From d62afa05a09119df1dec7610cecb4fa0c9580777 Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Fri, 23 Mar 2018 11:46:03 +0000
Subject: [PATCH 001/123] ATLAS-158 Fix assertion in Polygon

---
 src/atlas/util/Polygon.cc | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/src/atlas/util/Polygon.cc b/src/atlas/util/Polygon.cc
index 7c365768d..ba4f24edf 100644
--- a/src/atlas/util/Polygon.cc
+++ b/src/atlas/util/Polygon.cc
@@ -118,6 +118,8 @@ PolygonCoordinates::PolygonCoordinates( const Polygon& poly, const atlas::Field&
     coordinatesMin_ = PointLonLat( ll( poly[0], LON ), ll( poly[0], LAT ) );
     coordinatesMax_ = coordinatesMin_;
 
+    size_t nb_removed_points_due_to_alignment = 0;
+
     for ( size_t i = 0; i < poly.size(); ++i ) {
         PointLonLat A( ll( poly[i], LON ), ll( poly[i], LAT ) );
         coordinatesMin_ = PointLonLat::componentsMin( coordinatesMin_, A );
@@ -130,6 +132,7 @@ PolygonCoordinates::PolygonCoordinates( const Polygon& poly, const atlas::Field&
             const PointLonLat& C = coordinates_[coordinates_.size() - 2];
             if ( eckit::types::is_approximately_equal( 0., cross_product_analog( A, B, C ) ) ) {
                 coordinates_.back() = A;
+                ++nb_removed_points_due_to_alignment;
                 continue;
             }
         }
@@ -137,7 +140,7 @@ PolygonCoordinates::PolygonCoordinates( const Polygon& poly, const atlas::Field&
         coordinates_.push_back( A );
     }
 
-    ASSERT( coordinates_.size() == poly.size() );
+    ASSERT( coordinates_.size() == poly.size() - nb_removed_points_due_to_alignment );
 }
 
 PolygonCoordinates::PolygonCoordinates( const std::vector<PointLonLat>& points ) : coordinates_( points ) {

From d9d237cc0221f99e579725a6f3b6c562d462c262 Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Wed, 18 Apr 2018 08:26:18 +0100
Subject: [PATCH 002/123] bamboo: CLANG-env to use gfortran 6.3.0

---
 bamboo/CLANG-env.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bamboo/CLANG-env.sh b/bamboo/CLANG-env.sh
index 33c46d1dc..7549c741d 100644
--- a/bamboo/CLANG-env.sh
+++ b/bamboo/CLANG-env.sh
@@ -11,3 +11,5 @@ module unload fftw
 module unload libemos
 
 module switch gnu clang
+export FC=/usr/local/apps/gcc/6.3.0/bin/gfortran
+

From 4f2c483d4489da220c82efc71f4251863ea29ecd Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Tue, 27 Feb 2018 16:52:19 +0000
Subject: [PATCH 003/123] duplicated TransLocal into TransLocalopt.
 Test_transgeneral is using both to compare their results and timings

---
 src/atlas/CMakeLists.txt                      |  10 +
 src/atlas/trans/Trans.cc                      |   2 +
 src/atlas/trans/VorDivToUV.cc                 |   2 +
 .../trans/localopt/FourierTransformsopt.cc    |  78 +++++
 .../trans/localopt/FourierTransformsopt.h     |  38 ++
 .../trans/localopt/LegendrePolynomialsopt.cc  | 153 ++++++++
 .../trans/localopt/LegendrePolynomialsopt.h   |  43 +++
 .../trans/localopt/LegendreTransformsopt.cc   |  61 ++++
 .../trans/localopt/LegendreTransformsopt.h    |  36 ++
 src/atlas/trans/localopt/TransLocalopt.cc     | 327 ++++++++++++++++++
 src/atlas/trans/localopt/TransLocalopt.h      | 121 +++++++
 .../trans/localopt/VorDivToUVLocalopt.cc      | 184 ++++++++++
 src/atlas/trans/localopt/VorDivToUVLocalopt.h |  67 ++++
 src/tests/trans/test_transgeneral.cc          |  16 +-
 14 files changed, 1136 insertions(+), 2 deletions(-)
 create mode 100644 src/atlas/trans/localopt/FourierTransformsopt.cc
 create mode 100644 src/atlas/trans/localopt/FourierTransformsopt.h
 create mode 100644 src/atlas/trans/localopt/LegendrePolynomialsopt.cc
 create mode 100644 src/atlas/trans/localopt/LegendrePolynomialsopt.h
 create mode 100644 src/atlas/trans/localopt/LegendreTransformsopt.cc
 create mode 100644 src/atlas/trans/localopt/LegendreTransformsopt.h
 create mode 100644 src/atlas/trans/localopt/TransLocalopt.cc
 create mode 100644 src/atlas/trans/localopt/TransLocalopt.h
 create mode 100644 src/atlas/trans/localopt/VorDivToUVLocalopt.cc
 create mode 100644 src/atlas/trans/localopt/VorDivToUVLocalopt.h

diff --git a/src/atlas/CMakeLists.txt b/src/atlas/CMakeLists.txt
index 1675b693e..ecbbbbb06 100644
--- a/src/atlas/CMakeLists.txt
+++ b/src/atlas/CMakeLists.txt
@@ -331,6 +331,16 @@ trans/local/FourierTransforms.h
 trans/local/FourierTransforms.cc
 trans/local/VorDivToUVLocal.h
 trans/local/VorDivToUVLocal.cc
+trans/localopt/TransLocalopt.h
+trans/localopt/TransLocalopt.cc
+trans/localopt/LegendrePolynomialsopt.h
+trans/localopt/LegendrePolynomialsopt.cc
+trans/localopt/LegendreTransformsopt.h
+trans/localopt/LegendreTransformsopt.cc
+trans/localopt/FourierTransformsopt.h
+trans/localopt/FourierTransformsopt.cc
+trans/localopt/VorDivToUVLocalopt.h
+trans/localopt/VorDivToUVLocalopt.cc
 
 )
 if( ATLAS_HAVE_TRANS )
diff --git a/src/atlas/trans/Trans.cc b/src/atlas/trans/Trans.cc
index c10408891..9c555170d 100644
--- a/src/atlas/trans/Trans.cc
+++ b/src/atlas/trans/Trans.cc
@@ -28,6 +28,7 @@
 #define TRANS_DEFAULT "local"
 #endif
 #include "atlas/trans/local/TransLocal.h"
+#include "atlas/trans/localopt/TransLocalopt.h"
 
 namespace atlas {
 namespace trans {
@@ -62,6 +63,7 @@ struct force_link {
         load_builder_grid<TransIFS>();
 #endif
         load_builder_grid<TransLocal>();
+        load_builder_grid<TransLocalopt>();
     }
 };
 
diff --git a/src/atlas/trans/VorDivToUV.cc b/src/atlas/trans/VorDivToUV.cc
index f71e2a6c2..b958599fb 100644
--- a/src/atlas/trans/VorDivToUV.cc
+++ b/src/atlas/trans/VorDivToUV.cc
@@ -27,6 +27,7 @@
 #define TRANS_DEFAULT "local"
 #endif
 #include "atlas/trans/local/VorDivToUVLocal.h"
+#include "atlas/trans/localopt/VorDivToUVLocalopt.h"
 
 namespace atlas {
 namespace trans {
@@ -55,6 +56,7 @@ struct force_link {
         load_builder<VorDivToUVIFS>();
 #endif
         load_builder<VorDivToUVLocal>();
+        load_builder<VorDivToUVLocalopt>();
     }
 };
 
diff --git a/src/atlas/trans/localopt/FourierTransformsopt.cc b/src/atlas/trans/localopt/FourierTransformsopt.cc
new file mode 100644
index 000000000..ede59cea8
--- /dev/null
+++ b/src/atlas/trans/localopt/FourierTransformsopt.cc
@@ -0,0 +1,78 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor
+ * does it submit to any jurisdiction.
+ */
+
+#include <algorithm>
+#include <cmath>
+#include <iostream>
+
+#include "atlas/trans/localopt/FourierTransformsopt.h"
+
+namespace atlas {
+namespace trans {
+
+//-----------------------------------------------------------------------------
+
+void invtrans_fourieropt( const size_t trcFT,
+                       const double lon,         // longitude in radians (in)
+                       const int nb_fields,      // Number of fields
+                       const double rlegReal[],  // associated Legendre functions, size (trc+1)*trc/2 (in)
+                       const double rlegImag[],  // associated Legendre functions, size (trc+1)*trc/2 (in)
+                       double rgp[] )            // gridpoint
+{
+    for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
+        rgp[jfld] = 0.;
+    }
+    // local Fourier transformation:
+    for ( int jm = 0; jm <= trcFT; ++jm ) {
+        const double cos = std::cos( jm * lon );
+        const double sin = std::sin( jm * lon );
+        for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
+            double real = cos * rlegReal[jm * nb_fields + jfld];
+            double imag = sin * rlegImag[jm * nb_fields + jfld];
+            rgp[jfld] += real - imag;
+        }
+    }
+}
+
+int fourier_truncationopt( const int truncation,    // truncation
+                        const int nx,            // number of longitudes
+                        const int nxmax,         // maximum nx
+                        const int ndgl,          // number of latitudes
+                        const double lat,        // latitude in radian
+                        const bool fullgrid ) {  // regular grid
+    int trc     = truncation;
+    int trclin  = ndgl - 1;
+    int trcquad = ndgl * 2 / 3 - 1;
+    if ( truncation >= trclin || fullgrid ) {
+        // linear
+        trc = ( nx - 1 ) / 2;
+    }
+    else if ( truncation >= trcquad ) {
+        // quadratic
+        double weight = 3 * ( trclin - truncation ) / ndgl;
+        double sqcos  = std::pow( std::cos( lat ), 2 );
+
+        trc = ( nx - 1 ) / ( 2 + weight * sqcos );
+    }
+    else {
+        // cubic
+        double sqcos = std::pow( std::cos( lat ), 2 );
+
+        trc = ( nx - 1 ) / ( 2 + sqcos ) - 1;
+    }
+    trc = std::min( truncation, trc );
+    return trc;
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/atlas/trans/localopt/FourierTransformsopt.h b/src/atlas/trans/localopt/FourierTransformsopt.h
new file mode 100644
index 000000000..0ddfdfcdf
--- /dev/null
+++ b/src/atlas/trans/localopt/FourierTransformsopt.h
@@ -0,0 +1,38 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#pragma once
+
+#include <cstddef>
+
+namespace atlas {
+namespace trans {
+
+//-----------------------------------------------------------------------------
+// Routine to compute the local Fourier transformation
+//
+// Author:
+// Andreas Mueller *ECMWF*
+//
+
+void invtrans_fourieropt( const size_t trcFT,
+                       const double lon,         // longitude in radians (in)
+                       const int nb_fields,      // Number of fields
+                       const double rlegReal[],  // values of associated Legendre functions, size (trc+1)*trc/2 (in)
+                       const double rlegImag[],  // values of associated Legendre functions, size (trc+1)*trc/2 (in)
+                       double rgp[] );           // gridpoint
+
+int fourier_truncationopt( const int truncation, const int nx, const int nxmax, const int ndgl, const double lat,
+                        const bool fullgrid );
+
+// --------------------------------------------------------------------------------------------------------------------
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc
new file mode 100644
index 000000000..948cf90a1
--- /dev/null
+++ b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc
@@ -0,0 +1,153 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor
+ * does it submit to any jurisdiction.
+ */
+
+#include <cmath>
+#include <limits>
+
+#include "atlas/array.h"
+#include "atlas/trans/localopt/LegendrePolynomialsopt.h"
+
+namespace atlas {
+namespace trans {
+
+//-----------------------------------------------------------------------------
+
+void compute_legendre_polynomialsopt(
+    const size_t trc,  // truncation (in)
+    const double lat,  // latitude in radians (in)
+    double legpol[] )  // values of associated Legendre functions, size (trc+1)*trc/2 (out)
+{
+    array::ArrayT<int> idxmn_( trc + 1, trc + 1 );
+    array::ArrayView<int, 2> idxmn = array::make_view<int, 2>( idxmn_ );
+
+    int j = 0;
+    for ( int jm = 0; jm <= trc; ++jm ) {
+        for ( int jn = jm; jn <= trc; ++jn ) {
+            idxmn( jm, jn ) = j++;
+        }
+    }
+
+    array::ArrayT<double> zfn_( trc + 1, trc + 1 );
+    array::ArrayView<double, 2> zfn = array::make_view<double, 2>( zfn_ );
+
+    int iodd;
+
+    // Compute coefficients for Taylor series in Belousov (19) and (21)
+    // Belousov, Swarztrauber use zfn(0,0)=std::sqrt(2.)
+    // IFS normalisation chosen to be 0.5*Integral(Pnm**2) = 1
+    zfn( 0, 0 ) = 2.;
+    for ( int jn = 1; jn <= trc; ++jn ) {
+        double zfnn = zfn( 0, 0 );
+        for ( int jgl = 1; jgl <= jn; ++jgl ) {
+            zfnn *= std::sqrt( 1. - 0.25 / ( jgl * jgl ) );
+        }
+        iodd          = jn % 2;
+        zfn( jn, jn ) = zfnn;
+        for ( int jgl = 2; jgl <= jn - iodd; jgl += 2 ) {
+            double zfjn = ( ( jgl - 1. ) * ( 2. * jn - jgl + 2. ) );  // new factor numerator
+            double zfjd = ( jgl * ( 2. * jn - jgl + 1. ) );           // new factor denominator
+
+            zfn( jn, jn - jgl ) = zfn( jn, jn - jgl + 2 ) * zfjn / zfjd;
+        }
+    }
+
+    // --------------------
+    // 1. First two columns
+    // --------------------
+    double zdlx1   = ( M_PI_2 - lat );               // theta
+    double zdlx    = std::cos( zdlx1 );              // cos(theta)
+    double zdlsita = std::sqrt( 1. - zdlx * zdlx );  // sin(theta) (this is how trans library does it)
+
+    legpol[0]       = 1.;
+    double zdl1sita = 0.;
+
+    // if we are less than 1 meter from the pole,
+    if ( std::abs( zdlsita ) <= std::sqrt( std::numeric_limits<double>::epsilon() ) ) {
+        zdlx    = 1.;
+        zdlsita = 0.;
+    }
+    else {
+        zdl1sita = 1. / zdlsita;
+    }
+
+    // ordinary Legendre polynomials from series expansion
+    // ---------------------------------------------------
+
+    // even N
+    for ( int jn = 2; jn <= trc; jn += 2 ) {
+        double zdlk   = 0.5 * zfn( jn, 0 );
+        double zdlldn = 0.0;
+        double zdsq   = 1. / std::sqrt( jn * ( jn + 1. ) );
+        // represented by only even k
+        for ( int jk = 2; jk <= jn; jk += 2 ) {
+            // normalised ordinary Legendre polynomial == \overbar{P_n}^0
+            zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 );
+            // normalised associated Legendre polynomial == \overbar{P_n}^1
+            zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 );
+        }
+        legpol[idxmn( 0, jn )] = zdlk;
+        legpol[idxmn( 1, jn )] = zdlldn;
+    }
+
+    // odd N
+    for ( int jn = 1; jn <= trc; jn += 2 ) {
+        zfn( jn, 0 )  = 0.;
+        double zdlk   = 0.;
+        double zdlldn = 0.0;
+        double zdsq   = 1. / std::sqrt( jn * ( jn + 1. ) );
+        // represented by only even k
+        for ( int jk = 1; jk <= jn; jk += 2 ) {
+            // normalised ordinary Legendre polynomial == \overbar{P_n}^0
+            zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 );
+            // normalised associated Legendre polynomial == \overbar{P_n}^1
+            zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 );
+        }
+        legpol[idxmn( 0, jn )] = zdlk;
+        legpol[idxmn( 1, jn )] = zdlldn;
+    }
+
+    // --------------------------------------------------------------
+    // 2. Diagonal (the terms 0,0 and 1,1 have already been computed)
+    //    Belousov, equation (23)
+    // --------------------------------------------------------------
+
+    double zdls = zdl1sita * std::numeric_limits<double>::min();
+    for ( int jn = 2; jn <= trc; ++jn ) {
+        double sq = std::sqrt( ( 2. * jn + 1. ) / ( 2. * jn ) );
+
+        legpol[idxmn( jn, jn )] = legpol[idxmn( jn - 1, jn - 1 )] * zdlsita * sq;
+        if ( std::abs( legpol[idxmn( jn, jn )] ) < zdls ) legpol[idxmn( jn, jn )] = 0.0;
+    }
+
+    // ---------------------------------------------
+    // 3. General recurrence (Belousov, equation 17)
+    // ---------------------------------------------
+
+    for ( int jn = 3; jn <= trc; ++jn ) {
+        for ( int jm = 2; jm < jn; ++jm ) {
+            double cn = ( ( 2. * jn + 1. ) * ( jn + jm - 3. ) * ( jn + jm - 1. ) );  // numerator of c in Belousov
+            double cd = ( ( 2. * jn - 3. ) * ( jn + jm - 2. ) * ( jn + jm ) );       // denominator of c in Belousov
+            double dn = ( ( 2. * jn + 1. ) * ( jn - jm + 1. ) * ( jn + jm - 1. ) );  // numerator of d in Belousov
+            double dd = ( ( 2. * jn - 1. ) * ( jn + jm - 2. ) * ( jn + jm ) );       // denominator of d in Belousov
+            double en = ( ( 2. * jn + 1. ) * ( jn - jm ) );                          // numerator of e in Belousov
+            double ed = ( ( 2. * jn - 1. ) * ( jn + jm ) );                          // denominator of e in Belousov
+
+            legpol[idxmn( jm, jn )] = std::sqrt( cn / cd ) * legpol[idxmn( jm - 2, jn - 2 )] -
+                                      std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1 )] * zdlx +
+                                      std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1 )] * zdlx;
+        }
+    }
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/atlas/trans/localopt/LegendrePolynomialsopt.h b/src/atlas/trans/localopt/LegendrePolynomialsopt.h
new file mode 100644
index 000000000..1162eb6f4
--- /dev/null
+++ b/src/atlas/trans/localopt/LegendrePolynomialsopt.h
@@ -0,0 +1,43 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#pragma once
+
+#include <cstddef>
+
+namespace atlas {
+namespace trans {
+
+//-----------------------------------------------------------------------------
+// Routine to compute the Legendre polynomials in serial according to Belousov
+// (using correction by Swarztrauber)
+//
+// Reference:
+// S.L. Belousov, Tables of normalized associated Legendre Polynomials, Pergamon
+// Press (1962)
+// P.N. Swarztrauber, On computing the points and weights for Gauss-Legendre
+// quadrature,
+//      SIAM J. Sci. Comput. Vol. 24 (3) pp. 945-954 (2002)
+//
+// Author of Fortran version:
+// Mats Hamrud, Philippe Courtier, Nils Wedi *ECMWF*
+//
+// Ported to C++ by:
+// Andreas Mueller *ECMWF*
+//
+void compute_legendre_polynomialsopt(
+    const size_t trc,   // truncation (in)
+    const double lat,   // latitude in radians (in)
+    double legpol[] );  // values of associated Legendre functions, size (trc+1)*trc/2 (out)
+
+// --------------------------------------------------------------------------------------------------------------------
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/atlas/trans/localopt/LegendreTransformsopt.cc b/src/atlas/trans/localopt/LegendreTransformsopt.cc
new file mode 100644
index 000000000..e88feda69
--- /dev/null
+++ b/src/atlas/trans/localopt/LegendreTransformsopt.cc
@@ -0,0 +1,61 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#include <cstddef>
+
+#include "atlas/trans/localopt/LegendreTransformsopt.h"
+
+namespace atlas {
+namespace trans {
+
+//-----------------------------------------------------------------------------
+
+void invtrans_legendreopt( const size_t trc,    // truncation (in)
+                        const size_t trcFT,  // truncation for Fourier transformation (in)
+                        const size_t trcLP,  // truncation of Legendre polynomials data legpol. Needs to be >= trc (in)
+                        const double legpol[],  // values of associated Legendre functions, size (trc+1)*trc/2 (in)
+                        const int nb_fields,    // number of fields
+                        const double spec[],    // spectral data, size (trc+1)*trc (in)
+                        double leg_real[],      // values of associated Legendre functions, size (trc+1)*trc/2 (out)
+                        double leg_imag[] )     // values of associated Legendre functions, size (trc+1)*trc/2 (out)
+{
+    // Legendre transformation:
+    int k = 0, klp = 0;
+    for ( int jm = 0; jm <= trcFT; ++jm ) {
+        for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
+            leg_real[jm * nb_fields + jfld] = 0.;
+            leg_imag[jm * nb_fields + jfld] = 0.;
+        }
+        for ( int jn = jm; jn <= trcLP; ++jn, ++klp ) {
+            if ( jn <= trc ) {
+                for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
+                    // not completely sure where this factor 2 comes from. One possible
+                    // explanation:
+                    // normalization of trigonometric functions in the spherical harmonics
+                    // integral over square of trig function is 1 for m=0 and 0.5 (?) for
+                    // m>0
+                    leg_real[jm * nb_fields + jfld] += 2. * spec[( 2 * k ) * nb_fields + jfld] * legpol[klp];
+                    leg_imag[jm * nb_fields + jfld] += 2. * spec[( 2 * k + 1 ) * nb_fields + jfld] * legpol[klp];
+                }
+                ++k;
+            }
+        }
+    }
+    // Undo factor 2 for (jm == 0)
+    for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
+        leg_real[jfld] /= 2.;
+        leg_imag[jfld] /= 2.;
+    }
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/atlas/trans/localopt/LegendreTransformsopt.h b/src/atlas/trans/localopt/LegendreTransformsopt.h
new file mode 100644
index 000000000..55dcf0212
--- /dev/null
+++ b/src/atlas/trans/localopt/LegendreTransformsopt.h
@@ -0,0 +1,36 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#pragma once
+
+#include <cstddef>
+
+namespace atlas {
+namespace trans {
+
+//-----------------------------------------------------------------------------
+// Routine to compute the Legendre transformation
+//
+// Author:
+// Andreas Mueller *ECMWF*
+//
+void invtrans_legendreopt( const size_t trc,    // truncation (in)
+                        const size_t trcFT,  // truncation for Fourier transformation (in)
+                        const size_t trcLP,  // truncation of Legendre polynomials data legpol. Needs to be >= trc (in)
+                        const double legpol[],  // values of associated Legendre functions, size (trc+1)*trc/2 (in)
+                        const int nb_fields,    // number of fields
+                        const double spec[],    // spectral data, size (trc+1)*trc (in)
+                        double leg_real[],      // values of associated Legendre functions, size (trc+1)*trc/2 (out)
+                        double leg_imag[] );    // values of associated Legendre functions, size (trc+1)*trc/2 (out)
+
+// --------------------------------------------------------------------------------------------------------------------
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc
new file mode 100644
index 000000000..0a1477519
--- /dev/null
+++ b/src/atlas/trans/localopt/TransLocalopt.cc
@@ -0,0 +1,327 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#include "atlas/trans/localopt/TransLocalopt.h"
+#include "atlas/array.h"
+#include "atlas/option.h"
+#include "atlas/parallel/mpi/mpi.h"
+#include "atlas/runtime/ErrorHandling.h"
+#include "atlas/runtime/Log.h"
+#include "atlas/trans/VorDivToUV.h"
+#include "atlas/trans/localopt/FourierTransformsopt.h"
+#include "atlas/trans/localopt/LegendrePolynomialsopt.h"
+#include "atlas/trans/localopt/LegendreTransformsopt.h"
+#include "atlas/util/Constants.h"
+
+namespace atlas {
+namespace trans {
+
+namespace {
+static TransBuilderGrid<TransLocalopt> builder( "localopt" );
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+// Helper functions
+// --------------------------------------------------------------------------------------------------------------------
+namespace {  // anonymous
+
+size_t legendre_size( const size_t truncation ) {
+    return ( truncation + 2 ) * ( truncation + 1 ) / 2;
+}
+
+}  // namespace
+
+// --------------------------------------------------------------------------------------------------------------------
+// Class TransLocalopt
+// --------------------------------------------------------------------------------------------------------------------
+
+TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long truncation,
+                              const eckit::Configuration& config ) :
+    grid_( grid ),
+    truncation_( truncation ),
+    precompute_( config.getBool( "precompute", true ) ) {
+    if ( precompute_ ) {
+        if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
+            ATLAS_TRACE( "Precompute legendre structured opt" );
+            grid::StructuredGrid g( grid_ );
+            size_t size( 0 );
+            legendre_begin_.resize( g.ny() );
+            for ( size_t j = 0; j < g.ny(); ++j ) {
+                legendre_begin_[j] = size;
+                size += legendre_size( truncation_ + 1 );
+            }
+            legendre_.resize( size );
+
+            for ( size_t j = 0; j < g.ny(); ++j ) {
+                double lat = g.y( j ) * util::Constants::degreesToRadians();
+                compute_legendre_polynomialsopt( truncation_ + 1, lat, legendre_data( j ) );
+            }
+        }
+        else {
+            ATLAS_TRACE( "Precompute legendre unstructured opt" );
+            size_t size( 0 );
+            legendre_begin_.resize( grid_.size() );
+            for ( size_t j = 0; j < grid_.size(); ++j ) {
+                legendre_begin_[j] = size;
+                size += legendre_size( truncation_ + 1 );
+            }
+            legendre_.resize( size );
+            int j( 0 );
+            for ( PointXY p : grid_.xy() ) {
+                double lat = p.y() * util::Constants::degreesToRadians();
+                compute_legendre_polynomialsopt( truncation_ + 1, lat, legendre_data( j++ ) );
+            }
+        }
+    }
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+TransLocalopt::TransLocalopt( const Grid& grid, const long truncation, const eckit::Configuration& config ) :
+    TransLocalopt( Cache(), grid, truncation, config ) {}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+TransLocalopt::~TransLocalopt() {}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt::invtrans( const Field& spfield, Field& gpfield, const eckit::Configuration& config ) const {
+    NOTIMP;
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt::invtrans( const FieldSet& spfields, FieldSet& gpfields, const eckit::Configuration& config ) const {
+    NOTIMP;
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt::invtrans_grad( const Field& spfield, Field& gradfield, const eckit::Configuration& config ) const {
+    NOTIMP;
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt::invtrans_grad( const FieldSet& spfields, FieldSet& gradfields,
+                                   const eckit::Configuration& config ) const {
+    NOTIMP;
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt::invtrans_vordiv2wind( const Field& spvor, const Field& spdiv, Field& gpwind,
+                                          const eckit::Configuration& config ) const {
+    NOTIMP;
+}
+
+void TransLocalopt::invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[],
+                              const eckit::Configuration& config ) const {
+    invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields, config );
+}
+
+void gp_transposeopt( const int nb_size, const int nb_fields, const double gp_tmp[], double gp_fields[] ) {
+    for ( int jgp = 0; jgp < nb_size; jgp++ ) {
+        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+            gp_fields[jfld * nb_size + jgp] = gp_tmp[jgp * nb_fields + jfld];
+        }
+    }
+}
+
+//-----------------------------------------------------------------------------
+// Routine to compute the spectral transform by using a localopt Fourier
+// transformation
+// for a grid (same latitude for all longitudes, allows to compute Legendre
+// functions
+// once for all longitudes). U and v components are divided by cos(latitude) for
+// nb_vordiv_fields > 0.
+//
+// Author:
+// Andreas Mueller *ECMWF*
+//
+void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields,
+                                 const double scalar_spectra[], double gp_fields[],
+                                 const eckit::Configuration& config ) const {
+    if ( nb_scalar_fields > 0 ) {
+        int nb_fields = nb_scalar_fields;
+
+        // Depending on "precompute_legendre_", we have to compute the
+        // legendre polynomials for every latitute
+        std::vector<double> recomputed_legendre_;
+
+        auto legPol = [&]( double lat, int j ) -> const double* {
+            if ( precompute_ ) { return legendre_data( j ); }
+            else {
+                recomputed_legendre_.resize( legendre_size( truncation ) );
+                compute_legendre_polynomialsopt( truncation, lat, recomputed_legendre_.data() );
+                return recomputed_legendre_.data();
+            }
+        };
+
+        // Temporary storage for legendre space
+        std::vector<double> legReal( nb_fields * ( truncation + 1 ) );
+        std::vector<double> legImag( nb_fields * ( truncation + 1 ) );
+        std::vector<double> gp_tmp( nb_fields * grid_.size(), 0. );
+
+        // Transform
+        if ( grid::StructuredGrid g = grid_ ) {
+            ATLAS_TRACE( "invtrans_uv structured opt" );
+            int idx = 0;
+            for ( size_t j = 0; j < g.ny(); ++j ) {
+                double lat = g.y( j ) * util::Constants::degreesToRadians();
+                double trcFT =
+                    fourier_truncationopt( truncation, g.nx( j ), g.nxmax(), g.ny(), lat, grid::RegularGrid( grid_ ) );
+
+                // Legendre transform:
+                invtrans_legendreopt( truncation, trcFT, truncation_ + 1, legPol( lat, j ), nb_fields, scalar_spectra,
+                                   legReal.data(), legImag.data() );
+
+                // Fourier transform:
+                for ( size_t i = 0; i < g.nx( j ); ++i ) {
+                    double lon = g.x( i, j ) * util::Constants::degreesToRadians();
+                    invtrans_fourieropt( trcFT, lon, nb_fields, legReal.data(), legImag.data(),
+                                      gp_tmp.data() + ( nb_fields * idx ) );
+                    for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
+                        gp_tmp[nb_fields * idx + jfld] /= std::cos( lat );
+                    }
+                    ++idx;
+                }
+            }
+        }
+        else {
+            ATLAS_TRACE( "invtrans_uv unstructured opt" );
+            int idx = 0;
+            for ( PointXY p : grid_.xy() ) {
+                double lon   = p.x() * util::Constants::degreesToRadians();
+                double lat   = p.y() * util::Constants::degreesToRadians();
+                double trcFT = truncation;
+
+                // Legendre transform:
+                invtrans_legendreopt( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra,
+                                   legReal.data(), legImag.data() );
+
+                // Fourier transform:
+                invtrans_fourieropt( trcFT, lon, nb_fields, legReal.data(), legImag.data(),
+                                  gp_tmp.data() + ( nb_fields * idx ) );
+                for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
+                    gp_tmp[nb_fields * idx + jfld] /= std::cos( lat );
+                }
+                ++idx;
+            }
+        }
+
+        // transpose result (gp_tmp: jfld is fastest index. gp_fields: jfld needs to
+        // be slowest index)
+        gp_transposeopt( grid_.size(), nb_fields, gp_tmp.data(), gp_fields );
+    }
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[],
+                              const double divergence_spectra[], double gp_fields[],
+                              const eckit::Configuration& config ) const {
+    invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config );
+}
+
+void extend_truncationopt( const int old_truncation, const int nb_fields, const double old_spectra[],
+                        double new_spectra[] ) {
+    int k = 0, k_old = 0;
+    for ( int m = 0; m <= old_truncation + 1; m++ ) {             // zonal wavenumber
+        for ( int n = m; n <= old_truncation + 1; n++ ) {         // total wavenumber
+            for ( int imag = 0; imag < 2; imag++ ) {              // imaginary/real part
+                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {  // field
+                    if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; }
+                    else {
+                        new_spectra[k++] = old_spectra[k_old++];
+                    }
+                }
+            }
+        }
+    }
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields,
+                              const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[],
+                              const eckit::Configuration& config ) const {
+    ATLAS_TRACE( "TransLocalopt::invtrans" );
+    int nb_gp = grid_.size();
+
+    // increase truncation in vorticity_spectra and divergence_spectra:
+    int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields;
+    std::vector<double> vorticity_spectra_extended( nb_vordiv_spec_ext, 0. );
+    std::vector<double> divergence_spectra_extended( nb_vordiv_spec_ext, 0. );
+    extend_truncationopt( truncation_, nb_vordiv_fields, vorticity_spectra, vorticity_spectra_extended.data() );
+    extend_truncationopt( truncation_, nb_vordiv_fields, divergence_spectra, divergence_spectra_extended.data() );
+
+    // call vd2uv to compute u and v in spectral space
+    std::vector<double> U_ext( nb_vordiv_spec_ext, 0. );
+    std::vector<double> V_ext( nb_vordiv_spec_ext, 0. );
+    trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt" ) );
+    vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(),
+                              divergence_spectra_extended.data(), U_ext.data(), V_ext.data() );
+
+    // perform spectral transform to compute all fields in grid point space
+    invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config );
+    invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(),
+                 gp_fields + nb_gp * nb_vordiv_fields, config );
+    invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields + 2 * nb_gp * nb_vordiv_fields, config );
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const {
+    NOTIMP;
+    // Not implemented and not planned.
+    // Use the TransIFS implementation instead.
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt::dirtrans( const FieldSet& gpfields, FieldSet& spfields, const eckit::Configuration& config ) const {
+    NOTIMP;
+    // Not implemented and not planned.
+    // Use the TransIFS implementation instead.
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv,
+                                          const eckit::Configuration& config ) const {
+    NOTIMP;
+    // Not implemented and not planned.
+    // Use the TransIFS implementation instead.
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[],
+                              const eckit::Configuration& ) const {
+    NOTIMP;
+    // Not implemented and not planned.
+    // Use the TransIFS implementation instead.
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[],
+                              double divergence_spectra[], const eckit::Configuration& ) const {
+    NOTIMP;
+    // Not implemented and not planned.
+    // Use the TransIFS implementation instead.
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/atlas/trans/localopt/TransLocalopt.h b/src/atlas/trans/localopt/TransLocalopt.h
new file mode 100644
index 000000000..7b4a4d202
--- /dev/null
+++ b/src/atlas/trans/localopt/TransLocalopt.h
@@ -0,0 +1,121 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#pragma once
+
+#include <vector>
+
+#include "atlas/grid/Grid.h"
+#include "atlas/trans/Trans.h"
+
+//-----------------------------------------------------------------------------
+// Forward declarations
+
+namespace atlas {
+class Field;
+class FieldSet;
+}  // namespace atlas
+
+//-----------------------------------------------------------------------------
+
+namespace atlas {
+namespace trans {
+
+//-----------------------------------------------------------------------------
+
+/// @class TransLocalopt
+///
+/// Localopt spherical harmonics transformations to any grid
+/// Optimisations are present for structured grids
+/// For global grids, please consider using TransIFS instead.
+///
+/// @todo:
+///  - support multiple fields
+///  - support atlas::Field and atlas::FieldSet based on function spaces
+///
+/// @note: Direct transforms are not implemented and cannot be unless
+///        the grid is global. There are no plans to support this at the moment.
+class TransLocalopt : public trans::TransImpl {
+public:
+    TransLocalopt( const Grid& g, const long truncation, const eckit::Configuration& = util::NoConfig() );
+    TransLocalopt( const Cache&, const Grid& g, const long truncation, const eckit::Configuration& = util::NoConfig() );
+
+    virtual ~TransLocalopt();
+
+    virtual int truncation() const override { return truncation_; }
+    virtual size_t spectralCoefficients() const override { return ( truncation_ + 1 ) * ( truncation_ + 2 ); }
+
+    virtual const Grid& grid() const override { return grid_; }
+
+    virtual void invtrans( const Field& spfield, Field& gpfield,
+                           const eckit::Configuration& = util::NoConfig() ) const override;
+
+    virtual void invtrans( const FieldSet& spfields, FieldSet& gpfields,
+                           const eckit::Configuration& = util::NoConfig() ) const override;
+
+    virtual void invtrans_grad( const Field& spfield, Field& gradfield,
+                                const eckit::Configuration& = util::NoConfig() ) const override;
+
+    virtual void invtrans_grad( const FieldSet& spfields, FieldSet& gradfields,
+                                const eckit::Configuration& = util::NoConfig() ) const override;
+
+    virtual void invtrans_vordiv2wind( const Field& spvor, const Field& spdiv, Field& gpwind,
+                                       const eckit::Configuration& = util::NoConfig() ) const override;
+
+    // -- IFS style API --
+
+    virtual void invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields,
+                           const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[],
+                           const eckit::Configuration& = util::NoConfig() ) const override;
+
+    virtual void invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[],
+                           const eckit::Configuration& = util::NoConfig() ) const override;
+
+    virtual void invtrans( const int nb_vordiv_fields, const double vorticity_spectra[],
+                           const double divergence_spectra[], double gp_fields[],
+                           const eckit::Configuration& = util::NoConfig() ) const override;
+
+    // -- NOT SUPPORTED -- //
+
+    virtual void dirtrans( const Field& gpfield, Field& spfield,
+                           const eckit::Configuration& = util::NoConfig() ) const override;
+
+    virtual void dirtrans( const FieldSet& gpfields, FieldSet& spfields,
+                           const eckit::Configuration& = util::NoConfig() ) const override;
+
+    virtual void dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv,
+                                       const eckit::Configuration& = util::NoConfig() ) const override;
+
+    virtual void dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[],
+                           const eckit::Configuration& = util::NoConfig() ) const override;
+
+    virtual void dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[],
+                           double divergence_spectra[], const eckit::Configuration& = util::NoConfig() ) const override;
+
+private:
+    const double* legendre_data( int j ) const { return legendre_.data() + legendre_begin_[j]; }
+    double* legendre_data( int j ) { return legendre_.data() + legendre_begin_[j]; }
+
+    void invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields,
+                      const double scalar_spectra[], double gp_fields[],
+                      const eckit::Configuration& = util::NoConfig() ) const;
+
+private:
+    Grid grid_;
+    int truncation_;
+    bool precompute_;
+    std::vector<double> legendre_;
+    std::vector<size_t> legendre_begin_;
+};
+
+//-----------------------------------------------------------------------------
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/atlas/trans/localopt/VorDivToUVLocalopt.cc b/src/atlas/trans/localopt/VorDivToUVLocalopt.cc
new file mode 100644
index 000000000..8ed6ed09c
--- /dev/null
+++ b/src/atlas/trans/localopt/VorDivToUVLocalopt.cc
@@ -0,0 +1,184 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#include "atlas/trans/localopt/VorDivToUVLocalopt.h"
+#include <cmath>  // for std::sqrt
+#include "atlas/functionspace/Spectral.h"
+#include "atlas/runtime/Log.h"
+#include "atlas/util/Earth.h"
+
+using atlas::FunctionSpace;
+using atlas::functionspace::Spectral;
+
+namespace atlas {
+namespace trans {
+
+namespace {
+static VorDivToUVBuilder<VorDivToUVLocalopt> builder( "localopt" );
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+// Routine to copy spectral data into internal storage form of IFS trans
+// Ported to C++ by: Andreas Mueller *ECMWF*
+void prfi1bopt( const int truncation,
+             const int km,          // zonal wavenumber
+             const int nb_fields,   // number of fields
+             const double rspec[],  // spectral data
+             double pia[] )         // spectral components in data layout of trans library
+{
+    int ilcm = truncation + 1 - km, ioff = ( 2 * truncation - km + 3 ) * km,
+        nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2;
+    for ( int j = 1; j <= ilcm; j++ ) {
+        int inm = ioff + ( ilcm - j ) * 2;
+        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+            int ir = 2 * jfld, ii = ir + 1;
+            pia[ir * nlei1 + j + 1] = rspec[inm * nb_fields + jfld];
+            pia[ii * nlei1 + j + 1] = rspec[( inm + 1 ) * nb_fields + jfld];
+        }
+    }
+
+    for ( int jfld = 0; jfld < 2 * nb_fields; jfld++ ) {
+        pia[jfld * nlei1]            = 0.;
+        pia[jfld * nlei1 + 1]        = 0.;
+        pia[jfld * nlei1 + ilcm + 2] = 0.;
+    }
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+// Routine to compute spectral velocities (*cos(latitude)) out of spectral
+// vorticity and divergence
+// Reference:
+//        ECMWF Research Department documentation of the IFS
+//        Temperton, 1991, MWR 119 p1303
+// Ported to C++ by: Andreas Mueller *ECMWF*
+void vd2uvopt( const int truncation,               // truncation
+            const int km,                       // zonal wavenumber
+            const int nb_vordiv_fields,         // number of vorticity and divergence fields
+            const double vorticity_spectra[],   // spectral data of vorticity
+            const double divergence_spectra[],  // spectral data of divergence
+            double U[],                         // spectral data of U
+            double V[],                         // spectral data of V
+            const eckit::Configuration& config ) {
+    int nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2;
+
+    // repsnm: epsilon from eq.(2.12) and (2.13) in [Temperton 1991]
+    std::vector<double> repsnm( ( truncation + 1 ) * ( truncation + 6 ) / 2 );
+    int idx = 0;
+    for ( int jm = 0; jm <= truncation; ++jm ) {
+        for ( int jn = jm; jn <= truncation + 2; ++jn, ++idx ) {
+            repsnm[idx] = std::sqrt( ( jn * jn - jm * jm ) / ( 4. * jn * jn - 1. ) );
+        }
+    }
+    repsnm[0] = 0.;
+
+    // rlapin: constant factor from eq.(2.2) and (2.3) in [Temperton 1991]
+    double ra = util::Earth::radius();
+    std::vector<double> rlapin( truncation + 3 );
+    for ( int jn = 1; jn <= truncation + 2; ++jn ) {
+        rlapin[jn] = -ra * ra / ( jn * ( jn + 1. ) );
+    }
+    rlapin[0] = 0.;
+
+    // inverse the order of repsnm and rlapin for improved accuracy
+    std::vector<double> zepsnm( truncation + 6 );
+    std::vector<double> zlapin( truncation + 6 );
+    std::vector<double> zn( truncation + 6 );
+    for ( int jn = km - 1; jn <= truncation + 2; ++jn ) {
+        int ij = truncation + 3 - jn;
+        if ( jn >= 0 ) {
+            zlapin[ij] = rlapin[jn];
+            if ( jn < km ) { zepsnm[ij] = 0.; }
+            else {
+                zepsnm[ij] = repsnm[jn + ( 2 * truncation - km + 5 ) * km / 2];
+            }
+        }
+        else {
+            zlapin[ij] = 0.;
+            zepsnm[ij] = 0.;
+        }
+        zn[ij] = jn;
+    }
+    zn[0] = truncation + 3;
+
+    // copy spectral data into internal trans storage:
+    std::vector<double> rvor( 2 * nb_vordiv_fields * nlei1 );
+    std::vector<double> rdiv( 2 * nb_vordiv_fields * nlei1 );
+    std::vector<double> ru( 2 * nb_vordiv_fields * nlei1 );
+    std::vector<double> rv( 2 * nb_vordiv_fields * nlei1 );
+    prfi1bopt( truncation, km, nb_vordiv_fields, vorticity_spectra, rvor.data() );
+    prfi1bopt( truncation, km, nb_vordiv_fields, divergence_spectra, rdiv.data() );
+
+    // compute eq.(2.12) and (2.13) in [Temperton 1991]:
+    if ( km == 0 ) {
+        for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
+            int ir = 2 * jfld * nlei1 - 1;
+            for ( int ji = 2; ji < truncation + 4 - km; ++ji ) {
+                double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1];
+                double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1];
+                ru[ir + ji]  = +psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1];
+                rv[ir + ji]  = -psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1];
+            }
+        }
+    }
+    else {
+        for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
+            int ir = 2 * jfld * nlei1 - 1, ii = ir + nlei1;
+            for ( int ji = 2; ji < truncation + 4 - km; ++ji ) {
+                double chiIm = km * zlapin[ji];
+                double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1];
+                double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1];
+                ru[ir + ji]  = -chiIm * rdiv[ii + ji] + psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1];
+                ru[ii + ji]  = +chiIm * rdiv[ir + ji] + psiM1 * rvor[ii + ji + 1] - psiP1 * rvor[ii + ji - 1];
+                rv[ir + ji]  = -chiIm * rvor[ii + ji] - psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1];
+                rv[ii + ji]  = +chiIm * rvor[ir + ji] - psiM1 * rdiv[ii + ji + 1] + psiP1 * rdiv[ii + ji - 1];
+            }
+        }
+    }
+
+    // copy data from internal storage back to external spectral data:
+    int ilcm = truncation - km;
+    int ioff = ( 2 * truncation - km + 3 ) * km;
+    // ioff: start index of zonal wavenumber km in spectral data
+    double za_r = 1. / util::Earth::radius();
+    for ( int j = 0; j <= ilcm; ++j ) {
+        // ilcm-j = total wavenumber
+        int inm = ioff + ( ilcm - j ) * 2;
+        for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
+            int ir = 2 * jfld * nlei1, ii = ir + nlei1;
+            int idx = inm * nb_vordiv_fields + jfld;
+            // real part:
+            U[idx] = ru[ir + j + 2] * za_r;
+            V[idx] = rv[ir + j + 2] * za_r;
+            idx += nb_vordiv_fields;
+            // imaginary part:
+            U[idx] = ru[ii + j + 2] * za_r;
+            V[idx] = rv[ii + j + 2] * za_r;
+        }
+    }
+}
+
+void VorDivToUVLocalopt::execute( const int nb_coeff, const int nb_fields, const double vorticity[],
+                               const double divergence[], double U[], double V[],
+                               const eckit::Configuration& config ) const {
+    for ( int jm = 0; jm <= truncation_; ++jm ) {
+        vd2uvopt( truncation_, jm, nb_fields, vorticity, divergence, U, V, config );
+    }
+}
+
+VorDivToUVLocalopt::VorDivToUVLocalopt( const int truncation, const eckit::Configuration& config ) :
+    truncation_( truncation ) {}
+
+VorDivToUVLocalopt::VorDivToUVLocalopt( const FunctionSpace& fs, const eckit::Configuration& config ) :
+    truncation_( Spectral( fs ).truncation() ) {}
+
+VorDivToUVLocalopt::~VorDivToUVLocalopt() {}
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/atlas/trans/localopt/VorDivToUVLocalopt.h b/src/atlas/trans/localopt/VorDivToUVLocalopt.h
new file mode 100644
index 000000000..4bb7eda88
--- /dev/null
+++ b/src/atlas/trans/localopt/VorDivToUVLocalopt.h
@@ -0,0 +1,67 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#pragma once
+
+#include "atlas/trans/VorDivToUV.h"
+
+//-----------------------------------------------------------------------------
+// Forward declarations
+
+namespace atlas {
+class FunctionSpace;
+}
+
+//-----------------------------------------------------------------------------
+
+namespace atlas {
+namespace trans {
+
+//-----------------------------------------------------------------------------
+
+class VorDivToUVLocalopt : public trans::VorDivToUVImpl {
+public:
+    VorDivToUVLocalopt( const FunctionSpace&, const eckit::Configuration& = util::NoConfig() );
+    VorDivToUVLocalopt( int truncation, const eckit::Configuration& = util::NoConfig() );
+
+    virtual ~VorDivToUVLocalopt();
+
+    virtual int truncation() const override { return truncation_; }
+
+    // pure virtual interface
+
+    // -- IFS style API --
+    // These fields have special interpretation required. You need to know what
+    // you're doing.
+    // See IFS trans library.
+
+    /*!
+ * @brief Compute spectral wind (U/V) from spectral vorticity/divergence
+ *
+ * U = u*cos(lat)
+ * V = v*cos(lat)
+ *
+ * @param nb_fields [in] Number of fields
+ * @param vorticity [in] Spectral vorticity
+ * @param divergence [in] Spectral divergence
+ * @param U [out] Spectral wind U = u*cos(lat)
+ * @param V [out] Spectral wind V = v*cos(lat)
+ */
+    virtual void execute( const int nb_coeff, const int nb_fields, const double vorticity[], const double divergence[],
+                          double U[], double V[], const eckit::Configuration& = util::NoConfig() ) const override;
+
+private:
+    int truncation_;
+};
+
+// ------------------------------------------------------------------
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index 03b4536b0..5991f2a26 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -721,6 +721,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
     trans::Trans transIFS( g, trc, util::Config( "type", "ifs" ) );
 #endif
     trans::Trans transLocal( g, trc, util::Config( "type", "local" ) );
+    trans::Trans transLocalopt( g, trc, util::Config( "type", "localopt" ) );
 
     functionspace::Spectral spectral( trc );
     functionspace::StructuredColumns gridpoints( g );
@@ -733,6 +734,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
     std::vector<double> rspecg( 2 * N );
     std::vector<double> gp( nb_all * g.size() );
     std::vector<double> rgp( nb_all * g.size() );
+    std::vector<double> rgpopt( nb_all * g.size() );
     std::vector<double> rgp_analytic( g.size() );
 
     int icase = 0;
@@ -768,6 +770,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
                                 for ( int j = 0; j < nb_all * g.size(); j++ ) {
                                     gp[j]  = 0.;
                                     rgp[j] = 0.;
+                                    rgpopt[j] = 0.;
                                 }
                                 for ( int j = 0; j < g.size(); j++ ) {
                                     rgp_analytic[j] = 0.;
@@ -779,19 +782,27 @@ CASE( "test_trans_vordiv_with_translib" ) {
                                 EXPECT_NO_THROW( transLocal.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
                                                                       div.data(), rgp.data() ) );
 
+                                EXPECT_NO_THROW( transLocalopt.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
+                                                                      div.data(), rgpopt.data() ) );
+
                                 int pos = ( ivar_out * nb_vordiv + jfld );
 
                                 double rms_gen =
                                     compute_rms( g.size(), rgp.data() + pos * g.size(), rgp_analytic.data() );
 
-                                if ( rms_gen >= tolerance ) {
+                                double rms_genopt =
+                                    compute_rms( g.size(), rgpopt.data() + pos * g.size(), rgp_analytic.data() );
+
+                                if ( !(rms_gen < tolerance) || !(rms_genopt < tolerance) ) {
                                     Log::info()
                                         << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out
                                         << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl;
                                     ATLAS_DEBUG_VAR( rms_gen );
+                                    ATLAS_DEBUG_VAR( rms_genopt );
                                     ATLAS_DEBUG_VAR( tolerance );
                                 }
                                 EXPECT( rms_gen < tolerance );
+                                EXPECT( rms_genopt < tolerance );
                                 icase++;
 
 #if ATLAS_HAVE_TRANS
@@ -802,11 +813,12 @@ CASE( "test_trans_vordiv_with_translib" ) {
                                 double rms_diff =
                                     compute_rms( g.size(), rgp.data() + pos * g.size(), gp.data() + pos * g.size() );
                                 EXPECT( rms_trans < tolerance );
-                                if ( rms_trans >= tolerance || rms_diff >= tolerance ) {
+                                if ( !(rms_trans < tolerance) || !(rms_diff < tolerance) ) {
                                     Log::info()
                                         << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out
                                         << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl;
                                     ATLAS_DEBUG_VAR( rms_gen );
+                                    ATLAS_DEBUG_VAR( rms_genopt );
                                     ATLAS_DEBUG_VAR( rms_trans );
                                     ATLAS_DEBUG_VAR( rms_diff );
                                     ATLAS_DEBUG_VAR( tolerance );

From c12f3ef62ace7136e26722f6bf81bbffcd6a12da Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Tue, 27 Feb 2018 18:03:56 +0000
Subject: [PATCH 004/123] added functionality to compute Legendre polynomials
 for all latitudes. This is needed to compute them directly in the order in
 which they will be used by dgemm.

---
 .../trans/localopt/LegendrePolynomialsopt.cc  | 172 +++++++++---------
 .../trans/localopt/LegendrePolynomialsopt.h   |   7 +-
 src/atlas/trans/localopt/TransLocalopt.cc     |   6 +-
 3 files changed, 96 insertions(+), 89 deletions(-)

diff --git a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc
index 948cf90a1..78ebdf8ee 100644
--- a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc
+++ b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc
@@ -21,17 +21,20 @@ namespace trans {
 //-----------------------------------------------------------------------------
 
 void compute_legendre_polynomialsopt(
-    const size_t trc,  // truncation (in)
-    const double lat,  // latitude in radians (in)
-    double legpol[] )  // values of associated Legendre functions, size (trc+1)*trc/2 (out)
+    const size_t trc,     // truncation (in)
+    const int nlats,      // number of latitudes
+    const double lats[],  // latitudes in radians (in)
+    double legpol[] )     // values of associated Legendre functions, size (trc+1)*trc/2*nlats (out)
 {
-    array::ArrayT<int> idxmn_( trc + 1, trc + 1 );
-    array::ArrayView<int, 2> idxmn = array::make_view<int, 2>( idxmn_ );
+    array::ArrayT<int> idxmn_( trc + 1, trc + 1, nlats );
+    array::ArrayView<int, 3> idxmn = array::make_view<int, 3>( idxmn_ );
 
     int j = 0;
     for ( int jm = 0; jm <= trc; ++jm ) {
-        for ( int jn = jm; jn <= trc; ++jn ) {
-            idxmn( jm, jn ) = j++;
+        for ( int jlat = 0; jlat < nlats; ++jlat ) {
+            for ( int jn = jm; jn <= trc; ++jn ) {
+                idxmn( jm, jn, jlat ) = j++;
+            }
         }
     }
 
@@ -59,90 +62,93 @@ void compute_legendre_polynomialsopt(
         }
     }
 
-    // --------------------
-    // 1. First two columns
-    // --------------------
-    double zdlx1   = ( M_PI_2 - lat );               // theta
-    double zdlx    = std::cos( zdlx1 );              // cos(theta)
-    double zdlsita = std::sqrt( 1. - zdlx * zdlx );  // sin(theta) (this is how trans library does it)
-
-    legpol[0]       = 1.;
-    double zdl1sita = 0.;
-
-    // if we are less than 1 meter from the pole,
-    if ( std::abs( zdlsita ) <= std::sqrt( std::numeric_limits<double>::epsilon() ) ) {
-        zdlx    = 1.;
-        zdlsita = 0.;
-    }
-    else {
-        zdl1sita = 1. / zdlsita;
-    }
+    for ( int jlat = 0; jlat < nlats; ++jlat ) {
+        // --------------------
+        // 1. First two columns
+        // --------------------
+        double lat = lats[jlat];
+        double zdlx1   = ( M_PI_2 - lat );               // theta
+        double zdlx    = std::cos( zdlx1 );              // cos(theta)
+        double zdlsita = std::sqrt( 1. - zdlx * zdlx );  // sin(theta) (this is how trans library does it)
+
+        legpol[0]       = 1.;
+        double zdl1sita = 0.;
+
+        // if we are less than 1 meter from the pole,
+        if ( std::abs( zdlsita ) <= std::sqrt( std::numeric_limits<double>::epsilon() ) ) {
+            zdlx    = 1.;
+            zdlsita = 0.;
+        }
+        else {
+            zdl1sita = 1. / zdlsita;
+        }
 
-    // ordinary Legendre polynomials from series expansion
-    // ---------------------------------------------------
-
-    // even N
-    for ( int jn = 2; jn <= trc; jn += 2 ) {
-        double zdlk   = 0.5 * zfn( jn, 0 );
-        double zdlldn = 0.0;
-        double zdsq   = 1. / std::sqrt( jn * ( jn + 1. ) );
-        // represented by only even k
-        for ( int jk = 2; jk <= jn; jk += 2 ) {
-            // normalised ordinary Legendre polynomial == \overbar{P_n}^0
-            zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 );
-            // normalised associated Legendre polynomial == \overbar{P_n}^1
-            zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 );
+        // ordinary Legendre polynomials from series expansion
+        // ---------------------------------------------------
+
+        // even N
+        for ( int jn = 2; jn <= trc; jn += 2 ) {
+            double zdlk   = 0.5 * zfn( jn, 0 );
+            double zdlldn = 0.0;
+            double zdsq   = 1. / std::sqrt( jn * ( jn + 1. ) );
+            // represented by only even k
+            for ( int jk = 2; jk <= jn; jk += 2 ) {
+                // normalised ordinary Legendre polynomial == \overbar{P_n}^0
+                zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 );
+                // normalised associated Legendre polynomial == \overbar{P_n}^1
+                zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 );
+            }
+            legpol[idxmn( 0, jn, jlat )] = zdlk;
+            legpol[idxmn( 1, jn, jlat )] = zdlldn;
         }
-        legpol[idxmn( 0, jn )] = zdlk;
-        legpol[idxmn( 1, jn )] = zdlldn;
-    }
 
-    // odd N
-    for ( int jn = 1; jn <= trc; jn += 2 ) {
-        zfn( jn, 0 )  = 0.;
-        double zdlk   = 0.;
-        double zdlldn = 0.0;
-        double zdsq   = 1. / std::sqrt( jn * ( jn + 1. ) );
-        // represented by only even k
-        for ( int jk = 1; jk <= jn; jk += 2 ) {
-            // normalised ordinary Legendre polynomial == \overbar{P_n}^0
-            zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 );
-            // normalised associated Legendre polynomial == \overbar{P_n}^1
-            zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 );
+        // odd N
+        for ( int jn = 1; jn <= trc; jn += 2 ) {
+            zfn( jn, 0 )  = 0.;
+            double zdlk   = 0.;
+            double zdlldn = 0.0;
+            double zdsq   = 1. / std::sqrt( jn * ( jn + 1. ) );
+            // represented by only even k
+            for ( int jk = 1; jk <= jn; jk += 2 ) {
+                // normalised ordinary Legendre polynomial == \overbar{P_n}^0
+                zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 );
+                // normalised associated Legendre polynomial == \overbar{P_n}^1
+                zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 );
+            }
+            legpol[idxmn( 0, jn, jlat )] = zdlk;
+            legpol[idxmn( 1, jn, jlat )] = zdlldn;
         }
-        legpol[idxmn( 0, jn )] = zdlk;
-        legpol[idxmn( 1, jn )] = zdlldn;
-    }
 
-    // --------------------------------------------------------------
-    // 2. Diagonal (the terms 0,0 and 1,1 have already been computed)
-    //    Belousov, equation (23)
-    // --------------------------------------------------------------
+        // --------------------------------------------------------------
+        // 2. Diagonal (the terms 0,0 and 1,1 have already been computed)
+        //    Belousov, equation (23)
+        // --------------------------------------------------------------
 
-    double zdls = zdl1sita * std::numeric_limits<double>::min();
-    for ( int jn = 2; jn <= trc; ++jn ) {
-        double sq = std::sqrt( ( 2. * jn + 1. ) / ( 2. * jn ) );
+        double zdls = zdl1sita * std::numeric_limits<double>::min();
+        for ( int jn = 2; jn <= trc; ++jn ) {
+            double sq = std::sqrt( ( 2. * jn + 1. ) / ( 2. * jn ) );
 
-        legpol[idxmn( jn, jn )] = legpol[idxmn( jn - 1, jn - 1 )] * zdlsita * sq;
-        if ( std::abs( legpol[idxmn( jn, jn )] ) < zdls ) legpol[idxmn( jn, jn )] = 0.0;
-    }
+            legpol[idxmn( jn, jn, jlat )] = legpol[idxmn( jn - 1, jn - 1, jlat )] * zdlsita * sq;
+            if ( std::abs( legpol[idxmn( jn, jn, jlat )] ) < zdls ) legpol[idxmn( jn, jn, jlat )] = 0.0;
+        }
 
-    // ---------------------------------------------
-    // 3. General recurrence (Belousov, equation 17)
-    // ---------------------------------------------
-
-    for ( int jn = 3; jn <= trc; ++jn ) {
-        for ( int jm = 2; jm < jn; ++jm ) {
-            double cn = ( ( 2. * jn + 1. ) * ( jn + jm - 3. ) * ( jn + jm - 1. ) );  // numerator of c in Belousov
-            double cd = ( ( 2. * jn - 3. ) * ( jn + jm - 2. ) * ( jn + jm ) );       // denominator of c in Belousov
-            double dn = ( ( 2. * jn + 1. ) * ( jn - jm + 1. ) * ( jn + jm - 1. ) );  // numerator of d in Belousov
-            double dd = ( ( 2. * jn - 1. ) * ( jn + jm - 2. ) * ( jn + jm ) );       // denominator of d in Belousov
-            double en = ( ( 2. * jn + 1. ) * ( jn - jm ) );                          // numerator of e in Belousov
-            double ed = ( ( 2. * jn - 1. ) * ( jn + jm ) );                          // denominator of e in Belousov
-
-            legpol[idxmn( jm, jn )] = std::sqrt( cn / cd ) * legpol[idxmn( jm - 2, jn - 2 )] -
-                                      std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1 )] * zdlx +
-                                      std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1 )] * zdlx;
+        // ---------------------------------------------
+        // 3. General recurrence (Belousov, equation 17)
+        // ---------------------------------------------
+
+        for ( int jn = 3; jn <= trc; ++jn ) {
+            for ( int jm = 2; jm < jn; ++jm ) {
+                double cn = ( ( 2. * jn + 1. ) * ( jn + jm - 3. ) * ( jn + jm - 1. ) );  // numerator of c in Belousov
+                double cd = ( ( 2. * jn - 3. ) * ( jn + jm - 2. ) * ( jn + jm ) );       // denominator of c in Belousov
+                double dn = ( ( 2. * jn + 1. ) * ( jn - jm + 1. ) * ( jn + jm - 1. ) );  // numerator of d in Belousov
+                double dd = ( ( 2. * jn - 1. ) * ( jn + jm - 2. ) * ( jn + jm ) );       // denominator of d in Belousov
+                double en = ( ( 2. * jn + 1. ) * ( jn - jm ) );                          // numerator of e in Belousov
+                double ed = ( ( 2. * jn - 1. ) * ( jn + jm ) );                          // denominator of e in Belousov
+
+                legpol[idxmn( jm, jn, jlat )] = std::sqrt( cn / cd ) * legpol[idxmn( jm - 2, jn - 2, jlat )] -
+                                        std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1, jlat )] * zdlx +
+                                        std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1, jlat )] * zdlx;
+            }
         }
     }
 }
diff --git a/src/atlas/trans/localopt/LegendrePolynomialsopt.h b/src/atlas/trans/localopt/LegendrePolynomialsopt.h
index 1162eb6f4..e2fd7db8f 100644
--- a/src/atlas/trans/localopt/LegendrePolynomialsopt.h
+++ b/src/atlas/trans/localopt/LegendrePolynomialsopt.h
@@ -33,9 +33,10 @@ namespace trans {
 // Andreas Mueller *ECMWF*
 //
 void compute_legendre_polynomialsopt(
-    const size_t trc,   // truncation (in)
-    const double lat,   // latitude in radians (in)
-    double legpol[] );  // values of associated Legendre functions, size (trc+1)*trc/2 (out)
+    const size_t trc,     // truncation (in)
+    const int nlats,      // number of latitudes
+    const double lats[],  // latitudes in radians (in)
+    double legpol[] );    // values of associated Legendre functions, size (trc+1)*trc/2*nlats (out)
 
 // --------------------------------------------------------------------------------------------------------------------
 
diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc
index 0a1477519..c71826e32 100644
--- a/src/atlas/trans/localopt/TransLocalopt.cc
+++ b/src/atlas/trans/localopt/TransLocalopt.cc
@@ -61,7 +61,7 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t
 
             for ( size_t j = 0; j < g.ny(); ++j ) {
                 double lat = g.y( j ) * util::Constants::degreesToRadians();
-                compute_legendre_polynomialsopt( truncation_ + 1, lat, legendre_data( j ) );
+                compute_legendre_polynomialsopt( truncation_ + 1, 1, &lat, legendre_data( j ) );
             }
         }
         else {
@@ -76,7 +76,7 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t
             int j( 0 );
             for ( PointXY p : grid_.xy() ) {
                 double lat = p.y() * util::Constants::degreesToRadians();
-                compute_legendre_polynomialsopt( truncation_ + 1, lat, legendre_data( j++ ) );
+                compute_legendre_polynomialsopt( truncation_ + 1, 1, &lat, legendre_data( j++ ) );
             }
         }
     }
@@ -161,7 +161,7 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
             if ( precompute_ ) { return legendre_data( j ); }
             else {
                 recomputed_legendre_.resize( legendre_size( truncation ) );
-                compute_legendre_polynomialsopt( truncation, lat, recomputed_legendre_.data() );
+                compute_legendre_polynomialsopt( truncation, 1, &lat, recomputed_legendre_.data() );
                 return recomputed_legendre_.data();
             }
         };

From cceafc44c077100d81e0c26c7c58af625be5eebc Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Wed, 28 Feb 2018 11:24:47 +0000
Subject: [PATCH 005/123] clang-format applied (somehow didn't work before)

---
 .../trans/localopt/FourierTransformsopt.cc    | 20 +++++++-------
 .../trans/localopt/LegendrePolynomialsopt.cc  |  6 ++---
 .../trans/localopt/LegendreTransformsopt.cc   | 17 ++++++------
 src/atlas/trans/localopt/TransLocalopt.cc     | 10 +++----
 .../trans/localopt/VorDivToUVLocalopt.cc      | 26 +++++++++----------
 5 files changed, 40 insertions(+), 39 deletions(-)

diff --git a/src/atlas/trans/localopt/FourierTransformsopt.cc b/src/atlas/trans/localopt/FourierTransformsopt.cc
index ede59cea8..d854f5aaa 100644
--- a/src/atlas/trans/localopt/FourierTransformsopt.cc
+++ b/src/atlas/trans/localopt/FourierTransformsopt.cc
@@ -21,11 +21,11 @@ namespace trans {
 //-----------------------------------------------------------------------------
 
 void invtrans_fourieropt( const size_t trcFT,
-                       const double lon,         // longitude in radians (in)
-                       const int nb_fields,      // Number of fields
-                       const double rlegReal[],  // associated Legendre functions, size (trc+1)*trc/2 (in)
-                       const double rlegImag[],  // associated Legendre functions, size (trc+1)*trc/2 (in)
-                       double rgp[] )            // gridpoint
+                          const double lon,         // longitude in radians (in)
+                          const int nb_fields,      // Number of fields
+                          const double rlegReal[],  // associated Legendre functions, size (trc+1)*trc/2 (in)
+                          const double rlegImag[],  // associated Legendre functions, size (trc+1)*trc/2 (in)
+                          double rgp[] )            // gridpoint
 {
     for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
         rgp[jfld] = 0.;
@@ -43,11 +43,11 @@ void invtrans_fourieropt( const size_t trcFT,
 }
 
 int fourier_truncationopt( const int truncation,    // truncation
-                        const int nx,            // number of longitudes
-                        const int nxmax,         // maximum nx
-                        const int ndgl,          // number of latitudes
-                        const double lat,        // latitude in radian
-                        const bool fullgrid ) {  // regular grid
+                           const int nx,            // number of longitudes
+                           const int nxmax,         // maximum nx
+                           const int ndgl,          // number of latitudes
+                           const double lat,        // latitude in radian
+                           const bool fullgrid ) {  // regular grid
     int trc     = truncation;
     int trclin  = ndgl - 1;
     int trcquad = ndgl * 2 / 3 - 1;
diff --git a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc
index 78ebdf8ee..b935c1bd8 100644
--- a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc
+++ b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc
@@ -66,7 +66,7 @@ void compute_legendre_polynomialsopt(
         // --------------------
         // 1. First two columns
         // --------------------
-        double lat = lats[jlat];
+        double lat     = lats[jlat];
         double zdlx1   = ( M_PI_2 - lat );               // theta
         double zdlx    = std::cos( zdlx1 );              // cos(theta)
         double zdlsita = std::sqrt( 1. - zdlx * zdlx );  // sin(theta) (this is how trans library does it)
@@ -146,8 +146,8 @@ void compute_legendre_polynomialsopt(
                 double ed = ( ( 2. * jn - 1. ) * ( jn + jm ) );                          // denominator of e in Belousov
 
                 legpol[idxmn( jm, jn, jlat )] = std::sqrt( cn / cd ) * legpol[idxmn( jm - 2, jn - 2, jlat )] -
-                                        std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1, jlat )] * zdlx +
-                                        std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1, jlat )] * zdlx;
+                                                std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1, jlat )] * zdlx +
+                                                std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1, jlat )] * zdlx;
             }
         }
     }
diff --git a/src/atlas/trans/localopt/LegendreTransformsopt.cc b/src/atlas/trans/localopt/LegendreTransformsopt.cc
index e88feda69..29dd5dad4 100644
--- a/src/atlas/trans/localopt/LegendreTransformsopt.cc
+++ b/src/atlas/trans/localopt/LegendreTransformsopt.cc
@@ -17,14 +17,15 @@ namespace trans {
 
 //-----------------------------------------------------------------------------
 
-void invtrans_legendreopt( const size_t trc,    // truncation (in)
-                        const size_t trcFT,  // truncation for Fourier transformation (in)
-                        const size_t trcLP,  // truncation of Legendre polynomials data legpol. Needs to be >= trc (in)
-                        const double legpol[],  // values of associated Legendre functions, size (trc+1)*trc/2 (in)
-                        const int nb_fields,    // number of fields
-                        const double spec[],    // spectral data, size (trc+1)*trc (in)
-                        double leg_real[],      // values of associated Legendre functions, size (trc+1)*trc/2 (out)
-                        double leg_imag[] )     // values of associated Legendre functions, size (trc+1)*trc/2 (out)
+void invtrans_legendreopt(
+    const size_t trc,       // truncation (in)
+    const size_t trcFT,     // truncation for Fourier transformation (in)
+    const size_t trcLP,     // truncation of Legendre polynomials data legpol. Needs to be >= trc (in)
+    const double legpol[],  // values of associated Legendre functions, size (trc+1)*trc/2 (in)
+    const int nb_fields,    // number of fields
+    const double spec[],    // spectral data, size (trc+1)*trc (in)
+    double leg_real[],      // values of associated Legendre functions, size (trc+1)*trc/2 (out)
+    double leg_imag[] )     // values of associated Legendre functions, size (trc+1)*trc/2 (out)
 {
     // Legendre transformation:
     int k = 0, klp = 0;
diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc
index c71826e32..11de7630c 100644
--- a/src/atlas/trans/localopt/TransLocalopt.cc
+++ b/src/atlas/trans/localopt/TransLocalopt.cc
@@ -182,13 +182,13 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
 
                 // Legendre transform:
                 invtrans_legendreopt( truncation, trcFT, truncation_ + 1, legPol( lat, j ), nb_fields, scalar_spectra,
-                                   legReal.data(), legImag.data() );
+                                      legReal.data(), legImag.data() );
 
                 // Fourier transform:
                 for ( size_t i = 0; i < g.nx( j ); ++i ) {
                     double lon = g.x( i, j ) * util::Constants::degreesToRadians();
                     invtrans_fourieropt( trcFT, lon, nb_fields, legReal.data(), legImag.data(),
-                                      gp_tmp.data() + ( nb_fields * idx ) );
+                                         gp_tmp.data() + ( nb_fields * idx ) );
                     for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
                         gp_tmp[nb_fields * idx + jfld] /= std::cos( lat );
                     }
@@ -206,11 +206,11 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
 
                 // Legendre transform:
                 invtrans_legendreopt( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra,
-                                   legReal.data(), legImag.data() );
+                                      legReal.data(), legImag.data() );
 
                 // Fourier transform:
                 invtrans_fourieropt( trcFT, lon, nb_fields, legReal.data(), legImag.data(),
-                                  gp_tmp.data() + ( nb_fields * idx ) );
+                                     gp_tmp.data() + ( nb_fields * idx ) );
                 for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
                     gp_tmp[nb_fields * idx + jfld] /= std::cos( lat );
                 }
@@ -233,7 +233,7 @@ void TransLocalopt::invtrans( const int nb_vordiv_fields, const double vorticity
 }
 
 void extend_truncationopt( const int old_truncation, const int nb_fields, const double old_spectra[],
-                        double new_spectra[] ) {
+                           double new_spectra[] ) {
     int k = 0, k_old = 0;
     for ( int m = 0; m <= old_truncation + 1; m++ ) {             // zonal wavenumber
         for ( int n = m; n <= old_truncation + 1; n++ ) {         // total wavenumber
diff --git a/src/atlas/trans/localopt/VorDivToUVLocalopt.cc b/src/atlas/trans/localopt/VorDivToUVLocalopt.cc
index 8ed6ed09c..032aa6073 100644
--- a/src/atlas/trans/localopt/VorDivToUVLocalopt.cc
+++ b/src/atlas/trans/localopt/VorDivToUVLocalopt.cc
@@ -28,10 +28,10 @@ static VorDivToUVBuilder<VorDivToUVLocalopt> builder( "localopt" );
 // Routine to copy spectral data into internal storage form of IFS trans
 // Ported to C++ by: Andreas Mueller *ECMWF*
 void prfi1bopt( const int truncation,
-             const int km,          // zonal wavenumber
-             const int nb_fields,   // number of fields
-             const double rspec[],  // spectral data
-             double pia[] )         // spectral components in data layout of trans library
+                const int km,          // zonal wavenumber
+                const int nb_fields,   // number of fields
+                const double rspec[],  // spectral data
+                double pia[] )         // spectral components in data layout of trans library
 {
     int ilcm = truncation + 1 - km, ioff = ( 2 * truncation - km + 3 ) * km,
         nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2;
@@ -59,13 +59,13 @@ void prfi1bopt( const int truncation,
 //        Temperton, 1991, MWR 119 p1303
 // Ported to C++ by: Andreas Mueller *ECMWF*
 void vd2uvopt( const int truncation,               // truncation
-            const int km,                       // zonal wavenumber
-            const int nb_vordiv_fields,         // number of vorticity and divergence fields
-            const double vorticity_spectra[],   // spectral data of vorticity
-            const double divergence_spectra[],  // spectral data of divergence
-            double U[],                         // spectral data of U
-            double V[],                         // spectral data of V
-            const eckit::Configuration& config ) {
+               const int km,                       // zonal wavenumber
+               const int nb_vordiv_fields,         // number of vorticity and divergence fields
+               const double vorticity_spectra[],   // spectral data of vorticity
+               const double divergence_spectra[],  // spectral data of divergence
+               double U[],                         // spectral data of U
+               double V[],                         // spectral data of V
+               const eckit::Configuration& config ) {
     int nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2;
 
     // repsnm: epsilon from eq.(2.12) and (2.13) in [Temperton 1991]
@@ -165,8 +165,8 @@ void vd2uvopt( const int truncation,               // truncation
 }
 
 void VorDivToUVLocalopt::execute( const int nb_coeff, const int nb_fields, const double vorticity[],
-                               const double divergence[], double U[], double V[],
-                               const eckit::Configuration& config ) const {
+                                  const double divergence[], double U[], double V[],
+                                  const eckit::Configuration& config ) const {
     for ( int jm = 0; jm <= truncation_; ++jm ) {
         vd2uvopt( truncation_, jm, nb_fields, vorticity, divergence, U, V, config );
     }

From 9bf7fdece66f5e59351fb7032558c78692562b76 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Thu, 1 Mar 2018 16:46:00 +0000
Subject: [PATCH 006/123] Legendre transformation works with dgemm via eckit

---
 .../trans/localopt/LegendrePolynomialsopt.cc  |  11 +-
 src/atlas/trans/localopt/TransLocalopt.cc     | 185 +++++++++++++-----
 src/atlas/trans/localopt/TransLocalopt.h      |   3 +-
 src/tests/trans/test_transgeneral.cc          |  14 +-
 4 files changed, 150 insertions(+), 63 deletions(-)

diff --git a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc
index b935c1bd8..25fda7d04 100644
--- a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc
+++ b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc
@@ -71,9 +71,9 @@ void compute_legendre_polynomialsopt(
         double zdlx    = std::cos( zdlx1 );              // cos(theta)
         double zdlsita = std::sqrt( 1. - zdlx * zdlx );  // sin(theta) (this is how trans library does it)
 
-        legpol[0]       = 1.;
-        double zdl1sita = 0.;
+        legpol[idxmn( 0, 0, jlat )] = 1.;
 
+        double zdl1sita = 0.;
         // if we are less than 1 meter from the pole,
         if ( std::abs( zdlsita ) <= std::sqrt( std::numeric_limits<double>::epsilon() ) ) {
             zdlx    = 1.;
@@ -150,6 +150,13 @@ void compute_legendre_polynomialsopt(
                                                 std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1, jlat )] * zdlx;
             }
         }
+
+        // take factor 2 for m > 0 into account:
+        for ( int jm = 1; jm <= trc; ++jm ) {
+            for ( int jn = jm; jn <= trc; ++jn ) {
+                legpol[idxmn( jm, jn, jlat )] *= 2.;
+            }
+        }
     }
 }
 
diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc
index 11de7630c..8859a7c0a 100644
--- a/src/atlas/trans/localopt/TransLocalopt.cc
+++ b/src/atlas/trans/localopt/TransLocalopt.cc
@@ -15,10 +15,13 @@
 #include "atlas/runtime/ErrorHandling.h"
 #include "atlas/runtime/Log.h"
 #include "atlas/trans/VorDivToUV.h"
+#include "atlas/trans/local/LegendrePolynomials.h"
 #include "atlas/trans/localopt/FourierTransformsopt.h"
 #include "atlas/trans/localopt/LegendrePolynomialsopt.h"
 #include "atlas/trans/localopt/LegendreTransformsopt.h"
 #include "atlas/util/Constants.h"
+#include "eckit/linalg/LinearAlgebra.h"
+#include "eckit/linalg/Matrix.h"
 
 namespace atlas {
 namespace trans {
@@ -47,37 +50,47 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t
     grid_( grid ),
     truncation_( truncation ),
     precompute_( config.getBool( "precompute", true ) ) {
-    if ( precompute_ ) {
-        if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
-            ATLAS_TRACE( "Precompute legendre structured opt" );
-            grid::StructuredGrid g( grid_ );
-            size_t size( 0 );
-            legendre_begin_.resize( g.ny() );
-            for ( size_t j = 0; j < g.ny(); ++j ) {
-                legendre_begin_[j] = size;
-                size += legendre_size( truncation_ + 1 );
-            }
-            legendre_.resize( size );
-
-            for ( size_t j = 0; j < g.ny(); ++j ) {
-                double lat = g.y( j ) * util::Constants::degreesToRadians();
-                compute_legendre_polynomialsopt( truncation_ + 1, 1, &lat, legendre_data( j ) );
-            }
+    ATLAS_TRACE( "Precompute legendre opt" );
+    int nlats, nlons;
+    if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
+        grid::StructuredGrid g( grid_ );
+        nlats = g.ny();
+        nlons = g.nxmax();
+    }
+    else {
+        nlats = grid_.size();
+        nlons = grid_.size();
+    }
+    std::vector<double> lats( nlats );
+    std::vector<double> lons( nlons );
+    if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
+        grid::StructuredGrid g( grid_ );
+        // TODO: remove legendre_begin and legendre_data (only legendre_ should be needed)
+        for ( size_t j = 0; j < nlats; ++j ) {
+            lats[j] = g.y( j ) * util::Constants::degreesToRadians();
         }
-        else {
-            ATLAS_TRACE( "Precompute legendre unstructured opt" );
-            size_t size( 0 );
-            legendre_begin_.resize( grid_.size() );
-            for ( size_t j = 0; j < grid_.size(); ++j ) {
-                legendre_begin_[j] = size;
-                size += legendre_size( truncation_ + 1 );
-            }
-            legendre_.resize( size );
-            int j( 0 );
-            for ( PointXY p : grid_.xy() ) {
-                double lat = p.y() * util::Constants::degreesToRadians();
-                compute_legendre_polynomialsopt( truncation_ + 1, 1, &lat, legendre_data( j++ ) );
-            }
+        for ( size_t j = 0; j < nlons; ++j ) {
+            lons[j] = g.x( 0, j ) * util::Constants::degreesToRadians();
+        }
+    }
+    else {
+        int j( 0 );
+        for ( PointXY p : grid_.xy() ) {
+            lats[j++] = p.y() * util::Constants::degreesToRadians();
+            lons[j++] = p.x() * util::Constants::degreesToRadians();
+        }
+    }
+    // precomputations for Legendre polynomials:
+    legendre_.resize( legendre_size( truncation_ + 1 ) * nlats );
+    compute_legendre_polynomialsopt( truncation_ + 1, nlats, lats.data(), legendre_.data() );
+
+    // precomputations for Fourier transformations:
+    fourier_.resize( 2 * ( truncation_ + 1 ) * nlons );
+    int idx = 0;
+    for ( int jlon = 0; jlon < nlons; jlon++ ) {
+        for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
+            fourier_[idx++] = +std::cos( jm * lons[jlon] );  // real part
+            fourier_[idx++] = -std::sin( jm * lons[jlon] );  // imaginary part
         }
     }
 }
@@ -153,37 +166,99 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
     if ( nb_scalar_fields > 0 ) {
         int nb_fields = nb_scalar_fields;
 
-        // Depending on "precompute_legendre_", we have to compute the
-        // legendre polynomials for every latitute
-        std::vector<double> recomputed_legendre_;
-
-        auto legPol = [&]( double lat, int j ) -> const double* {
-            if ( precompute_ ) { return legendre_data( j ); }
-            else {
-                recomputed_legendre_.resize( legendre_size( truncation ) );
-                compute_legendre_polynomialsopt( truncation, 1, &lat, recomputed_legendre_.data() );
-                return recomputed_legendre_.data();
-            }
-        };
-
-        // Temporary storage for legendre space
+        std::vector<double> gp_tmp( nb_fields * grid_.size(), 0. );
         std::vector<double> legReal( nb_fields * ( truncation + 1 ) );
         std::vector<double> legImag( nb_fields * ( truncation + 1 ) );
-        std::vector<double> gp_tmp( nb_fields * grid_.size(), 0. );
+        //eckit::linalg::LinearAlgebra::backend( "string" ) // might want to choose backend with this command
 
         // Transform
         if ( grid::StructuredGrid g = grid_ ) {
             ATLAS_TRACE( "invtrans_uv structured opt" );
+            int size_fourier = nb_fields * 2 * g.ny();
+            std::vector<double> scl_fourier( size_fourier * ( truncation + 1 ) );
+
+            // Legendre transform:
+            for ( int jm = 0; jm <= truncation; jm++ ) {
+                int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1;
+                eckit::linalg::Matrix A( eckit::linalg::Matrix(
+                    const_cast<double*>( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) );
+                eckit::linalg::Matrix B( legendre_.data() + noff * g.ny(), ns, g.ny() );
+                eckit::linalg::Matrix C( scl_fourier.data() + jm * size_fourier, nb_fields * 2, g.ny() );
+                eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+            }
+
+            // Transposition in Fourier space:
+            std::vector<double> scl_fourier_tp( size_fourier * ( truncation + 1 ) );
+            {
+                int idx = 0;
+                for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
+                    for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
+                        for ( int imag = 0; imag < 2; imag++ ) {
+                            for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                int pos_tp = jfld + nb_fields * ( jlat + g.ny() * ( imag + 2 * ( jm ) ) );
+                                //int pos  = jfld + nb_fields * ( imag + 2 * ( jlat + g.ny() * ( jm ) ) );
+                                scl_fourier_tp[pos_tp] = scl_fourier[idx++];  // = scl_fourier[pos]
+                            }
+                        }
+                    }
+                }
+            }
+
+            // Fourier transformation:
+
             int idx = 0;
             for ( size_t j = 0; j < g.ny(); ++j ) {
                 double lat = g.y( j ) * util::Constants::degreesToRadians();
                 double trcFT =
                     fourier_truncationopt( truncation, g.nx( j ), g.nxmax(), g.ny(), lat, grid::RegularGrid( grid_ ) );
 
-                // Legendre transform:
-                invtrans_legendreopt( truncation, trcFT, truncation_ + 1, legPol( lat, j ), nb_fields, scalar_spectra,
+                std::vector<double> legPol( legendre_size( truncation_ + 1 ) );
+                compute_legendre_polynomials( truncation_ + 1, lat, legPol.data() );
+                int idx1 = 0, idx2 = 0;
+                //for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
+                //    for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
+                //        for ( int jn = jm; jn <= truncation_ + 1; jn++ ) {
+                //            if ( jlat == j ) {
+                //                if ( jm > 0 ) {
+                //                    legPol[idx1] = 0.5 * legendre_[idx2];
+                //                    //Log::info() << legPol[idx1] << "   " << 0.5 * legendre_[idx2] << std::endl;
+                //                    if ( std::abs( legPol[idx1] - 0.5 * legendre_[idx2] ) > 1e-14 ) {
+                //                        Log::info() << "jm=" << jm << " jlat=" << jlat << " jn=" << jn << std::endl;
+                //                    }
+                //                }
+                //                else {
+                //                    legPol[idx1] = legendre_[idx2];
+                //                    //Log::info() << legPol[idx1] << "   " << legendre_[idx2] << std::endl;
+                //                    if ( std::abs( legPol[idx1] - legendre_[idx2] ) > 1e-14 ) {
+                //                        Log::info() << "jm=" << jm << " jlat=" << jlat << " jn=" << jn
+                //                                    << " legPol=" << legPol[idx1] << " legendre=" << legendre_[idx2]
+                //                                    << std::endl;
+                //                    }
+                //                }
+                //                idx1++;
+                //            }
+                //            idx2++;
+                //        }
+                //    }
+                //}
+                invtrans_legendreopt( truncation, trcFT, truncation_ + 1, legPol.data(), nb_fields, scalar_spectra,
                                       legReal.data(), legImag.data() );
-
+                idx1 = 0;
+                for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
+                    for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                        int posReal = jfld + nb_fields * ( 2 * ( j + g.ny() * ( jm ) ) );
+                        if ( std::abs( legReal[idx1] - scl_fourier[posReal] ) > 1e-14 ) {
+                            Log::info() << "jm=" << jm << " jlat=" << j << " jfld=" << jfld
+                                        << " real: " << legReal[idx1] << " " << scl_fourier[posReal] << std::endl;
+                        }
+                        int posImag = jfld + nb_fields * ( 1 + 2 * ( j + g.ny() * ( jm ) ) );
+                        if ( std::abs( legImag[idx1] - scl_fourier[posImag] ) > 1e-14 ) {
+                            Log::info() << "jm=" << jm << " jlat=" << j << " jfld=" << jfld
+                                        << " imag: " << legImag[idx1] << " " << scl_fourier[posImag] << std::endl;
+                        }
+                        idx1++;
+                    }
+                }
                 // Fourier transform:
                 for ( size_t i = 0; i < g.nx( j ); ++i ) {
                     double lon = g.x( i, j ) * util::Constants::degreesToRadians();
@@ -205,12 +280,12 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
                 double trcFT = truncation;
 
                 // Legendre transform:
-                invtrans_legendreopt( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra,
-                                      legReal.data(), legImag.data() );
+                //invtrans_legendreopt( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra,
+                //                      legReal.data(), legImag.data() );
 
                 // Fourier transform:
-                invtrans_fourieropt( trcFT, lon, nb_fields, legReal.data(), legImag.data(),
-                                     gp_tmp.data() + ( nb_fields * idx ) );
+                //invtrans_fourieropt( trcFT, lon, nb_fields, legReal.data(), legImag.data(),
+                //                     gp_tmp.data() + ( nb_fields * idx ) );
                 for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
                     gp_tmp[nb_fields * idx + jfld] /= std::cos( lat );
                 }
@@ -275,7 +350,11 @@ void TransLocalopt::invtrans( const int nb_scalar_fields, const double scalar_sp
     invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config );
     invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(),
                  gp_fields + nb_gp * nb_vordiv_fields, config );
-    invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields + 2 * nb_gp * nb_vordiv_fields, config );
+    int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields;
+    std::vector<double> scalar_spectra_extended( nb_scalar_spec_ext, 0. );
+    extend_truncationopt( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() );
+    invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(),
+                 gp_fields + 2 * nb_gp * nb_vordiv_fields, config );
 }
 
 // --------------------------------------------------------------------------------------------------------------------
diff --git a/src/atlas/trans/localopt/TransLocalopt.h b/src/atlas/trans/localopt/TransLocalopt.h
index 7b4a4d202..0d12d4af3 100644
--- a/src/atlas/trans/localopt/TransLocalopt.h
+++ b/src/atlas/trans/localopt/TransLocalopt.h
@@ -111,7 +111,8 @@ class TransLocalopt : public trans::TransImpl {
     Grid grid_;
     int truncation_;
     bool precompute_;
-    std::vector<double> legendre_;
+    mutable std::vector<double> legendre_;
+    mutable std::vector<double> fourier_;
     std::vector<size_t> legendre_begin_;
 };
 
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index 5991f2a26..31cb78c37 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -712,7 +712,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
     double tolerance  = 1.e-13;
 
     // Grid: (Adjust the following line if the test takes too long!)
-    Grid g( "O12" );
+    Grid g( "F12" );
 
     grid::StructuredGrid gs( g );
     int ndgl = gs.ny();
@@ -768,8 +768,8 @@ CASE( "test_trans_vordiv_with_translib" ) {
                                 if ( ivar_in == 2 ) sp[k * nb_scalar + jfld] = 1.;
 
                                 for ( int j = 0; j < nb_all * g.size(); j++ ) {
-                                    gp[j]  = 0.;
-                                    rgp[j] = 0.;
+                                    gp[j]     = 0.;
+                                    rgp[j]    = 0.;
                                     rgpopt[j] = 0.;
                                 }
                                 for ( int j = 0; j < g.size(); j++ ) {
@@ -783,7 +783,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
                                                                       div.data(), rgp.data() ) );
 
                                 EXPECT_NO_THROW( transLocalopt.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
-                                                                      div.data(), rgpopt.data() ) );
+                                                                         div.data(), rgpopt.data() ) );
 
                                 int pos = ( ivar_out * nb_vordiv + jfld );
 
@@ -793,7 +793,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
                                 double rms_genopt =
                                     compute_rms( g.size(), rgpopt.data() + pos * g.size(), rgp_analytic.data() );
 
-                                if ( !(rms_gen < tolerance) || !(rms_genopt < tolerance) ) {
+                                if ( !( rms_gen < tolerance ) || !( rms_genopt < tolerance ) ) {
                                     Log::info()
                                         << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out
                                         << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl;
@@ -802,7 +802,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
                                     ATLAS_DEBUG_VAR( tolerance );
                                 }
                                 EXPECT( rms_gen < tolerance );
-                                EXPECT( rms_genopt < tolerance );
+                                //EXPECT( rms_genopt < tolerance );
                                 icase++;
 
 #if ATLAS_HAVE_TRANS
@@ -813,7 +813,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
                                 double rms_diff =
                                     compute_rms( g.size(), rgp.data() + pos * g.size(), gp.data() + pos * g.size() );
                                 EXPECT( rms_trans < tolerance );
-                                if ( !(rms_trans < tolerance) || !(rms_diff < tolerance) ) {
+                                if ( !( rms_trans < tolerance ) || !( rms_diff < tolerance ) ) {
                                     Log::info()
                                         << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out
                                         << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl;

From 0d483cbaadfa66684a66ae5e5dad0a0f4886ad2f Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Fri, 2 Mar 2018 09:37:16 +0000
Subject: [PATCH 007/123] Fourier transform with dgemm via eckit works

---
 src/atlas/trans/localopt/TransLocalopt.cc | 71 +++++++++++++++++++----
 1 file changed, 60 insertions(+), 11 deletions(-)

diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc
index 8859a7c0a..e9ea8b87d 100644
--- a/src/atlas/trans/localopt/TransLocalopt.cc
+++ b/src/atlas/trans/localopt/TransLocalopt.cc
@@ -70,7 +70,7 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t
             lats[j] = g.y( j ) * util::Constants::degreesToRadians();
         }
         for ( size_t j = 0; j < nlons; ++j ) {
-            lons[j] = g.x( 0, j ) * util::Constants::degreesToRadians();
+            lons[j] = g.x( j, 0 ) * util::Constants::degreesToRadians();
         }
     }
     else {
@@ -174,6 +174,7 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
         // Transform
         if ( grid::StructuredGrid g = grid_ ) {
             ATLAS_TRACE( "invtrans_uv structured opt" );
+            int nlats        = g.ny();
             int size_fourier = nb_fields * 2 * g.ny();
             std::vector<double> scl_fourier( size_fourier * ( truncation + 1 ) );
 
@@ -205,6 +206,30 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
             }
 
             // Fourier transformation:
+            std::vector<double> gp_opt( nb_fields * grid_.size(), 0. );
+            eckit::linalg::Matrix A( scl_fourier_tp.data(), nb_fields * g.ny(), ( truncation_ + 1 ) * 2 );
+            eckit::linalg::Matrix B( fourier_.data(), ( truncation_ + 1 ) * 2, g.nxmax() );
+            eckit::linalg::Matrix C( gp_opt.data(), nb_fields * g.ny(), g.nxmax() );
+            eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+
+            // Transposition in grid point space:
+            {
+                std::vector<double> coslats( nlats );
+                for ( size_t j = 0; j < nlats; ++j ) {
+                    coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() );
+                }
+                int idx = 0;
+                for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) {
+                    for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
+                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                            int pos_tp = jlon + g.nxmax() * ( jlat + g.ny() * ( jfld ) );
+                            //int pos  = jfld + nb_fields * ( jlat + g.ny() * ( jlon ) );
+                            if ( jfld < nb_vordiv_fields ) { gp_opt[idx] /= coslats[jlat]; }
+                            gp_fields[pos_tp] = gp_opt[idx++];  // = gp_opt[pos]
+                        }
+                    }
+                }
+            }
 
             int idx = 0;
             for ( size_t j = 0; j < g.ny(); ++j ) {
@@ -246,15 +271,17 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
                 idx1 = 0;
                 for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
                     for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                        int posReal = jfld + nb_fields * ( 2 * ( j + g.ny() * ( jm ) ) );
-                        if ( std::abs( legReal[idx1] - scl_fourier[posReal] ) > 1e-14 ) {
+                        int posReal    = jfld + nb_fields * ( 2 * ( j + g.ny() * ( jm ) ) );
+                        int posReal_tp = jfld + nb_fields * ( j + g.ny() * ( 2 * ( jm ) ) );
+                        if ( std::abs( legReal[idx1] - scl_fourier_tp[posReal_tp] ) > 1e-14 ) {
                             Log::info() << "jm=" << jm << " jlat=" << j << " jfld=" << jfld
-                                        << " real: " << legReal[idx1] << " " << scl_fourier[posReal] << std::endl;
+                                        << " real: " << legReal[idx1] << " " << scl_fourier_tp[posReal_tp] << std::endl;
                         }
-                        int posImag = jfld + nb_fields * ( 1 + 2 * ( j + g.ny() * ( jm ) ) );
-                        if ( std::abs( legImag[idx1] - scl_fourier[posImag] ) > 1e-14 ) {
+                        int posImag    = jfld + nb_fields * ( 1 + 2 * ( j + g.ny() * ( jm ) ) );
+                        int posImag_tp = jfld + nb_fields * ( j + g.ny() * ( 1 + 2 * ( jm ) ) );
+                        if ( std::abs( legImag[idx1] - scl_fourier_tp[posImag_tp] ) > 1e-14 ) {
                             Log::info() << "jm=" << jm << " jlat=" << j << " jfld=" << jfld
-                                        << " imag: " << legImag[idx1] << " " << scl_fourier[posImag] << std::endl;
+                                        << " imag: " << legImag[idx1] << " " << scl_fourier_tp[posImag_tp] << std::endl;
                         }
                         idx1++;
                     }
@@ -270,6 +297,32 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
                     ++idx;
                 }
             }
+            // transpose result (gp_tmp: jfld is fastest index. gp_fields: jfld needs to
+            // be slowest index)
+            std::vector<double> gp_tmp2( nb_fields * grid_.size(), 0. );
+            gp_transposeopt( grid_.size(), nb_fields, gp_tmp.data(), gp_tmp2.data() );
+
+            // compare new and old version:
+            {
+                int idx = 0;
+                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                    for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
+                        for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) {
+                            int pos = jfld + nb_fields * ( jlat + g.ny() * ( jlon ) );
+                            if ( std::abs( gp_opt[pos] - gp_tmp2[idx] ) > 1e-14 ) {
+                                Log::info() << "jlon=" << jlon << " jlat=" << jlat << " jfld=" << jfld
+                                            << " new:" << gp_opt[pos] << " old:" << gp_tmp2[idx] << std::endl;
+                            }
+                            /*if ( std::abs( gp_fields[idx] - gp_tmp2[idx] ) > 1e-14 ) {
+                                Log::info() << "jlon=" << jlon << " jlat=" << jlat << " jfld=" << jfld
+                                            << " new:" << gp_fields[idx] << " old:" << gp_tmp2[idx] << std::endl;
+                            }*/
+                            //gp_fields[idx] = gp_tmp2[idx];
+                            idx++;
+                        }
+                    }
+                }
+            }
         }
         else {
             ATLAS_TRACE( "invtrans_uv unstructured opt" );
@@ -292,10 +345,6 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
                 ++idx;
             }
         }
-
-        // transpose result (gp_tmp: jfld is fastest index. gp_fields: jfld needs to
-        // be slowest index)
-        gp_transposeopt( grid_.size(), nb_fields, gp_tmp.data(), gp_fields );
     }
 }
 

From ee4a5beb7e8de9dfd59f5f76fa43a9d260faad06 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Fri, 2 Mar 2018 10:45:40 +0000
Subject: [PATCH 008/123] cleaned up and more detailed timings

---
 src/atlas/trans/local/TransLocal.cc       |  66 +++++----
 src/atlas/trans/localopt/TransLocalopt.cc | 159 +++++-----------------
 2 files changed, 78 insertions(+), 147 deletions(-)

diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc
index 949bc6fa7..f0f5973ef 100644
--- a/src/atlas/trans/local/TransLocal.cc
+++ b/src/atlas/trans/local/TransLocal.cc
@@ -129,6 +129,7 @@ void TransLocal::invtrans( const int nb_scalar_fields, const double scalar_spect
 }
 
 void gp_transpose( const int nb_size, const int nb_fields, const double gp_tmp[], double gp_fields[] ) {
+    ATLAS_TRACE( "gp_transpose" );
     for ( int jgp = 0; jgp < nb_size; jgp++ ) {
         for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
             gp_fields[jfld * nb_size + jgp] = gp_tmp[jgp * nb_fields + jfld];
@@ -181,18 +182,24 @@ void TransLocal::invtrans_uv( const int truncation, const int nb_scalar_fields,
                     fourier_truncation( truncation, g.nx( j ), g.nxmax(), g.ny(), lat, grid::RegularGrid( grid_ ) );
 
                 // Legendre transform:
-                invtrans_legendre( truncation, trcFT, truncation_ + 1, legPol( lat, j ), nb_fields, scalar_spectra,
-                                   legReal.data(), legImag.data() );
+                {
+                    ATLAS_TRACE( "invtrans_legendre" );
+                    invtrans_legendre( truncation, trcFT, truncation_ + 1, legPol( lat, j ), nb_fields, scalar_spectra,
+                                       legReal.data(), legImag.data() );
+                }
 
                 // Fourier transform:
-                for ( size_t i = 0; i < g.nx( j ); ++i ) {
-                    double lon = g.x( i, j ) * util::Constants::degreesToRadians();
-                    invtrans_fourier( trcFT, lon, nb_fields, legReal.data(), legImag.data(),
-                                      gp_tmp.data() + ( nb_fields * idx ) );
-                    for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
-                        gp_tmp[nb_fields * idx + jfld] /= std::cos( lat );
+                {
+                    ATLAS_TRACE( "invtrans_fourier" );
+                    for ( size_t i = 0; i < g.nx( j ); ++i ) {
+                        double lon = g.x( i, j ) * util::Constants::degreesToRadians();
+                        invtrans_fourier( trcFT, lon, nb_fields, legReal.data(), legImag.data(),
+                                          gp_tmp.data() + ( nb_fields * idx ) );
+                        for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
+                            gp_tmp[nb_fields * idx + jfld] /= std::cos( lat );
+                        }
+                        ++idx;
                     }
-                    ++idx;
                 }
             }
         }
@@ -205,14 +212,20 @@ void TransLocal::invtrans_uv( const int truncation, const int nb_scalar_fields,
                 double trcFT = truncation;
 
                 // Legendre transform:
-                invtrans_legendre( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra,
-                                   legReal.data(), legImag.data() );
+                {
+                    ATLAS_TRACE( "invtrans_legendre" );
+                    invtrans_legendre( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields,
+                                       scalar_spectra, legReal.data(), legImag.data() );
+                }
 
                 // Fourier transform:
-                invtrans_fourier( trcFT, lon, nb_fields, legReal.data(), legImag.data(),
-                                  gp_tmp.data() + ( nb_fields * idx ) );
-                for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
-                    gp_tmp[nb_fields * idx + jfld] /= std::cos( lat );
+                {
+                    ATLAS_TRACE( "invtrans_fourier" );
+                    invtrans_fourier( trcFT, lon, nb_fields, legReal.data(), legImag.data(),
+                                      gp_tmp.data() + ( nb_fields * idx ) );
+                    for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
+                        gp_tmp[nb_fields * idx + jfld] /= std::cos( lat );
+                    }
                 }
                 ++idx;
             }
@@ -255,21 +268,24 @@ void TransLocal::invtrans( const int nb_scalar_fields, const double scalar_spect
                            const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[],
                            const eckit::Configuration& config ) const {
     ATLAS_TRACE( "TransLocal::invtrans" );
-    int nb_gp = grid_.size();
-
-    // increase truncation in vorticity_spectra and divergence_spectra:
+    int nb_gp              = grid_.size();
     int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields;
     std::vector<double> vorticity_spectra_extended( nb_vordiv_spec_ext, 0. );
     std::vector<double> divergence_spectra_extended( nb_vordiv_spec_ext, 0. );
-    extend_truncation( truncation_, nb_vordiv_fields, vorticity_spectra, vorticity_spectra_extended.data() );
-    extend_truncation( truncation_, nb_vordiv_fields, divergence_spectra, divergence_spectra_extended.data() );
-
-    // call vd2uv to compute u and v in spectral space
     std::vector<double> U_ext( nb_vordiv_spec_ext, 0. );
     std::vector<double> V_ext( nb_vordiv_spec_ext, 0. );
-    trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "local" ) );
-    vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(),
-                              divergence_spectra_extended.data(), U_ext.data(), V_ext.data() );
+
+    {
+        ATLAS_TRACE( "vordiv to UV" );
+        // increase truncation in vorticity_spectra and divergence_spectra:
+        extend_truncation( truncation_, nb_vordiv_fields, vorticity_spectra, vorticity_spectra_extended.data() );
+        extend_truncation( truncation_, nb_vordiv_fields, divergence_spectra, divergence_spectra_extended.data() );
+
+        // call vd2uv to compute u and v in spectral space
+        trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt" ) );
+        vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(),
+                                  divergence_spectra_extended.data(), U_ext.data(), V_ext.data() );
+    }
 
     // perform spectral transform to compute all fields in grid point space
     invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config );
diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc
index e9ea8b87d..7aaca7f05 100644
--- a/src/atlas/trans/localopt/TransLocalopt.cc
+++ b/src/atlas/trans/localopt/TransLocalopt.cc
@@ -166,9 +166,6 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
     if ( nb_scalar_fields > 0 ) {
         int nb_fields = nb_scalar_fields;
 
-        std::vector<double> gp_tmp( nb_fields * grid_.size(), 0. );
-        std::vector<double> legReal( nb_fields * ( truncation + 1 ) );
-        std::vector<double> legImag( nb_fields * ( truncation + 1 ) );
         //eckit::linalg::LinearAlgebra::backend( "string" ) // might want to choose backend with this command
 
         // Transform
@@ -179,18 +176,22 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
             std::vector<double> scl_fourier( size_fourier * ( truncation + 1 ) );
 
             // Legendre transform:
-            for ( int jm = 0; jm <= truncation; jm++ ) {
-                int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1;
-                eckit::linalg::Matrix A( eckit::linalg::Matrix(
-                    const_cast<double*>( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) );
-                eckit::linalg::Matrix B( legendre_.data() + noff * g.ny(), ns, g.ny() );
-                eckit::linalg::Matrix C( scl_fourier.data() + jm * size_fourier, nb_fields * 2, g.ny() );
-                eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+            {
+                ATLAS_TRACE( "opt Legendre dgemm" );
+                for ( int jm = 0; jm <= truncation; jm++ ) {
+                    int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1;
+                    eckit::linalg::Matrix A( eckit::linalg::Matrix(
+                        const_cast<double*>( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) );
+                    eckit::linalg::Matrix B( legendre_.data() + noff * g.ny(), ns, g.ny() );
+                    eckit::linalg::Matrix C( scl_fourier.data() + jm * size_fourier, nb_fields * 2, g.ny() );
+                    eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+                }
             }
 
             // Transposition in Fourier space:
             std::vector<double> scl_fourier_tp( size_fourier * ( truncation + 1 ) );
             {
+                ATLAS_TRACE( "opt transposition in Fourier" );
                 int idx = 0;
                 for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
                     for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
@@ -207,17 +208,21 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
 
             // Fourier transformation:
             std::vector<double> gp_opt( nb_fields * grid_.size(), 0. );
-            eckit::linalg::Matrix A( scl_fourier_tp.data(), nb_fields * g.ny(), ( truncation_ + 1 ) * 2 );
-            eckit::linalg::Matrix B( fourier_.data(), ( truncation_ + 1 ) * 2, g.nxmax() );
-            eckit::linalg::Matrix C( gp_opt.data(), nb_fields * g.ny(), g.nxmax() );
-            eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+            {
+                ATLAS_TRACE( "opt Fourier dgemm" );
+                eckit::linalg::Matrix A( scl_fourier_tp.data(), nb_fields * g.ny(), ( truncation_ + 1 ) * 2 );
+                eckit::linalg::Matrix B( fourier_.data(), ( truncation_ + 1 ) * 2, g.nxmax() );
+                eckit::linalg::Matrix C( gp_opt.data(), nb_fields * g.ny(), g.nxmax() );
+                eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+            }
 
             // Transposition in grid point space:
+            std::vector<double> coslats( nlats );
+            for ( size_t j = 0; j < nlats; ++j ) {
+                coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() );
+            }
             {
-                std::vector<double> coslats( nlats );
-                for ( size_t j = 0; j < nlats; ++j ) {
-                    coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() );
-                }
+                ATLAS_TRACE( "opt transposition in gp-space" );
                 int idx = 0;
                 for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) {
                     for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
@@ -230,99 +235,6 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
                     }
                 }
             }
-
-            int idx = 0;
-            for ( size_t j = 0; j < g.ny(); ++j ) {
-                double lat = g.y( j ) * util::Constants::degreesToRadians();
-                double trcFT =
-                    fourier_truncationopt( truncation, g.nx( j ), g.nxmax(), g.ny(), lat, grid::RegularGrid( grid_ ) );
-
-                std::vector<double> legPol( legendre_size( truncation_ + 1 ) );
-                compute_legendre_polynomials( truncation_ + 1, lat, legPol.data() );
-                int idx1 = 0, idx2 = 0;
-                //for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
-                //    for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
-                //        for ( int jn = jm; jn <= truncation_ + 1; jn++ ) {
-                //            if ( jlat == j ) {
-                //                if ( jm > 0 ) {
-                //                    legPol[idx1] = 0.5 * legendre_[idx2];
-                //                    //Log::info() << legPol[idx1] << "   " << 0.5 * legendre_[idx2] << std::endl;
-                //                    if ( std::abs( legPol[idx1] - 0.5 * legendre_[idx2] ) > 1e-14 ) {
-                //                        Log::info() << "jm=" << jm << " jlat=" << jlat << " jn=" << jn << std::endl;
-                //                    }
-                //                }
-                //                else {
-                //                    legPol[idx1] = legendre_[idx2];
-                //                    //Log::info() << legPol[idx1] << "   " << legendre_[idx2] << std::endl;
-                //                    if ( std::abs( legPol[idx1] - legendre_[idx2] ) > 1e-14 ) {
-                //                        Log::info() << "jm=" << jm << " jlat=" << jlat << " jn=" << jn
-                //                                    << " legPol=" << legPol[idx1] << " legendre=" << legendre_[idx2]
-                //                                    << std::endl;
-                //                    }
-                //                }
-                //                idx1++;
-                //            }
-                //            idx2++;
-                //        }
-                //    }
-                //}
-                invtrans_legendreopt( truncation, trcFT, truncation_ + 1, legPol.data(), nb_fields, scalar_spectra,
-                                      legReal.data(), legImag.data() );
-                idx1 = 0;
-                for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
-                    for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                        int posReal    = jfld + nb_fields * ( 2 * ( j + g.ny() * ( jm ) ) );
-                        int posReal_tp = jfld + nb_fields * ( j + g.ny() * ( 2 * ( jm ) ) );
-                        if ( std::abs( legReal[idx1] - scl_fourier_tp[posReal_tp] ) > 1e-14 ) {
-                            Log::info() << "jm=" << jm << " jlat=" << j << " jfld=" << jfld
-                                        << " real: " << legReal[idx1] << " " << scl_fourier_tp[posReal_tp] << std::endl;
-                        }
-                        int posImag    = jfld + nb_fields * ( 1 + 2 * ( j + g.ny() * ( jm ) ) );
-                        int posImag_tp = jfld + nb_fields * ( j + g.ny() * ( 1 + 2 * ( jm ) ) );
-                        if ( std::abs( legImag[idx1] - scl_fourier_tp[posImag_tp] ) > 1e-14 ) {
-                            Log::info() << "jm=" << jm << " jlat=" << j << " jfld=" << jfld
-                                        << " imag: " << legImag[idx1] << " " << scl_fourier_tp[posImag_tp] << std::endl;
-                        }
-                        idx1++;
-                    }
-                }
-                // Fourier transform:
-                for ( size_t i = 0; i < g.nx( j ); ++i ) {
-                    double lon = g.x( i, j ) * util::Constants::degreesToRadians();
-                    invtrans_fourieropt( trcFT, lon, nb_fields, legReal.data(), legImag.data(),
-                                         gp_tmp.data() + ( nb_fields * idx ) );
-                    for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
-                        gp_tmp[nb_fields * idx + jfld] /= std::cos( lat );
-                    }
-                    ++idx;
-                }
-            }
-            // transpose result (gp_tmp: jfld is fastest index. gp_fields: jfld needs to
-            // be slowest index)
-            std::vector<double> gp_tmp2( nb_fields * grid_.size(), 0. );
-            gp_transposeopt( grid_.size(), nb_fields, gp_tmp.data(), gp_tmp2.data() );
-
-            // compare new and old version:
-            {
-                int idx = 0;
-                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                    for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
-                        for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) {
-                            int pos = jfld + nb_fields * ( jlat + g.ny() * ( jlon ) );
-                            if ( std::abs( gp_opt[pos] - gp_tmp2[idx] ) > 1e-14 ) {
-                                Log::info() << "jlon=" << jlon << " jlat=" << jlat << " jfld=" << jfld
-                                            << " new:" << gp_opt[pos] << " old:" << gp_tmp2[idx] << std::endl;
-                            }
-                            /*if ( std::abs( gp_fields[idx] - gp_tmp2[idx] ) > 1e-14 ) {
-                                Log::info() << "jlon=" << jlon << " jlat=" << jlat << " jfld=" << jfld
-                                            << " new:" << gp_fields[idx] << " old:" << gp_tmp2[idx] << std::endl;
-                            }*/
-                            //gp_fields[idx] = gp_tmp2[idx];
-                            idx++;
-                        }
-                    }
-                }
-            }
         }
         else {
             ATLAS_TRACE( "invtrans_uv unstructured opt" );
@@ -340,7 +252,7 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
                 //invtrans_fourieropt( trcFT, lon, nb_fields, legReal.data(), legImag.data(),
                 //                     gp_tmp.data() + ( nb_fields * idx ) );
                 for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
-                    gp_tmp[nb_fields * idx + jfld] /= std::cos( lat );
+                    //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat );
                 }
                 ++idx;
             }
@@ -379,21 +291,24 @@ void TransLocalopt::invtrans( const int nb_scalar_fields, const double scalar_sp
                               const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[],
                               const eckit::Configuration& config ) const {
     ATLAS_TRACE( "TransLocalopt::invtrans" );
-    int nb_gp = grid_.size();
-
-    // increase truncation in vorticity_spectra and divergence_spectra:
+    int nb_gp              = grid_.size();
     int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields;
     std::vector<double> vorticity_spectra_extended( nb_vordiv_spec_ext, 0. );
     std::vector<double> divergence_spectra_extended( nb_vordiv_spec_ext, 0. );
-    extend_truncationopt( truncation_, nb_vordiv_fields, vorticity_spectra, vorticity_spectra_extended.data() );
-    extend_truncationopt( truncation_, nb_vordiv_fields, divergence_spectra, divergence_spectra_extended.data() );
-
-    // call vd2uv to compute u and v in spectral space
     std::vector<double> U_ext( nb_vordiv_spec_ext, 0. );
     std::vector<double> V_ext( nb_vordiv_spec_ext, 0. );
-    trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt" ) );
-    vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(),
-                              divergence_spectra_extended.data(), U_ext.data(), V_ext.data() );
+
+    {
+        ATLAS_TRACE( "vordiv to UV opt" );
+        // increase truncation in vorticity_spectra and divergence_spectra:
+        extend_truncationopt( truncation_, nb_vordiv_fields, vorticity_spectra, vorticity_spectra_extended.data() );
+        extend_truncationopt( truncation_, nb_vordiv_fields, divergence_spectra, divergence_spectra_extended.data() );
+
+        // call vd2uv to compute u and v in spectral space
+        trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt" ) );
+        vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(),
+                                  divergence_spectra_extended.data(), U_ext.data(), V_ext.data() );
+    }
 
     // perform spectral transform to compute all fields in grid point space
     invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config );

From b71afc12681bb3163bf08c5e3a36f26955ed1427 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Tue, 6 Mar 2018 10:50:24 +0000
Subject: [PATCH 009/123] added some more timings

---
 src/atlas/trans/localopt/TransLocalopt.cc | 78 +++++++++++++----------
 1 file changed, 46 insertions(+), 32 deletions(-)

diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc
index 7aaca7f05..a088b8a13 100644
--- a/src/atlas/trans/localopt/TransLocalopt.cc
+++ b/src/atlas/trans/localopt/TransLocalopt.cc
@@ -81,16 +81,22 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t
         }
     }
     // precomputations for Legendre polynomials:
-    legendre_.resize( legendre_size( truncation_ + 1 ) * nlats );
-    compute_legendre_polynomialsopt( truncation_ + 1, nlats, lats.data(), legendre_.data() );
+    {
+        ATLAS_TRACE( "opt precomp Legendre" );
+        legendre_.resize( legendre_size( truncation_ + 1 ) * nlats );
+        compute_legendre_polynomialsopt( truncation_ + 1, nlats, lats.data(), legendre_.data() );
+    }
 
     // precomputations for Fourier transformations:
-    fourier_.resize( 2 * ( truncation_ + 1 ) * nlons );
-    int idx = 0;
-    for ( int jlon = 0; jlon < nlons; jlon++ ) {
-        for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
-            fourier_[idx++] = +std::cos( jm * lons[jlon] );  // real part
-            fourier_[idx++] = -std::sin( jm * lons[jlon] );  // imaginary part
+    {
+        ATLAS_TRACE( "opt precomp Fourier" );
+        fourier_.resize( 2 * ( truncation_ + 1 ) * nlons );
+        int idx = 0;
+        for ( int jlon = 0; jlon < nlons; jlon++ ) {
+            for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
+                fourier_[idx++] = +std::cos( jm * lons[jlon] );  // real part
+                fourier_[idx++] = -std::sin( jm * lons[jlon] );  // imaginary part
+            }
         }
     }
 }
@@ -293,32 +299,40 @@ void TransLocalopt::invtrans( const int nb_scalar_fields, const double scalar_sp
     ATLAS_TRACE( "TransLocalopt::invtrans" );
     int nb_gp              = grid_.size();
     int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields;
-    std::vector<double> vorticity_spectra_extended( nb_vordiv_spec_ext, 0. );
-    std::vector<double> divergence_spectra_extended( nb_vordiv_spec_ext, 0. );
-    std::vector<double> U_ext( nb_vordiv_spec_ext, 0. );
-    std::vector<double> V_ext( nb_vordiv_spec_ext, 0. );
+    if ( nb_vordiv_fields > 0 ) {
+        std::vector<double> vorticity_spectra_extended( nb_vordiv_spec_ext, 0. );
+        std::vector<double> divergence_spectra_extended( nb_vordiv_spec_ext, 0. );
+        std::vector<double> U_ext( nb_vordiv_spec_ext, 0. );
+        std::vector<double> V_ext( nb_vordiv_spec_ext, 0. );
+
+        {
+            ATLAS_TRACE( "opt extend vordiv" );
+            // increase truncation in vorticity_spectra and divergence_spectra:
+            extend_truncationopt( truncation_, nb_vordiv_fields, vorticity_spectra, vorticity_spectra_extended.data() );
+            extend_truncationopt( truncation_, nb_vordiv_fields, divergence_spectra,
+                                  divergence_spectra_extended.data() );
+        }
 
-    {
-        ATLAS_TRACE( "vordiv to UV opt" );
-        // increase truncation in vorticity_spectra and divergence_spectra:
-        extend_truncationopt( truncation_, nb_vordiv_fields, vorticity_spectra, vorticity_spectra_extended.data() );
-        extend_truncationopt( truncation_, nb_vordiv_fields, divergence_spectra, divergence_spectra_extended.data() );
-
-        // call vd2uv to compute u and v in spectral space
-        trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt" ) );
-        vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(),
-                                  divergence_spectra_extended.data(), U_ext.data(), V_ext.data() );
-    }
+        {
+            ATLAS_TRACE( "vordiv to UV opt" );
+            // call vd2uv to compute u and v in spectral space
+            trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt" ) );
+            vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(),
+                                      divergence_spectra_extended.data(), U_ext.data(), V_ext.data() );
+        }
 
-    // perform spectral transform to compute all fields in grid point space
-    invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config );
-    invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(),
-                 gp_fields + nb_gp * nb_vordiv_fields, config );
-    int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields;
-    std::vector<double> scalar_spectra_extended( nb_scalar_spec_ext, 0. );
-    extend_truncationopt( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() );
-    invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(),
-                 gp_fields + 2 * nb_gp * nb_vordiv_fields, config );
+        // perform spectral transform to compute all fields in grid point space
+        invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config );
+        invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(),
+                     gp_fields + nb_gp * nb_vordiv_fields, config );
+    }
+    if ( nb_scalar_fields > 0 ) {
+        int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields;
+        std::vector<double> scalar_spectra_extended( nb_scalar_spec_ext, 0. );
+        extend_truncationopt( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() );
+        invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(),
+                     gp_fields + 2 * nb_gp * nb_vordiv_fields, config );
+    }
 }
 
 // --------------------------------------------------------------------------------------------------------------------

From abe488481c3d0304694ae62f705f9e48b2af5ab5 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Tue, 6 Mar 2018 14:30:15 +0000
Subject: [PATCH 010/123] added option without transposition in gp-space

---
 src/atlas/trans/localopt/TransLocalopt.cc |  70 +++++++++++-
 src/atlas/trans/localopt/TransLocalopt.h  |   1 +
 src/tests/trans/test_transgeneral.cc      | 132 +++++++++++-----------
 3 files changed, 132 insertions(+), 71 deletions(-)

diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc
index a088b8a13..7e157d80c 100644
--- a/src/atlas/trans/localopt/TransLocalopt.cc
+++ b/src/atlas/trans/localopt/TransLocalopt.cc
@@ -99,6 +99,19 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t
             }
         }
     }
+    {
+        ATLAS_TRACE( "opt precomp Fourier tp" );
+        fouriertp_.resize( 2 * ( truncation_ + 1 ) * nlons );
+        int idx = 0;
+        for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
+            for ( int jlon = 0; jlon < nlons; jlon++ ) {
+                fouriertp_[idx++] = +std::cos( jm * lons[jlon] );  // real part
+            }
+            for ( int jlon = 0; jlon < nlons; jlon++ ) {
+                fouriertp_[idx++] = -std::sin( jm * lons[jlon] );  // imaginary part
+            }
+        }
+    }
 }
 
 // --------------------------------------------------------------------------------------------------------------------
@@ -193,13 +206,14 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
                     eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
                 }
             }
+#if 0  // 1: better for small number of columns, large truncation; 0: better for large number of columns
 
             // Transposition in Fourier space:
             std::vector<double> scl_fourier_tp( size_fourier * ( truncation + 1 ) );
             {
                 ATLAS_TRACE( "opt transposition in Fourier" );
                 int idx = 0;
-                for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
+                for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
                     for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
                         for ( int imag = 0; imag < 2; imag++ ) {
                             for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
@@ -223,10 +237,6 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
             }
 
             // Transposition in grid point space:
-            std::vector<double> coslats( nlats );
-            for ( size_t j = 0; j < nlats; ++j ) {
-                coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() );
-            }
             {
                 ATLAS_TRACE( "opt transposition in gp-space" );
                 int idx = 0;
@@ -235,12 +245,60 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
                         for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
                             int pos_tp = jlon + g.nxmax() * ( jlat + g.ny() * ( jfld ) );
                             //int pos  = jfld + nb_fields * ( jlat + g.ny() * ( jlon ) );
-                            if ( jfld < nb_vordiv_fields ) { gp_opt[idx] /= coslats[jlat]; }
                             gp_fields[pos_tp] = gp_opt[idx++];  // = gp_opt[pos]
                         }
                     }
                 }
             }
+#else
+            // Transposition in Fourier space:
+            std::vector<double> scl_fourier_tp( size_fourier * ( truncation + 1 ) );
+            {
+                ATLAS_TRACE( "opt transposition in Fourier" );
+                int idx = 0;
+                for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
+                    for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
+                        for ( int imag = 0; imag < 2; imag++ ) {
+                            for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                int pos_tp = imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + g.ny() * ( jfld ) ) );
+                                //int pos  = jfld + nb_fields * ( imag + 2 * ( jlat + g.ny() * ( jm ) ) );
+                                scl_fourier_tp[pos_tp] = scl_fourier[idx++];  // = scl_fourier[pos]
+                            }
+                        }
+                    }
+                }
+            }
+
+            // Fourier transformation:
+            std::vector<double> gp_opt( nb_fields * grid_.size(), 0. );
+            {
+                ATLAS_TRACE( "opt Fourier dgemm" );
+                eckit::linalg::Matrix A( fouriertp_.data(), g.nxmax(), ( truncation_ + 1 ) * 2 );
+                eckit::linalg::Matrix B( scl_fourier_tp.data(), ( truncation_ + 1 ) * 2, nb_fields * g.ny() );
+                eckit::linalg::Matrix C( gp_fields, g.nxmax(), nb_fields * g.ny() );
+                eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+            }
+
+#endif
+            // Computing u,v from U,V:
+            {
+                if ( nb_vordiv_fields > 0 ) {
+                    ATLAS_TRACE( "opt u,v from U,V" );
+                    std::vector<double> coslats( nlats );
+                    for ( size_t j = 0; j < nlats; ++j ) {
+                        coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() );
+                    }
+                    int idx = 0;
+                    for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                        for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
+                            for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) {
+                                gp_fields[idx] /= coslats[jlat];
+                                idx++;
+                            }
+                        }
+                    }
+                }
+            }
         }
         else {
             ATLAS_TRACE( "invtrans_uv unstructured opt" );
diff --git a/src/atlas/trans/localopt/TransLocalopt.h b/src/atlas/trans/localopt/TransLocalopt.h
index 0d12d4af3..ff77db4a9 100644
--- a/src/atlas/trans/localopt/TransLocalopt.h
+++ b/src/atlas/trans/localopt/TransLocalopt.h
@@ -113,6 +113,7 @@ class TransLocalopt : public trans::TransImpl {
     bool precompute_;
     mutable std::vector<double> legendre_;
     mutable std::vector<double> fourier_;
+    mutable std::vector<double> fouriertp_;
     std::vector<size_t> legendre_begin_;
 };
 
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index 31cb78c37..6cd0c0cb5 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -756,74 +756,76 @@ CASE( "test_trans_vordiv_with_translib" ) {
                         for ( int imag = 0; imag <= 1; imag++ ) {  // real and imaginary part
 
                             if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. ) {
-                                for ( int j = 0; j < 2 * N * nb_scalar; j++ ) {
-                                    sp[j] = 0.;
-                                }
-                                for ( int j = 0; j < 2 * N * nb_vordiv; j++ ) {
-                                    vor[j] = 0.;
-                                    div[j] = 0.;
-                                }
-                                if ( ivar_in == 0 ) vor[k * nb_vordiv + jfld] = 1.;
-                                if ( ivar_in == 1 ) div[k * nb_vordiv + jfld] = 1.;
-                                if ( ivar_in == 2 ) sp[k * nb_scalar + jfld] = 1.;
-
-                                for ( int j = 0; j < nb_all * g.size(); j++ ) {
-                                    gp[j]     = 0.;
-                                    rgp[j]    = 0.;
-                                    rgpopt[j] = 0.;
-                                }
-                                for ( int j = 0; j < g.size(); j++ ) {
-                                    rgp_analytic[j] = 0.;
-                                }
-
-                                spectral_transform_grid_analytic( trc, trc, n, m, imag, g, rspecg.data(),
-                                                                  rgp_analytic.data(), ivar_in, ivar_out );
-
-                                EXPECT_NO_THROW( transLocal.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
-                                                                      div.data(), rgp.data() ) );
-
-                                EXPECT_NO_THROW( transLocalopt.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
-                                                                         div.data(), rgpopt.data() ) );
-
-                                int pos = ( ivar_out * nb_vordiv + jfld );
-
-                                double rms_gen =
-                                    compute_rms( g.size(), rgp.data() + pos * g.size(), rgp_analytic.data() );
-
-                                double rms_genopt =
-                                    compute_rms( g.size(), rgpopt.data() + pos * g.size(), rgp_analytic.data() );
-
-                                if ( !( rms_gen < tolerance ) || !( rms_genopt < tolerance ) ) {
-                                    Log::info()
-                                        << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out
-                                        << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl;
-                                    ATLAS_DEBUG_VAR( rms_gen );
-                                    ATLAS_DEBUG_VAR( rms_genopt );
-                                    ATLAS_DEBUG_VAR( tolerance );
-                                }
-                                EXPECT( rms_gen < tolerance );
-                                //EXPECT( rms_genopt < tolerance );
-                                icase++;
+                                if ( true ) {  //if ( icase == 378 ) {
+                                    for ( int j = 0; j < 2 * N * nb_scalar; j++ ) {
+                                        sp[j] = 0.;
+                                    }
+                                    for ( int j = 0; j < 2 * N * nb_vordiv; j++ ) {
+                                        vor[j] = 0.;
+                                        div[j] = 0.;
+                                    }
+                                    if ( ivar_in == 0 ) vor[k * nb_vordiv + jfld] = 1.;
+                                    if ( ivar_in == 1 ) div[k * nb_vordiv + jfld] = 1.;
+                                    if ( ivar_in == 2 ) sp[k * nb_scalar + jfld] = 1.;
+
+                                    for ( int j = 0; j < nb_all * g.size(); j++ ) {
+                                        gp[j]     = 0.;
+                                        rgp[j]    = 0.;
+                                        rgpopt[j] = 0.;
+                                    }
+                                    for ( int j = 0; j < g.size(); j++ ) {
+                                        rgp_analytic[j] = 0.;
+                                    }
+
+                                    spectral_transform_grid_analytic( trc, trc, n, m, imag, g, rspecg.data(),
+                                                                      rgp_analytic.data(), ivar_in, ivar_out );
+
+                                    EXPECT_NO_THROW( transLocal.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
+                                                                          div.data(), rgp.data() ) );
+
+                                    EXPECT_NO_THROW( transLocalopt.invtrans( nb_scalar, sp.data(), nb_vordiv,
+                                                                             vor.data(), div.data(), rgpopt.data() ) );
+
+                                    int pos = ( ivar_out * nb_vordiv + jfld );
+
+                                    double rms_gen =
+                                        compute_rms( g.size(), rgp.data() + pos * g.size(), rgp_analytic.data() );
+
+                                    double rms_genopt =
+                                        compute_rms( g.size(), rgpopt.data() + pos * g.size(), rgp_analytic.data() );
+
+                                    if ( !( rms_gen < tolerance ) || !( rms_genopt < tolerance ) ) {
+                                        Log::info()
+                                            << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out
+                                            << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl;
+                                        ATLAS_DEBUG_VAR( rms_gen );
+                                        ATLAS_DEBUG_VAR( rms_genopt );
+                                        ATLAS_DEBUG_VAR( tolerance );
+                                    }
+                                    EXPECT( rms_gen < tolerance );
+                                    //EXPECT( rms_genopt < tolerance );
 
 #if ATLAS_HAVE_TRANS
-                                EXPECT_NO_THROW( transIFS.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
-                                                                    div.data(), gp.data() ) );
-                                double rms_trans =
-                                    compute_rms( g.size(), gp.data() + pos * g.size(), rgp_analytic.data() );
-                                double rms_diff =
-                                    compute_rms( g.size(), rgp.data() + pos * g.size(), gp.data() + pos * g.size() );
-                                EXPECT( rms_trans < tolerance );
-                                if ( !( rms_trans < tolerance ) || !( rms_diff < tolerance ) ) {
-                                    Log::info()
-                                        << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out
-                                        << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl;
-                                    ATLAS_DEBUG_VAR( rms_gen );
-                                    ATLAS_DEBUG_VAR( rms_genopt );
-                                    ATLAS_DEBUG_VAR( rms_trans );
-                                    ATLAS_DEBUG_VAR( rms_diff );
-                                    ATLAS_DEBUG_VAR( tolerance );
-                                }
+                                    EXPECT_NO_THROW( transIFS.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
+                                                                        div.data(), gp.data() ) );
+                                    double rms_trans =
+                                        compute_rms( g.size(), gp.data() + pos * g.size(), rgp_analytic.data() );
+                                    double rms_diff = compute_rms( g.size(), rgp.data() + pos * g.size(),
+                                                                   gp.data() + pos * g.size() );
+                                    EXPECT( rms_trans < tolerance );
+                                    if ( !( rms_trans < tolerance ) || !( rms_diff < tolerance ) ) {
+                                        Log::info()
+                                            << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out
+                                            << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl;
+                                        ATLAS_DEBUG_VAR( rms_gen );
+                                        ATLAS_DEBUG_VAR( rms_genopt );
+                                        ATLAS_DEBUG_VAR( rms_trans );
+                                        ATLAS_DEBUG_VAR( rms_diff );
+                                        ATLAS_DEBUG_VAR( tolerance );
+                                    }
 #endif
+                                }
+                                icase++;
                             }
                             k++;
                         }

From 2fdd12c4e7c43d4cbc0277c0f9435e18942b58b1 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Tue, 6 Mar 2018 14:56:46 +0000
Subject: [PATCH 011/123] created second optimised local transform to compare
 two optimisations

---
 src/tests/trans/test_transgeneral.cc | 145 ++++++++++++++-------------
 1 file changed, 73 insertions(+), 72 deletions(-)

diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index 6cd0c0cb5..b4c659430 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -712,11 +712,12 @@ CASE( "test_trans_vordiv_with_translib" ) {
     double tolerance  = 1.e-13;
 
     // Grid: (Adjust the following line if the test takes too long!)
-    Grid g( "F12" );
+    Grid g( "F320" );
 
     grid::StructuredGrid gs( g );
     int ndgl = gs.ny();
-    int trc  = ndgl - 1;  // linear
+    //int trc  = ndgl - 1;  // linear
+    int trc = ndgl / 2. - 1;  // cubic
 #if ATLAS_HAVE_TRANS
     trans::Trans transIFS( g, trc, util::Config( "type", "ifs" ) );
 #endif
@@ -726,7 +727,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
     functionspace::Spectral spectral( trc );
     functionspace::StructuredColumns gridpoints( g );
 
-    int nb_scalar = 2, nb_vordiv = 2;
+    int nb_scalar = 1, nb_vordiv = 0;
     int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv;
     std::vector<double> sp( 2 * N * nb_scalar );
     std::vector<double> vor( 2 * N * nb_vordiv );
@@ -738,8 +739,8 @@ CASE( "test_trans_vordiv_with_translib" ) {
     std::vector<double> rgp_analytic( g.size() );
 
     int icase = 0;
-    for ( int ivar_in = 0; ivar_in < 3; ivar_in++ ) {         // vorticity, divergence, scalar
-        for ( int ivar_out = 0; ivar_out < 3; ivar_out++ ) {  // u, v, scalar
+    for ( int ivar_in = 2; ivar_in < 3; ivar_in++ ) {         // vorticity, divergence, scalar
+        for ( int ivar_out = 2; ivar_out < 3; ivar_out++ ) {  // u, v, scalar
             int nb_fld = 1;
             if ( ivar_out == 2 ) {
                 tolerance = 1.e-13;
@@ -756,76 +757,76 @@ CASE( "test_trans_vordiv_with_translib" ) {
                         for ( int imag = 0; imag <= 1; imag++ ) {  // real and imaginary part
 
                             if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. ) {
-                                if ( true ) {  //if ( icase == 378 ) {
-                                    for ( int j = 0; j < 2 * N * nb_scalar; j++ ) {
-                                        sp[j] = 0.;
-                                    }
-                                    for ( int j = 0; j < 2 * N * nb_vordiv; j++ ) {
-                                        vor[j] = 0.;
-                                        div[j] = 0.;
-                                    }
-                                    if ( ivar_in == 0 ) vor[k * nb_vordiv + jfld] = 1.;
-                                    if ( ivar_in == 1 ) div[k * nb_vordiv + jfld] = 1.;
-                                    if ( ivar_in == 2 ) sp[k * nb_scalar + jfld] = 1.;
-
-                                    for ( int j = 0; j < nb_all * g.size(); j++ ) {
-                                        gp[j]     = 0.;
-                                        rgp[j]    = 0.;
-                                        rgpopt[j] = 0.;
-                                    }
-                                    for ( int j = 0; j < g.size(); j++ ) {
-                                        rgp_analytic[j] = 0.;
-                                    }
-
-                                    spectral_transform_grid_analytic( trc, trc, n, m, imag, g, rspecg.data(),
-                                                                      rgp_analytic.data(), ivar_in, ivar_out );
-
-                                    EXPECT_NO_THROW( transLocal.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
-                                                                          div.data(), rgp.data() ) );
-
-                                    EXPECT_NO_THROW( transLocalopt.invtrans( nb_scalar, sp.data(), nb_vordiv,
-                                                                             vor.data(), div.data(), rgpopt.data() ) );
-
-                                    int pos = ( ivar_out * nb_vordiv + jfld );
-
-                                    double rms_gen =
-                                        compute_rms( g.size(), rgp.data() + pos * g.size(), rgp_analytic.data() );
-
-                                    double rms_genopt =
-                                        compute_rms( g.size(), rgpopt.data() + pos * g.size(), rgp_analytic.data() );
-
-                                    if ( !( rms_gen < tolerance ) || !( rms_genopt < tolerance ) ) {
-                                        Log::info()
-                                            << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out
-                                            << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl;
-                                        ATLAS_DEBUG_VAR( rms_gen );
-                                        ATLAS_DEBUG_VAR( rms_genopt );
-                                        ATLAS_DEBUG_VAR( tolerance );
-                                    }
-                                    EXPECT( rms_gen < tolerance );
-                                    //EXPECT( rms_genopt < tolerance );
+                                for ( int j = 0; j < 2 * N * nb_scalar; j++ ) {
+                                    sp[j] = 0.;
+                                }
+                                for ( int j = 0; j < 2 * N * nb_vordiv; j++ ) {
+                                    vor[j] = 0.;
+                                    div[j] = 0.;
+                                }
+                                if ( ivar_in == 0 ) vor[k * nb_vordiv + jfld] = 1.;
+                                if ( ivar_in == 1 ) div[k * nb_vordiv + jfld] = 1.;
+                                if ( ivar_in == 2 ) sp[k * nb_scalar + jfld] = 1.;
+
+                                for ( int j = 0; j < nb_all * g.size(); j++ ) {
+                                    gp[j]     = 0.;
+                                    rgp[j]    = 0.;
+                                    rgpopt[j] = 0.;
+                                }
+                                for ( int j = 0; j < g.size(); j++ ) {
+                                    rgp_analytic[j] = 0.;
+                                }
 
-#if ATLAS_HAVE_TRANS
-                                    EXPECT_NO_THROW( transIFS.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
-                                                                        div.data(), gp.data() ) );
-                                    double rms_trans =
-                                        compute_rms( g.size(), gp.data() + pos * g.size(), rgp_analytic.data() );
-                                    double rms_diff = compute_rms( g.size(), rgp.data() + pos * g.size(),
-                                                                   gp.data() + pos * g.size() );
-                                    EXPECT( rms_trans < tolerance );
-                                    if ( !( rms_trans < tolerance ) || !( rms_diff < tolerance ) ) {
-                                        Log::info()
-                                            << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out
-                                            << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl;
-                                        ATLAS_DEBUG_VAR( rms_gen );
-                                        ATLAS_DEBUG_VAR( rms_genopt );
-                                        ATLAS_DEBUG_VAR( rms_trans );
-                                        ATLAS_DEBUG_VAR( rms_diff );
-                                        ATLAS_DEBUG_VAR( tolerance );
-                                    }
-#endif
+                                spectral_transform_grid_analytic( trc, trc, n, m, imag, g, rspecg.data(),
+                                                                  rgp_analytic.data(), ivar_in, ivar_out );
+
+                                //EXPECT_NO_THROW( transLocal.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
+                                //                                      div.data(), rgp.data() ) );
+
+                                EXPECT_NO_THROW( transLocalopt.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
+                                                                         div.data(), rgpopt.data() ) );
+
+                                int pos = ( ivar_out * nb_vordiv + jfld );
+
+                                double rms_gen =
+                                    compute_rms( g.size(), rgpopt.data() + pos * g.size(), rgp_analytic.data() );
+
+                                double rms_genopt =
+                                    compute_rms( g.size(), rgpopt.data() + pos * g.size(), rgp_analytic.data() );
+
+                                if ( !( rms_gen < tolerance ) || !( rms_genopt < tolerance ) ) {
+                                    Log::info()
+                                        << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out
+                                        << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl;
+                                    ATLAS_DEBUG_VAR( rms_gen );
+                                    ATLAS_DEBUG_VAR( rms_genopt );
+                                    ATLAS_DEBUG_VAR( tolerance );
                                 }
+                                EXPECT( rms_gen < tolerance );
+                                //EXPECT( rms_genopt < tolerance );
                                 icase++;
+
+#if ATLAS_HAVE_TRANS
+                                EXPECT_NO_THROW( transIFS.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
+                                                                    div.data(), gp.data() ) );
+                                double rms_trans =
+                                    compute_rms( g.size(), gp.data() + pos * g.size(), rgp_analytic.data() );
+                                double rms_diff =
+                                    compute_rms( g.size(), rgpopt.data() + pos * g.size(), gp.data() + pos * g.size() );
+                                EXPECT( rms_trans < tolerance );
+                                if ( !( rms_trans < tolerance ) || !( rms_diff < tolerance ) ) {
+                                    Log::info()
+                                        << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out
+                                        << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl;
+                                    ATLAS_DEBUG_VAR( rms_gen );
+                                    ATLAS_DEBUG_VAR( rms_genopt );
+                                    ATLAS_DEBUG_VAR( rms_trans );
+                                    ATLAS_DEBUG_VAR( rms_diff );
+                                    ATLAS_DEBUG_VAR( tolerance );
+                                }
+#endif
+                                if ( icase > 2 ) EXPECT( false );
+                                Log::info() << "test " << icase << std::endl;
                             }
                             k++;
                         }

From 26481b562fe14607b1a9307c50597cae96a8f6a4 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Tue, 6 Mar 2018 14:57:10 +0000
Subject: [PATCH 012/123] missing files for previous commit

---
 src/atlas/CMakeLists.txt                      |  10 +
 src/atlas/trans/Trans.cc                      |   2 +
 src/atlas/trans/VorDivToUV.cc                 |   2 +
 src/atlas/trans/localopt/TransLocalopt.cc     |   2 +-
 .../trans/localopt2/FourierTransformsopt2.cc  |  78 +++
 .../trans/localopt2/FourierTransformsopt2.h   |  38 ++
 .../localopt2/LegendrePolynomialsopt2.cc      | 166 +++++++
 .../trans/localopt2/LegendrePolynomialsopt2.h |  44 ++
 .../trans/localopt2/LegendreTransformsopt2.cc |  62 +++
 .../trans/localopt2/LegendreTransformsopt2.h  |  37 ++
 src/atlas/trans/localopt2/TransLocalopt2.cc   | 445 ++++++++++++++++++
 src/atlas/trans/localopt2/TransLocalopt2.h    | 124 +++++
 .../trans/localopt2/VorDivToUVLocalopt2.cc    | 184 ++++++++
 .../trans/localopt2/VorDivToUVLocalopt2.h     |  67 +++
 src/tests/trans/test_transgeneral.cc          |  10 +-
 15 files changed, 1265 insertions(+), 6 deletions(-)
 create mode 100644 src/atlas/trans/localopt2/FourierTransformsopt2.cc
 create mode 100644 src/atlas/trans/localopt2/FourierTransformsopt2.h
 create mode 100644 src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc
 create mode 100644 src/atlas/trans/localopt2/LegendrePolynomialsopt2.h
 create mode 100644 src/atlas/trans/localopt2/LegendreTransformsopt2.cc
 create mode 100644 src/atlas/trans/localopt2/LegendreTransformsopt2.h
 create mode 100644 src/atlas/trans/localopt2/TransLocalopt2.cc
 create mode 100644 src/atlas/trans/localopt2/TransLocalopt2.h
 create mode 100644 src/atlas/trans/localopt2/VorDivToUVLocalopt2.cc
 create mode 100644 src/atlas/trans/localopt2/VorDivToUVLocalopt2.h

diff --git a/src/atlas/CMakeLists.txt b/src/atlas/CMakeLists.txt
index ecbbbbb06..8cb63ed1f 100644
--- a/src/atlas/CMakeLists.txt
+++ b/src/atlas/CMakeLists.txt
@@ -341,6 +341,16 @@ trans/localopt/FourierTransformsopt.h
 trans/localopt/FourierTransformsopt.cc
 trans/localopt/VorDivToUVLocalopt.h
 trans/localopt/VorDivToUVLocalopt.cc
+trans/localopt2/TransLocalopt2.h
+trans/localopt2/TransLocalopt2.cc
+trans/localopt2/LegendrePolynomialsopt2.h
+trans/localopt2/LegendrePolynomialsopt2.cc
+trans/localopt2/LegendreTransformsopt2.h
+trans/localopt2/LegendreTransformsopt2.cc
+trans/localopt2/FourierTransformsopt2.h
+trans/localopt2/FourierTransformsopt2.cc
+trans/localopt2/VorDivToUVLocalopt2.h
+trans/localopt2/VorDivToUVLocalopt2.cc
 
 )
 if( ATLAS_HAVE_TRANS )
diff --git a/src/atlas/trans/Trans.cc b/src/atlas/trans/Trans.cc
index 9c555170d..efd8eabe9 100644
--- a/src/atlas/trans/Trans.cc
+++ b/src/atlas/trans/Trans.cc
@@ -29,6 +29,7 @@
 #endif
 #include "atlas/trans/local/TransLocal.h"
 #include "atlas/trans/localopt/TransLocalopt.h"
+#include "atlas/trans/localopt2/TransLocalopt2.h"
 
 namespace atlas {
 namespace trans {
@@ -64,6 +65,7 @@ struct force_link {
 #endif
         load_builder_grid<TransLocal>();
         load_builder_grid<TransLocalopt>();
+        load_builder_grid<TransLocalopt2>();
     }
 };
 
diff --git a/src/atlas/trans/VorDivToUV.cc b/src/atlas/trans/VorDivToUV.cc
index b958599fb..566303c52 100644
--- a/src/atlas/trans/VorDivToUV.cc
+++ b/src/atlas/trans/VorDivToUV.cc
@@ -28,6 +28,7 @@
 #endif
 #include "atlas/trans/local/VorDivToUVLocal.h"
 #include "atlas/trans/localopt/VorDivToUVLocalopt.h"
+#include "atlas/trans/localopt2/VorDivToUVLocalopt2.h"
 
 namespace atlas {
 namespace trans {
@@ -57,6 +58,7 @@ struct force_link {
 #endif
         load_builder<VorDivToUVLocal>();
         load_builder<VorDivToUVLocalopt>();
+        load_builder<VorDivToUVLocalopt2>();
     }
 };
 
diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc
index 7e157d80c..907be3e24 100644
--- a/src/atlas/trans/localopt/TransLocalopt.cc
+++ b/src/atlas/trans/localopt/TransLocalopt.cc
@@ -206,7 +206,7 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
                     eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
                 }
             }
-#if 0  // 1: better for small number of columns, large truncation; 0: better for large number of columns
+#if 1  // 1: better for small number of columns, large truncation; 0: better for large number of columns
 
             // Transposition in Fourier space:
             std::vector<double> scl_fourier_tp( size_fourier * ( truncation + 1 ) );
diff --git a/src/atlas/trans/localopt2/FourierTransformsopt2.cc b/src/atlas/trans/localopt2/FourierTransformsopt2.cc
new file mode 100644
index 000000000..71d3202dd
--- /dev/null
+++ b/src/atlas/trans/localopt2/FourierTransformsopt2.cc
@@ -0,0 +1,78 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor
+ * does it submit to any jurisdiction.
+ */
+
+#include <algorithm>
+#include <cmath>
+#include <iostream>
+
+#include "atlas/trans/localopt2/FourierTransformsopt2.h"
+
+namespace atlas {
+namespace trans {
+
+//-----------------------------------------------------------------------------
+
+void invtrans_fourieropt2( const size_t trcFT,
+                           const double lon,         // longitude in radians (in)
+                           const int nb_fields,      // Number of fields
+                           const double rlegReal[],  // associated Legendre functions, size (trc+1)*trc/2 (in)
+                           const double rlegImag[],  // associated Legendre functions, size (trc+1)*trc/2 (in)
+                           double rgp[] )            // gridpoint
+{
+    for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
+        rgp[jfld] = 0.;
+    }
+    // local Fourier transformation:
+    for ( int jm = 0; jm <= trcFT; ++jm ) {
+        const double cos = std::cos( jm * lon );
+        const double sin = std::sin( jm * lon );
+        for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
+            double real = cos * rlegReal[jm * nb_fields + jfld];
+            double imag = sin * rlegImag[jm * nb_fields + jfld];
+            rgp[jfld] += real - imag;
+        }
+    }
+}
+
+int fourier_truncationopt2( const int truncation,    // truncation
+                            const int nx,            // number of longitudes
+                            const int nxmax,         // maximum nx
+                            const int ndgl,          // number of latitudes
+                            const double lat,        // latitude in radian
+                            const bool fullgrid ) {  // regular grid
+    int trc     = truncation;
+    int trclin  = ndgl - 1;
+    int trcquad = ndgl * 2 / 3 - 1;
+    if ( truncation >= trclin || fullgrid ) {
+        // linear
+        trc = ( nx - 1 ) / 2;
+    }
+    else if ( truncation >= trcquad ) {
+        // quadratic
+        double weight = 3 * ( trclin - truncation ) / ndgl;
+        double sqcos  = std::pow( std::cos( lat ), 2 );
+
+        trc = ( nx - 1 ) / ( 2 + weight * sqcos );
+    }
+    else {
+        // cubic
+        double sqcos = std::pow( std::cos( lat ), 2 );
+
+        trc = ( nx - 1 ) / ( 2 + sqcos ) - 1;
+    }
+    trc = std::min( truncation, trc );
+    return trc;
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/atlas/trans/localopt2/FourierTransformsopt2.h b/src/atlas/trans/localopt2/FourierTransformsopt2.h
new file mode 100644
index 000000000..4281a92cc
--- /dev/null
+++ b/src/atlas/trans/localopt2/FourierTransformsopt2.h
@@ -0,0 +1,38 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#pragma once
+
+#include <cstddef>
+
+namespace atlas {
+namespace trans {
+
+//-----------------------------------------------------------------------------
+// Routine to compute the local Fourier transformation
+//
+// Author:
+// Andreas Mueller *ECMWF*
+//
+
+void invtrans_fourieropt2( const size_t trcFT,
+                           const double lon,         // longitude in radians (in)
+                           const int nb_fields,      // Number of fields
+                           const double rlegReal[],  // values of associated Legendre functions, size (trc+1)*trc/2 (in)
+                           const double rlegImag[],  // values of associated Legendre functions, size (trc+1)*trc/2 (in)
+                           double rgp[] );           // gridpoint
+
+int fourier_truncationopt2( const int truncation, const int nx, const int nxmax, const int ndgl, const double lat,
+                            const bool fullgrid );
+
+// --------------------------------------------------------------------------------------------------------------------
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc
new file mode 100644
index 000000000..8f55231d5
--- /dev/null
+++ b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc
@@ -0,0 +1,166 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor
+ * does it submit to any jurisdiction.
+ */
+
+#include <cmath>
+#include <limits>
+
+#include "atlas/array.h"
+#include "atlas/trans/localopt2/LegendrePolynomialsopt2.h"
+
+namespace atlas {
+namespace trans {
+
+//-----------------------------------------------------------------------------
+
+void compute_legendre_polynomialsopt2(
+    const size_t trc,     // truncation (in)
+    const int nlats,      // number of latitudes
+    const double lats[],  // latitudes in radians (in)
+    double legpol[] )     // values of associated Legendre functions, size (trc+1)*trc/2*nlats (out)
+{
+    array::ArrayT<int> idxmn_( trc + 1, trc + 1, nlats );
+    array::ArrayView<int, 3> idxmn = array::make_view<int, 3>( idxmn_ );
+
+    int j = 0;
+    for ( int jm = 0; jm <= trc; ++jm ) {
+        for ( int jlat = 0; jlat < nlats; ++jlat ) {
+            for ( int jn = jm; jn <= trc; ++jn ) {
+                idxmn( jm, jn, jlat ) = j++;
+            }
+        }
+    }
+
+    array::ArrayT<double> zfn_( trc + 1, trc + 1 );
+    array::ArrayView<double, 2> zfn = array::make_view<double, 2>( zfn_ );
+
+    int iodd;
+
+    // Compute coefficients for Taylor series in Belousov (19) and (21)
+    // Belousov, Swarztrauber use zfn(0,0)=std::sqrt(2.)
+    // IFS normalisation chosen to be 0.5*Integral(Pnm**2) = 1
+    zfn( 0, 0 ) = 2.;
+    for ( int jn = 1; jn <= trc; ++jn ) {
+        double zfnn = zfn( 0, 0 );
+        for ( int jgl = 1; jgl <= jn; ++jgl ) {
+            zfnn *= std::sqrt( 1. - 0.25 / ( jgl * jgl ) );
+        }
+        iodd          = jn % 2;
+        zfn( jn, jn ) = zfnn;
+        for ( int jgl = 2; jgl <= jn - iodd; jgl += 2 ) {
+            double zfjn = ( ( jgl - 1. ) * ( 2. * jn - jgl + 2. ) );  // new factor numerator
+            double zfjd = ( jgl * ( 2. * jn - jgl + 1. ) );           // new factor denominator
+
+            zfn( jn, jn - jgl ) = zfn( jn, jn - jgl + 2 ) * zfjn / zfjd;
+        }
+    }
+
+    for ( int jlat = 0; jlat < nlats; ++jlat ) {
+        // --------------------
+        // 1. First two columns
+        // --------------------
+        double lat     = lats[jlat];
+        double zdlx1   = ( M_PI_2 - lat );               // theta
+        double zdlx    = std::cos( zdlx1 );              // cos(theta)
+        double zdlsita = std::sqrt( 1. - zdlx * zdlx );  // sin(theta) (this is how trans library does it)
+
+        legpol[idxmn( 0, 0, jlat )] = 1.;
+
+        double zdl1sita = 0.;
+        // if we are less than 1 meter from the pole,
+        if ( std::abs( zdlsita ) <= std::sqrt( std::numeric_limits<double>::epsilon() ) ) {
+            zdlx    = 1.;
+            zdlsita = 0.;
+        }
+        else {
+            zdl1sita = 1. / zdlsita;
+        }
+
+        // ordinary Legendre polynomials from series expansion
+        // ---------------------------------------------------
+
+        // even N
+        for ( int jn = 2; jn <= trc; jn += 2 ) {
+            double zdlk   = 0.5 * zfn( jn, 0 );
+            double zdlldn = 0.0;
+            double zdsq   = 1. / std::sqrt( jn * ( jn + 1. ) );
+            // represented by only even k
+            for ( int jk = 2; jk <= jn; jk += 2 ) {
+                // normalised ordinary Legendre polynomial == \overbar{P_n}^0
+                zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 );
+                // normalised associated Legendre polynomial == \overbar{P_n}^1
+                zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 );
+            }
+            legpol[idxmn( 0, jn, jlat )] = zdlk;
+            legpol[idxmn( 1, jn, jlat )] = zdlldn;
+        }
+
+        // odd N
+        for ( int jn = 1; jn <= trc; jn += 2 ) {
+            zfn( jn, 0 )  = 0.;
+            double zdlk   = 0.;
+            double zdlldn = 0.0;
+            double zdsq   = 1. / std::sqrt( jn * ( jn + 1. ) );
+            // represented by only even k
+            for ( int jk = 1; jk <= jn; jk += 2 ) {
+                // normalised ordinary Legendre polynomial == \overbar{P_n}^0
+                zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 );
+                // normalised associated Legendre polynomial == \overbar{P_n}^1
+                zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 );
+            }
+            legpol[idxmn( 0, jn, jlat )] = zdlk;
+            legpol[idxmn( 1, jn, jlat )] = zdlldn;
+        }
+
+        // --------------------------------------------------------------
+        // 2. Diagonal (the terms 0,0 and 1,1 have already been computed)
+        //    Belousov, equation (23)
+        // --------------------------------------------------------------
+
+        double zdls = zdl1sita * std::numeric_limits<double>::min();
+        for ( int jn = 2; jn <= trc; ++jn ) {
+            double sq = std::sqrt( ( 2. * jn + 1. ) / ( 2. * jn ) );
+
+            legpol[idxmn( jn, jn, jlat )] = legpol[idxmn( jn - 1, jn - 1, jlat )] * zdlsita * sq;
+            if ( std::abs( legpol[idxmn( jn, jn, jlat )] ) < zdls ) legpol[idxmn( jn, jn, jlat )] = 0.0;
+        }
+
+        // ---------------------------------------------
+        // 3. General recurrence (Belousov, equation 17)
+        // ---------------------------------------------
+
+        for ( int jn = 3; jn <= trc; ++jn ) {
+            for ( int jm = 2; jm < jn; ++jm ) {
+                double cn = ( ( 2. * jn + 1. ) * ( jn + jm - 3. ) * ( jn + jm - 1. ) );  // numerator of c in Belousov
+                double cd = ( ( 2. * jn - 3. ) * ( jn + jm - 2. ) * ( jn + jm ) );       // denominator of c in Belousov
+                double dn = ( ( 2. * jn + 1. ) * ( jn - jm + 1. ) * ( jn + jm - 1. ) );  // numerator of d in Belousov
+                double dd = ( ( 2. * jn - 1. ) * ( jn + jm - 2. ) * ( jn + jm ) );       // denominator of d in Belousov
+                double en = ( ( 2. * jn + 1. ) * ( jn - jm ) );                          // numerator of e in Belousov
+                double ed = ( ( 2. * jn - 1. ) * ( jn + jm ) );                          // denominator of e in Belousov
+
+                legpol[idxmn( jm, jn, jlat )] = std::sqrt( cn / cd ) * legpol[idxmn( jm - 2, jn - 2, jlat )] -
+                                                std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1, jlat )] * zdlx +
+                                                std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1, jlat )] * zdlx;
+            }
+        }
+
+        // take factor 2 for m > 0 into account:
+        for ( int jm = 1; jm <= trc; ++jm ) {
+            for ( int jn = jm; jn <= trc; ++jn ) {
+                legpol[idxmn( jm, jn, jlat )] *= 2.;
+            }
+        }
+    }
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h
new file mode 100644
index 000000000..ae550d30d
--- /dev/null
+++ b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h
@@ -0,0 +1,44 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#pragma once
+
+#include <cstddef>
+
+namespace atlas {
+namespace trans {
+
+//-----------------------------------------------------------------------------
+// Routine to compute the Legendre polynomials in serial according to Belousov
+// (using correction by Swarztrauber)
+//
+// Reference:
+// S.L. Belousov, Tables of normalized associated Legendre Polynomials, Pergamon
+// Press (1962)
+// P.N. Swarztrauber, On computing the points and weights for Gauss-Legendre
+// quadrature,
+//      SIAM J. Sci. Comput. Vol. 24 (3) pp. 945-954 (2002)
+//
+// Author of Fortran version:
+// Mats Hamrud, Philippe Courtier, Nils Wedi *ECMWF*
+//
+// Ported to C++ by:
+// Andreas Mueller *ECMWF*
+//
+void compute_legendre_polynomialsopt2(
+    const size_t trc,     // truncation (in)
+    const int nlats,      // number of latitudes
+    const double lats[],  // latitudes in radians (in)
+    double legpol[] );    // values of associated Legendre functions, size (trc+1)*trc/2*nlats (out)
+
+// --------------------------------------------------------------------------------------------------------------------
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/atlas/trans/localopt2/LegendreTransformsopt2.cc b/src/atlas/trans/localopt2/LegendreTransformsopt2.cc
new file mode 100644
index 000000000..ddb71a967
--- /dev/null
+++ b/src/atlas/trans/localopt2/LegendreTransformsopt2.cc
@@ -0,0 +1,62 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#include <cstddef>
+
+#include "atlas/trans/localopt2/LegendreTransformsopt2.h"
+
+namespace atlas {
+namespace trans {
+
+//-----------------------------------------------------------------------------
+
+void invtrans_legendreopt2(
+    const size_t trc,       // truncation (in)
+    const size_t trcFT,     // truncation for Fourier transformation (in)
+    const size_t trcLP,     // truncation of Legendre polynomials data legpol. Needs to be >= trc (in)
+    const double legpol[],  // values of associated Legendre functions, size (trc+1)*trc/2 (in)
+    const int nb_fields,    // number of fields
+    const double spec[],    // spectral data, size (trc+1)*trc (in)
+    double leg_real[],      // values of associated Legendre functions, size (trc+1)*trc/2 (out)
+    double leg_imag[] )     // values of associated Legendre functions, size (trc+1)*trc/2 (out)
+{
+    // Legendre transformation:
+    int k = 0, klp = 0;
+    for ( int jm = 0; jm <= trcFT; ++jm ) {
+        for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
+            leg_real[jm * nb_fields + jfld] = 0.;
+            leg_imag[jm * nb_fields + jfld] = 0.;
+        }
+        for ( int jn = jm; jn <= trcLP; ++jn, ++klp ) {
+            if ( jn <= trc ) {
+                for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
+                    // not completely sure where this factor 2 comes from. One possible
+                    // explanation:
+                    // normalization of trigonometric functions in the spherical harmonics
+                    // integral over square of trig function is 1 for m=0 and 0.5 (?) for
+                    // m>0
+                    leg_real[jm * nb_fields + jfld] += 2. * spec[( 2 * k ) * nb_fields + jfld] * legpol[klp];
+                    leg_imag[jm * nb_fields + jfld] += 2. * spec[( 2 * k + 1 ) * nb_fields + jfld] * legpol[klp];
+                }
+                ++k;
+            }
+        }
+    }
+    // Undo factor 2 for (jm == 0)
+    for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
+        leg_real[jfld] /= 2.;
+        leg_imag[jfld] /= 2.;
+    }
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/atlas/trans/localopt2/LegendreTransformsopt2.h b/src/atlas/trans/localopt2/LegendreTransformsopt2.h
new file mode 100644
index 000000000..ef10eb885
--- /dev/null
+++ b/src/atlas/trans/localopt2/LegendreTransformsopt2.h
@@ -0,0 +1,37 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#pragma once
+
+#include <cstddef>
+
+namespace atlas {
+namespace trans {
+
+//-----------------------------------------------------------------------------
+// Routine to compute the Legendre transformation
+//
+// Author:
+// Andreas Mueller *ECMWF*
+//
+void invtrans_legendreopt2(
+    const size_t trc,       // truncation (in)
+    const size_t trcFT,     // truncation for Fourier transformation (in)
+    const size_t trcLP,     // truncation of Legendre polynomials data legpol. Needs to be >= trc (in)
+    const double legpol[],  // values of associated Legendre functions, size (trc+1)*trc/2 (in)
+    const int nb_fields,    // number of fields
+    const double spec[],    // spectral data, size (trc+1)*trc (in)
+    double leg_real[],      // values of associated Legendre functions, size (trc+1)*trc/2 (out)
+    double leg_imag[] );    // values of associated Legendre functions, size (trc+1)*trc/2 (out)
+
+// --------------------------------------------------------------------------------------------------------------------
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc
new file mode 100644
index 000000000..45548ce12
--- /dev/null
+++ b/src/atlas/trans/localopt2/TransLocalopt2.cc
@@ -0,0 +1,445 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#include "atlas/trans/localopt2/TransLocalopt2.h"
+#include "atlas/array.h"
+#include "atlas/option.h"
+#include "atlas/parallel/mpi/mpi.h"
+#include "atlas/runtime/ErrorHandling.h"
+#include "atlas/runtime/Log.h"
+#include "atlas/trans/VorDivToUV.h"
+#include "atlas/trans/local/LegendrePolynomials.h"
+#include "atlas/trans/localopt2/FourierTransformsopt2.h"
+#include "atlas/trans/localopt2/LegendrePolynomialsopt2.h"
+#include "atlas/trans/localopt2/LegendreTransformsopt2.h"
+#include "atlas/util/Constants.h"
+#include "eckit/linalg/LinearAlgebra.h"
+#include "eckit/linalg/Matrix.h"
+
+namespace atlas {
+namespace trans {
+
+namespace {
+static TransBuilderGrid<TransLocalopt2> builder( "localopt2" );
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+// Helper functions
+// --------------------------------------------------------------------------------------------------------------------
+namespace {  // anonymous
+
+size_t legendre_size( const size_t truncation ) {
+    return ( truncation + 2 ) * ( truncation + 1 ) / 2;
+}
+
+}  // namespace
+
+// --------------------------------------------------------------------------------------------------------------------
+// Class TransLocalopt2
+// --------------------------------------------------------------------------------------------------------------------
+
+TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long truncation,
+                                const eckit::Configuration& config ) :
+    grid_( grid ),
+    truncation_( truncation ),
+    precompute_( config.getBool( "precompute", true ) ) {
+    ATLAS_TRACE( "Precompute legendre opt2" );
+    int nlats, nlons;
+    if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
+        grid::StructuredGrid g( grid_ );
+        nlats = g.ny();
+        nlons = g.nxmax();
+    }
+    else {
+        nlats = grid_.size();
+        nlons = grid_.size();
+    }
+    std::vector<double> lats( nlats );
+    std::vector<double> lons( nlons );
+    if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
+        grid::StructuredGrid g( grid_ );
+        // TODO: remove legendre_begin and legendre_data (only legendre_ should be needed)
+        for ( size_t j = 0; j < nlats; ++j ) {
+            lats[j] = g.y( j ) * util::Constants::degreesToRadians();
+        }
+        for ( size_t j = 0; j < nlons; ++j ) {
+            lons[j] = g.x( j, 0 ) * util::Constants::degreesToRadians();
+        }
+    }
+    else {
+        int j( 0 );
+        for ( PointXY p : grid_.xy() ) {
+            lats[j++] = p.y() * util::Constants::degreesToRadians();
+            lons[j++] = p.x() * util::Constants::degreesToRadians();
+        }
+    }
+    // precomputations for Legendre polynomials:
+    {
+        ATLAS_TRACE( "opt2 precomp Legendre" );
+        legendre_.resize( legendre_size( truncation_ + 1 ) * nlats );
+        compute_legendre_polynomialsopt2( truncation_ + 1, nlats, lats.data(), legendre_.data() );
+    }
+
+    // precomputations for Fourier transformations:
+    {
+        ATLAS_TRACE( "opt2 precomp Fourier" );
+        fourier_.resize( 2 * ( truncation_ + 1 ) * nlons );
+        int idx = 0;
+        for ( int jlon = 0; jlon < nlons; jlon++ ) {
+            for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
+                fourier_[idx++] = +std::cos( jm * lons[jlon] );  // real part
+                fourier_[idx++] = -std::sin( jm * lons[jlon] );  // imaginary part
+            }
+        }
+    }
+    {
+        ATLAS_TRACE( "opt2 precomp Fourier tp" );
+        fouriertp_.resize( 2 * ( truncation_ + 1 ) * nlons );
+        int idx = 0;
+        for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
+            for ( int jlon = 0; jlon < nlons; jlon++ ) {
+                fouriertp_[idx++] = +std::cos( jm * lons[jlon] );  // real part
+            }
+            for ( int jlon = 0; jlon < nlons; jlon++ ) {
+                fouriertp_[idx++] = -std::sin( jm * lons[jlon] );  // imaginary part
+            }
+        }
+    }
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+TransLocalopt2::TransLocalopt2( const Grid& grid, const long truncation, const eckit::Configuration& config ) :
+    TransLocalopt2( Cache(), grid, truncation, config ) {}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+TransLocalopt2::~TransLocalopt2() {}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt2::invtrans( const Field& spfield, Field& gpfield, const eckit::Configuration& config ) const {
+    NOTIMP;
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt2::invtrans( const FieldSet& spfields, FieldSet& gpfields,
+                               const eckit::Configuration& config ) const {
+    NOTIMP;
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt2::invtrans_grad( const Field& spfield, Field& gradfield, const eckit::Configuration& config ) const {
+    NOTIMP;
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt2::invtrans_grad( const FieldSet& spfields, FieldSet& gradfields,
+                                    const eckit::Configuration& config ) const {
+    NOTIMP;
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt2::invtrans_vordiv2wind( const Field& spvor, const Field& spdiv, Field& gpwind,
+                                           const eckit::Configuration& config ) const {
+    NOTIMP;
+}
+
+void TransLocalopt2::invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[],
+                               const eckit::Configuration& config ) const {
+    invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields, config );
+}
+
+void gp_transposeopt2( const int nb_size, const int nb_fields, const double gp_tmp[], double gp_fields[] ) {
+    for ( int jgp = 0; jgp < nb_size; jgp++ ) {
+        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+            gp_fields[jfld * nb_size + jgp] = gp_tmp[jgp * nb_fields + jfld];
+        }
+    }
+}
+
+//-----------------------------------------------------------------------------
+// Routine to compute the spectral transform by using a localopt2 Fourier
+// transformation
+// for a grid (same latitude for all longitudes, allows to compute Legendre
+// functions
+// once for all longitudes). U and v components are divided by cos(latitude) for
+// nb_vordiv_fields > 0.
+//
+// Author:
+// Andreas Mueller *ECMWF*
+//
+void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields,
+                                  const double scalar_spectra[], double gp_fields[],
+                                  const eckit::Configuration& config ) const {
+    if ( nb_scalar_fields > 0 ) {
+        int nb_fields = nb_scalar_fields;
+
+        //eckit::linalg::LinearAlgebra::backend( "string" ) // might want to choose backend with this command
+
+        // Transform
+        if ( grid::StructuredGrid g = grid_ ) {
+            ATLAS_TRACE( "invtrans_uv structured opt2" );
+            int nlats        = g.ny();
+            int size_fourier = nb_fields * 2 * g.ny();
+            std::vector<double> scl_fourier( size_fourier * ( truncation + 1 ) );
+
+            // Legendre transform:
+            {
+                ATLAS_TRACE( "opt2 Legendre dgemm" );
+                for ( int jm = 0; jm <= truncation; jm++ ) {
+                    int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1;
+                    eckit::linalg::Matrix A( eckit::linalg::Matrix(
+                        const_cast<double*>( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) );
+                    eckit::linalg::Matrix B( legendre_.data() + noff * g.ny(), ns, g.ny() );
+                    eckit::linalg::Matrix C( scl_fourier.data() + jm * size_fourier, nb_fields * 2, g.ny() );
+                    eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+                }
+            }
+#if 0  // 1: better for small number of columns, large truncation; 0: better for large number of columns
+
+            // Transposition in Fourier space:
+            std::vector<double> scl_fourier_tp( size_fourier * ( truncation + 1 ) );
+            {
+                ATLAS_TRACE( "opt2 transposition in Fourier" );
+                int idx = 0;
+                for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
+                    for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
+                        for ( int imag = 0; imag < 2; imag++ ) {
+                            for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                int pos_tp = jfld + nb_fields * ( jlat + g.ny() * ( imag + 2 * ( jm ) ) );
+                                //int pos  = jfld + nb_fields * ( imag + 2 * ( jlat + g.ny() * ( jm ) ) );
+                                scl_fourier_tp[pos_tp] = scl_fourier[idx++];  // = scl_fourier[pos]
+                            }
+                        }
+                    }
+                }
+            }
+
+            // Fourier transformation:
+            std::vector<double> gp_opt2( nb_fields * grid_.size(), 0. );
+            {
+                ATLAS_TRACE( "opt2 Fourier dgemm" );
+                eckit::linalg::Matrix A( scl_fourier_tp.data(), nb_fields * g.ny(), ( truncation_ + 1 ) * 2 );
+                eckit::linalg::Matrix B( fourier_.data(), ( truncation_ + 1 ) * 2, g.nxmax() );
+                eckit::linalg::Matrix C( gp_opt2.data(), nb_fields * g.ny(), g.nxmax() );
+                eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+            }
+
+            // Transposition in grid point space:
+            {
+                ATLAS_TRACE( "opt2 transposition in gp-space" );
+                int idx = 0;
+                for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) {
+                    for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
+                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                            int pos_tp = jlon + g.nxmax() * ( jlat + g.ny() * ( jfld ) );
+                            //int pos  = jfld + nb_fields * ( jlat + g.ny() * ( jlon ) );
+                            gp_fields[pos_tp] = gp_opt2[idx++];  // = gp_opt2[pos]
+                        }
+                    }
+                }
+            }
+#else
+            // Transposition in Fourier space:
+            std::vector<double> scl_fourier_tp( size_fourier * ( truncation + 1 ) );
+            {
+                ATLAS_TRACE( "opt2 transposition in Fourier" );
+                int idx = 0;
+                for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
+                    for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
+                        for ( int imag = 0; imag < 2; imag++ ) {
+                            for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                int pos_tp = imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + g.ny() * ( jfld ) ) );
+                                //int pos  = jfld + nb_fields * ( imag + 2 * ( jlat + g.ny() * ( jm ) ) );
+                                scl_fourier_tp[pos_tp] = scl_fourier[idx++];  // = scl_fourier[pos]
+                            }
+                        }
+                    }
+                }
+            }
+
+            // Fourier transformation:
+            std::vector<double> gp_opt2( nb_fields * grid_.size(), 0. );
+            {
+                ATLAS_TRACE( "opt2 Fourier dgemm" );
+                eckit::linalg::Matrix A( fouriertp_.data(), g.nxmax(), ( truncation_ + 1 ) * 2 );
+                eckit::linalg::Matrix B( scl_fourier_tp.data(), ( truncation_ + 1 ) * 2, nb_fields * g.ny() );
+                eckit::linalg::Matrix C( gp_fields, g.nxmax(), nb_fields * g.ny() );
+                eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+            }
+
+#endif
+            // Computing u,v from U,V:
+            {
+                if ( nb_vordiv_fields > 0 ) {
+                    ATLAS_TRACE( "opt2 u,v from U,V" );
+                    std::vector<double> coslats( nlats );
+                    for ( size_t j = 0; j < nlats; ++j ) {
+                        coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() );
+                    }
+                    int idx = 0;
+                    for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                        for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
+                            for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) {
+                                gp_fields[idx] /= coslats[jlat];
+                                idx++;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        else {
+            ATLAS_TRACE( "invtrans_uv unstructured opt2" );
+            int idx = 0;
+            for ( PointXY p : grid_.xy() ) {
+                double lon   = p.x() * util::Constants::degreesToRadians();
+                double lat   = p.y() * util::Constants::degreesToRadians();
+                double trcFT = truncation;
+
+                // Legendre transform:
+                //invtrans_legendreopt2( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra,
+                //                      legReal.data(), legImag.data() );
+
+                // Fourier transform:
+                //invtrans_fourieropt2( trcFT, lon, nb_fields, legReal.data(), legImag.data(),
+                //                     gp_tmp.data() + ( nb_fields * idx ) );
+                for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
+                    //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat );
+                }
+                ++idx;
+            }
+        }
+    }
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt2::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[],
+                               const double divergence_spectra[], double gp_fields[],
+                               const eckit::Configuration& config ) const {
+    invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config );
+}
+
+void extend_truncationopt2( const int old_truncation, const int nb_fields, const double old_spectra[],
+                            double new_spectra[] ) {
+    int k = 0, k_old = 0;
+    for ( int m = 0; m <= old_truncation + 1; m++ ) {             // zonal wavenumber
+        for ( int n = m; n <= old_truncation + 1; n++ ) {         // total wavenumber
+            for ( int imag = 0; imag < 2; imag++ ) {              // imaginary/real part
+                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {  // field
+                    if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; }
+                    else {
+                        new_spectra[k++] = old_spectra[k_old++];
+                    }
+                }
+            }
+        }
+    }
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt2::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields,
+                               const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[],
+                               const eckit::Configuration& config ) const {
+    ATLAS_TRACE( "TransLocalopt2::invtrans" );
+    int nb_gp              = grid_.size();
+    int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields;
+    if ( nb_vordiv_fields > 0 ) {
+        std::vector<double> vorticity_spectra_extended( nb_vordiv_spec_ext, 0. );
+        std::vector<double> divergence_spectra_extended( nb_vordiv_spec_ext, 0. );
+        std::vector<double> U_ext( nb_vordiv_spec_ext, 0. );
+        std::vector<double> V_ext( nb_vordiv_spec_ext, 0. );
+
+        {
+            ATLAS_TRACE( "opt2 extend vordiv" );
+            // increase truncation in vorticity_spectra and divergence_spectra:
+            extend_truncationopt2( truncation_, nb_vordiv_fields, vorticity_spectra,
+                                   vorticity_spectra_extended.data() );
+            extend_truncationopt2( truncation_, nb_vordiv_fields, divergence_spectra,
+                                   divergence_spectra_extended.data() );
+        }
+
+        {
+            ATLAS_TRACE( "vordiv to UV opt2" );
+            // call vd2uv to compute u and v in spectral space
+            trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt2" ) );
+            vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(),
+                                      divergence_spectra_extended.data(), U_ext.data(), V_ext.data() );
+        }
+
+        // perform spectral transform to compute all fields in grid point space
+        invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config );
+        invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(),
+                     gp_fields + nb_gp * nb_vordiv_fields, config );
+    }
+    if ( nb_scalar_fields > 0 ) {
+        int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields;
+        std::vector<double> scalar_spectra_extended( nb_scalar_spec_ext, 0. );
+        extend_truncationopt2( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() );
+        invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(),
+                     gp_fields + 2 * nb_gp * nb_vordiv_fields, config );
+    }
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt2::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const {
+    NOTIMP;
+    // Not implemented and not planned.
+    // Use the TransIFS implementation instead.
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt2::dirtrans( const FieldSet& gpfields, FieldSet& spfields,
+                               const eckit::Configuration& config ) const {
+    NOTIMP;
+    // Not implemented and not planned.
+    // Use the TransIFS implementation instead.
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt2::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv,
+                                           const eckit::Configuration& config ) const {
+    NOTIMP;
+    // Not implemented and not planned.
+    // Use the TransIFS implementation instead.
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt2::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[],
+                               const eckit::Configuration& ) const {
+    NOTIMP;
+    // Not implemented and not planned.
+    // Use the TransIFS implementation instead.
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt2::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[],
+                               double divergence_spectra[], const eckit::Configuration& ) const {
+    NOTIMP;
+    // Not implemented and not planned.
+    // Use the TransIFS implementation instead.
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/atlas/trans/localopt2/TransLocalopt2.h b/src/atlas/trans/localopt2/TransLocalopt2.h
new file mode 100644
index 000000000..8f3de137c
--- /dev/null
+++ b/src/atlas/trans/localopt2/TransLocalopt2.h
@@ -0,0 +1,124 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#pragma once
+
+#include <vector>
+
+#include "atlas/grid/Grid.h"
+#include "atlas/trans/Trans.h"
+
+//-----------------------------------------------------------------------------
+// Forward declarations
+
+namespace atlas {
+class Field;
+class FieldSet;
+}  // namespace atlas
+
+//-----------------------------------------------------------------------------
+
+namespace atlas {
+namespace trans {
+
+//-----------------------------------------------------------------------------
+
+/// @class TransLocalopt2
+///
+/// Localopt2 spherical harmonics transformations to any grid
+/// Optimisations are present for structured grids
+/// For global grids, please consider using TransIFS instead.
+///
+/// @todo:
+///  - support multiple fields
+///  - support atlas::Field and atlas::FieldSet based on function spaces
+///
+/// @note: Direct transforms are not implemented and cannot be unless
+///        the grid is global. There are no plans to support this at the moment.
+class TransLocalopt2 : public trans::TransImpl {
+public:
+    TransLocalopt2( const Grid& g, const long truncation, const eckit::Configuration& = util::NoConfig() );
+    TransLocalopt2( const Cache&, const Grid& g, const long truncation,
+                    const eckit::Configuration& = util::NoConfig() );
+
+    virtual ~TransLocalopt2();
+
+    virtual int truncation() const override { return truncation_; }
+    virtual size_t spectralCoefficients() const override { return ( truncation_ + 1 ) * ( truncation_ + 2 ); }
+
+    virtual const Grid& grid() const override { return grid_; }
+
+    virtual void invtrans( const Field& spfield, Field& gpfield,
+                           const eckit::Configuration& = util::NoConfig() ) const override;
+
+    virtual void invtrans( const FieldSet& spfields, FieldSet& gpfields,
+                           const eckit::Configuration& = util::NoConfig() ) const override;
+
+    virtual void invtrans_grad( const Field& spfield, Field& gradfield,
+                                const eckit::Configuration& = util::NoConfig() ) const override;
+
+    virtual void invtrans_grad( const FieldSet& spfields, FieldSet& gradfields,
+                                const eckit::Configuration& = util::NoConfig() ) const override;
+
+    virtual void invtrans_vordiv2wind( const Field& spvor, const Field& spdiv, Field& gpwind,
+                                       const eckit::Configuration& = util::NoConfig() ) const override;
+
+    // -- IFS style API --
+
+    virtual void invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields,
+                           const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[],
+                           const eckit::Configuration& = util::NoConfig() ) const override;
+
+    virtual void invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[],
+                           const eckit::Configuration& = util::NoConfig() ) const override;
+
+    virtual void invtrans( const int nb_vordiv_fields, const double vorticity_spectra[],
+                           const double divergence_spectra[], double gp_fields[],
+                           const eckit::Configuration& = util::NoConfig() ) const override;
+
+    // -- NOT SUPPORTED -- //
+
+    virtual void dirtrans( const Field& gpfield, Field& spfield,
+                           const eckit::Configuration& = util::NoConfig() ) const override;
+
+    virtual void dirtrans( const FieldSet& gpfields, FieldSet& spfields,
+                           const eckit::Configuration& = util::NoConfig() ) const override;
+
+    virtual void dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv,
+                                       const eckit::Configuration& = util::NoConfig() ) const override;
+
+    virtual void dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[],
+                           const eckit::Configuration& = util::NoConfig() ) const override;
+
+    virtual void dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[],
+                           double divergence_spectra[], const eckit::Configuration& = util::NoConfig() ) const override;
+
+private:
+    const double* legendre_data( int j ) const { return legendre_.data() + legendre_begin_[j]; }
+    double* legendre_data( int j ) { return legendre_.data() + legendre_begin_[j]; }
+
+    void invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields,
+                      const double scalar_spectra[], double gp_fields[],
+                      const eckit::Configuration& = util::NoConfig() ) const;
+
+private:
+    Grid grid_;
+    int truncation_;
+    bool precompute_;
+    mutable std::vector<double> legendre_;
+    mutable std::vector<double> fourier_;
+    mutable std::vector<double> fouriertp_;
+    std::vector<size_t> legendre_begin_;
+};
+
+//-----------------------------------------------------------------------------
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/atlas/trans/localopt2/VorDivToUVLocalopt2.cc b/src/atlas/trans/localopt2/VorDivToUVLocalopt2.cc
new file mode 100644
index 000000000..72c5a3ac9
--- /dev/null
+++ b/src/atlas/trans/localopt2/VorDivToUVLocalopt2.cc
@@ -0,0 +1,184 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#include "atlas/trans/localopt2/VorDivToUVLocalopt2.h"
+#include <cmath>  // for std::sqrt
+#include "atlas/functionspace/Spectral.h"
+#include "atlas/runtime/Log.h"
+#include "atlas/util/Earth.h"
+
+using atlas::FunctionSpace;
+using atlas::functionspace::Spectral;
+
+namespace atlas {
+namespace trans {
+
+namespace {
+static VorDivToUVBuilder<VorDivToUVLocalopt2> builder( "localopt2" );
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+// Routine to copy spectral data into internal storage form of IFS trans
+// Ported to C++ by: Andreas Mueller *ECMWF*
+void prfi1bopt2( const int truncation,
+                 const int km,          // zonal wavenumber
+                 const int nb_fields,   // number of fields
+                 const double rspec[],  // spectral data
+                 double pia[] )         // spectral components in data layout of trans library
+{
+    int ilcm = truncation + 1 - km, ioff = ( 2 * truncation - km + 3 ) * km,
+        nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2;
+    for ( int j = 1; j <= ilcm; j++ ) {
+        int inm = ioff + ( ilcm - j ) * 2;
+        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+            int ir = 2 * jfld, ii = ir + 1;
+            pia[ir * nlei1 + j + 1] = rspec[inm * nb_fields + jfld];
+            pia[ii * nlei1 + j + 1] = rspec[( inm + 1 ) * nb_fields + jfld];
+        }
+    }
+
+    for ( int jfld = 0; jfld < 2 * nb_fields; jfld++ ) {
+        pia[jfld * nlei1]            = 0.;
+        pia[jfld * nlei1 + 1]        = 0.;
+        pia[jfld * nlei1 + ilcm + 2] = 0.;
+    }
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+// Routine to compute spectral velocities (*cos(latitude)) out of spectral
+// vorticity and divergence
+// Reference:
+//        ECMWF Research Department documentation of the IFS
+//        Temperton, 1991, MWR 119 p1303
+// Ported to C++ by: Andreas Mueller *ECMWF*
+void vd2uvopt2( const int truncation,               // truncation
+                const int km,                       // zonal wavenumber
+                const int nb_vordiv_fields,         // number of vorticity and divergence fields
+                const double vorticity_spectra[],   // spectral data of vorticity
+                const double divergence_spectra[],  // spectral data of divergence
+                double U[],                         // spectral data of U
+                double V[],                         // spectral data of V
+                const eckit::Configuration& config ) {
+    int nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2;
+
+    // repsnm: epsilon from eq.(2.12) and (2.13) in [Temperton 1991]
+    std::vector<double> repsnm( ( truncation + 1 ) * ( truncation + 6 ) / 2 );
+    int idx = 0;
+    for ( int jm = 0; jm <= truncation; ++jm ) {
+        for ( int jn = jm; jn <= truncation + 2; ++jn, ++idx ) {
+            repsnm[idx] = std::sqrt( ( jn * jn - jm * jm ) / ( 4. * jn * jn - 1. ) );
+        }
+    }
+    repsnm[0] = 0.;
+
+    // rlapin: constant factor from eq.(2.2) and (2.3) in [Temperton 1991]
+    double ra = util::Earth::radius();
+    std::vector<double> rlapin( truncation + 3 );
+    for ( int jn = 1; jn <= truncation + 2; ++jn ) {
+        rlapin[jn] = -ra * ra / ( jn * ( jn + 1. ) );
+    }
+    rlapin[0] = 0.;
+
+    // inverse the order of repsnm and rlapin for improved accuracy
+    std::vector<double> zepsnm( truncation + 6 );
+    std::vector<double> zlapin( truncation + 6 );
+    std::vector<double> zn( truncation + 6 );
+    for ( int jn = km - 1; jn <= truncation + 2; ++jn ) {
+        int ij = truncation + 3 - jn;
+        if ( jn >= 0 ) {
+            zlapin[ij] = rlapin[jn];
+            if ( jn < km ) { zepsnm[ij] = 0.; }
+            else {
+                zepsnm[ij] = repsnm[jn + ( 2 * truncation - km + 5 ) * km / 2];
+            }
+        }
+        else {
+            zlapin[ij] = 0.;
+            zepsnm[ij] = 0.;
+        }
+        zn[ij] = jn;
+    }
+    zn[0] = truncation + 3;
+
+    // copy spectral data into internal trans storage:
+    std::vector<double> rvor( 2 * nb_vordiv_fields * nlei1 );
+    std::vector<double> rdiv( 2 * nb_vordiv_fields * nlei1 );
+    std::vector<double> ru( 2 * nb_vordiv_fields * nlei1 );
+    std::vector<double> rv( 2 * nb_vordiv_fields * nlei1 );
+    prfi1bopt2( truncation, km, nb_vordiv_fields, vorticity_spectra, rvor.data() );
+    prfi1bopt2( truncation, km, nb_vordiv_fields, divergence_spectra, rdiv.data() );
+
+    // compute eq.(2.12) and (2.13) in [Temperton 1991]:
+    if ( km == 0 ) {
+        for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
+            int ir = 2 * jfld * nlei1 - 1;
+            for ( int ji = 2; ji < truncation + 4 - km; ++ji ) {
+                double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1];
+                double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1];
+                ru[ir + ji]  = +psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1];
+                rv[ir + ji]  = -psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1];
+            }
+        }
+    }
+    else {
+        for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
+            int ir = 2 * jfld * nlei1 - 1, ii = ir + nlei1;
+            for ( int ji = 2; ji < truncation + 4 - km; ++ji ) {
+                double chiIm = km * zlapin[ji];
+                double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1];
+                double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1];
+                ru[ir + ji]  = -chiIm * rdiv[ii + ji] + psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1];
+                ru[ii + ji]  = +chiIm * rdiv[ir + ji] + psiM1 * rvor[ii + ji + 1] - psiP1 * rvor[ii + ji - 1];
+                rv[ir + ji]  = -chiIm * rvor[ii + ji] - psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1];
+                rv[ii + ji]  = +chiIm * rvor[ir + ji] - psiM1 * rdiv[ii + ji + 1] + psiP1 * rdiv[ii + ji - 1];
+            }
+        }
+    }
+
+    // copy data from internal storage back to external spectral data:
+    int ilcm = truncation - km;
+    int ioff = ( 2 * truncation - km + 3 ) * km;
+    // ioff: start index of zonal wavenumber km in spectral data
+    double za_r = 1. / util::Earth::radius();
+    for ( int j = 0; j <= ilcm; ++j ) {
+        // ilcm-j = total wavenumber
+        int inm = ioff + ( ilcm - j ) * 2;
+        for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
+            int ir = 2 * jfld * nlei1, ii = ir + nlei1;
+            int idx = inm * nb_vordiv_fields + jfld;
+            // real part:
+            U[idx] = ru[ir + j + 2] * za_r;
+            V[idx] = rv[ir + j + 2] * za_r;
+            idx += nb_vordiv_fields;
+            // imaginary part:
+            U[idx] = ru[ii + j + 2] * za_r;
+            V[idx] = rv[ii + j + 2] * za_r;
+        }
+    }
+}
+
+void VorDivToUVLocalopt2::execute( const int nb_coeff, const int nb_fields, const double vorticity[],
+                                   const double divergence[], double U[], double V[],
+                                   const eckit::Configuration& config ) const {
+    for ( int jm = 0; jm <= truncation_; ++jm ) {
+        vd2uvopt2( truncation_, jm, nb_fields, vorticity, divergence, U, V, config );
+    }
+}
+
+VorDivToUVLocalopt2::VorDivToUVLocalopt2( const int truncation, const eckit::Configuration& config ) :
+    truncation_( truncation ) {}
+
+VorDivToUVLocalopt2::VorDivToUVLocalopt2( const FunctionSpace& fs, const eckit::Configuration& config ) :
+    truncation_( Spectral( fs ).truncation() ) {}
+
+VorDivToUVLocalopt2::~VorDivToUVLocalopt2() {}
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/atlas/trans/localopt2/VorDivToUVLocalopt2.h b/src/atlas/trans/localopt2/VorDivToUVLocalopt2.h
new file mode 100644
index 000000000..30c386a92
--- /dev/null
+++ b/src/atlas/trans/localopt2/VorDivToUVLocalopt2.h
@@ -0,0 +1,67 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#pragma once
+
+#include "atlas/trans/VorDivToUV.h"
+
+//-----------------------------------------------------------------------------
+// Forward declarations
+
+namespace atlas {
+class FunctionSpace;
+}
+
+//-----------------------------------------------------------------------------
+
+namespace atlas {
+namespace trans {
+
+//-----------------------------------------------------------------------------
+
+class VorDivToUVLocalopt2 : public trans::VorDivToUVImpl {
+public:
+    VorDivToUVLocalopt2( const FunctionSpace&, const eckit::Configuration& = util::NoConfig() );
+    VorDivToUVLocalopt2( int truncation, const eckit::Configuration& = util::NoConfig() );
+
+    virtual ~VorDivToUVLocalopt2();
+
+    virtual int truncation() const override { return truncation_; }
+
+    // pure virtual interface
+
+    // -- IFS style API --
+    // These fields have special interpretation required. You need to know what
+    // you're doing.
+    // See IFS trans library.
+
+    /*!
+ * @brief Compute spectral wind (U/V) from spectral vorticity/divergence
+ *
+ * U = u*cos(lat)
+ * V = v*cos(lat)
+ *
+ * @param nb_fields [in] Number of fields
+ * @param vorticity [in] Spectral vorticity
+ * @param divergence [in] Spectral divergence
+ * @param U [out] Spectral wind U = u*cos(lat)
+ * @param V [out] Spectral wind V = v*cos(lat)
+ */
+    virtual void execute( const int nb_coeff, const int nb_fields, const double vorticity[], const double divergence[],
+                          double U[], double V[], const eckit::Configuration& = util::NoConfig() ) const override;
+
+private:
+    int truncation_;
+};
+
+// ------------------------------------------------------------------
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index b4c659430..5838bd796 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -721,8 +721,8 @@ CASE( "test_trans_vordiv_with_translib" ) {
 #if ATLAS_HAVE_TRANS
     trans::Trans transIFS( g, trc, util::Config( "type", "ifs" ) );
 #endif
-    trans::Trans transLocal( g, trc, util::Config( "type", "local" ) );
-    trans::Trans transLocalopt( g, trc, util::Config( "type", "localopt" ) );
+    trans::Trans transLocal( g, trc, util::Config( "type", "localopt" ) );
+    trans::Trans transLocalopt( g, trc, util::Config( "type", "localopt2" ) );
 
     functionspace::Spectral spectral( trc );
     functionspace::StructuredColumns gridpoints( g );
@@ -780,8 +780,8 @@ CASE( "test_trans_vordiv_with_translib" ) {
                                 spectral_transform_grid_analytic( trc, trc, n, m, imag, g, rspecg.data(),
                                                                   rgp_analytic.data(), ivar_in, ivar_out );
 
-                                //EXPECT_NO_THROW( transLocal.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
-                                //                                      div.data(), rgp.data() ) );
+                                EXPECT_NO_THROW( transLocal.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
+                                                                      div.data(), rgp.data() ) );
 
                                 EXPECT_NO_THROW( transLocalopt.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
                                                                          div.data(), rgpopt.data() ) );
@@ -803,7 +803,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
                                     ATLAS_DEBUG_VAR( tolerance );
                                 }
                                 EXPECT( rms_gen < tolerance );
-                                //EXPECT( rms_genopt < tolerance );
+                                EXPECT( rms_genopt < tolerance );
                                 icase++;
 
 #if ATLAS_HAVE_TRANS

From 5b52a5393f2a5e33e9e6653b235bcedc6a1d568e Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Thu, 8 Mar 2018 12:06:00 +0000
Subject: [PATCH 013/123] using the symmetry of Legendre polynomials works

---
 src/atlas/trans/localopt2/TransLocalopt2.cc | 144 +++++++++++++--
 src/atlas/trans/localopt2/TransLocalopt2.h  |   4 +
 src/tests/trans/test_transgeneral.cc        | 187 +++++++++++++++++---
 3 files changed, 298 insertions(+), 37 deletions(-)

diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc
index 45548ce12..745c6f065 100644
--- a/src/atlas/trans/localopt2/TransLocalopt2.cc
+++ b/src/atlas/trans/localopt2/TransLocalopt2.cc
@@ -9,6 +9,7 @@
  */
 
 #include "atlas/trans/localopt2/TransLocalopt2.h"
+#include <math.h>
 #include "atlas/array.h"
 #include "atlas/option.h"
 #include "atlas/parallel/mpi/mpi.h"
@@ -39,6 +40,37 @@ size_t legendre_size( const size_t truncation ) {
     return ( truncation + 2 ) * ( truncation + 1 ) / 2;
 }
 
+int nlats_northernHemisphere( const int nlats ) {
+    return ceil( nlats / 2. );
+    // using ceil here should make it possible to have odd number of latitudes (with the centre latitude being the equator)
+}
+
+int num_n( const int truncation, const int m, const bool symmetric ) {
+    int len = 0;
+    if ( symmetric ) { len = ( truncation - m + 2 ) / 2; }
+    else {
+        len = ( truncation - m + 1 ) / 2;
+    }
+    return len;
+}
+
+std::vector<int> n_indices( const int truncation, const int m, const bool symmetric ) {
+    int len = num_n( truncation, m, symmetric ), jn0 = 0;
+    if ( !symmetric ) { jn0 = 1; }
+    std::vector<int> jns( len );
+    int ia = 0, id = len - 1;
+    for ( int jn = jn0; jn <= truncation - m; jn += 2, ia++, id-- ) {
+#if 1  // 1: ascending, 0: descending
+        int idx = ia;
+#else
+        int idx = id;
+#endif
+        jns[idx] = jn;
+        ASSERT( idx < len && idx >= 0 );
+    }
+    return jns;
+}
+
 }  // namespace
 
 // --------------------------------------------------------------------------------------------------------------------
@@ -51,22 +83,26 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long
     truncation_( truncation ),
     precompute_( config.getBool( "precompute", true ) ) {
     ATLAS_TRACE( "Precompute legendre opt2" );
-    int nlats, nlons;
+    int nlats   = 0;
+    int nlons   = 0;
+    int nlatsNH = nlats_northernHemisphere( nlats );
     if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
         grid::StructuredGrid g( grid_ );
-        nlats = g.ny();
-        nlons = g.nxmax();
+        nlats   = g.ny();
+        nlons   = g.nxmax();
+        nlatsNH = nlats_northernHemisphere( nlats );
     }
     else {
-        nlats = grid_.size();
-        nlons = grid_.size();
+        nlats   = grid_.size();
+        nlons   = grid_.size();
+        nlatsNH = nlats;
     }
-    std::vector<double> lats( nlats );
+    std::vector<double> lats( nlatsNH );
     std::vector<double> lons( nlons );
     if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
         grid::StructuredGrid g( grid_ );
         // TODO: remove legendre_begin and legendre_data (only legendre_ should be needed)
-        for ( size_t j = 0; j < nlats; ++j ) {
+        for ( size_t j = 0; j < nlatsNH; ++j ) {
             lats[j] = g.y( j ) * util::Constants::degreesToRadians();
         }
         for ( size_t j = 0; j < nlons; ++j ) {
@@ -83,8 +119,37 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long
     // precomputations for Legendre polynomials:
     {
         ATLAS_TRACE( "opt2 precomp Legendre" );
-        legendre_.resize( legendre_size( truncation_ + 1 ) * nlats );
-        compute_legendre_polynomialsopt2( truncation_ + 1, nlats, lats.data(), legendre_.data() );
+        legendre_.resize( legendre_size( truncation_ + 1 ) * nlatsNH );
+        compute_legendre_polynomialsopt2( truncation_ + 1, nlatsNH, lats.data(), legendre_.data() );
+    }
+    {
+        ATLAS_TRACE( "opt2 split Legendre" );
+        int size_sym  = 0;
+        int size_asym = 0;
+        legendre_sym_begin_.resize( truncation_ + 3 );
+        legendre_asym_begin_.resize( truncation_ + 3 );
+        legendre_sym_begin_[0]  = 0;
+        legendre_asym_begin_[0] = 0;
+        for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
+            size_sym += num_n( truncation_ + 1, jm, true );
+            size_asym += num_n( truncation_ + 1, jm, false );
+            legendre_sym_begin_[jm + 1]  = size_sym;
+            legendre_asym_begin_[jm + 1] = size_asym;
+        }
+        legendre_sym_.resize( size_sym * nlatsNH );
+        legendre_asym_.resize( size_asym * nlatsNH );
+        int idx = 0, is = 0, ia = 0;
+        for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
+            for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
+                for ( int jn = 0; jn <= truncation_ - jm + 1; jn++, idx++ ) {
+                    if ( jn % 2 == 0 ) { legendre_sym_[is++] = legendre_[idx]; }
+                    else {
+                        legendre_asym_[ia++] = legendre_[idx];
+                    }
+                }
+            }
+        }
+        ASSERT( ia == size_asym * nlatsNH && is == size_sym * nlatsNH );
     }
 
     // precomputations for Fourier transformations:
@@ -192,19 +257,76 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
         if ( grid::StructuredGrid g = grid_ ) {
             ATLAS_TRACE( "invtrans_uv structured opt2" );
             int nlats        = g.ny();
+            int nlatsNH      = nlats_northernHemisphere( nlats );
             int size_fourier = nb_fields * 2 * g.ny();
             std::vector<double> scl_fourier( size_fourier * ( truncation + 1 ) );
 
             // Legendre transform:
             {
                 ATLAS_TRACE( "opt2 Legendre dgemm" );
-                for ( int jm = 0; jm <= truncation; jm++ ) {
+                for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
+#if 1  // 0: no symmetry, 1: use symmetry
+                    int size_sym  = num_n( truncation_ + 1, jm, true );
+                    int size_asym = num_n( truncation_ + 1, jm, false );
+                    std::vector<double> scalar_sym( 2 * nb_fields * size_sym, -1234. );
+                    std::vector<double> scalar_asym( 2 * nb_fields * size_asym, -1234. );
+                    std::vector<double> scl_fourier_sym( size_fourier );
+                    std::vector<double> scl_fourier_asym( size_fourier );
+                    {
+                        //ATLAS_TRACE( "opt2 Legendre split" );
+                        int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2;
+                        for ( int jn = 0; jn <= truncation_ - jm + 1; jn++ ) {
+                            for ( int imag = 0; imag < 2; imag++ ) {
+                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
+                                    if ( jn % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; }
+                                    else {
+                                        scalar_asym[ia++] = scalar_spectra[idx + ioff];
+                                    }
+                                }
+                            }
+                        }
+                        ASSERT( ia == 2 * nb_fields * size_asym && is == 2 * nb_fields * size_sym );
+                    }
+                    {
+                        eckit::linalg::Matrix A( scalar_sym.data(), nb_fields * 2, size_sym );
+                        eckit::linalg::Matrix B( legendre_sym_.data() + legendre_sym_begin_[jm] * nlatsNH, size_sym,
+                                                 nlatsNH );
+                        eckit::linalg::Matrix C( scl_fourier_sym.data(), nb_fields * 2, nlatsNH );
+                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+                    }
+                    if ( size_asym > 0 ) {
+                        eckit::linalg::Matrix A( scalar_asym.data(), nb_fields * 2, size_asym );
+                        eckit::linalg::Matrix B( legendre_asym_.data() + legendre_asym_begin_[jm] * nlatsNH, size_asym,
+                                                 nlatsNH );
+                        eckit::linalg::Matrix C( scl_fourier_asym.data(), nb_fields * 2, nlatsNH );
+                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+                        {
+                            //ATLAS_TRACE( "opt2 merge spheres" );
+                            // northern hemisphere:
+                            int ioff = jm * size_fourier;
+                            for ( int j = 0; j < 2 * nb_fields * nlatsNH; j++ ) {
+                                scl_fourier[j + ioff] = scl_fourier_sym[j] + scl_fourier_asym[j];
+                            }
+                            // southern hemisphere:
+                            int idx = 0;
+                            for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
+                                for ( int imag = 0; imag < 2; imag++ ) {
+                                    for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
+                                        int pos = jfld + nb_fields * ( imag + 2 * ( nlats - jlat - 1 ) );
+                                        scl_fourier[pos + ioff] = scl_fourier_sym[idx] - scl_fourier_asym[idx];
+                                    }
+                                }
+                            }
+                        }
+                    }
+#else
                     int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1;
                     eckit::linalg::Matrix A( eckit::linalg::Matrix(
                         const_cast<double*>( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) );
                     eckit::linalg::Matrix B( legendre_.data() + noff * g.ny(), ns, g.ny() );
                     eckit::linalg::Matrix C( scl_fourier.data() + jm * size_fourier, nb_fields * 2, g.ny() );
                     eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+#endif
                 }
             }
 #if 0  // 1: better for small number of columns, large truncation; 0: better for large number of columns
@@ -323,7 +445,7 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
             }
         }
     }
-}
+}  // namespace trans
 
 // --------------------------------------------------------------------------------------------------------------------
 
diff --git a/src/atlas/trans/localopt2/TransLocalopt2.h b/src/atlas/trans/localopt2/TransLocalopt2.h
index 8f3de137c..92fb292f1 100644
--- a/src/atlas/trans/localopt2/TransLocalopt2.h
+++ b/src/atlas/trans/localopt2/TransLocalopt2.h
@@ -113,9 +113,13 @@ class TransLocalopt2 : public trans::TransImpl {
     int truncation_;
     bool precompute_;
     mutable std::vector<double> legendre_;
+    mutable std::vector<double> legendre_sym_;
+    mutable std::vector<double> legendre_asym_;
     mutable std::vector<double> fourier_;
     mutable std::vector<double> fouriertp_;
     std::vector<size_t> legendre_begin_;
+    std::vector<size_t> legendre_sym_begin_;
+    std::vector<size_t> legendre_asym_begin_;
 };
 
 //-----------------------------------------------------------------------------
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index 5838bd796..a3c5285fe 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -712,7 +712,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
     double tolerance  = 1.e-13;
 
     // Grid: (Adjust the following line if the test takes too long!)
-    Grid g( "F320" );
+    Grid g( "F120" );
 
     grid::StructuredGrid gs( g );
     int ndgl = gs.ny();
@@ -721,8 +721,8 @@ CASE( "test_trans_vordiv_with_translib" ) {
 #if ATLAS_HAVE_TRANS
     trans::Trans transIFS( g, trc, util::Config( "type", "ifs" ) );
 #endif
-    trans::Trans transLocal( g, trc, util::Config( "type", "localopt" ) );
-    trans::Trans transLocalopt( g, trc, util::Config( "type", "localopt2" ) );
+    trans::Trans transLocal1( g, trc, util::Config( "type", "localopt" ) );
+    trans::Trans transLocal2( g, trc, util::Config( "type", "localopt2" ) );
 
     functionspace::Spectral spectral( trc );
     functionspace::StructuredColumns gridpoints( g );
@@ -734,8 +734,8 @@ CASE( "test_trans_vordiv_with_translib" ) {
     std::vector<double> div( 2 * N * nb_vordiv );
     std::vector<double> rspecg( 2 * N );
     std::vector<double> gp( nb_all * g.size() );
-    std::vector<double> rgp( nb_all * g.size() );
-    std::vector<double> rgpopt( nb_all * g.size() );
+    std::vector<double> rgp1( nb_all * g.size() );
+    std::vector<double> rgp2( nb_all * g.size() );
     std::vector<double> rgp_analytic( g.size() );
 
     int icase = 0;
@@ -769,9 +769,9 @@ CASE( "test_trans_vordiv_with_translib" ) {
                                 if ( ivar_in == 2 ) sp[k * nb_scalar + jfld] = 1.;
 
                                 for ( int j = 0; j < nb_all * g.size(); j++ ) {
-                                    gp[j]     = 0.;
-                                    rgp[j]    = 0.;
-                                    rgpopt[j] = 0.;
+                                    gp[j]   = 0.;
+                                    rgp1[j] = 0.;
+                                    rgp2[j] = 0.;
                                 }
                                 for ( int j = 0; j < g.size(); j++ ) {
                                     rgp_analytic[j] = 0.;
@@ -780,30 +780,30 @@ CASE( "test_trans_vordiv_with_translib" ) {
                                 spectral_transform_grid_analytic( trc, trc, n, m, imag, g, rspecg.data(),
                                                                   rgp_analytic.data(), ivar_in, ivar_out );
 
-                                EXPECT_NO_THROW( transLocal.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
-                                                                      div.data(), rgp.data() ) );
+                                EXPECT_NO_THROW( transLocal1.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
+                                                                       div.data(), rgp1.data() ) );
 
-                                EXPECT_NO_THROW( transLocalopt.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
-                                                                         div.data(), rgpopt.data() ) );
+                                EXPECT_NO_THROW( transLocal2.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
+                                                                       div.data(), rgp2.data() ) );
 
                                 int pos = ( ivar_out * nb_vordiv + jfld );
 
-                                double rms_gen =
-                                    compute_rms( g.size(), rgpopt.data() + pos * g.size(), rgp_analytic.data() );
+                                double rms_gen1 =
+                                    compute_rms( g.size(), rgp1.data() + pos * g.size(), rgp_analytic.data() );
 
-                                double rms_genopt =
-                                    compute_rms( g.size(), rgpopt.data() + pos * g.size(), rgp_analytic.data() );
+                                double rms_gen2 =
+                                    compute_rms( g.size(), rgp2.data() + pos * g.size(), rgp_analytic.data() );
 
-                                if ( !( rms_gen < tolerance ) || !( rms_genopt < tolerance ) ) {
+                                if ( !( rms_gen1 < tolerance ) || !( rms_gen2 < tolerance ) ) {
                                     Log::info()
                                         << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out
                                         << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl;
-                                    ATLAS_DEBUG_VAR( rms_gen );
-                                    ATLAS_DEBUG_VAR( rms_genopt );
+                                    ATLAS_DEBUG_VAR( rms_gen1 );
+                                    ATLAS_DEBUG_VAR( rms_gen2 );
                                     ATLAS_DEBUG_VAR( tolerance );
                                 }
-                                EXPECT( rms_gen < tolerance );
-                                EXPECT( rms_genopt < tolerance );
+                                EXPECT( rms_gen1 < tolerance );
+                                EXPECT( rms_gen2 < tolerance );
                                 icase++;
 
 #if ATLAS_HAVE_TRANS
@@ -812,21 +812,156 @@ CASE( "test_trans_vordiv_with_translib" ) {
                                 double rms_trans =
                                     compute_rms( g.size(), gp.data() + pos * g.size(), rgp_analytic.data() );
                                 double rms_diff =
-                                    compute_rms( g.size(), rgpopt.data() + pos * g.size(), gp.data() + pos * g.size() );
+                                    compute_rms( g.size(), rgp1.data() + pos * g.size(), gp.data() + pos * g.size() );
                                 EXPECT( rms_trans < tolerance );
                                 if ( !( rms_trans < tolerance ) || !( rms_diff < tolerance ) ) {
                                     Log::info()
                                         << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out
                                         << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl;
-                                    ATLAS_DEBUG_VAR( rms_gen );
-                                    ATLAS_DEBUG_VAR( rms_genopt );
+                                    ATLAS_DEBUG_VAR( rms_gen1 );
+                                    ATLAS_DEBUG_VAR( rms_gen2 );
                                     ATLAS_DEBUG_VAR( rms_trans );
                                     ATLAS_DEBUG_VAR( rms_diff );
                                     ATLAS_DEBUG_VAR( tolerance );
                                 }
 #endif
-                                if ( icase > 2 ) EXPECT( false );
-                                Log::info() << "test " << icase << std::endl;
+                                EXPECT( icase < 300 );
+                            }
+                            k++;
+                        }
+                    }
+                }
+            }
+        }
+    }
+    Log::info() << "Vordiv+scalar comparison with trans: all " << icase << " cases successfully passed!" << std::endl;
+}
+
+//-----------------------------------------------------------------------------
+
+CASE( "test_trans_hires" ) {
+    Log::info() << "test_trans_hires" << std::endl;
+    // test transgeneral by comparing its result with the trans library
+    // this test is based on the test_nomesh case in test_trans.cc
+
+    std::ostream& out = Log::info();
+    double tolerance  = 1.e-13;
+
+    // Grid: (Adjust the following line if the test takes too long!)
+    Grid g( "F128" );
+
+    grid::StructuredGrid gs( g );
+    int ndgl = gs.ny();
+    //int trc  = ndgl - 1;  // linear
+    int trc = ndgl / 2. - 1;  // cubic
+#if 0
+    trans::Trans transIFS( g, trc, util::Config( "type", "ifs" ) );
+#endif
+    trans::Trans transLocal1( g, trc, util::Config( "type", "localopt2" ) );
+    //trans::Trans transLocal2( g, trc, util::Config( "type", "localopt2" ) );
+
+    functionspace::Spectral spectral( trc );
+    functionspace::StructuredColumns gridpoints( g );
+
+    int nb_scalar = 1, nb_vordiv = 0;
+    int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv;
+    std::vector<double> sp( 2 * N * nb_scalar );
+    std::vector<double> vor( 2 * N * nb_vordiv );
+    std::vector<double> div( 2 * N * nb_vordiv );
+    std::vector<double> rspecg( 2 * N );
+    std::vector<double> gp( nb_all * g.size() );
+    std::vector<double> rgp1( nb_all * g.size() );
+    //std::vector<double> rgp2( nb_all * g.size() );
+    std::vector<double> rgp_analytic( g.size() );
+
+    int icase = 0;
+    for ( int ivar_in = 2; ivar_in < 3; ivar_in++ ) {         // vorticity, divergence, scalar
+        for ( int ivar_out = 2; ivar_out < 3; ivar_out++ ) {  // u, v, scalar
+            int nb_fld = 1;
+            if ( ivar_out == 2 ) {
+                tolerance = 1.e-13;
+                nb_fld    = nb_scalar;
+            }
+            else {
+                tolerance = 2.e-6;
+                nb_fld    = nb_vordiv;
+            }
+            for ( int jfld = 0; jfld < nb_fld; jfld++ ) {  // multiple fields
+                int k = 0;
+                for ( int m = 0; m <= trc; m++ ) {                 // zonal wavenumber
+                    for ( int n = m; n <= trc; n++ ) {             // total wavenumber
+                        for ( int imag = 0; imag <= 1; imag++ ) {  // real and imaginary part
+
+                            if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. ) {
+                                for ( int j = 0; j < 2 * N * nb_scalar; j++ ) {
+                                    sp[j] = 0.;
+                                }
+                                for ( int j = 0; j < 2 * N * nb_vordiv; j++ ) {
+                                    vor[j] = 0.;
+                                    div[j] = 0.;
+                                }
+                                if ( ivar_in == 0 ) vor[k * nb_vordiv + jfld] = 1.;
+                                if ( ivar_in == 1 ) div[k * nb_vordiv + jfld] = 1.;
+                                if ( ivar_in == 2 ) sp[k * nb_scalar + jfld] = 1.;
+
+                                for ( int j = 0; j < nb_all * g.size(); j++ ) {
+                                    gp[j]   = 0.;
+                                    rgp1[j] = 0.;
+                                    //rgp2[j] = 0.;
+                                }
+                                for ( int j = 0; j < g.size(); j++ ) {
+                                    rgp_analytic[j] = 0.;
+                                }
+
+                                spectral_transform_grid_analytic( trc, trc, n, m, imag, g, rspecg.data(),
+                                                                  rgp_analytic.data(), ivar_in, ivar_out );
+
+                                EXPECT_NO_THROW( transLocal1.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
+                                                                       div.data(), rgp1.data() ) );
+
+                                //EXPECT_NO_THROW( transLocal2.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
+                                //                                       div.data(), rgp2.data() ) );
+
+                                int pos = ( ivar_out * nb_vordiv + jfld );
+
+                                double rms_gen1 =
+                                    compute_rms( g.size(), rgp1.data() + pos * g.size(), rgp_analytic.data() );
+
+                                //double rms_gen2 =
+                                //    compute_rms( g.size(), rgp2.data() + pos * g.size(), rgp_analytic.data() );
+
+                                if ( !( rms_gen1 < tolerance ) ) {  // || !( rms_gen2 < tolerance ) ) {
+                                    Log::info()
+                                        << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out
+                                        << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl;
+                                    ATLAS_DEBUG_VAR( rms_gen1 );
+                                    //ATLAS_DEBUG_VAR( rms_gen2 );
+                                    ATLAS_DEBUG_VAR( tolerance );
+                                }
+                                EXPECT( rms_gen1 < tolerance );
+                                //EXPECT( rms_gen2 < tolerance );
+                                icase++;
+
+#if 0
+                                EXPECT_NO_THROW( transIFS.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
+                                                                    div.data(), gp.data() ) );
+                                double rms_trans =
+                                    compute_rms( g.size(), gp.data() + pos * g.size(), rgp_analytic.data() );
+                                double rms_diff =
+                                    compute_rms( g.size(), rgp1.data() + pos * g.size(), gp.data() + pos * g.size() );
+                                EXPECT( rms_trans < tolerance );
+                                if ( !( rms_trans < tolerance ) || !( rms_diff < tolerance ) ) {
+                                    Log::info()
+                                        << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out
+                                        << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl;
+                                    ATLAS_DEBUG_VAR( rms_gen1 );
+                                    //ATLAS_DEBUG_VAR( rms_gen2 );
+                                    ATLAS_DEBUG_VAR( rms_trans );
+                                    ATLAS_DEBUG_VAR( rms_diff );
+                                    ATLAS_DEBUG_VAR( tolerance );
+                                }
+#endif
+                                EXPECT( icase < 300 );
                             }
                             k++;
                         }

From 7663d891d6aecb7dfbe230dd6434cca3ec26651f Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Thu, 8 Mar 2018 17:56:30 +0000
Subject: [PATCH 014/123] fftw is working. With lots of debug output

---
 CMakeLists.txt                              |   6 ++
 src/CMakeLists.txt                          |   6 ++
 src/atlas/CMakeLists.txt                    |   4 +-
 src/atlas/library/defines.h.in              |   1 +
 src/atlas/trans/localopt/TransLocalopt.cc   |   2 +-
 src/atlas/trans/localopt2/TransLocalopt2.cc | 102 +++++++++++++++++++-
 src/tests/trans/test_transgeneral.cc        |  25 +++--
 7 files changed, 130 insertions(+), 16 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index cf31981d9..787c96898 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -99,6 +99,12 @@ else()
   ecbuild_enable_ompstubs()
 endif()
 
+### FFTW ...
+
+ecbuild_add_option( FEATURE FFTW
+                    DESCRIPTION "Support for fftw"
+                    REQUIRED_PACKAGES "FFTW COMPONENTS double" )
+
 ### trans ...
 
 ecbuild_add_option( FEATURE TRANS
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 2a48b5b55..98a5e1dd0 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -36,6 +36,12 @@ else()
   set( ATLAS_HAVE_TRANS 0 )
 endif()
 
+if( ATLAS_HAVE_FFTW )
+  set( ATLAS_HAVE_FFTW 1 )
+else()
+  set( ATLAS_HAVE_FFTW 0 )
+endif()
+
 if( ATLAS_HAVE_BOUNDSCHECKING )
   set( ATLAS_HAVE_BOUNDSCHECKING 1 )
 else()
diff --git a/src/atlas/CMakeLists.txt b/src/atlas/CMakeLists.txt
index 8cb63ed1f..b55db3e40 100644
--- a/src/atlas/CMakeLists.txt
+++ b/src/atlas/CMakeLists.txt
@@ -576,6 +576,7 @@ ecbuild_add_library( TARGET atlas
     "${CGAL_INCLUDE_DIRS}"
     "${TRANSI_INCLUDE_DIRS}"
     "${MPI_CXX_INCLUDE_DIRS}"
+    "${FFTW_INCLUDES}"
   LIBS
     eckit_geometry
     eckit_linalg
@@ -584,9 +585,10 @@ ecbuild_add_library( TARGET atlas
     "${CGAL_LIBRARIES}"
     "${TRANSI_LIBRARIES}"
     "${FCKIT_LIBRARIES}"
+    "${FFTW_LIBRARIES}"
   DEFINITIONS
     ${ATLAS_DEFINITIONS}
- )
+)
 
 if( ATLAS_HAVE_GRIDTOOLS_STORAGE )
   target_link_libraries( atlas gridtools::storage )
diff --git a/src/atlas/library/defines.h.in b/src/atlas/library/defines.h.in
index 22ba76c0c..e644fc73e 100644
--- a/src/atlas/library/defines.h.in
+++ b/src/atlas/library/defines.h.in
@@ -10,6 +10,7 @@
 #define ATLAS_HAVE_TESSELATION               @ATLAS_HAVE_TESSELATION@
 #define ATLAS_HAVE_FORTRAN                   @ATLAS_HAVE_FORTRAN@
 #define ATLAS_HAVE_EIGEN                     @ATLAS_HAVE_EIGEN@
+#define ATLAS_HAVE_FFTW                      @ATLAS_HAVE_FFTW@
 #define ATLAS_BITS_GLOBAL                    @ATLAS_BITS_GLOBAL@
 #define ATLAS_ARRAYVIEW_BOUNDS_CHECKING      @ATLAS_HAVE_BOUNDSCHECKING@
 #define ATLAS_INDEXVIEW_BOUNDS_CHECKING      @ATLAS_HAVE_BOUNDSCHECKING@
diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc
index 907be3e24..7e157d80c 100644
--- a/src/atlas/trans/localopt/TransLocalopt.cc
+++ b/src/atlas/trans/localopt/TransLocalopt.cc
@@ -206,7 +206,7 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
                     eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
                 }
             }
-#if 1  // 1: better for small number of columns, large truncation; 0: better for large number of columns
+#if 0  // 1: better for small number of columns, large truncation; 0: better for large number of columns
 
             // Transposition in Fourier space:
             std::vector<double> scl_fourier_tp( size_fourier * ( truncation + 1 ) );
diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc
index 745c6f065..1ccaffa5c 100644
--- a/src/atlas/trans/localopt2/TransLocalopt2.cc
+++ b/src/atlas/trans/localopt2/TransLocalopt2.cc
@@ -23,6 +23,9 @@
 #include "atlas/util/Constants.h"
 #include "eckit/linalg/LinearAlgebra.h"
 #include "eckit/linalg/Matrix.h"
+#if ATLAS_HAVE_FFTW
+#include <fftw3.h>
+#endif
 
 namespace atlas {
 namespace trans {
@@ -82,6 +85,11 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long
     grid_( grid ),
     truncation_( truncation ),
     precompute_( config.getBool( "precompute", true ) ) {
+#if ATLAS_HAVE_FFTW
+    Log::info() << "Atlas has FFTW" << std::endl;
+#else
+    Log::info() << "Atlas has no FFTW" << std::endl;
+#endif
     ATLAS_TRACE( "Precompute legendre opt2" );
     int nlats   = 0;
     int nlons   = 0;
@@ -257,6 +265,7 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
         if ( grid::StructuredGrid g = grid_ ) {
             ATLAS_TRACE( "invtrans_uv structured opt2" );
             int nlats        = g.ny();
+            int nlons        = g.nxmax();
             int nlatsNH      = nlats_northernHemisphere( nlats );
             int size_fourier = nb_fields * 2 * g.ny();
             std::vector<double> scl_fourier( size_fourier * ( truncation + 1 ) );
@@ -265,7 +274,8 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
             {
                 ATLAS_TRACE( "opt2 Legendre dgemm" );
                 for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
-#if 1  // 0: no symmetry, 1: use symmetry
+#if 1  // 0: no symmetry, 1: use symmetry \
+    // TODO: 0 is currently not working because it requires all latitudes to be included in legendre_ (which is currently not done)
                     int size_sym  = num_n( truncation_ + 1, jm, true );
                     int size_asym = num_n( truncation_ + 1, jm, false );
                     std::vector<double> scalar_sym( 2 * nb_fields * size_sym, -1234. );
@@ -329,6 +339,95 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
 #endif
                 }
             }
+#if ATLAS_HAVE_FFTW
+            std::vector<double> scl_fourier_tp( size_fourier * ( truncation + 1 ) );
+            {
+                // Transposition in Fourier space:
+                {
+                    ATLAS_TRACE( "opt2 transposition in Fourier" );
+                    int idx = 0;
+                    for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
+                        for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
+                            for ( int imag = 0; imag < 2; imag++ ) {
+                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                    int pos_tp = imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + g.ny() * ( jfld ) ) );
+                                    //int pos  = jfld + nb_fields * ( imag + 2 * ( jlat + g.ny() * ( jm ) ) );
+                                    scl_fourier_tp[pos_tp] = scl_fourier[idx++];  // = scl_fourier[pos]
+                                }
+                            }
+                        }
+                    }
+                }
+
+                // Fourier transformation:
+                std::vector<double> gp_opt2( nb_fields * grid_.size(), 0. );
+                {
+                    ATLAS_TRACE( "opt2 Fourier dgemm" );
+                    eckit::linalg::Matrix A( fouriertp_.data(), g.nxmax(), ( truncation_ + 1 ) * 2 );
+                    eckit::linalg::Matrix B( scl_fourier_tp.data(), ( truncation_ + 1 ) * 2, nb_fields * g.ny() );
+                    eckit::linalg::Matrix C( gp_fields, g.nxmax(), nb_fields * g.ny() );
+                    eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+                }
+            }
+            {
+                auto position = [&]( int jfld, int imag, int jlat, int jm ) {
+                    return jfld + nb_fields * ( imag + 2 * ( jlat + g.ny() * ( jm ) ) );
+                };
+
+                {
+                    ATLAS_TRACE( "opt2 transposition in Fourier for FFTW" );
+                    int num_complex  = ( nlons / 2 ) + 1;
+                    fftw_complex* in = fftw_alloc_complex( num_complex );
+                    double* out      = fftw_alloc_real( nlons );
+                    fftw_plan plan   = fftw_plan_dft_c2r_1d( nlons, in, out, FFTW_ESTIMATE );
+                    int idx0 = 0, idx1 = 0;
+                    for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                        for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
+                            Log::info() << "scl_fourier_tp: " << std::endl;
+                            for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
+                                for ( int imag = 0; imag < 2; imag++, idx1++ ) {
+                                    Log::info() << scl_fourier_tp[idx1] << " ";
+                                }
+                            }
+                            Log::info() << std::endl;
+                            for ( int jm = 0; jm < num_complex; jm++ ) {
+                                for ( int imag = 0; imag < 2; imag++ ) {
+                                    if ( jm <= truncation_ ) {
+                                        in[jm][imag] = scl_fourier[position( jfld, imag, jlat, jm )] / 2.;
+                                    }
+                                    else {
+                                        in[jm][imag] = 0.;
+                                    }
+                                }
+                            }
+                            in[0][0] *= 2.;
+                            Log::info() << "fft:in: " << std::endl;
+                            for ( int jm = 0; jm < num_complex; jm++ ) {
+                                for ( int imag = 0; imag < 2; imag++ ) {
+                                    Log::info() << in[jm][imag] << " ";
+                                }
+                            }
+                            Log::info() << std::endl;
+                            fftw_execute( plan );
+                            Log::info() << "fft:out: " << std::endl;
+                            for ( int jlon = 0; jlon < nlons; jlon++ ) {
+                                Log::info() << out[jlon] << " ";
+                            }
+                            Log::info() << std::endl;
+                            Log::info() << "gp_fields: old: " << std::endl;
+                            for ( int jlon = 0; jlon < nlons; jlon++, idx0++ ) {
+                                Log::info() << gp_fields[idx0] << " ";
+                                gp_fields[idx0] = out[jlon];
+                            }
+                            Log::info() << std::endl;
+                        }
+                    }
+                    fftw_destroy_plan( plan );
+                    fftw_free( in );
+                    fftw_free( out );
+                }
+            }
+#else
 #if 0  // 1: better for small number of columns, large truncation; 0: better for large number of columns
 
             // Transposition in Fourier space:
@@ -402,6 +501,7 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
                 eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
             }
 
+#endif
 #endif
             // Computing u,v from U,V:
             {
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index a3c5285fe..2135ca994 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -712,7 +712,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
     double tolerance  = 1.e-13;
 
     // Grid: (Adjust the following line if the test takes too long!)
-    Grid g( "F120" );
+    Grid g( "F3" );
 
     grid::StructuredGrid gs( g );
     int ndgl = gs.ny();
@@ -814,16 +814,15 @@ CASE( "test_trans_vordiv_with_translib" ) {
                                 double rms_diff =
                                     compute_rms( g.size(), rgp1.data() + pos * g.size(), gp.data() + pos * g.size() );
                                 EXPECT( rms_trans < tolerance );
-                                if ( !( rms_trans < tolerance ) || !( rms_diff < tolerance ) ) {
-                                    Log::info()
-                                        << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out
-                                        << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl;
-                                    ATLAS_DEBUG_VAR( rms_gen1 );
-                                    ATLAS_DEBUG_VAR( rms_gen2 );
-                                    ATLAS_DEBUG_VAR( rms_trans );
-                                    ATLAS_DEBUG_VAR( rms_diff );
-                                    ATLAS_DEBUG_VAR( tolerance );
-                                }
+                                //if ( !( rms_trans < tolerance ) || !( rms_diff < tolerance ) ) {
+                                Log::info() << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out
+                                            << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl;
+                                ATLAS_DEBUG_VAR( rms_gen1 );
+                                ATLAS_DEBUG_VAR( rms_gen2 );
+                                ATLAS_DEBUG_VAR( rms_trans );
+                                ATLAS_DEBUG_VAR( rms_diff );
+                                ATLAS_DEBUG_VAR( tolerance );
+                                //}
 #endif
                                 EXPECT( icase < 300 );
                             }
@@ -838,7 +837,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
 }
 
 //-----------------------------------------------------------------------------
-
+#if 0
 CASE( "test_trans_hires" ) {
     Log::info() << "test_trans_hires" << std::endl;
     // test transgeneral by comparing its result with the trans library
@@ -972,7 +971,7 @@ CASE( "test_trans_hires" ) {
     }
     Log::info() << "Vordiv+scalar comparison with trans: all " << icase << " cases successfully passed!" << std::endl;
 }
-
+#endif
 //-----------------------------------------------------------------------------
 
 CASE( "test_trans_invtrans" ) {

From e3c8cacb9e1f8bc9e4e2e903163e0f44327fd7c3 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Thu, 8 Mar 2018 18:42:59 +0000
Subject: [PATCH 015/123] fftw works with all latitudes in one execution

---
 src/atlas/trans/localopt2/TransLocalopt2.cc | 54 ++++++++++++---------
 1 file changed, 32 insertions(+), 22 deletions(-)

diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc
index 1ccaffa5c..9f3c4f92f 100644
--- a/src/atlas/trans/localopt2/TransLocalopt2.cc
+++ b/src/atlas/trans/localopt2/TransLocalopt2.cc
@@ -373,58 +373,68 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
                 auto position = [&]( int jfld, int imag, int jlat, int jm ) {
                     return jfld + nb_fields * ( imag + 2 * ( jlat + g.ny() * ( jm ) ) );
                 };
+                auto factor = [&]( int jm ) {
+                    if ( jm > 0 ) { return 2.; }
+                    else {
+                        return 1.;
+                    }
+                };
 
                 {
                     ATLAS_TRACE( "opt2 transposition in Fourier for FFTW" );
-                    int num_complex  = ( nlons / 2 ) + 1;
-                    fftw_complex* in = fftw_alloc_complex( num_complex );
-                    double* out      = fftw_alloc_real( nlons );
-                    fftw_plan plan   = fftw_plan_dft_c2r_1d( nlons, in, out, FFTW_ESTIMATE );
-                    int idx0 = 0, idx1 = 0;
+                    int num_complex      = ( nlons / 2 ) + 1;
+                    fftw_complex* fft_in = fftw_alloc_complex( nlats * num_complex );
+                    double* fft_out      = fftw_alloc_real( nlats * nlons );
+                    fftw_plan plan = fftw_plan_many_dft_c2r( 1, &nlons, nlats, fft_in, NULL, 1, num_complex, fft_out,
+                                                             NULL, 1, nlons, FFTW_ESTIMATE );
                     for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                        int idx = 0;
                         for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
-                            Log::info() << "scl_fourier_tp: " << std::endl;
-                            for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
-                                for ( int imag = 0; imag < 2; imag++, idx1++ ) {
-                                    Log::info() << scl_fourier_tp[idx1] << " ";
-                                }
-                            }
-                            Log::info() << std::endl;
-                            for ( int jm = 0; jm < num_complex; jm++ ) {
+                            for ( int jm = 0; jm < num_complex; jm++, idx++ ) {
                                 for ( int imag = 0; imag < 2; imag++ ) {
                                     if ( jm <= truncation_ ) {
-                                        in[jm][imag] = scl_fourier[position( jfld, imag, jlat, jm )] / 2.;
+                                        fft_in[idx][imag] =
+                                            scl_fourier[position( jfld, imag, jlat, jm )] / factor( jm );
                                     }
                                     else {
-                                        in[jm][imag] = 0.;
+                                        fft_in[idx][imag] = 0.;
                                     }
                                 }
                             }
-                            in[0][0] *= 2.;
+                        }
+                        fftw_execute( plan );
+                        int idx0 = 0, idx1 = 0;
+                        for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
+                            Log::info() << "scl_fourier_tp: " << std::endl;
+                            for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
+                                for ( int imag = 0; imag < 2; imag++, idx1++ ) {
+                                    Log::info() << scl_fourier_tp[idx1] << " ";
+                                }
+                            }
+                            Log::info() << std::endl;
                             Log::info() << "fft:in: " << std::endl;
                             for ( int jm = 0; jm < num_complex; jm++ ) {
                                 for ( int imag = 0; imag < 2; imag++ ) {
-                                    Log::info() << in[jm][imag] << " ";
+                                    Log::info() << fft_in[jm + num_complex * jlat][imag] << " ";
                                 }
                             }
                             Log::info() << std::endl;
-                            fftw_execute( plan );
                             Log::info() << "fft:out: " << std::endl;
                             for ( int jlon = 0; jlon < nlons; jlon++ ) {
-                                Log::info() << out[jlon] << " ";
+                                Log::info() << fft_out[jlon + nlons * jlat] << " ";
                             }
                             Log::info() << std::endl;
                             Log::info() << "gp_fields: old: " << std::endl;
                             for ( int jlon = 0; jlon < nlons; jlon++, idx0++ ) {
                                 Log::info() << gp_fields[idx0] << " ";
-                                gp_fields[idx0] = out[jlon];
+                                //gp_fields[idx0] = fft_out[jlon + nlons * jlat];
                             }
                             Log::info() << std::endl;
                         }
                     }
                     fftw_destroy_plan( plan );
-                    fftw_free( in );
-                    fftw_free( out );
+                    fftw_free( fft_in );
+                    fftw_free( fft_out );
                 }
             }
 #else

From 3800fa0c88c3e2268476f4070bcef8799aab524f Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Thu, 8 Mar 2018 19:19:52 +0000
Subject: [PATCH 016/123] opt2 is now reusing the FFTW plan. Opt is now using
 symmetry (no FFTW)

---
 src/atlas/trans/localopt/TransLocalopt.cc   | 146 ++++++++++++++++++--
 src/atlas/trans/localopt/TransLocalopt.h    |   4 +
 src/atlas/trans/localopt2/TransLocalopt2.cc | 101 ++++----------
 src/atlas/trans/localopt2/TransLocalopt2.h  |   9 ++
 src/tests/trans/test_transgeneral.cc        |  21 +--
 5 files changed, 183 insertions(+), 98 deletions(-)

diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc
index 7e157d80c..d5281db77 100644
--- a/src/atlas/trans/localopt/TransLocalopt.cc
+++ b/src/atlas/trans/localopt/TransLocalopt.cc
@@ -9,6 +9,7 @@
  */
 
 #include "atlas/trans/localopt/TransLocalopt.h"
+#include <math.h>
 #include "atlas/array.h"
 #include "atlas/option.h"
 #include "atlas/parallel/mpi/mpi.h"
@@ -39,6 +40,37 @@ size_t legendre_size( const size_t truncation ) {
     return ( truncation + 2 ) * ( truncation + 1 ) / 2;
 }
 
+int nlats_northernHemisphere( const int nlats ) {
+    return ceil( nlats / 2. );
+    // using ceil here should make it possible to have odd number of latitudes (with the centre latitude being the equator)
+}
+
+int num_n( const int truncation, const int m, const bool symmetric ) {
+    int len = 0;
+    if ( symmetric ) { len = ( truncation - m + 2 ) / 2; }
+    else {
+        len = ( truncation - m + 1 ) / 2;
+    }
+    return len;
+}
+
+std::vector<int> n_indices( const int truncation, const int m, const bool symmetric ) {
+    int len = num_n( truncation, m, symmetric ), jn0 = 0;
+    if ( !symmetric ) { jn0 = 1; }
+    std::vector<int> jns( len );
+    int ia = 0, id = len - 1;
+    for ( int jn = jn0; jn <= truncation - m; jn += 2, ia++, id-- ) {
+#if 1  // 1: ascending, 0: descending
+        int idx = ia;
+#else
+        int idx = id;
+#endif
+        jns[idx] = jn;
+        ASSERT( idx < len && idx >= 0 );
+    }
+    return jns;
+}
+
 }  // namespace
 
 // --------------------------------------------------------------------------------------------------------------------
@@ -51,22 +83,26 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t
     truncation_( truncation ),
     precompute_( config.getBool( "precompute", true ) ) {
     ATLAS_TRACE( "Precompute legendre opt" );
-    int nlats, nlons;
+    int nlats   = 0;
+    int nlons   = 0;
+    int nlatsNH = nlats_northernHemisphere( nlats );
     if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
         grid::StructuredGrid g( grid_ );
-        nlats = g.ny();
-        nlons = g.nxmax();
+        nlats   = g.ny();
+        nlons   = g.nxmax();
+        nlatsNH = nlats_northernHemisphere( nlats );
     }
     else {
-        nlats = grid_.size();
-        nlons = grid_.size();
+        nlats   = grid_.size();
+        nlons   = grid_.size();
+        nlatsNH = nlats;
     }
-    std::vector<double> lats( nlats );
+    std::vector<double> lats( nlatsNH );
     std::vector<double> lons( nlons );
     if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
         grid::StructuredGrid g( grid_ );
         // TODO: remove legendre_begin and legendre_data (only legendre_ should be needed)
-        for ( size_t j = 0; j < nlats; ++j ) {
+        for ( size_t j = 0; j < nlatsNH; ++j ) {
             lats[j] = g.y( j ) * util::Constants::degreesToRadians();
         }
         for ( size_t j = 0; j < nlons; ++j ) {
@@ -83,8 +119,37 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t
     // precomputations for Legendre polynomials:
     {
         ATLAS_TRACE( "opt precomp Legendre" );
-        legendre_.resize( legendre_size( truncation_ + 1 ) * nlats );
-        compute_legendre_polynomialsopt( truncation_ + 1, nlats, lats.data(), legendre_.data() );
+        legendre_.resize( legendre_size( truncation_ + 1 ) * nlatsNH );
+        compute_legendre_polynomialsopt( truncation_ + 1, nlatsNH, lats.data(), legendre_.data() );
+    }
+    {
+        ATLAS_TRACE( "opt split Legendre" );
+        int size_sym  = 0;
+        int size_asym = 0;
+        legendre_sym_begin_.resize( truncation_ + 3 );
+        legendre_asym_begin_.resize( truncation_ + 3 );
+        legendre_sym_begin_[0]  = 0;
+        legendre_asym_begin_[0] = 0;
+        for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
+            size_sym += num_n( truncation_ + 1, jm, true );
+            size_asym += num_n( truncation_ + 1, jm, false );
+            legendre_sym_begin_[jm + 1]  = size_sym;
+            legendre_asym_begin_[jm + 1] = size_asym;
+        }
+        legendre_sym_.resize( size_sym * nlatsNH );
+        legendre_asym_.resize( size_asym * nlatsNH );
+        int idx = 0, is = 0, ia = 0;
+        for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
+            for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
+                for ( int jn = 0; jn <= truncation_ - jm + 1; jn++, idx++ ) {
+                    if ( jn % 2 == 0 ) { legendre_sym_[is++] = legendre_[idx]; }
+                    else {
+                        legendre_asym_[ia++] = legendre_[idx];
+                    }
+                }
+            }
+        }
+        ASSERT( ia == size_asym * nlatsNH && is == size_sym * nlatsNH );
     }
 
     // precomputations for Fourier transformations:
@@ -191,19 +256,78 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
         if ( grid::StructuredGrid g = grid_ ) {
             ATLAS_TRACE( "invtrans_uv structured opt" );
             int nlats        = g.ny();
+            int nlons        = g.nxmax();
+            int nlatsNH      = nlats_northernHemisphere( nlats );
             int size_fourier = nb_fields * 2 * g.ny();
             std::vector<double> scl_fourier( size_fourier * ( truncation + 1 ) );
 
             // Legendre transform:
             {
                 ATLAS_TRACE( "opt Legendre dgemm" );
-                for ( int jm = 0; jm <= truncation; jm++ ) {
+                for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
+#if 1  // 0: no symmetry, 1: use symmetry \
+    // TODO: 0 is currently not working because it requires all latitudes to be included in legendre_ (which is currently not done)
+                    int size_sym  = num_n( truncation_ + 1, jm, true );
+                    int size_asym = num_n( truncation_ + 1, jm, false );
+                    std::vector<double> scalar_sym( 2 * nb_fields * size_sym, -1234. );
+                    std::vector<double> scalar_asym( 2 * nb_fields * size_asym, -1234. );
+                    std::vector<double> scl_fourier_sym( size_fourier );
+                    std::vector<double> scl_fourier_asym( size_fourier );
+                    {
+                        //ATLAS_TRACE( "opt Legendre split" );
+                        int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2;
+                        for ( int jn = 0; jn <= truncation_ - jm + 1; jn++ ) {
+                            for ( int imag = 0; imag < 2; imag++ ) {
+                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
+                                    if ( jn % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; }
+                                    else {
+                                        scalar_asym[ia++] = scalar_spectra[idx + ioff];
+                                    }
+                                }
+                            }
+                        }
+                        ASSERT( ia == 2 * nb_fields * size_asym && is == 2 * nb_fields * size_sym );
+                    }
+                    {
+                        eckit::linalg::Matrix A( scalar_sym.data(), nb_fields * 2, size_sym );
+                        eckit::linalg::Matrix B( legendre_sym_.data() + legendre_sym_begin_[jm] * nlatsNH, size_sym,
+                                                 nlatsNH );
+                        eckit::linalg::Matrix C( scl_fourier_sym.data(), nb_fields * 2, nlatsNH );
+                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+                    }
+                    if ( size_asym > 0 ) {
+                        eckit::linalg::Matrix A( scalar_asym.data(), nb_fields * 2, size_asym );
+                        eckit::linalg::Matrix B( legendre_asym_.data() + legendre_asym_begin_[jm] * nlatsNH, size_asym,
+                                                 nlatsNH );
+                        eckit::linalg::Matrix C( scl_fourier_asym.data(), nb_fields * 2, nlatsNH );
+                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+                        {
+                            //ATLAS_TRACE( "opt merge spheres" );
+                            // northern hemisphere:
+                            int ioff = jm * size_fourier;
+                            for ( int j = 0; j < 2 * nb_fields * nlatsNH; j++ ) {
+                                scl_fourier[j + ioff] = scl_fourier_sym[j] + scl_fourier_asym[j];
+                            }
+                            // southern hemisphere:
+                            int idx = 0;
+                            for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
+                                for ( int imag = 0; imag < 2; imag++ ) {
+                                    for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
+                                        int pos = jfld + nb_fields * ( imag + 2 * ( nlats - jlat - 1 ) );
+                                        scl_fourier[pos + ioff] = scl_fourier_sym[idx] - scl_fourier_asym[idx];
+                                    }
+                                }
+                            }
+                        }
+                    }
+#else
                     int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1;
                     eckit::linalg::Matrix A( eckit::linalg::Matrix(
                         const_cast<double*>( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) );
                     eckit::linalg::Matrix B( legendre_.data() + noff * g.ny(), ns, g.ny() );
                     eckit::linalg::Matrix C( scl_fourier.data() + jm * size_fourier, nb_fields * 2, g.ny() );
                     eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+#endif
                 }
             }
 #if 0  // 1: better for small number of columns, large truncation; 0: better for large number of columns
@@ -322,7 +446,7 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
             }
         }
     }
-}
+}  // namespace trans
 
 // --------------------------------------------------------------------------------------------------------------------
 
diff --git a/src/atlas/trans/localopt/TransLocalopt.h b/src/atlas/trans/localopt/TransLocalopt.h
index ff77db4a9..dca01cd2f 100644
--- a/src/atlas/trans/localopt/TransLocalopt.h
+++ b/src/atlas/trans/localopt/TransLocalopt.h
@@ -112,9 +112,13 @@ class TransLocalopt : public trans::TransImpl {
     int truncation_;
     bool precompute_;
     mutable std::vector<double> legendre_;
+    mutable std::vector<double> legendre_sym_;
+    mutable std::vector<double> legendre_asym_;
     mutable std::vector<double> fourier_;
     mutable std::vector<double> fouriertp_;
     std::vector<size_t> legendre_begin_;
+    std::vector<size_t> legendre_sym_begin_;
+    std::vector<size_t> legendre_asym_begin_;
 };
 
 //-----------------------------------------------------------------------------
diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc
index 9f3c4f92f..3f4edafb2 100644
--- a/src/atlas/trans/localopt2/TransLocalopt2.cc
+++ b/src/atlas/trans/localopt2/TransLocalopt2.cc
@@ -23,9 +23,6 @@
 #include "atlas/util/Constants.h"
 #include "eckit/linalg/LinearAlgebra.h"
 #include "eckit/linalg/Matrix.h"
-#if ATLAS_HAVE_FFTW
-#include <fftw3.h>
-#endif
 
 namespace atlas {
 namespace trans {
@@ -85,11 +82,6 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long
     grid_( grid ),
     truncation_( truncation ),
     precompute_( config.getBool( "precompute", true ) ) {
-#if ATLAS_HAVE_FFTW
-    Log::info() << "Atlas has FFTW" << std::endl;
-#else
-    Log::info() << "Atlas has no FFTW" << std::endl;
-#endif
     ATLAS_TRACE( "Precompute legendre opt2" );
     int nlats   = 0;
     int nlons   = 0;
@@ -185,6 +177,16 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long
             }
         }
     }
+#if ATLAS_HAVE_FFTW
+    {
+        ATLAS_TRACE( "opt2 precomp FFTW" );
+        int num_complex = ( nlons / 2 ) + 1;
+        fft_in_         = fftw_alloc_complex( nlats * num_complex );
+        fft_out_        = fftw_alloc_real( nlats * nlons );
+        plan_ = fftw_plan_many_dft_c2r( 1, &nlons, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1, nlons,
+                                        FFTW_ESTIMATE );
+    }
+#endif
 }
 
 // --------------------------------------------------------------------------------------------------------------------
@@ -194,7 +196,13 @@ TransLocalopt2::TransLocalopt2( const Grid& grid, const long truncation, const e
 
 // --------------------------------------------------------------------------------------------------------------------
 
-TransLocalopt2::~TransLocalopt2() {}
+TransLocalopt2::~TransLocalopt2() {
+#if ATLAS_HAVE_FFTW
+    fftw_destroy_plan( plan_ );
+    fftw_free( fft_in_ );
+    fftw_free( fft_out_ );
+#endif
+}
 
 // --------------------------------------------------------------------------------------------------------------------
 
@@ -340,35 +348,6 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
                 }
             }
 #if ATLAS_HAVE_FFTW
-            std::vector<double> scl_fourier_tp( size_fourier * ( truncation + 1 ) );
-            {
-                // Transposition in Fourier space:
-                {
-                    ATLAS_TRACE( "opt2 transposition in Fourier" );
-                    int idx = 0;
-                    for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
-                        for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
-                            for ( int imag = 0; imag < 2; imag++ ) {
-                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                                    int pos_tp = imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + g.ny() * ( jfld ) ) );
-                                    //int pos  = jfld + nb_fields * ( imag + 2 * ( jlat + g.ny() * ( jm ) ) );
-                                    scl_fourier_tp[pos_tp] = scl_fourier[idx++];  // = scl_fourier[pos]
-                                }
-                            }
-                        }
-                    }
-                }
-
-                // Fourier transformation:
-                std::vector<double> gp_opt2( nb_fields * grid_.size(), 0. );
-                {
-                    ATLAS_TRACE( "opt2 Fourier dgemm" );
-                    eckit::linalg::Matrix A( fouriertp_.data(), g.nxmax(), ( truncation_ + 1 ) * 2 );
-                    eckit::linalg::Matrix B( scl_fourier_tp.data(), ( truncation_ + 1 ) * 2, nb_fields * g.ny() );
-                    eckit::linalg::Matrix C( gp_fields, g.nxmax(), nb_fields * g.ny() );
-                    eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-                }
-            }
             {
                 auto position = [&]( int jfld, int imag, int jlat, int jm ) {
                     return jfld + nb_fields * ( imag + 2 * ( jlat + g.ny() * ( jm ) ) );
@@ -380,61 +359,29 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
                     }
                 };
 
+                int num_complex = ( nlons / 2 ) + 1;
                 {
-                    ATLAS_TRACE( "opt2 transposition in Fourier for FFTW" );
-                    int num_complex      = ( nlons / 2 ) + 1;
-                    fftw_complex* fft_in = fftw_alloc_complex( nlats * num_complex );
-                    double* fft_out      = fftw_alloc_real( nlats * nlons );
-                    fftw_plan plan = fftw_plan_many_dft_c2r( 1, &nlons, nlats, fft_in, NULL, 1, num_complex, fft_out,
-                                                             NULL, 1, nlons, FFTW_ESTIMATE );
+                    ATLAS_TRACE( "opt2 FFTW" );
                     for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
                         int idx = 0;
                         for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
                             for ( int jm = 0; jm < num_complex; jm++, idx++ ) {
                                 for ( int imag = 0; imag < 2; imag++ ) {
                                     if ( jm <= truncation_ ) {
-                                        fft_in[idx][imag] =
+                                        fft_in_[idx][imag] =
                                             scl_fourier[position( jfld, imag, jlat, jm )] / factor( jm );
                                     }
                                     else {
-                                        fft_in[idx][imag] = 0.;
+                                        fft_in_[idx][imag] = 0.;
                                     }
                                 }
                             }
                         }
-                        fftw_execute( plan );
-                        int idx0 = 0, idx1 = 0;
-                        for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
-                            Log::info() << "scl_fourier_tp: " << std::endl;
-                            for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
-                                for ( int imag = 0; imag < 2; imag++, idx1++ ) {
-                                    Log::info() << scl_fourier_tp[idx1] << " ";
-                                }
-                            }
-                            Log::info() << std::endl;
-                            Log::info() << "fft:in: " << std::endl;
-                            for ( int jm = 0; jm < num_complex; jm++ ) {
-                                for ( int imag = 0; imag < 2; imag++ ) {
-                                    Log::info() << fft_in[jm + num_complex * jlat][imag] << " ";
-                                }
-                            }
-                            Log::info() << std::endl;
-                            Log::info() << "fft:out: " << std::endl;
-                            for ( int jlon = 0; jlon < nlons; jlon++ ) {
-                                Log::info() << fft_out[jlon + nlons * jlat] << " ";
-                            }
-                            Log::info() << std::endl;
-                            Log::info() << "gp_fields: old: " << std::endl;
-                            for ( int jlon = 0; jlon < nlons; jlon++, idx0++ ) {
-                                Log::info() << gp_fields[idx0] << " ";
-                                //gp_fields[idx0] = fft_out[jlon + nlons * jlat];
-                            }
-                            Log::info() << std::endl;
+                        fftw_execute( plan_ );
+                        for ( int j = 0; j < nlats * nlons; j++ ) {
+                            gp_fields[j + jfld * nlats * nlons] = fft_out_[j];
                         }
                     }
-                    fftw_destroy_plan( plan );
-                    fftw_free( fft_in );
-                    fftw_free( fft_out );
                 }
             }
 #else
diff --git a/src/atlas/trans/localopt2/TransLocalopt2.h b/src/atlas/trans/localopt2/TransLocalopt2.h
index 92fb292f1..0cec4e84a 100644
--- a/src/atlas/trans/localopt2/TransLocalopt2.h
+++ b/src/atlas/trans/localopt2/TransLocalopt2.h
@@ -12,8 +12,12 @@
 
 #include <vector>
 
+#include "atlas/array.h"
 #include "atlas/grid/Grid.h"
 #include "atlas/trans/Trans.h"
+#if ATLAS_HAVE_FFTW
+#include <fftw3.h>
+#endif
 
 //-----------------------------------------------------------------------------
 // Forward declarations
@@ -120,6 +124,11 @@ class TransLocalopt2 : public trans::TransImpl {
     std::vector<size_t> legendre_begin_;
     std::vector<size_t> legendre_sym_begin_;
     std::vector<size_t> legendre_asym_begin_;
+#if ATLAS_HAVE_FFTW
+    fftw_complex* fft_in_;
+    double* fft_out_;
+    fftw_plan plan_;
+#endif
 };
 
 //-----------------------------------------------------------------------------
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index 2135ca994..22d874416 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -712,7 +712,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
     double tolerance  = 1.e-13;
 
     // Grid: (Adjust the following line if the test takes too long!)
-    Grid g( "F3" );
+    Grid g( "F640" );
 
     grid::StructuredGrid gs( g );
     int ndgl = gs.ny();
@@ -814,15 +814,16 @@ CASE( "test_trans_vordiv_with_translib" ) {
                                 double rms_diff =
                                     compute_rms( g.size(), rgp1.data() + pos * g.size(), gp.data() + pos * g.size() );
                                 EXPECT( rms_trans < tolerance );
-                                //if ( !( rms_trans < tolerance ) || !( rms_diff < tolerance ) ) {
-                                Log::info() << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out
-                                            << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl;
-                                ATLAS_DEBUG_VAR( rms_gen1 );
-                                ATLAS_DEBUG_VAR( rms_gen2 );
-                                ATLAS_DEBUG_VAR( rms_trans );
-                                ATLAS_DEBUG_VAR( rms_diff );
-                                ATLAS_DEBUG_VAR( tolerance );
-                                //}
+                                if ( !( rms_trans < tolerance ) || !( rms_diff < tolerance ) ) {
+                                    Log::info()
+                                        << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out
+                                        << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl;
+                                    ATLAS_DEBUG_VAR( rms_gen1 );
+                                    ATLAS_DEBUG_VAR( rms_gen2 );
+                                    ATLAS_DEBUG_VAR( rms_trans );
+                                    ATLAS_DEBUG_VAR( rms_diff );
+                                    ATLAS_DEBUG_VAR( tolerance );
+                                }
 #endif
                                 EXPECT( icase < 300 );
                             }

From 60886e09cb3e35dd6ba8d81bc61cae2f48118ba3 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Fri, 9 Mar 2018 15:33:27 +0000
Subject: [PATCH 017/123] choosing the eckit backend explicitly

---
 src/atlas/trans/localopt/TransLocalopt.cc   | 1 +
 src/atlas/trans/localopt2/TransLocalopt2.cc | 3 +--
 src/tests/trans/test_transgeneral.cc        | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc
index d5281db77..4b7bb6bc5 100644
--- a/src/atlas/trans/localopt/TransLocalopt.cc
+++ b/src/atlas/trans/localopt/TransLocalopt.cc
@@ -83,6 +83,7 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t
     truncation_( truncation ),
     precompute_( config.getBool( "precompute", true ) ) {
     ATLAS_TRACE( "Precompute legendre opt" );
+    eckit::linalg::LinearAlgebra::backend( "generic" );  // might want to choose backend with this command
     int nlats   = 0;
     int nlons   = 0;
     int nlatsNH = nlats_northernHemisphere( nlats );
diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc
index 3f4edafb2..87b7dc0ee 100644
--- a/src/atlas/trans/localopt2/TransLocalopt2.cc
+++ b/src/atlas/trans/localopt2/TransLocalopt2.cc
@@ -83,6 +83,7 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long
     truncation_( truncation ),
     precompute_( config.getBool( "precompute", true ) ) {
     ATLAS_TRACE( "Precompute legendre opt2" );
+    eckit::linalg::LinearAlgebra::backend( "generic" );  // might want to choose backend with this command
     int nlats   = 0;
     int nlons   = 0;
     int nlatsNH = nlats_northernHemisphere( nlats );
@@ -267,8 +268,6 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
     if ( nb_scalar_fields > 0 ) {
         int nb_fields = nb_scalar_fields;
 
-        //eckit::linalg::LinearAlgebra::backend( "string" ) // might want to choose backend with this command
-
         // Transform
         if ( grid::StructuredGrid g = grid_ ) {
             ATLAS_TRACE( "invtrans_uv structured opt2" );
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index 22d874416..ddf85054a 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -712,7 +712,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
     double tolerance  = 1.e-13;
 
     // Grid: (Adjust the following line if the test takes too long!)
-    Grid g( "F640" );
+    Grid g( "F120" );
 
     grid::StructuredGrid gs( g );
     int ndgl = gs.ny();

From e006fb421c7fdeb6af9db3f5d85982c5692666ac Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Mon, 12 Mar 2018 19:28:23 +0000
Subject: [PATCH 018/123] optimised precomputation of Legendre polynomials;
 should now be about as fast as trans library; peak RAM consumption should be
 two times lower than in trans library when both use Belousov. This allows to
 run a few scalar fields on cubic F1280 with about 10GB RAM

---
 cmake/CompileFlags.cmake                      |   4 +
 .../trans/localopt/LegendrePolynomialsopt.cc  |  54 ++---
 src/atlas/trans/localopt/TransLocalopt.cc     |  56 ++---
 .../localopt2/LegendrePolynomialsopt2.cc      | 225 ++++++++++--------
 .../trans/localopt2/LegendrePolynomialsopt2.h |   9 +-
 src/atlas/trans/localopt2/TransLocalopt2.cc   |  21 +-
 src/atlas/trans/localopt2/TransLocalopt2.h    |   4 -
 src/tests/trans/test_transgeneral.cc          | 191 +++++++--------
 8 files changed, 281 insertions(+), 283 deletions(-)

diff --git a/cmake/CompileFlags.cmake b/cmake/CompileFlags.cmake
index 522e4a6cb..c02df915c 100644
--- a/cmake/CompileFlags.cmake
+++ b/cmake/CompileFlags.cmake
@@ -8,3 +8,7 @@ if( CMAKE_CXX_COMPILER_ID MATCHES Cray )
 #          directives, ACC directives, or ASM intrinsics.
 
 endif()
+
+#ecbuild_add_cxx_flags("-fsanitize=address")
+#ecbuild_add_cxx_flags("-fsanitize=thread")
+#ecbuild_add_cxx_flags("-fsanitize=memory")
diff --git a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc
index 25fda7d04..e872eb20b 100644
--- a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc
+++ b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc
@@ -13,6 +13,7 @@
 #include <limits>
 
 #include "atlas/array.h"
+#include "atlas/parallel/mpi/mpi.h"
 #include "atlas/trans/localopt/LegendrePolynomialsopt.h"
 
 namespace atlas {
@@ -26,42 +27,37 @@ void compute_legendre_polynomialsopt(
     const double lats[],  // latitudes in radians (in)
     double legpol[] )     // values of associated Legendre functions, size (trc+1)*trc/2*nlats (out)
 {
-    array::ArrayT<int> idxmn_( trc + 1, trc + 1, nlats );
-    array::ArrayView<int, 3> idxmn = array::make_view<int, 3>( idxmn_ );
-
-    int j = 0;
-    for ( int jm = 0; jm <= trc; ++jm ) {
-        for ( int jlat = 0; jlat < nlats; ++jlat ) {
-            for ( int jn = jm; jn <= trc; ++jn ) {
-                idxmn( jm, jn, jlat ) = j++;
-            }
-        }
-    }
-
     array::ArrayT<double> zfn_( trc + 1, trc + 1 );
     array::ArrayView<double, 2> zfn = array::make_view<double, 2>( zfn_ );
 
-    int iodd;
+    auto idxmn = [&]( int jm, int jn, int jlat ) {
+        return ( 2 * trc + 3 - jm ) * jm / 2 * nlats + jlat * ( trc - jm + 1 ) + jn - jm;
+    };
 
-    // Compute coefficients for Taylor series in Belousov (19) and (21)
-    // Belousov, Swarztrauber use zfn(0,0)=std::sqrt(2.)
-    // IFS normalisation chosen to be 0.5*Integral(Pnm**2) = 1
-    zfn( 0, 0 ) = 2.;
-    for ( int jn = 1; jn <= trc; ++jn ) {
-        double zfnn = zfn( 0, 0 );
-        for ( int jgl = 1; jgl <= jn; ++jgl ) {
-            zfnn *= std::sqrt( 1. - 0.25 / ( jgl * jgl ) );
-        }
-        iodd          = jn % 2;
-        zfn( jn, jn ) = zfnn;
-        for ( int jgl = 2; jgl <= jn - iodd; jgl += 2 ) {
-            double zfjn = ( ( jgl - 1. ) * ( 2. * jn - jgl + 2. ) );  // new factor numerator
-            double zfjd = ( jgl * ( 2. * jn - jgl + 1. ) );           // new factor denominator
+    {
+        ATLAS_TRACE( "init arrays" );
+
+        int iodd;
+
+        // Compute coefficients for Taylor series in Belousov (19) and (21)
+        // Belousov, Swarztrauber use zfn(0,0)=std::sqrt(2.)
+        // IFS normalisation chosen to be 0.5*Integral(Pnm**2) = 1
+        zfn( 0, 0 ) = 2.;
+        for ( int jn = 1; jn <= trc; ++jn ) {
+            double zfnn = zfn( 0, 0 );
+            for ( int jgl = 1; jgl <= jn; ++jgl ) {
+                zfnn *= std::sqrt( 1. - 0.25 / ( jgl * jgl ) );
+            }
+            iodd          = jn % 2;
+            zfn( jn, jn ) = zfnn;
+            for ( int jgl = 2; jgl <= jn - iodd; jgl += 2 ) {
+                double zfjn = ( ( jgl - 1. ) * ( 2. * jn - jgl + 2. ) );  // new factor numerator
+                double zfjd = ( jgl * ( 2. * jn - jgl + 1. ) );           // new factor denominator
 
-            zfn( jn, jn - jgl ) = zfn( jn, jn - jgl + 2 ) * zfjn / zfjd;
+                zfn( jn, jn - jgl ) = zfn( jn, jn - jgl + 2 ) * zfjn / zfjd;
+            }
         }
     }
-
     for ( int jlat = 0; jlat < nlats; ++jlat ) {
         // --------------------
         // 1. First two columns
diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc
index 4b7bb6bc5..5b31b75ad 100644
--- a/src/atlas/trans/localopt/TransLocalopt.cc
+++ b/src/atlas/trans/localopt/TransLocalopt.cc
@@ -119,38 +119,40 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t
     }
     // precomputations for Legendre polynomials:
     {
-        ATLAS_TRACE( "opt precomp Legendre" );
-        legendre_.resize( legendre_size( truncation_ + 1 ) * nlatsNH );
-        compute_legendre_polynomialsopt( truncation_ + 1, nlatsNH, lats.data(), legendre_.data() );
-    }
-    {
-        ATLAS_TRACE( "opt split Legendre" );
-        int size_sym  = 0;
-        int size_asym = 0;
-        legendre_sym_begin_.resize( truncation_ + 3 );
-        legendre_asym_begin_.resize( truncation_ + 3 );
-        legendre_sym_begin_[0]  = 0;
-        legendre_asym_begin_[0] = 0;
-        for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
-            size_sym += num_n( truncation_ + 1, jm, true );
-            size_asym += num_n( truncation_ + 1, jm, false );
-            legendre_sym_begin_[jm + 1]  = size_sym;
-            legendre_asym_begin_[jm + 1] = size_asym;
+        std::vector<double> legendre( legendre_size( truncation_ + 1 ) * nlatsNH );
+        {
+            ATLAS_TRACE( "opt precomp Legendre" );
+            compute_legendre_polynomialsopt( truncation_ + 1, nlatsNH, lats.data(), legendre.data() );
         }
-        legendre_sym_.resize( size_sym * nlatsNH );
-        legendre_asym_.resize( size_asym * nlatsNH );
-        int idx = 0, is = 0, ia = 0;
-        for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
-            for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
-                for ( int jn = 0; jn <= truncation_ - jm + 1; jn++, idx++ ) {
-                    if ( jn % 2 == 0 ) { legendre_sym_[is++] = legendre_[idx]; }
-                    else {
-                        legendre_asym_[ia++] = legendre_[idx];
+        {
+            ATLAS_TRACE( "opt split Legendre" );
+            int size_sym  = 0;
+            int size_asym = 0;
+            legendre_sym_begin_.resize( truncation_ + 3 );
+            legendre_asym_begin_.resize( truncation_ + 3 );
+            legendre_sym_begin_[0]  = 0;
+            legendre_asym_begin_[0] = 0;
+            for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
+                size_sym += num_n( truncation_ + 1, jm, true );
+                size_asym += num_n( truncation_ + 1, jm, false );
+                legendre_sym_begin_[jm + 1]  = size_sym;
+                legendre_asym_begin_[jm + 1] = size_asym;
+            }
+            legendre_sym_.resize( size_sym * nlatsNH );
+            legendre_asym_.resize( size_asym * nlatsNH );
+            int idx = 0, is = 0, ia = 0;
+            for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
+                for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
+                    for ( int jn = 0; jn <= truncation_ - jm + 1; jn++, idx++ ) {
+                        if ( jn % 2 == 0 ) { legendre_sym_[is++] = legendre[idx]; }
+                        else {
+                            legendre_asym_[ia++] = legendre[idx];
+                        }
                     }
                 }
             }
+            ASSERT( ia == size_asym * nlatsNH && is == size_sym * nlatsNH );
         }
-        ASSERT( ia == size_asym * nlatsNH && is == size_sym * nlatsNH );
     }
 
     // precomputations for Fourier transformations:
diff --git a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc
index 8f55231d5..bbff0a1a8 100644
--- a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc
+++ b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc
@@ -13,6 +13,7 @@
 #include <limits>
 
 #include "atlas/array.h"
+#include "atlas/parallel/mpi/mpi.h"
 #include "atlas/trans/localopt2/LegendrePolynomialsopt2.h"
 
 namespace atlas {
@@ -20,27 +21,20 @@ namespace trans {
 
 //-----------------------------------------------------------------------------
 
-void compute_legendre_polynomialsopt2(
-    const size_t trc,     // truncation (in)
-    const int nlats,      // number of latitudes
-    const double lats[],  // latitudes in radians (in)
-    double legpol[] )     // values of associated Legendre functions, size (trc+1)*trc/2*nlats (out)
-{
-    array::ArrayT<int> idxmn_( trc + 1, trc + 1, nlats );
-    array::ArrayView<int, 3> idxmn = array::make_view<int, 3>( idxmn_ );
-
-    int j = 0;
-    for ( int jm = 0; jm <= trc; ++jm ) {
-        for ( int jlat = 0; jlat < nlats; ++jlat ) {
-            for ( int jn = jm; jn <= trc; ++jn ) {
-                idxmn( jm, jn, jlat ) = j++;
-            }
-        }
-    }
+size_t legendre_size( const size_t truncation ) {
+    return ( truncation + 2 ) * ( truncation + 1 ) / 2;
+}
 
+void compute_legendre_polynomialsopt2( const size_t trc,     // truncation (in)
+                                       const int nlats,      // number of latitudes
+                                       const double lats[],  // latitudes in radians (in)
+                                       double leg_sym[],     // values of associated Legendre functions, symmetric part
+                                       double leg_asym[] )   // values of associated Legendre functions, asymmetric part
+{
     array::ArrayT<double> zfn_( trc + 1, trc + 1 );
     array::ArrayView<double, 2> zfn = array::make_view<double, 2>( zfn_ );
-
+    std::vector<double> legpol( legendre_size( trc ) );
+    auto idxmn = [&]( int jm, int jn ) { return ( 2 * trc + 3 - jm ) * jm / 2 + jn - jm; };
     int iodd;
 
     // Compute coefficients for Taylor series in Belousov (19) and (21)
@@ -62,99 +56,132 @@ void compute_legendre_polynomialsopt2(
         }
     }
 
+    // Loop over latitudes:
     for ( int jlat = 0; jlat < nlats; ++jlat ) {
-        // --------------------
-        // 1. First two columns
-        // --------------------
-        double lat     = lats[jlat];
-        double zdlx1   = ( M_PI_2 - lat );               // theta
-        double zdlx    = std::cos( zdlx1 );              // cos(theta)
-        double zdlsita = std::sqrt( 1. - zdlx * zdlx );  // sin(theta) (this is how trans library does it)
-
-        legpol[idxmn( 0, 0, jlat )] = 1.;
-
-        double zdl1sita = 0.;
-        // if we are less than 1 meter from the pole,
-        if ( std::abs( zdlsita ) <= std::sqrt( std::numeric_limits<double>::epsilon() ) ) {
-            zdlx    = 1.;
-            zdlsita = 0.;
-        }
-        else {
-            zdl1sita = 1. / zdlsita;
-        }
+        {
+            //ATLAS_TRACE( "compute Legendre polynomials" );
+            // --------------------
+            // 1. First two columns
+            // --------------------
+            double lat     = lats[jlat];
+            double zdlx1   = ( M_PI_2 - lat );               // theta
+            double zdlx    = std::cos( zdlx1 );              // cos(theta)
+            double zdlsita = std::sqrt( 1. - zdlx * zdlx );  // sin(theta) (this is how trans library does it)
+
+            legpol[idxmn( 0, 0 )] = 1.;
+
+            double zdl1sita = 0.;
+            // if we are less than 1 meter from the pole,
+            if ( std::abs( zdlsita ) <= std::sqrt( std::numeric_limits<double>::epsilon() ) ) {
+                zdlx    = 1.;
+                zdlsita = 0.;
+            }
+            else {
+                zdl1sita = 1. / zdlsita;
+            }
 
-        // ordinary Legendre polynomials from series expansion
-        // ---------------------------------------------------
-
-        // even N
-        for ( int jn = 2; jn <= trc; jn += 2 ) {
-            double zdlk   = 0.5 * zfn( jn, 0 );
-            double zdlldn = 0.0;
-            double zdsq   = 1. / std::sqrt( jn * ( jn + 1. ) );
-            // represented by only even k
-            for ( int jk = 2; jk <= jn; jk += 2 ) {
-                // normalised ordinary Legendre polynomial == \overbar{P_n}^0
-                zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 );
-                // normalised associated Legendre polynomial == \overbar{P_n}^1
-                zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 );
+            // ordinary Legendre polynomials from series expansion
+            // ---------------------------------------------------
+
+            // even N
+            for ( int jn = 2; jn <= trc; jn += 2 ) {
+                double zdlk   = 0.5 * zfn( jn, 0 );
+                double zdlldn = 0.0;
+                double zdsq   = 1. / std::sqrt( jn * ( jn + 1. ) );
+                // represented by only even k
+                for ( int jk = 2; jk <= jn; jk += 2 ) {
+                    // normalised ordinary Legendre polynomial == \overbar{P_n}^0
+                    zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 );
+                    // normalised associated Legendre polynomial == \overbar{P_n}^1
+                    zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 );
+                }
+                legpol[idxmn( 0, jn )] = zdlk;
+                legpol[idxmn( 1, jn )] = zdlldn;
             }
-            legpol[idxmn( 0, jn, jlat )] = zdlk;
-            legpol[idxmn( 1, jn, jlat )] = zdlldn;
-        }
 
-        // odd N
-        for ( int jn = 1; jn <= trc; jn += 2 ) {
-            zfn( jn, 0 )  = 0.;
-            double zdlk   = 0.;
-            double zdlldn = 0.0;
-            double zdsq   = 1. / std::sqrt( jn * ( jn + 1. ) );
-            // represented by only even k
-            for ( int jk = 1; jk <= jn; jk += 2 ) {
-                // normalised ordinary Legendre polynomial == \overbar{P_n}^0
-                zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 );
-                // normalised associated Legendre polynomial == \overbar{P_n}^1
-                zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 );
+            // odd N
+            for ( int jn = 1; jn <= trc; jn += 2 ) {
+                zfn( jn, 0 )  = 0.;
+                double zdlk   = 0.;
+                double zdlldn = 0.0;
+                double zdsq   = 1. / std::sqrt( jn * ( jn + 1. ) );
+                // represented by only even k
+                for ( int jk = 1; jk <= jn; jk += 2 ) {
+                    // normalised ordinary Legendre polynomial == \overbar{P_n}^0
+                    zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 );
+                    // normalised associated Legendre polynomial == \overbar{P_n}^1
+                    zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 );
+                }
+                legpol[idxmn( 0, jn )] = zdlk;
+                legpol[idxmn( 1, jn )] = zdlldn;
             }
-            legpol[idxmn( 0, jn, jlat )] = zdlk;
-            legpol[idxmn( 1, jn, jlat )] = zdlldn;
-        }
 
-        // --------------------------------------------------------------
-        // 2. Diagonal (the terms 0,0 and 1,1 have already been computed)
-        //    Belousov, equation (23)
-        // --------------------------------------------------------------
+            // --------------------------------------------------------------
+            // 2. Diagonal (the terms 0,0 and 1,1 have already been computed)
+            //    Belousov, equation (23)
+            // --------------------------------------------------------------
 
-        double zdls = zdl1sita * std::numeric_limits<double>::min();
-        for ( int jn = 2; jn <= trc; ++jn ) {
-            double sq = std::sqrt( ( 2. * jn + 1. ) / ( 2. * jn ) );
+            double zdls = zdl1sita * std::numeric_limits<double>::min();
+            for ( int jn = 2; jn <= trc; ++jn ) {
+                double sq = std::sqrt( ( 2. * jn + 1. ) / ( 2. * jn ) );
 
-            legpol[idxmn( jn, jn, jlat )] = legpol[idxmn( jn - 1, jn - 1, jlat )] * zdlsita * sq;
-            if ( std::abs( legpol[idxmn( jn, jn, jlat )] ) < zdls ) legpol[idxmn( jn, jn, jlat )] = 0.0;
-        }
+                legpol[idxmn( jn, jn )] = legpol[idxmn( jn - 1, jn - 1 )] * zdlsita * sq;
+                if ( std::abs( legpol[idxmn( jn, jn )] ) < zdls ) legpol[idxmn( jn, jn )] = 0.0;
+            }
 
-        // ---------------------------------------------
-        // 3. General recurrence (Belousov, equation 17)
-        // ---------------------------------------------
-
-        for ( int jn = 3; jn <= trc; ++jn ) {
-            for ( int jm = 2; jm < jn; ++jm ) {
-                double cn = ( ( 2. * jn + 1. ) * ( jn + jm - 3. ) * ( jn + jm - 1. ) );  // numerator of c in Belousov
-                double cd = ( ( 2. * jn - 3. ) * ( jn + jm - 2. ) * ( jn + jm ) );       // denominator of c in Belousov
-                double dn = ( ( 2. * jn + 1. ) * ( jn - jm + 1. ) * ( jn + jm - 1. ) );  // numerator of d in Belousov
-                double dd = ( ( 2. * jn - 1. ) * ( jn + jm - 2. ) * ( jn + jm ) );       // denominator of d in Belousov
-                double en = ( ( 2. * jn + 1. ) * ( jn - jm ) );                          // numerator of e in Belousov
-                double ed = ( ( 2. * jn - 1. ) * ( jn + jm ) );                          // denominator of e in Belousov
-
-                legpol[idxmn( jm, jn, jlat )] = std::sqrt( cn / cd ) * legpol[idxmn( jm - 2, jn - 2, jlat )] -
-                                                std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1, jlat )] * zdlx +
-                                                std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1, jlat )] * zdlx;
+            // ---------------------------------------------
+            // 3. General recurrence (Belousov, equation 17)
+            // ---------------------------------------------
+
+            for ( int jn = 3; jn <= trc; ++jn ) {
+                for ( int jm = 2; jm < jn; ++jm ) {
+                    double cn =
+                        ( ( 2. * jn + 1. ) * ( jn + jm - 3. ) * ( jn + jm - 1. ) );     // numerator of c in Belousov
+                    double cd = ( ( 2. * jn - 3. ) * ( jn + jm - 2. ) * ( jn + jm ) );  // denominator of c in Belousov
+                    double dn =
+                        ( ( 2. * jn + 1. ) * ( jn - jm + 1. ) * ( jn + jm - 1. ) );     // numerator of d in Belousov
+                    double dd = ( ( 2. * jn - 1. ) * ( jn + jm - 2. ) * ( jn + jm ) );  // denominator of d in Belousov
+                    double en = ( ( 2. * jn + 1. ) * ( jn - jm ) );                     // numerator of e in Belousov
+                    double ed = ( ( 2. * jn - 1. ) * ( jn + jm ) );                     // denominator of e in Belousov
+
+                    legpol[idxmn( jm, jn )] = std::sqrt( cn / cd ) * legpol[idxmn( jm - 2, jn - 2 )] -
+                                              std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1 )] * zdlx +
+                                              std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1 )] * zdlx;
+                }
             }
         }
 
-        // take factor 2 for m > 0 into account:
-        for ( int jm = 1; jm <= trc; ++jm ) {
-            for ( int jn = jm; jn <= trc; ++jn ) {
-                legpol[idxmn( jm, jn, jlat )] *= 2.;
+        {
+            //ATLAS_TRACE( "add to global arrays" );
+
+            // take factor 2 for m > 0 into account:
+            for ( int jm = 1; jm <= trc; ++jm ) {
+                for ( int jn = jm; jn <= trc; ++jn ) {
+                    legpol[idxmn( jm, jn )] *= 2.;
+                }
+            }
+            int is0 = 0, ia0 = 0;
+            for ( int jm = 0; jm <= trc; jm++ ) {
+                int is1 = 0, ia1 = 0;
+                for ( int jn = jm; jn <= trc; jn++ ) {
+                    if ( ( jn - jm ) % 2 == 0 ) { is1++; }
+                    else {
+                        ia1++;
+                    }
+                }
+                int is2 = 0, ia2 = 0;
+                for ( int jn = jm; jn <= trc; jn++ ) {
+                    if ( ( jn - jm ) % 2 == 0 ) {
+                        int is      = is0 * nlats + is1 * jlat + is2++;
+                        leg_sym[is] = legpol[idxmn( jm, jn )];
+                    }
+                    else {
+                        int ia       = ia0 * nlats + ia1 * jlat + ia2++;
+                        leg_asym[ia] = legpol[idxmn( jm, jn )];
+                    }
+                }
+                is0 += is2;
+                ia0 += ia2;
             }
         }
     }
diff --git a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h
index ae550d30d..0de2a5f69 100644
--- a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h
+++ b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h
@@ -33,10 +33,11 @@ namespace trans {
 // Andreas Mueller *ECMWF*
 //
 void compute_legendre_polynomialsopt2(
-    const size_t trc,     // truncation (in)
-    const int nlats,      // number of latitudes
-    const double lats[],  // latitudes in radians (in)
-    double legpol[] );    // values of associated Legendre functions, size (trc+1)*trc/2*nlats (out)
+    const size_t trc,          // truncation (in)
+    const int nlats,           // number of latitudes
+    const double lats[],       // latitudes in radians (in)
+    double legendre_sym[],     // values of associated Legendre functions, symmetric part
+    double legendre_asym[] );  // values of associated Legendre functions, asymmetric part
 
 // --------------------------------------------------------------------------------------------------------------------
 
diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc
index 87b7dc0ee..ea6724472 100644
--- a/src/atlas/trans/localopt2/TransLocalopt2.cc
+++ b/src/atlas/trans/localopt2/TransLocalopt2.cc
@@ -120,11 +120,6 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long
     // precomputations for Legendre polynomials:
     {
         ATLAS_TRACE( "opt2 precomp Legendre" );
-        legendre_.resize( legendre_size( truncation_ + 1 ) * nlatsNH );
-        compute_legendre_polynomialsopt2( truncation_ + 1, nlatsNH, lats.data(), legendre_.data() );
-    }
-    {
-        ATLAS_TRACE( "opt2 split Legendre" );
         int size_sym  = 0;
         int size_asym = 0;
         legendre_sym_begin_.resize( truncation_ + 3 );
@@ -139,18 +134,8 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long
         }
         legendre_sym_.resize( size_sym * nlatsNH );
         legendre_asym_.resize( size_asym * nlatsNH );
-        int idx = 0, is = 0, ia = 0;
-        for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
-            for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
-                for ( int jn = 0; jn <= truncation_ - jm + 1; jn++, idx++ ) {
-                    if ( jn % 2 == 0 ) { legendre_sym_[is++] = legendre_[idx]; }
-                    else {
-                        legendre_asym_[ia++] = legendre_[idx];
-                    }
-                }
-            }
-        }
-        ASSERT( ia == size_asym * nlatsNH && is == size_sym * nlatsNH );
+        compute_legendre_polynomialsopt2( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_.data(),
+                                          legendre_asym_.data() );
     }
 
     // precomputations for Fourier transformations:
@@ -188,7 +173,7 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long
                                         FFTW_ESTIMATE );
     }
 #endif
-}
+}  // namespace atlas
 
 // --------------------------------------------------------------------------------------------------------------------
 
diff --git a/src/atlas/trans/localopt2/TransLocalopt2.h b/src/atlas/trans/localopt2/TransLocalopt2.h
index 0cec4e84a..51644430d 100644
--- a/src/atlas/trans/localopt2/TransLocalopt2.h
+++ b/src/atlas/trans/localopt2/TransLocalopt2.h
@@ -105,9 +105,6 @@ class TransLocalopt2 : public trans::TransImpl {
                            double divergence_spectra[], const eckit::Configuration& = util::NoConfig() ) const override;
 
 private:
-    const double* legendre_data( int j ) const { return legendre_.data() + legendre_begin_[j]; }
-    double* legendre_data( int j ) { return legendre_.data() + legendre_begin_[j]; }
-
     void invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields,
                       const double scalar_spectra[], double gp_fields[],
                       const eckit::Configuration& = util::NoConfig() ) const;
@@ -116,7 +113,6 @@ class TransLocalopt2 : public trans::TransImpl {
     Grid grid_;
     int truncation_;
     bool precompute_;
-    mutable std::vector<double> legendre_;
     mutable std::vector<double> legendre_sym_;
     mutable std::vector<double> legendre_asym_;
     mutable std::vector<double> fourier_;
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index ddf85054a..f5dc61f1e 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -569,6 +569,7 @@ CASE( "test_transgeneral_legendrepolynomials" )
 }
 #endif
 //-----------------------------------------------------------------------------
+#if 0
 #if 1
 CASE( "test_transgeneral_point" ) {
     std::ostream& out = Log::info();
@@ -700,9 +701,10 @@ CASE( "test_transgeneral_with_translib" ) {
         }
     }
 }
-
+#endif
+#endif
 //-----------------------------------------------------------------------------
-
+#if 0
 CASE( "test_trans_vordiv_with_translib" ) {
     Log::info() << "test_trans_vordiv_with_translib" << std::endl;
     // test transgeneral by comparing its result with the trans library
@@ -727,7 +729,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
     functionspace::Spectral spectral( trc );
     functionspace::StructuredColumns gridpoints( g );
 
-    int nb_scalar = 1, nb_vordiv = 0;
+    int nb_scalar = 2, nb_vordiv = 0;
     int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv;
     std::vector<double> sp( 2 * N * nb_scalar );
     std::vector<double> vor( 2 * N * nb_vordiv );
@@ -836,9 +838,9 @@ CASE( "test_trans_vordiv_with_translib" ) {
     }
     Log::info() << "Vordiv+scalar comparison with trans: all " << icase << " cases successfully passed!" << std::endl;
 }
-
+#endif
 //-----------------------------------------------------------------------------
-#if 0
+#if 1
 CASE( "test_trans_hires" ) {
     Log::info() << "test_trans_hires" << std::endl;
     // test transgeneral by comparing its result with the trans library
@@ -848,17 +850,17 @@ CASE( "test_trans_hires" ) {
     double tolerance  = 1.e-13;
 
     // Grid: (Adjust the following line if the test takes too long!)
-    Grid g( "F128" );
-
+    Grid g( "F1280" );
+#if ATLAS_HAVE_TRANS
+    //std::string transTypes[1] = {"localopt2"};
+    std::string transTypes[2] = {"localopt2", "ifs"};
+#else
+    std::string transTypes[1] = {"localopt2"};
+#endif
     grid::StructuredGrid gs( g );
     int ndgl = gs.ny();
     //int trc  = ndgl - 1;  // linear
     int trc = ndgl / 2. - 1;  // cubic
-#if 0
-    trans::Trans transIFS( g, trc, util::Config( "type", "ifs" ) );
-#endif
-    trans::Trans transLocal1( g, trc, util::Config( "type", "localopt2" ) );
-    //trans::Trans transLocal2( g, trc, util::Config( "type", "localopt2" ) );
 
     functionspace::Spectral spectral( trc );
     functionspace::StructuredColumns gridpoints( g );
@@ -874,107 +876,92 @@ CASE( "test_trans_hires" ) {
     //std::vector<double> rgp2( nb_all * g.size() );
     std::vector<double> rgp_analytic( g.size() );
 
-    int icase = 0;
-    for ( int ivar_in = 2; ivar_in < 3; ivar_in++ ) {         // vorticity, divergence, scalar
-        for ( int ivar_out = 2; ivar_out < 3; ivar_out++ ) {  // u, v, scalar
-            int nb_fld = 1;
-            if ( ivar_out == 2 ) {
-                tolerance = 1.e-13;
-                nb_fld    = nb_scalar;
-            }
-            else {
-                tolerance = 2.e-6;
-                nb_fld    = nb_vordiv;
-            }
-            for ( int jfld = 0; jfld < nb_fld; jfld++ ) {  // multiple fields
-                int k = 0;
-                for ( int m = 0; m <= trc; m++ ) {                 // zonal wavenumber
-                    for ( int n = m; n <= trc; n++ ) {             // total wavenumber
-                        for ( int imag = 0; imag <= 1; imag++ ) {  // real and imaginary part
-
-                            if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. ) {
-                                for ( int j = 0; j < 2 * N * nb_scalar; j++ ) {
-                                    sp[j] = 0.;
-                                }
-                                for ( int j = 0; j < 2 * N * nb_vordiv; j++ ) {
-                                    vor[j] = 0.;
-                                    div[j] = 0.;
-                                }
-                                if ( ivar_in == 0 ) vor[k * nb_vordiv + jfld] = 1.;
-                                if ( ivar_in == 1 ) div[k * nb_vordiv + jfld] = 1.;
-                                if ( ivar_in == 2 ) sp[k * nb_scalar + jfld] = 1.;
-
-                                for ( int j = 0; j < nb_all * g.size(); j++ ) {
-                                    gp[j]   = 0.;
-                                    rgp1[j] = 0.;
-                                    //rgp2[j] = 0.;
-                                }
-                                for ( int j = 0; j < g.size(); j++ ) {
-                                    rgp_analytic[j] = 0.;
-                                }
-
-                                spectral_transform_grid_analytic( trc, trc, n, m, imag, g, rspecg.data(),
-                                                                  rgp_analytic.data(), ivar_in, ivar_out );
-
-                                EXPECT_NO_THROW( transLocal1.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
-                                                                       div.data(), rgp1.data() ) );
-
-                                //EXPECT_NO_THROW( transLocal2.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
-                                //                                       div.data(), rgp2.data() ) );
-
-                                int pos = ( ivar_out * nb_vordiv + jfld );
-
-                                double rms_gen1 =
-                                    compute_rms( g.size(), rgp1.data() + pos * g.size(), rgp_analytic.data() );
-
-                                //double rms_gen2 =
-                                //    compute_rms( g.size(), rgp2.data() + pos * g.size(), rgp_analytic.data() );
-
-                                if ( !( rms_gen1 < tolerance ) ) {  // || !( rms_gen2 < tolerance ) ) {
-                                    Log::info()
-                                        << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out
-                                        << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl;
-                                    ATLAS_DEBUG_VAR( rms_gen1 );
-                                    //ATLAS_DEBUG_VAR( rms_gen2 );
-                                    ATLAS_DEBUG_VAR( tolerance );
-                                }
-                                EXPECT( rms_gen1 < tolerance );
-                                //EXPECT( rms_gen2 < tolerance );
-                                icase++;
-
-#if 0
-                                EXPECT_NO_THROW( transIFS.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
-                                                                    div.data(), gp.data() ) );
-                                double rms_trans =
-                                    compute_rms( g.size(), gp.data() + pos * g.size(), rgp_analytic.data() );
-                                double rms_diff =
-                                    compute_rms( g.size(), rgp1.data() + pos * g.size(), gp.data() + pos * g.size() );
-                                EXPECT( rms_trans < tolerance );
-                                if ( !( rms_trans < tolerance ) || !( rms_diff < tolerance ) ) {
-                                    Log::info()
-                                        << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out
-                                        << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl;
-                                    ATLAS_DEBUG_VAR( rms_gen1 );
-                                    //ATLAS_DEBUG_VAR( rms_gen2 );
-                                    ATLAS_DEBUG_VAR( rms_trans );
-                                    ATLAS_DEBUG_VAR( rms_diff );
-                                    ATLAS_DEBUG_VAR( tolerance );
+    for ( auto transType : transTypes ) {
+        int icase = 0;
+        trans::Trans trans( g, trc, util::Config( "type", transType ) );
+        for ( int ivar_in = 2; ivar_in < 3; ivar_in++ ) {         // vorticity, divergence, scalar
+            for ( int ivar_out = 2; ivar_out < 3; ivar_out++ ) {  // u, v, scalar
+                int nb_fld = 1;
+                if ( ivar_out == 2 ) {
+                    tolerance = 1.e-13;
+                    nb_fld    = nb_scalar;
+                }
+                else {
+                    tolerance = 2.e-6;
+                    nb_fld    = nb_vordiv;
+                }
+                for ( int jfld = 0; jfld < nb_fld; jfld++ ) {  // multiple fields
+                    int k = 0;
+                    for ( int m = 0; m <= trc; m++ ) {                 // zonal wavenumber
+                        for ( int n = m; n <= trc; n++ ) {             // total wavenumber
+                            for ( int imag = 0; imag <= 1; imag++ ) {  // real and imaginary part
+
+                                if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. ) {
+                                    for ( int j = 0; j < 2 * N * nb_scalar; j++ ) {
+                                        sp[j] = 0.;
+                                    }
+                                    for ( int j = 0; j < 2 * N * nb_vordiv; j++ ) {
+                                        vor[j] = 0.;
+                                        div[j] = 0.;
+                                    }
+                                    if ( ivar_in == 0 ) vor[k * nb_vordiv + jfld] = 1.;
+                                    if ( ivar_in == 1 ) div[k * nb_vordiv + jfld] = 1.;
+                                    if ( ivar_in == 2 ) sp[k * nb_scalar + jfld] = 1.;
+
+                                    for ( int j = 0; j < nb_all * g.size(); j++ ) {
+                                        gp[j]   = 0.;
+                                        rgp1[j] = 0.;
+                                        //rgp2[j] = 0.;
+                                    }
+                                    for ( int j = 0; j < g.size(); j++ ) {
+                                        rgp_analytic[j] = 0.;
+                                    }
+
+                                    spectral_transform_grid_analytic( trc, trc, n, m, imag, g, rspecg.data(),
+                                                                      rgp_analytic.data(), ivar_in, ivar_out );
+
+                                    EXPECT_NO_THROW( trans.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
+                                                                     div.data(), rgp1.data() ) );
+
+                                    //EXPECT_NO_THROW( transLocal2.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
+                                    //                                       div.data(), rgp2.data() ) );
+
+                                    int pos = ( ivar_out * nb_vordiv + jfld );
+
+                                    double rms_gen1 =
+                                        compute_rms( g.size(), rgp1.data() + pos * g.size(), rgp_analytic.data() );
+
+                                    //double rms_gen2 =
+                                    //    compute_rms( g.size(), rgp2.data() + pos * g.size(), rgp_analytic.data() );
+
+                                    if ( !( rms_gen1 < tolerance ) ) {  // || !( rms_gen2 < tolerance ) ) {
+                                        Log::info()
+                                            << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out
+                                            << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl;
+                                        ATLAS_DEBUG_VAR( rms_gen1 );
+                                        //ATLAS_DEBUG_VAR( rms_gen2 );
+                                        ATLAS_DEBUG_VAR( tolerance );
+                                    }
+                                    EXPECT( rms_gen1 < tolerance );
+                                    //EXPECT( rms_gen2 < tolerance );
+                                    icase++;
+
+                                    EXPECT( icase < 300 );
                                 }
-#endif
-                                EXPECT( icase < 300 );
+                                k++;
                             }
-                            k++;
                         }
                     }
                 }
             }
         }
+        Log::info() << "Vordiv+scalar comparison with trans::" << transType << ": all " << icase
+                    << " cases successfully passed!" << std::endl;
     }
-    Log::info() << "Vordiv+scalar comparison with trans: all " << icase << " cases successfully passed!" << std::endl;
 }
 #endif
 //-----------------------------------------------------------------------------
-
+#if 0
 CASE( "test_trans_invtrans" ) {
     trans::Trans trans( Grid( "O64" ), 63, util::Config( "type", "local" ) );
 

From 8ccfbd112b7e5b2782139e19f95431c4827076c4 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Mon, 12 Mar 2018 22:59:43 +0000
Subject: [PATCH 019/123] testing scaling with nb_scalar

---
 src/tests/trans/test_transgeneral.cc | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index f5dc61f1e..941b86664 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -704,7 +704,7 @@ CASE( "test_transgeneral_with_translib" ) {
 #endif
 #endif
 //-----------------------------------------------------------------------------
-#if 0
+#if 1
 CASE( "test_trans_vordiv_with_translib" ) {
     Log::info() << "test_trans_vordiv_with_translib" << std::endl;
     // test transgeneral by comparing its result with the trans library
@@ -714,7 +714,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
     double tolerance  = 1.e-13;
 
     // Grid: (Adjust the following line if the test takes too long!)
-    Grid g( "F120" );
+    Grid g( "F640" );
 
     grid::StructuredGrid gs( g );
     int ndgl = gs.ny();
@@ -729,7 +729,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
     functionspace::Spectral spectral( trc );
     functionspace::StructuredColumns gridpoints( g );
 
-    int nb_scalar = 2, nb_vordiv = 0;
+    int nb_scalar = 100, nb_vordiv = 0;
     int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv;
     std::vector<double> sp( 2 * N * nb_scalar );
     std::vector<double> vor( 2 * N * nb_vordiv );
@@ -827,7 +827,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
                                     ATLAS_DEBUG_VAR( tolerance );
                                 }
 #endif
-                                EXPECT( icase < 300 );
+                                EXPECT( icase < 100 );
                             }
                             k++;
                         }
@@ -840,7 +840,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
 }
 #endif
 //-----------------------------------------------------------------------------
-#if 1
+#if 0
 CASE( "test_trans_hires" ) {
     Log::info() << "test_trans_hires" << std::endl;
     // test transgeneral by comparing its result with the trans library

From 79fdd110a9a4a1263acf241464b5068070877076 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Tue, 13 Mar 2018 11:00:56 +0000
Subject: [PATCH 020/123] reversed order of total wavenumbers in Legendre
 transformation (the same is done in trans library for improving accuracy of
 summation); also added average RMS errors to test_trans_vordiv_with_translib

---
 .../localopt2/LegendrePolynomialsopt2.cc      |  9 ++++++-
 src/atlas/trans/localopt2/TransLocalopt2.cc   | 14 +++++++---
 src/tests/trans/test_transgeneral.cc          | 27 ++++++++++++++-----
 3 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc
index bbff0a1a8..99f54586a 100644
--- a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc
+++ b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc
@@ -170,7 +170,14 @@ void compute_legendre_polynomialsopt2( const size_t trc,     // truncation (in)
                     }
                 }
                 int is2 = 0, ia2 = 0;
-                for ( int jn = jm; jn <= trc; jn++ ) {
+                // the choice between the following two code lines determines whether
+                // total wavenumbers are summed in an ascending or descending order.
+                // The trans library in IFS uses descending order because it should
+                // be more accurate (higher wavenumbers have smaller contributions).
+                // This also needs to be changed when splitting the spectral data in
+                // TransLocalopt2::invtrans_uv!
+                //for ( int jn = jm; jn <= trc; jn++ ) {
+                for ( int jn = trc; jn >= jm; jn-- ) {
                     if ( ( jn - jm ) % 2 == 0 ) {
                         int is      = is0 * nlats + is1 * jlat + is2++;
                         leg_sym[is] = legpol[idxmn( jm, jn )];
diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc
index ea6724472..084eca68f 100644
--- a/src/atlas/trans/localopt2/TransLocalopt2.cc
+++ b/src/atlas/trans/localopt2/TransLocalopt2.cc
@@ -277,10 +277,18 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
                     {
                         //ATLAS_TRACE( "opt2 Legendre split" );
                         int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2;
-                        for ( int jn = 0; jn <= truncation_ - jm + 1; jn++ ) {
+                        // the choice between the following two code lines determines whether
+                        // total wavenumbers are summed in an ascending or descending order.
+                        // The trans library in IFS uses descending order because it should
+                        // be more accurate (higher wavenumbers have smaller contributions).
+                        // This also needs to be changed when splitting the spectral data in
+                        // compute_legendre_polynomialsopt2!
+                        //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) {
+                        for ( int jn = truncation_ + 1; jn >= jm; jn-- ) {
                             for ( int imag = 0; imag < 2; imag++ ) {
-                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
-                                    if ( jn % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; }
+                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                    idx = jfld + nb_fields * ( imag + 2 * ( jn - jm ) );
+                                    if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; }
                                     else {
                                         scalar_asym[ia++] = scalar_spectra[idx + ioff];
                                     }
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index 941b86664..cd1b21478 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -704,7 +704,7 @@ CASE( "test_transgeneral_with_translib" ) {
 #endif
 #endif
 //-----------------------------------------------------------------------------
-#if 1
+#if 0
 CASE( "test_trans_vordiv_with_translib" ) {
     Log::info() << "test_trans_vordiv_with_translib" << std::endl;
     // test transgeneral by comparing its result with the trans library
@@ -714,7 +714,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
     double tolerance  = 1.e-13;
 
     // Grid: (Adjust the following line if the test takes too long!)
-    Grid g( "F640" );
+    Grid g( "F120" );
 
     grid::StructuredGrid gs( g );
     int ndgl = gs.ny();
@@ -722,14 +722,16 @@ CASE( "test_trans_vordiv_with_translib" ) {
     int trc = ndgl / 2. - 1;  // cubic
 #if ATLAS_HAVE_TRANS
     trans::Trans transIFS( g, trc, util::Config( "type", "ifs" ) );
+    double rav = 0.;  // compute average rms error of trans library in rav
 #endif
     trans::Trans transLocal1( g, trc, util::Config( "type", "localopt" ) );
     trans::Trans transLocal2( g, trc, util::Config( "type", "localopt2" ) );
+    double rav1 = 0., rav2 = 0.;  // compute average rms errors of transLocal1 and transLocal2
 
     functionspace::Spectral spectral( trc );
     functionspace::StructuredColumns gridpoints( g );
 
-    int nb_scalar = 100, nb_vordiv = 0;
+    int nb_scalar = 1, nb_vordiv = 0;
     int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv;
     std::vector<double> sp( 2 * N * nb_scalar );
     std::vector<double> vor( 2 * N * nb_vordiv );
@@ -796,6 +798,8 @@ CASE( "test_trans_vordiv_with_translib" ) {
                                 double rms_gen2 =
                                     compute_rms( g.size(), rgp2.data() + pos * g.size(), rgp_analytic.data() );
 
+                                rav1 += rms_gen1;
+                                rav2 += rms_gen2;
                                 if ( !( rms_gen1 < tolerance ) || !( rms_gen2 < tolerance ) ) {
                                     Log::info()
                                         << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out
@@ -813,6 +817,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
                                                                     div.data(), gp.data() ) );
                                 double rms_trans =
                                     compute_rms( g.size(), gp.data() + pos * g.size(), rgp_analytic.data() );
+                                rav += rms_trans;
                                 double rms_diff =
                                     compute_rms( g.size(), rgp1.data() + pos * g.size(), gp.data() + pos * g.size() );
                                 EXPECT( rms_trans < tolerance );
@@ -827,7 +832,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
                                     ATLAS_DEBUG_VAR( tolerance );
                                 }
 #endif
-                                EXPECT( icase < 100 );
+                                EXPECT( icase < 300 );
                             }
                             k++;
                         }
@@ -837,10 +842,18 @@ CASE( "test_trans_vordiv_with_translib" ) {
         }
     }
     Log::info() << "Vordiv+scalar comparison with trans: all " << icase << " cases successfully passed!" << std::endl;
+    rav1 /= icase;
+    Log::info() << "average RMS error of transLocal1: " << rav1 << std::endl;
+    rav2 /= icase;
+    Log::info() << "average RMS error of transLocal2: " << rav2 << std::endl;
+#if ATLAS_HAVE_TRANS
+    rav /= icase;
+    Log::info() << "average RMS error of transIFS: " << rav << std::endl;
+#endif
 }
 #endif
 //-----------------------------------------------------------------------------
-#if 0
+#if 1
 CASE( "test_trans_hires" ) {
     Log::info() << "test_trans_hires" << std::endl;
     // test transgeneral by comparing its result with the trans library
@@ -865,7 +878,7 @@ CASE( "test_trans_hires" ) {
     functionspace::Spectral spectral( trc );
     functionspace::StructuredColumns gridpoints( g );
 
-    int nb_scalar = 1, nb_vordiv = 0;
+    int nb_scalar = 100, nb_vordiv = 0;
     int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv;
     std::vector<double> sp( 2 * N * nb_scalar );
     std::vector<double> vor( 2 * N * nb_vordiv );
@@ -946,7 +959,7 @@ CASE( "test_trans_hires" ) {
                                     //EXPECT( rms_gen2 < tolerance );
                                     icase++;
 
-                                    EXPECT( icase < 300 );
+                                    EXPECT( icase < 50 );
                                 }
                                 k++;
                             }

From 7dc00ff369e57791c9e3eee22c64a7445d29d3ac Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Tue, 13 Mar 2018 11:42:18 +0000
Subject: [PATCH 021/123] printing case number to make hires test a little more
 entertaining

---
 src/tests/trans/test_transgeneral.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index cd1b21478..51a0d5796 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -958,8 +958,8 @@ CASE( "test_trans_hires" ) {
                                     EXPECT( rms_gen1 < tolerance );
                                     //EXPECT( rms_gen2 < tolerance );
                                     icase++;
-
-                                    EXPECT( icase < 50 );
+                                    Log::info() << transType << ": case " << icase << std::endl;
+                                    EXPECT( icase < 25 );
                                 }
                                 k++;
                             }

From aa31ad62776279be644afa54443232eb982e2902 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Thu, 15 Mar 2018 15:50:40 +0000
Subject: [PATCH 022/123] updated localopt with changes from localopt2; removed
 scl_fourier_sym but seems to make code slower; switch to
 fftw_execute_dft_c2r, doesn't seem to affect performance

---
 .../trans/localopt/LegendrePolynomialsopt.cc  | 259 ++++++++++--------
 .../trans/localopt/LegendrePolynomialsopt.h   |   9 +-
 src/atlas/trans/localopt/TransLocalopt.cc     | 128 ++++++---
 src/atlas/trans/localopt/TransLocalopt.h      |  13 +-
 .../localopt2/LegendrePolynomialsopt2.cc      |  16 +-
 src/atlas/trans/localopt2/TransLocalopt2.cc   |  38 +--
 src/tests/trans/test_transgeneral.cc          |  94 ++-----
 7 files changed, 300 insertions(+), 257 deletions(-)

diff --git a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc
index e872eb20b..3ed9e7544 100644
--- a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc
+++ b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc
@@ -21,136 +21,171 @@ namespace trans {
 
 //-----------------------------------------------------------------------------
 
-void compute_legendre_polynomialsopt(
-    const size_t trc,     // truncation (in)
-    const int nlats,      // number of latitudes
-    const double lats[],  // latitudes in radians (in)
-    double legpol[] )     // values of associated Legendre functions, size (trc+1)*trc/2*nlats (out)
+void compute_legendre_polynomialsopt( const size_t trc,     // truncation (in)
+                                      const int nlats,      // number of latitudes
+                                      const double lats[],  // latitudes in radians (in)
+                                      double leg_sym[],     // values of associated Legendre functions, symmetric part
+                                      double leg_asym[] )   // values of associated Legendre functions, asymmetric part
 {
+    auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; };
     array::ArrayT<double> zfn_( trc + 1, trc + 1 );
     array::ArrayView<double, 2> zfn = array::make_view<double, 2>( zfn_ );
+    std::vector<double> legpol( legendre_size( trc ) );
+    auto idxmn = [&]( int jm, int jn ) { return ( 2 * trc + 3 - jm ) * jm / 2 + jn - jm; };
+    int iodd;
+
+    // Compute coefficients for Taylor series in Belousov (19) and (21)
+    // Belousov, Swarztrauber use zfn(0,0)=std::sqrt(2.)
+    // IFS normalisation chosen to be 0.5*Integral(Pnm**2) = 1
+    zfn( 0, 0 ) = 2.;
+    for ( int jn = 1; jn <= trc; ++jn ) {
+        double zfnn = zfn( 0, 0 );
+        for ( int jgl = 1; jgl <= jn; ++jgl ) {
+            zfnn *= std::sqrt( 1. - 0.25 / ( jgl * jgl ) );
+        }
+        iodd          = jn % 2;
+        zfn( jn, jn ) = zfnn;
+        for ( int jgl = 2; jgl <= jn - iodd; jgl += 2 ) {
+            double zfjn = ( ( jgl - 1. ) * ( 2. * jn - jgl + 2. ) );  // new factor numerator
+            double zfjd = ( jgl * ( 2. * jn - jgl + 1. ) );           // new factor denominator
 
-    auto idxmn = [&]( int jm, int jn, int jlat ) {
-        return ( 2 * trc + 3 - jm ) * jm / 2 * nlats + jlat * ( trc - jm + 1 ) + jn - jm;
-    };
-
-    {
-        ATLAS_TRACE( "init arrays" );
-
-        int iodd;
-
-        // Compute coefficients for Taylor series in Belousov (19) and (21)
-        // Belousov, Swarztrauber use zfn(0,0)=std::sqrt(2.)
-        // IFS normalisation chosen to be 0.5*Integral(Pnm**2) = 1
-        zfn( 0, 0 ) = 2.;
-        for ( int jn = 1; jn <= trc; ++jn ) {
-            double zfnn = zfn( 0, 0 );
-            for ( int jgl = 1; jgl <= jn; ++jgl ) {
-                zfnn *= std::sqrt( 1. - 0.25 / ( jgl * jgl ) );
-            }
-            iodd          = jn % 2;
-            zfn( jn, jn ) = zfnn;
-            for ( int jgl = 2; jgl <= jn - iodd; jgl += 2 ) {
-                double zfjn = ( ( jgl - 1. ) * ( 2. * jn - jgl + 2. ) );  // new factor numerator
-                double zfjd = ( jgl * ( 2. * jn - jgl + 1. ) );           // new factor denominator
-
-                zfn( jn, jn - jgl ) = zfn( jn, jn - jgl + 2 ) * zfjn / zfjd;
-            }
+            zfn( jn, jn - jgl ) = zfn( jn, jn - jgl + 2 ) * zfjn / zfjd;
         }
     }
+
+    // Loop over latitudes:
     for ( int jlat = 0; jlat < nlats; ++jlat ) {
-        // --------------------
-        // 1. First two columns
-        // --------------------
-        double lat     = lats[jlat];
-        double zdlx1   = ( M_PI_2 - lat );               // theta
-        double zdlx    = std::cos( zdlx1 );              // cos(theta)
-        double zdlsita = std::sqrt( 1. - zdlx * zdlx );  // sin(theta) (this is how trans library does it)
-
-        legpol[idxmn( 0, 0, jlat )] = 1.;
-
-        double zdl1sita = 0.;
-        // if we are less than 1 meter from the pole,
-        if ( std::abs( zdlsita ) <= std::sqrt( std::numeric_limits<double>::epsilon() ) ) {
-            zdlx    = 1.;
-            zdlsita = 0.;
-        }
-        else {
-            zdl1sita = 1. / zdlsita;
-        }
+        {
+            //ATLAS_TRACE( "compute Legendre polynomials" );
+            // --------------------
+            // 1. First two columns
+            // --------------------
+            double lat     = lats[jlat];
+            double zdlx1   = ( M_PI_2 - lat );               // theta
+            double zdlx    = std::cos( zdlx1 );              // cos(theta)
+            double zdlsita = std::sqrt( 1. - zdlx * zdlx );  // sin(theta) (this is how trans library does it)
+
+            legpol[idxmn( 0, 0 )] = 1.;
+
+            double zdl1sita = 0.;
+            // if we are less than 1 meter from the pole,
+            if ( std::abs( zdlsita ) <= std::sqrt( std::numeric_limits<double>::epsilon() ) ) {
+                zdlx    = 1.;
+                zdlsita = 0.;
+            }
+            else {
+                zdl1sita = 1. / zdlsita;
+            }
 
-        // ordinary Legendre polynomials from series expansion
-        // ---------------------------------------------------
-
-        // even N
-        for ( int jn = 2; jn <= trc; jn += 2 ) {
-            double zdlk   = 0.5 * zfn( jn, 0 );
-            double zdlldn = 0.0;
-            double zdsq   = 1. / std::sqrt( jn * ( jn + 1. ) );
-            // represented by only even k
-            for ( int jk = 2; jk <= jn; jk += 2 ) {
-                // normalised ordinary Legendre polynomial == \overbar{P_n}^0
-                zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 );
-                // normalised associated Legendre polynomial == \overbar{P_n}^1
-                zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 );
+            // ordinary Legendre polynomials from series expansion
+            // ---------------------------------------------------
+
+            // even N
+            for ( int jn = 2; jn <= trc; jn += 2 ) {
+                double zdlk   = 0.5 * zfn( jn, 0 );
+                double zdlldn = 0.0;
+                double zdsq   = 1. / std::sqrt( jn * ( jn + 1. ) );
+                // represented by only even k
+                for ( int jk = 2; jk <= jn; jk += 2 ) {
+                    // normalised ordinary Legendre polynomial == \overbar{P_n}^0
+                    zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 );
+                    // normalised associated Legendre polynomial == \overbar{P_n}^1
+                    zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 );
+                }
+                legpol[idxmn( 0, jn )] = zdlk;
+                legpol[idxmn( 1, jn )] = zdlldn;
             }
-            legpol[idxmn( 0, jn, jlat )] = zdlk;
-            legpol[idxmn( 1, jn, jlat )] = zdlldn;
-        }
 
-        // odd N
-        for ( int jn = 1; jn <= trc; jn += 2 ) {
-            zfn( jn, 0 )  = 0.;
-            double zdlk   = 0.;
-            double zdlldn = 0.0;
-            double zdsq   = 1. / std::sqrt( jn * ( jn + 1. ) );
-            // represented by only even k
-            for ( int jk = 1; jk <= jn; jk += 2 ) {
-                // normalised ordinary Legendre polynomial == \overbar{P_n}^0
-                zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 );
-                // normalised associated Legendre polynomial == \overbar{P_n}^1
-                zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 );
+            // odd N
+            for ( int jn = 1; jn <= trc; jn += 2 ) {
+                zfn( jn, 0 )  = 0.;
+                double zdlk   = 0.;
+                double zdlldn = 0.0;
+                double zdsq   = 1. / std::sqrt( jn * ( jn + 1. ) );
+                // represented by only even k
+                for ( int jk = 1; jk <= jn; jk += 2 ) {
+                    // normalised ordinary Legendre polynomial == \overbar{P_n}^0
+                    zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 );
+                    // normalised associated Legendre polynomial == \overbar{P_n}^1
+                    zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 );
+                }
+                legpol[idxmn( 0, jn )] = zdlk;
+                legpol[idxmn( 1, jn )] = zdlldn;
             }
-            legpol[idxmn( 0, jn, jlat )] = zdlk;
-            legpol[idxmn( 1, jn, jlat )] = zdlldn;
-        }
 
-        // --------------------------------------------------------------
-        // 2. Diagonal (the terms 0,0 and 1,1 have already been computed)
-        //    Belousov, equation (23)
-        // --------------------------------------------------------------
+            // --------------------------------------------------------------
+            // 2. Diagonal (the terms 0,0 and 1,1 have already been computed)
+            //    Belousov, equation (23)
+            // --------------------------------------------------------------
 
-        double zdls = zdl1sita * std::numeric_limits<double>::min();
-        for ( int jn = 2; jn <= trc; ++jn ) {
-            double sq = std::sqrt( ( 2. * jn + 1. ) / ( 2. * jn ) );
+            double zdls = zdl1sita * std::numeric_limits<double>::min();
+            for ( int jn = 2; jn <= trc; ++jn ) {
+                double sq = std::sqrt( ( 2. * jn + 1. ) / ( 2. * jn ) );
 
-            legpol[idxmn( jn, jn, jlat )] = legpol[idxmn( jn - 1, jn - 1, jlat )] * zdlsita * sq;
-            if ( std::abs( legpol[idxmn( jn, jn, jlat )] ) < zdls ) legpol[idxmn( jn, jn, jlat )] = 0.0;
-        }
+                legpol[idxmn( jn, jn )] = legpol[idxmn( jn - 1, jn - 1 )] * zdlsita * sq;
+                if ( std::abs( legpol[idxmn( jn, jn )] ) < zdls ) legpol[idxmn( jn, jn )] = 0.0;
+            }
 
-        // ---------------------------------------------
-        // 3. General recurrence (Belousov, equation 17)
-        // ---------------------------------------------
-
-        for ( int jn = 3; jn <= trc; ++jn ) {
-            for ( int jm = 2; jm < jn; ++jm ) {
-                double cn = ( ( 2. * jn + 1. ) * ( jn + jm - 3. ) * ( jn + jm - 1. ) );  // numerator of c in Belousov
-                double cd = ( ( 2. * jn - 3. ) * ( jn + jm - 2. ) * ( jn + jm ) );       // denominator of c in Belousov
-                double dn = ( ( 2. * jn + 1. ) * ( jn - jm + 1. ) * ( jn + jm - 1. ) );  // numerator of d in Belousov
-                double dd = ( ( 2. * jn - 1. ) * ( jn + jm - 2. ) * ( jn + jm ) );       // denominator of d in Belousov
-                double en = ( ( 2. * jn + 1. ) * ( jn - jm ) );                          // numerator of e in Belousov
-                double ed = ( ( 2. * jn - 1. ) * ( jn + jm ) );                          // denominator of e in Belousov
-
-                legpol[idxmn( jm, jn, jlat )] = std::sqrt( cn / cd ) * legpol[idxmn( jm - 2, jn - 2, jlat )] -
-                                                std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1, jlat )] * zdlx +
-                                                std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1, jlat )] * zdlx;
+            // ---------------------------------------------
+            // 3. General recurrence (Belousov, equation 17)
+            // ---------------------------------------------
+
+            for ( int jn = 3; jn <= trc; ++jn ) {
+                for ( int jm = 2; jm < jn; ++jm ) {
+                    double cn =
+                        ( ( 2. * jn + 1. ) * ( jn + jm - 3. ) * ( jn + jm - 1. ) );     // numerator of c in Belousov
+                    double cd = ( ( 2. * jn - 3. ) * ( jn + jm - 2. ) * ( jn + jm ) );  // denominator of c in Belousov
+                    double dn =
+                        ( ( 2. * jn + 1. ) * ( jn - jm + 1. ) * ( jn + jm - 1. ) );     // numerator of d in Belousov
+                    double dd = ( ( 2. * jn - 1. ) * ( jn + jm - 2. ) * ( jn + jm ) );  // denominator of d in Belousov
+                    double en = ( ( 2. * jn + 1. ) * ( jn - jm ) );                     // numerator of e in Belousov
+                    double ed = ( ( 2. * jn - 1. ) * ( jn + jm ) );                     // denominator of e in Belousov
+
+                    legpol[idxmn( jm, jn )] = std::sqrt( cn / cd ) * legpol[idxmn( jm - 2, jn - 2 )] -
+                                              std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1 )] * zdlx +
+                                              std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1 )] * zdlx;
+                }
             }
         }
 
-        // take factor 2 for m > 0 into account:
-        for ( int jm = 1; jm <= trc; ++jm ) {
-            for ( int jn = jm; jn <= trc; ++jn ) {
-                legpol[idxmn( jm, jn, jlat )] *= 2.;
+        {
+            //ATLAS_TRACE( "add to global arrays" );
+
+            // take factor 2 for m > 0 into account:
+            for ( int jm = 1; jm <= trc; ++jm ) {
+                for ( int jn = jm; jn <= trc; ++jn ) {
+                    legpol[idxmn( jm, jn )] *= 2.;
+                }
+            }
+            int is0 = 0, ia0 = 0;
+            for ( int jm = 0; jm <= trc; jm++ ) {
+                int is1 = 0, ia1 = 0;
+                for ( int jn = jm; jn <= trc; jn++ ) {
+                    if ( ( jn - jm ) % 2 == 0 ) { is1++; }
+                    else {
+                        ia1++;
+                    }
+                }
+                int is2 = 0, ia2 = 0;
+                // the choice between the following two code lines determines whether
+                // total wavenumbers are summed in an ascending or descending order.
+                // The trans library in IFS uses descending order because it should
+                // be more accurate (higher wavenumbers have smaller contributions).
+                // This also needs to be changed when splitting the spectral data in
+                // TransLocalopt::invtrans_uv!
+                //for ( int jn = jm; jn <= trc; jn++ ) {
+                for ( int jn = trc; jn >= jm; jn-- ) {
+                    if ( ( jn - jm ) % 2 == 0 ) {
+                        int is      = is0 * nlats + is1 * jlat + is2++;
+                        leg_sym[is] = legpol[idxmn( jm, jn )];
+                    }
+                    else {
+                        int ia       = ia0 * nlats + ia1 * jlat + ia2++;
+                        leg_asym[ia] = legpol[idxmn( jm, jn )];
+                    }
+                }
+                is0 += is2;
+                ia0 += ia2;
             }
         }
     }
diff --git a/src/atlas/trans/localopt/LegendrePolynomialsopt.h b/src/atlas/trans/localopt/LegendrePolynomialsopt.h
index e2fd7db8f..4dc3ce2e5 100644
--- a/src/atlas/trans/localopt/LegendrePolynomialsopt.h
+++ b/src/atlas/trans/localopt/LegendrePolynomialsopt.h
@@ -33,10 +33,11 @@ namespace trans {
 // Andreas Mueller *ECMWF*
 //
 void compute_legendre_polynomialsopt(
-    const size_t trc,     // truncation (in)
-    const int nlats,      // number of latitudes
-    const double lats[],  // latitudes in radians (in)
-    double legpol[] );    // values of associated Legendre functions, size (trc+1)*trc/2*nlats (out)
+    const size_t trc,          // truncation (in)
+    const int nlats,           // number of latitudes
+    const double lats[],       // latitudes in radians (in)
+    double legendre_sym[],     // values of associated Legendre functions, symmetric part
+    double legendre_asym[] );  // values of associated Legendre functions, asymmetric part
 
 // --------------------------------------------------------------------------------------------------------------------
 
diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc
index 5b31b75ad..b34b8f837 100644
--- a/src/atlas/trans/localopt/TransLocalopt.cc
+++ b/src/atlas/trans/localopt/TransLocalopt.cc
@@ -119,40 +119,23 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t
     }
     // precomputations for Legendre polynomials:
     {
-        std::vector<double> legendre( legendre_size( truncation_ + 1 ) * nlatsNH );
-        {
-            ATLAS_TRACE( "opt precomp Legendre" );
-            compute_legendre_polynomialsopt( truncation_ + 1, nlatsNH, lats.data(), legendre.data() );
-        }
-        {
-            ATLAS_TRACE( "opt split Legendre" );
-            int size_sym  = 0;
-            int size_asym = 0;
-            legendre_sym_begin_.resize( truncation_ + 3 );
-            legendre_asym_begin_.resize( truncation_ + 3 );
-            legendre_sym_begin_[0]  = 0;
-            legendre_asym_begin_[0] = 0;
-            for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
-                size_sym += num_n( truncation_ + 1, jm, true );
-                size_asym += num_n( truncation_ + 1, jm, false );
-                legendre_sym_begin_[jm + 1]  = size_sym;
-                legendre_asym_begin_[jm + 1] = size_asym;
-            }
-            legendre_sym_.resize( size_sym * nlatsNH );
-            legendre_asym_.resize( size_asym * nlatsNH );
-            int idx = 0, is = 0, ia = 0;
-            for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
-                for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
-                    for ( int jn = 0; jn <= truncation_ - jm + 1; jn++, idx++ ) {
-                        if ( jn % 2 == 0 ) { legendre_sym_[is++] = legendre[idx]; }
-                        else {
-                            legendre_asym_[ia++] = legendre[idx];
-                        }
-                    }
-                }
-            }
-            ASSERT( ia == size_asym * nlatsNH && is == size_sym * nlatsNH );
+        ATLAS_TRACE( "opt precomp Legendre" );
+        int size_sym  = 0;
+        int size_asym = 0;
+        legendre_sym_begin_.resize( truncation_ + 3 );
+        legendre_asym_begin_.resize( truncation_ + 3 );
+        legendre_sym_begin_[0]  = 0;
+        legendre_asym_begin_[0] = 0;
+        for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
+            size_sym += num_n( truncation_ + 1, jm, true );
+            size_asym += num_n( truncation_ + 1, jm, false );
+            legendre_sym_begin_[jm + 1]  = size_sym;
+            legendre_asym_begin_[jm + 1] = size_asym;
         }
+        legendre_sym_.resize( size_sym * nlatsNH );
+        legendre_asym_.resize( size_asym * nlatsNH );
+        compute_legendre_polynomialsopt( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_.data(),
+                                         legendre_asym_.data() );
     }
 
     // precomputations for Fourier transformations:
@@ -180,7 +163,17 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t
             }
         }
     }
-}
+#if ATLAS_HAVE_FFTW
+    {
+        ATLAS_TRACE( "opt precomp FFTW" );
+        int num_complex = ( nlons / 2 ) + 1;
+        fft_in_         = fftw_alloc_complex( nlats * num_complex );
+        fft_out_        = fftw_alloc_real( nlats * nlons );
+        plan_ = fftw_plan_many_dft_c2r( 1, &nlons, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1, nlons,
+                                        FFTW_ESTIMATE );
+    }
+#endif
+}  // namespace atlas
 
 // --------------------------------------------------------------------------------------------------------------------
 
@@ -189,7 +182,13 @@ TransLocalopt::TransLocalopt( const Grid& grid, const long truncation, const eck
 
 // --------------------------------------------------------------------------------------------------------------------
 
-TransLocalopt::~TransLocalopt() {}
+TransLocalopt::~TransLocalopt() {
+#if ATLAS_HAVE_FFTW
+    fftw_destroy_plan( plan_ );
+    fftw_free( fft_in_ );
+    fftw_free( fft_out_ );
+#endif
+}
 
 // --------------------------------------------------------------------------------------------------------------------
 
@@ -253,8 +252,6 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
     if ( nb_scalar_fields > 0 ) {
         int nb_fields = nb_scalar_fields;
 
-        //eckit::linalg::LinearAlgebra::backend( "string" ) // might want to choose backend with this command
-
         // Transform
         if ( grid::StructuredGrid g = grid_ ) {
             ATLAS_TRACE( "invtrans_uv structured opt" );
@@ -272,17 +269,25 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
     // TODO: 0 is currently not working because it requires all latitudes to be included in legendre_ (which is currently not done)
                     int size_sym  = num_n( truncation_ + 1, jm, true );
                     int size_asym = num_n( truncation_ + 1, jm, false );
-                    std::vector<double> scalar_sym( 2 * nb_fields * size_sym, -1234. );
-                    std::vector<double> scalar_asym( 2 * nb_fields * size_asym, -1234. );
+                    std::vector<double> scalar_sym( 2 * nb_fields * size_sym );
+                    std::vector<double> scalar_asym( 2 * nb_fields * size_asym );
                     std::vector<double> scl_fourier_sym( size_fourier );
                     std::vector<double> scl_fourier_asym( size_fourier );
                     {
                         //ATLAS_TRACE( "opt Legendre split" );
                         int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2;
-                        for ( int jn = 0; jn <= truncation_ - jm + 1; jn++ ) {
+                        // the choice between the following two code lines determines whether
+                        // total wavenumbers are summed in an ascending or descending order.
+                        // The trans library in IFS uses descending order because it should
+                        // be more accurate (higher wavenumbers have smaller contributions).
+                        // This also needs to be changed when splitting the spectral data in
+                        // compute_legendre_polynomialsopt!
+                        //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) {
+                        for ( int jn = truncation_ + 1; jn >= jm; jn-- ) {
                             for ( int imag = 0; imag < 2; imag++ ) {
-                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
-                                    if ( jn % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; }
+                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                    idx = jfld + nb_fields * ( imag + 2 * ( jn - jm ) );
+                                    if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; }
                                     else {
                                         scalar_asym[ia++] = scalar_spectra[idx + ioff];
                                     }
@@ -333,6 +338,44 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
 #endif
                 }
             }
+#if ATLAS_HAVE_FFTW
+            {
+                auto position = [&]( int jfld, int imag, int jlat, int jm ) {
+                    return jfld + nb_fields * ( imag + 2 * ( jlat + g.ny() * ( jm ) ) );
+                };
+                auto factor = [&]( int jm ) {
+                    if ( jm > 0 ) { return 2.; }
+                    else {
+                        return 1.;
+                    }
+                };
+
+                int num_complex = ( nlons / 2 ) + 1;
+                {
+                    ATLAS_TRACE( "opt FFTW" );
+                    for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                        int idx = 0;
+                        for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
+                            for ( int jm = 0; jm < num_complex; jm++, idx++ ) {
+                                for ( int imag = 0; imag < 2; imag++ ) {
+                                    if ( jm <= truncation_ ) {
+                                        fft_in_[idx][imag] =
+                                            scl_fourier[position( jfld, imag, jlat, jm )] / factor( jm );
+                                    }
+                                    else {
+                                        fft_in_[idx][imag] = 0.;
+                                    }
+                                }
+                            }
+                        }
+                        fftw_execute( plan_ );
+                        for ( int j = 0; j < nlats * nlons; j++ ) {
+                            gp_fields[j + jfld * nlats * nlons] = fft_out_[j];
+                        }
+                    }
+                }
+            }
+#else
 #if 0  // 1: better for small number of columns, large truncation; 0: better for large number of columns
 
             // Transposition in Fourier space:
@@ -406,6 +449,7 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
                 eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
             }
 
+#endif
 #endif
             // Computing u,v from U,V:
             {
diff --git a/src/atlas/trans/localopt/TransLocalopt.h b/src/atlas/trans/localopt/TransLocalopt.h
index dca01cd2f..b9f89ef08 100644
--- a/src/atlas/trans/localopt/TransLocalopt.h
+++ b/src/atlas/trans/localopt/TransLocalopt.h
@@ -12,8 +12,12 @@
 
 #include <vector>
 
+#include "atlas/array.h"
 #include "atlas/grid/Grid.h"
 #include "atlas/trans/Trans.h"
+#if ATLAS_HAVE_FFTW
+#include <fftw3.h>
+#endif
 
 //-----------------------------------------------------------------------------
 // Forward declarations
@@ -100,9 +104,6 @@ class TransLocalopt : public trans::TransImpl {
                            double divergence_spectra[], const eckit::Configuration& = util::NoConfig() ) const override;
 
 private:
-    const double* legendre_data( int j ) const { return legendre_.data() + legendre_begin_[j]; }
-    double* legendre_data( int j ) { return legendre_.data() + legendre_begin_[j]; }
-
     void invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields,
                       const double scalar_spectra[], double gp_fields[],
                       const eckit::Configuration& = util::NoConfig() ) const;
@@ -111,7 +112,6 @@ class TransLocalopt : public trans::TransImpl {
     Grid grid_;
     int truncation_;
     bool precompute_;
-    mutable std::vector<double> legendre_;
     mutable std::vector<double> legendre_sym_;
     mutable std::vector<double> legendre_asym_;
     mutable std::vector<double> fourier_;
@@ -119,6 +119,11 @@ class TransLocalopt : public trans::TransImpl {
     std::vector<size_t> legendre_begin_;
     std::vector<size_t> legendre_sym_begin_;
     std::vector<size_t> legendre_asym_begin_;
+#if ATLAS_HAVE_FFTW
+    fftw_complex* fft_in_;
+    double* fft_out_;
+    fftw_plan plan_;
+#endif
 };
 
 //-----------------------------------------------------------------------------
diff --git a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc
index 99f54586a..4f786370d 100644
--- a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc
+++ b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc
@@ -21,16 +21,14 @@ namespace trans {
 
 //-----------------------------------------------------------------------------
 
-size_t legendre_size( const size_t truncation ) {
-    return ( truncation + 2 ) * ( truncation + 1 ) / 2;
-}
-
-void compute_legendre_polynomialsopt2( const size_t trc,     // truncation (in)
-                                       const int nlats,      // number of latitudes
-                                       const double lats[],  // latitudes in radians (in)
-                                       double leg_sym[],     // values of associated Legendre functions, symmetric part
-                                       double leg_asym[] )   // values of associated Legendre functions, asymmetric part
+void compute_legendre_polynomialsopt2(
+    const size_t trc,               // truncation (in)
+    const int nlats,                // number of latitudes
+    const double lats[],            // latitudes in radians (in)
+    double __restrict leg_sym[],    // values of associated Legendre functions, symmetric part
+    double __restrict leg_asym[] )  // values of associated Legendre functions, asymmetric part
 {
+    auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; };
     array::ArrayT<double> zfn_( trc + 1, trc + 1 );
     array::ArrayView<double, 2> zfn = array::make_view<double, 2>( zfn_ );
     std::vector<double> legpol( legendre_size( trc ) );
diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc
index 084eca68f..2c2ffdee4 100644
--- a/src/atlas/trans/localopt2/TransLocalopt2.cc
+++ b/src/atlas/trans/localopt2/TransLocalopt2.cc
@@ -167,10 +167,17 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long
     {
         ATLAS_TRACE( "opt2 precomp FFTW" );
         int num_complex = ( nlons / 2 ) + 1;
-        fft_in_         = fftw_alloc_complex( nlats * num_complex );
-        fft_out_        = fftw_alloc_real( nlats * nlons );
-        plan_ = fftw_plan_many_dft_c2r( 1, &nlons, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1, nlons,
-                                        FFTW_ESTIMATE );
+
+        fftw_complex* tmp_in = (fftw_complex*)malloc( sizeof( fftw_complex ) );
+        double* tmp_out      = (double*)malloc( sizeof( double ) );
+        fft_in_              = fftw_alloc_complex( nlats * num_complex );
+        fft_out_             = fftw_alloc_real( nlats * nlons );
+
+        plan_ = fftw_plan_many_dft_c2r( 1, &nlons, nlats, tmp_in, NULL, 1, num_complex, tmp_out, NULL, 1, nlons,
+                                        FFTW_ESTIMATE + FFTW_NO_SIMD );
+
+        free( tmp_in );
+        free( tmp_out );
     }
 #endif
 }  // namespace atlas
@@ -270,9 +277,8 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
     // TODO: 0 is currently not working because it requires all latitudes to be included in legendre_ (which is currently not done)
                     int size_sym  = num_n( truncation_ + 1, jm, true );
                     int size_asym = num_n( truncation_ + 1, jm, false );
-                    std::vector<double> scalar_sym( 2 * nb_fields * size_sym, -1234. );
-                    std::vector<double> scalar_asym( 2 * nb_fields * size_asym, -1234. );
-                    std::vector<double> scl_fourier_sym( size_fourier );
+                    std::vector<double> scalar_sym( 2 * nb_fields * size_sym );
+                    std::vector<double> scalar_asym( 2 * nb_fields * size_asym );
                     std::vector<double> scl_fourier_asym( size_fourier );
                     {
                         //ATLAS_TRACE( "opt2 Legendre split" );
@@ -301,7 +307,7 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
                         eckit::linalg::Matrix A( scalar_sym.data(), nb_fields * 2, size_sym );
                         eckit::linalg::Matrix B( legendre_sym_.data() + legendre_sym_begin_[jm] * nlatsNH, size_sym,
                                                  nlatsNH );
-                        eckit::linalg::Matrix C( scl_fourier_sym.data(), nb_fields * 2, nlatsNH );
+                        eckit::linalg::Matrix C( scl_fourier.data() + jm * size_fourier, nb_fields * 2, nlatsNH );
                         eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
                     }
                     if ( size_asym > 0 ) {
@@ -312,21 +318,21 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
                         eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
                         {
                             //ATLAS_TRACE( "opt2 merge spheres" );
-                            // northern hemisphere:
-                            int ioff = jm * size_fourier;
-                            for ( int j = 0; j < 2 * nb_fields * nlatsNH; j++ ) {
-                                scl_fourier[j + ioff] = scl_fourier_sym[j] + scl_fourier_asym[j];
-                            }
                             // southern hemisphere:
-                            int idx = 0;
+                            int ioff = jm * size_fourier;
+                            int idx  = 0;
                             for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
                                 for ( int imag = 0; imag < 2; imag++ ) {
                                     for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
                                         int pos = jfld + nb_fields * ( imag + 2 * ( nlats - jlat - 1 ) );
-                                        scl_fourier[pos + ioff] = scl_fourier_sym[idx] - scl_fourier_asym[idx];
+                                        scl_fourier[pos + ioff] = scl_fourier[idx + ioff] - scl_fourier_asym[idx];
                                     }
                                 }
                             }
+                            // northern hemisphere:
+                            for ( int j = 0; j < 2 * nb_fields * nlatsNH; j++ ) {
+                                scl_fourier[j + ioff] += scl_fourier_asym[j];
+                            }
                         }
                     }
 #else
@@ -369,7 +375,7 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
                                 }
                             }
                         }
-                        fftw_execute( plan_ );
+                        fftw_execute_dft_c2r( plan_, fft_in_, fft_out_ );
                         for ( int j = 0; j < nlats * nlons; j++ ) {
                             gp_fields[j + jfld * nlats * nlons] = fft_out_[j];
                         }
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index 51a0d5796..cca428385 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -704,7 +704,7 @@ CASE( "test_transgeneral_with_translib" ) {
 #endif
 #endif
 //-----------------------------------------------------------------------------
-#if 0
+#if 1
 CASE( "test_trans_vordiv_with_translib" ) {
     Log::info() << "test_trans_vordiv_with_translib" << std::endl;
     // test transgeneral by comparing its result with the trans library
@@ -714,7 +714,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
     double tolerance  = 1.e-13;
 
     // Grid: (Adjust the following line if the test takes too long!)
-    Grid g( "F120" );
+    Grid g( "F320" );
 
     grid::StructuredGrid gs( g );
     int ndgl = gs.ny();
@@ -853,7 +853,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
 }
 #endif
 //-----------------------------------------------------------------------------
-#if 1
+#if 0
 CASE( "test_trans_hires" ) {
     Log::info() << "test_trans_hires" << std::endl;
     // test transgeneral by comparing its result with the trans library
@@ -863,10 +863,10 @@ CASE( "test_trans_hires" ) {
     double tolerance  = 1.e-13;
 
     // Grid: (Adjust the following line if the test takes too long!)
-    Grid g( "F1280" );
+    Grid g( "F640" );
 #if ATLAS_HAVE_TRANS
-    //std::string transTypes[1] = {"localopt2"};
-    std::string transTypes[2] = {"localopt2", "ifs"};
+    std::string transTypes[1] = {"localopt2"};
+    //std::string transTypes[3] = {"localopt", "localopt2", "ifs"};
 #else
     std::string transTypes[1] = {"localopt2"};
 #endif
@@ -880,14 +880,6 @@ CASE( "test_trans_hires" ) {
 
     int nb_scalar = 100, nb_vordiv = 0;
     int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv;
-    std::vector<double> sp( 2 * N * nb_scalar );
-    std::vector<double> vor( 2 * N * nb_vordiv );
-    std::vector<double> div( 2 * N * nb_vordiv );
-    std::vector<double> rspecg( 2 * N );
-    std::vector<double> gp( nb_all * g.size() );
-    std::vector<double> rgp1( nb_all * g.size() );
-    //std::vector<double> rgp2( nb_all * g.size() );
-    std::vector<double> rgp_analytic( g.size() );
 
     for ( auto transType : transTypes ) {
         int icase = 0;
@@ -895,71 +887,33 @@ CASE( "test_trans_hires" ) {
         for ( int ivar_in = 2; ivar_in < 3; ivar_in++ ) {         // vorticity, divergence, scalar
             for ( int ivar_out = 2; ivar_out < 3; ivar_out++ ) {  // u, v, scalar
                 int nb_fld = 1;
-                if ( ivar_out == 2 ) {
-                    tolerance = 1.e-13;
-                    nb_fld    = nb_scalar;
-                }
+                if ( ivar_out == 2 ) { nb_fld = nb_scalar; }
                 else {
-                    tolerance = 2.e-6;
-                    nb_fld    = nb_vordiv;
+                    nb_fld = nb_vordiv;
                 }
-                for ( int jfld = 0; jfld < nb_fld; jfld++ ) {  // multiple fields
+                for ( int jfld = 0; jfld < 1; jfld++ ) {  // multiple fields
                     int k = 0;
                     for ( int m = 0; m <= trc; m++ ) {                 // zonal wavenumber
                         for ( int n = m; n <= trc; n++ ) {             // total wavenumber
                             for ( int imag = 0; imag <= 1; imag++ ) {  // real and imaginary part
 
-                                if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. ) {
-                                    for ( int j = 0; j < 2 * N * nb_scalar; j++ ) {
-                                        sp[j] = 0.;
-                                    }
-                                    for ( int j = 0; j < 2 * N * nb_vordiv; j++ ) {
-                                        vor[j] = 0.;
-                                        div[j] = 0.;
-                                    }
-                                    if ( ivar_in == 0 ) vor[k * nb_vordiv + jfld] = 1.;
-                                    if ( ivar_in == 1 ) div[k * nb_vordiv + jfld] = 1.;
+                                if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. &&
+                                     icase < 25 ) {
+                                    auto start = std::chrono::system_clock::now();
+                                    std::vector<double> sp( 2 * N * nb_scalar );
+                                    std::vector<double> gp( nb_all * g.size() );
                                     if ( ivar_in == 2 ) sp[k * nb_scalar + jfld] = 1.;
-
-                                    for ( int j = 0; j < nb_all * g.size(); j++ ) {
-                                        gp[j]   = 0.;
-                                        rgp1[j] = 0.;
-                                        //rgp2[j] = 0.;
-                                    }
-                                    for ( int j = 0; j < g.size(); j++ ) {
-                                        rgp_analytic[j] = 0.;
-                                    }
-
-                                    spectral_transform_grid_analytic( trc, trc, n, m, imag, g, rspecg.data(),
-                                                                      rgp_analytic.data(), ivar_in, ivar_out );
-
-                                    EXPECT_NO_THROW( trans.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
-                                                                     div.data(), rgp1.data() ) );
-
-                                    //EXPECT_NO_THROW( transLocal2.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
-                                    //                                       div.data(), rgp2.data() ) );
-
-                                    int pos = ( ivar_out * nb_vordiv + jfld );
-
-                                    double rms_gen1 =
-                                        compute_rms( g.size(), rgp1.data() + pos * g.size(), rgp_analytic.data() );
-
-                                    //double rms_gen2 =
-                                    //    compute_rms( g.size(), rgp2.data() + pos * g.size(), rgp_analytic.data() );
-
-                                    if ( !( rms_gen1 < tolerance ) ) {  // || !( rms_gen2 < tolerance ) ) {
-                                        Log::info()
-                                            << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out
-                                            << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl;
-                                        ATLAS_DEBUG_VAR( rms_gen1 );
-                                        //ATLAS_DEBUG_VAR( rms_gen2 );
-                                        ATLAS_DEBUG_VAR( tolerance );
-                                    }
-                                    EXPECT( rms_gen1 < tolerance );
-                                    //EXPECT( rms_gen2 < tolerance );
+                                    EXPECT_NO_THROW( trans.invtrans( nb_scalar, sp.data(), nb_vordiv, nullptr, nullptr,
+                                                                     gp.data() ) );
                                     icase++;
-                                    Log::info() << transType << ": case " << icase << std::endl;
-                                    EXPECT( icase < 25 );
+                                    auto end = std::chrono::system_clock::now();  //
+                                    std::chrono::duration<double> elapsed_seconds = end - start;
+                                    std::time_t end_time = std::chrono::system_clock::to_time_t( end );
+                                    std::string time_str = std::ctime( &end_time );
+                                    Log::info()
+                                        << transType << ": case " << icase
+                                        << ", elapsed time: " << elapsed_seconds.count()
+                                        << "s. Now: " << time_str.substr( 0, time_str.length() - 1 ) << std::endl;
                                 }
                                 k++;
                             }

From 40bae9c645b8244c169865912fb8f3db2666754b Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Thu, 15 Mar 2018 17:42:07 +0000
Subject: [PATCH 023/123] not using fft_in_ and fft_out_ when creating the plan
 is slower; removing scl_fourier_sym doesn't seem to affect the speed

---
 src/atlas/trans/localopt/TransLocalopt.cc   | 17 ---------
 src/atlas/trans/localopt2/TransLocalopt2.cc | 39 +++++----------------
 2 files changed, 9 insertions(+), 47 deletions(-)

diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc
index b34b8f837..9c4469725 100644
--- a/src/atlas/trans/localopt/TransLocalopt.cc
+++ b/src/atlas/trans/localopt/TransLocalopt.cc
@@ -54,23 +54,6 @@ int num_n( const int truncation, const int m, const bool symmetric ) {
     return len;
 }
 
-std::vector<int> n_indices( const int truncation, const int m, const bool symmetric ) {
-    int len = num_n( truncation, m, symmetric ), jn0 = 0;
-    if ( !symmetric ) { jn0 = 1; }
-    std::vector<int> jns( len );
-    int ia = 0, id = len - 1;
-    for ( int jn = jn0; jn <= truncation - m; jn += 2, ia++, id-- ) {
-#if 1  // 1: ascending, 0: descending
-        int idx = ia;
-#else
-        int idx = id;
-#endif
-        jns[idx] = jn;
-        ASSERT( idx < len && idx >= 0 );
-    }
-    return jns;
-}
-
 }  // namespace
 
 // --------------------------------------------------------------------------------------------------------------------
diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc
index 2c2ffdee4..d6d4b4012 100644
--- a/src/atlas/trans/localopt2/TransLocalopt2.cc
+++ b/src/atlas/trans/localopt2/TransLocalopt2.cc
@@ -54,23 +54,6 @@ int num_n( const int truncation, const int m, const bool symmetric ) {
     return len;
 }
 
-std::vector<int> n_indices( const int truncation, const int m, const bool symmetric ) {
-    int len = num_n( truncation, m, symmetric ), jn0 = 0;
-    if ( !symmetric ) { jn0 = 1; }
-    std::vector<int> jns( len );
-    int ia = 0, id = len - 1;
-    for ( int jn = jn0; jn <= truncation - m; jn += 2, ia++, id-- ) {
-#if 1  // 1: ascending, 0: descending
-        int idx = ia;
-#else
-        int idx = id;
-#endif
-        jns[idx] = jn;
-        ASSERT( idx < len && idx >= 0 );
-    }
-    return jns;
-}
-
 }  // namespace
 
 // --------------------------------------------------------------------------------------------------------------------
@@ -167,17 +150,10 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long
     {
         ATLAS_TRACE( "opt2 precomp FFTW" );
         int num_complex = ( nlons / 2 ) + 1;
-
-        fftw_complex* tmp_in = (fftw_complex*)malloc( sizeof( fftw_complex ) );
-        double* tmp_out      = (double*)malloc( sizeof( double ) );
-        fft_in_              = fftw_alloc_complex( nlats * num_complex );
-        fft_out_             = fftw_alloc_real( nlats * nlons );
-
-        plan_ = fftw_plan_many_dft_c2r( 1, &nlons, nlats, tmp_in, NULL, 1, num_complex, tmp_out, NULL, 1, nlons,
-                                        FFTW_ESTIMATE + FFTW_NO_SIMD );
-
-        free( tmp_in );
-        free( tmp_out );
+        fft_in_         = fftw_alloc_complex( nlats * num_complex );
+        fft_out_        = fftw_alloc_real( nlats * nlons );
+        plan_ = fftw_plan_many_dft_c2r( 1, &nlons, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1, nlons,
+                                        FFTW_ESTIMATE );
     }
 #endif
 }  // namespace atlas
@@ -320,12 +296,15 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
                             //ATLAS_TRACE( "opt2 merge spheres" );
                             // southern hemisphere:
                             int ioff = jm * size_fourier;
+                            int pos0 = 2 * ( nlats - 1 ) + ioff;
                             int idx  = 0;
                             for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
+                                int poslat = pos0 - 2 * jlat;
                                 for ( int imag = 0; imag < 2; imag++ ) {
+                                    int posimag = nb_fields * ( imag + poslat );
                                     for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
-                                        int pos = jfld + nb_fields * ( imag + 2 * ( nlats - jlat - 1 ) );
-                                        scl_fourier[pos + ioff] = scl_fourier[idx + ioff] - scl_fourier_asym[idx];
+                                        int pos          = jfld + posimag;
+                                        scl_fourier[pos] = scl_fourier[idx + ioff] - scl_fourier_asym[idx];
                                     }
                                 }
                             }

From 2ef0a02ca61baaa87c1359d8df74bbd6c1270abb Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Thu, 15 Mar 2018 17:48:56 +0000
Subject: [PATCH 024/123] removed restrict again because gcc doesn't allow
 restrict with double

---
 src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc
index 4f786370d..90e398229 100644
--- a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc
+++ b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc
@@ -21,12 +21,11 @@ namespace trans {
 
 //-----------------------------------------------------------------------------
 
-void compute_legendre_polynomialsopt2(
-    const size_t trc,               // truncation (in)
-    const int nlats,                // number of latitudes
-    const double lats[],            // latitudes in radians (in)
-    double __restrict leg_sym[],    // values of associated Legendre functions, symmetric part
-    double __restrict leg_asym[] )  // values of associated Legendre functions, asymmetric part
+void compute_legendre_polynomialsopt2( const size_t trc,     // truncation (in)
+                                       const int nlats,      // number of latitudes
+                                       const double lats[],  // latitudes in radians (in)
+                                       double leg_sym[],     // values of associated Legendre functions, symmetric part
+                                       double leg_asym[] )   // values of associated Legendre functions, asymmetric part
 {
     auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; };
     array::ArrayT<double> zfn_( trc + 1, trc + 1 );

From fc6346213ac3e059c46951ad69a9ff071bbd8ddf Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Fri, 16 Mar 2018 11:19:50 +0000
Subject: [PATCH 025/123] removed imaginary part from Legendre transform for
 zonal wavenumber 0; running hires-test with truncation-1 to be more
 comparable with trans

---
 src/atlas/trans/localopt2/TransLocalopt2.cc | 92 +++++++++++----------
 src/tests/trans/test_transgeneral.cc        | 15 ++--
 2 files changed, 58 insertions(+), 49 deletions(-)

diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc
index d6d4b4012..2f29c7acb 100644
--- a/src/atlas/trans/localopt2/TransLocalopt2.cc
+++ b/src/atlas/trans/localopt2/TransLocalopt2.cc
@@ -239,11 +239,11 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
         // Transform
         if ( grid::StructuredGrid g = grid_ ) {
             ATLAS_TRACE( "invtrans_uv structured opt2" );
-            int nlats        = g.ny();
-            int nlons        = g.nxmax();
-            int nlatsNH      = nlats_northernHemisphere( nlats );
-            int size_fourier = nb_fields * 2 * g.ny();
-            std::vector<double> scl_fourier( size_fourier * ( truncation + 1 ) );
+            int nlats            = g.ny();
+            int nlons            = g.nxmax();
+            int nlatsNH          = nlats_northernHemisphere( nlats );
+            int size_fourier_max = nb_fields * 2 * nlats;
+            std::vector<double> scl_fourier( size_fourier_max * ( truncation + 1 ) );
 
             // Legendre transform:
             {
@@ -253,9 +253,13 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
     // TODO: 0 is currently not working because it requires all latitudes to be included in legendre_ (which is currently not done)
                     int size_sym  = num_n( truncation_ + 1, jm, true );
                     int size_asym = num_n( truncation_ + 1, jm, false );
-                    std::vector<double> scalar_sym( 2 * nb_fields * size_sym );
-                    std::vector<double> scalar_asym( 2 * nb_fields * size_asym );
-                    std::vector<double> scl_fourier_asym( size_fourier );
+                    int n_imag    = 2;
+                    if ( jm == 0 ) { n_imag = 1; }
+                    int size_fourier = nb_fields * n_imag * nlatsNH;
+                    std::vector<double> scalar_sym( n_imag * nb_fields * size_sym );
+                    std::vector<double> scalar_asym( n_imag * nb_fields * size_asym );
+                    std::vector<double> scl_fourier_sym( size_fourier );
+                    std::vector<double> scl_fourier_asym( size_fourier, 0. );
                     {
                         //ATLAS_TRACE( "opt2 Legendre split" );
                         int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2;
@@ -267,7 +271,7 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
                         // compute_legendre_polynomialsopt2!
                         //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) {
                         for ( int jn = truncation_ + 1; jn >= jm; jn-- ) {
-                            for ( int imag = 0; imag < 2; imag++ ) {
+                            for ( int imag = 0; imag < n_imag; imag++ ) {
                                 for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
                                     idx = jfld + nb_fields * ( imag + 2 * ( jn - jm ) );
                                     if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; }
@@ -277,40 +281,51 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
                                 }
                             }
                         }
-                        ASSERT( ia == 2 * nb_fields * size_asym && is == 2 * nb_fields * size_sym );
+                        ASSERT( ia == n_imag * nb_fields * size_asym && is == n_imag * nb_fields * size_sym );
                     }
                     {
-                        eckit::linalg::Matrix A( scalar_sym.data(), nb_fields * 2, size_sym );
+                        //Log::info() << "jm=" << jm << " symmetric - ";
+                        eckit::linalg::Matrix A( scalar_sym.data(), nb_fields * n_imag, size_sym );
                         eckit::linalg::Matrix B( legendre_sym_.data() + legendre_sym_begin_[jm] * nlatsNH, size_sym,
                                                  nlatsNH );
-                        eckit::linalg::Matrix C( scl_fourier.data() + jm * size_fourier, nb_fields * 2, nlatsNH );
+                        eckit::linalg::Matrix C( scl_fourier_sym.data(), nb_fields * n_imag, nlatsNH );
                         eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
                     }
                     if ( size_asym > 0 ) {
-                        eckit::linalg::Matrix A( scalar_asym.data(), nb_fields * 2, size_asym );
+                        //Log::info() << "jm=" << jm << " antisymmetric - ";
+                        eckit::linalg::Matrix A( scalar_asym.data(), nb_fields * n_imag, size_asym );
                         eckit::linalg::Matrix B( legendre_asym_.data() + legendre_asym_begin_[jm] * nlatsNH, size_asym,
                                                  nlatsNH );
-                        eckit::linalg::Matrix C( scl_fourier_asym.data(), nb_fields * 2, nlatsNH );
+                        eckit::linalg::Matrix C( scl_fourier_asym.data(), nb_fields * n_imag, nlatsNH );
                         eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-                        {
-                            //ATLAS_TRACE( "opt2 merge spheres" );
-                            // southern hemisphere:
-                            int ioff = jm * size_fourier;
-                            int pos0 = 2 * ( nlats - 1 ) + ioff;
-                            int idx  = 0;
-                            for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
-                                int poslat = pos0 - 2 * jlat;
-                                for ( int imag = 0; imag < 2; imag++ ) {
-                                    int posimag = nb_fields * ( imag + poslat );
-                                    for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
-                                        int pos          = jfld + posimag;
-                                        scl_fourier[pos] = scl_fourier[idx + ioff] - scl_fourier_asym[idx];
-                                    }
+                    }
+                    {
+                        //ATLAS_TRACE( "opt2 merge spheres" );
+                        // northern hemisphere:
+                        int ioff = jm * size_fourier_max;
+                        int pos0 = ioff;
+                        int idx  = 0;
+                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
+                            int poslat = pos0 + 2 * jlat;
+                            for ( int imag = 0; imag < n_imag; imag++ ) {
+                                int posimag = nb_fields * ( imag + poslat );
+                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
+                                    int pos          = jfld + posimag;
+                                    scl_fourier[pos] = scl_fourier_sym[idx] + scl_fourier_asym[idx];
                                 }
                             }
-                            // northern hemisphere:
-                            for ( int j = 0; j < 2 * nb_fields * nlatsNH; j++ ) {
-                                scl_fourier[j + ioff] += scl_fourier_asym[j];
+                        }
+                        // southern hemisphere:
+                        idx  = 0;
+                        pos0 = 2 * ( nlats - 1 ) + ioff;
+                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
+                            int poslat = pos0 - 2 * jlat;
+                            for ( int imag = 0; imag < n_imag; imag++ ) {
+                                int posimag = nb_fields * ( imag + poslat );
+                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
+                                    int pos          = jfld + posimag;
+                                    scl_fourier[pos] = scl_fourier_sym[idx] - scl_fourier_asym[idx];
+                                }
                             }
                         }
                     }
@@ -327,26 +342,19 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
 #if ATLAS_HAVE_FFTW
             {
                 auto position = [&]( int jfld, int imag, int jlat, int jm ) {
-                    return jfld + nb_fields * ( imag + 2 * ( jlat + g.ny() * ( jm ) ) );
+                    return jfld + nb_fields * ( imag + 2 * ( jlat + nlats * ( jm ) ) );
                 };
-                auto factor = [&]( int jm ) {
-                    if ( jm > 0 ) { return 2.; }
-                    else {
-                        return 1.;
-                    }
-                };
-
                 int num_complex = ( nlons / 2 ) + 1;
                 {
                     ATLAS_TRACE( "opt2 FFTW" );
                     for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
                         int idx = 0;
                         for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
-                            for ( int jm = 0; jm < num_complex; jm++, idx++ ) {
+                            fft_in_[idx++][0] = scl_fourier[position( jfld, 0, jlat, 0 )];
+                            for ( int jm = 1; jm < num_complex; jm++, idx++ ) {
                                 for ( int imag = 0; imag < 2; imag++ ) {
                                     if ( jm <= truncation_ ) {
-                                        fft_in_[idx][imag] =
-                                            scl_fourier[position( jfld, imag, jlat, jm )] / factor( jm );
+                                        fft_in_[idx][imag] = scl_fourier[position( jfld, imag, jlat, jm )] / 2.;
                                     }
                                     else {
                                         fft_in_[idx][imag] = 0.;
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index cca428385..a4ab92c26 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -714,7 +714,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
     double tolerance  = 1.e-13;
 
     // Grid: (Adjust the following line if the test takes too long!)
-    Grid g( "F320" );
+    Grid g( "F3" );
 
     grid::StructuredGrid gs( g );
     int ndgl = gs.ny();
@@ -865,7 +865,7 @@ CASE( "test_trans_hires" ) {
     // Grid: (Adjust the following line if the test takes too long!)
     Grid g( "F640" );
 #if ATLAS_HAVE_TRANS
-    std::string transTypes[1] = {"localopt2"};
+    std::string transTypes[2] = {"localopt2", "ifs"};
     //std::string transTypes[3] = {"localopt", "localopt2", "ifs"};
 #else
     std::string transTypes[1] = {"localopt2"};
@@ -875,13 +875,14 @@ CASE( "test_trans_hires" ) {
     //int trc  = ndgl - 1;  // linear
     int trc = ndgl / 2. - 1;  // cubic
 
-    functionspace::Spectral spectral( trc );
-    functionspace::StructuredColumns gridpoints( g );
-
-    int nb_scalar = 100, nb_vordiv = 0;
-    int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv;
+    int nb_scalar = 1, nb_vordiv = 0;
 
     for ( auto transType : transTypes ) {
+        if ( transType == "localopt2" ) { trc = ndgl / 2. - 2; }
+        else {
+            trc = ndgl / 2. - 1;
+        }
+        int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv;
         int icase = 0;
         trans::Trans trans( g, trc, util::Config( "type", transType ) );
         for ( int ivar_in = 2; ivar_in < 3; ivar_in++ ) {         // vorticity, divergence, scalar

From 5d7d5e3ef510e12213a4525faf1f142d484d3c1e Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Sat, 17 Mar 2018 13:37:06 +0000
Subject: [PATCH 026/123] added alignment and padding for Legendre
 transformations; works on Mac OS, still need to test on other platforms and
 with MKL

---
 CMakeLists.txt                                |  6 ++
 cmake/CompileFlags.cmake                      |  2 +
 src/CMakeLists.txt                            |  6 ++
 src/atlas/CMakeLists.txt                      |  2 +
 src/atlas/library/defines.h.in                |  1 +
 .../localopt2/LegendrePolynomialsopt2.cc      | 20 ++---
 .../trans/localopt2/LegendrePolynomialsopt2.h | 12 +--
 src/atlas/trans/localopt2/TransLocalopt2.cc   | 82 +++++++++++++------
 src/atlas/trans/localopt2/TransLocalopt2.h    |  8 +-
 src/tests/trans/test_transgeneral.cc          |  2 +-
 10 files changed, 98 insertions(+), 43 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 787c96898..e71fcc03f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -105,6 +105,12 @@ ecbuild_add_option( FEATURE FFTW
                     DESCRIPTION "Support for fftw"
                     REQUIRED_PACKAGES "FFTW COMPONENTS double" )
 
+### MKL ...
+
+ecbuild_add_option( FEATURE MKL
+                    DESCRIPTION "MKL linear algebra library"
+                    REQUIRED_PACKAGES MKL )
+
 ### trans ...
 
 ecbuild_add_option( FEATURE TRANS
diff --git a/cmake/CompileFlags.cmake b/cmake/CompileFlags.cmake
index c02df915c..c92b7d581 100644
--- a/cmake/CompileFlags.cmake
+++ b/cmake/CompileFlags.cmake
@@ -9,6 +9,8 @@ if( CMAKE_CXX_COMPILER_ID MATCHES Cray )
 
 endif()
 
+ecbuild_add_cxx_flags("-Wl,-ydgemm_")
+ecbuild_add_fortran_flags("-Wl,-ydgemm_")
 #ecbuild_add_cxx_flags("-fsanitize=address")
 #ecbuild_add_cxx_flags("-fsanitize=thread")
 #ecbuild_add_cxx_flags("-fsanitize=memory")
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 98a5e1dd0..48175e157 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -42,6 +42,12 @@ else()
   set( ATLAS_HAVE_FFTW 0 )
 endif()
 
+if( ATLAS_HAVE_MKL )
+  set( ATLAS_HAVE_MKL 1 )
+else()
+  set( ATLAS_HAVE_MKL 0 )
+endif()
+
 if( ATLAS_HAVE_BOUNDSCHECKING )
   set( ATLAS_HAVE_BOUNDSCHECKING 1 )
 else()
diff --git a/src/atlas/CMakeLists.txt b/src/atlas/CMakeLists.txt
index b55db3e40..26915243e 100644
--- a/src/atlas/CMakeLists.txt
+++ b/src/atlas/CMakeLists.txt
@@ -577,6 +577,7 @@ ecbuild_add_library( TARGET atlas
     "${TRANSI_INCLUDE_DIRS}"
     "${MPI_CXX_INCLUDE_DIRS}"
     "${FFTW_INCLUDES}"
+    "${MKL_INCLUDE_DIRS}"
   LIBS
     eckit_geometry
     eckit_linalg
@@ -586,6 +587,7 @@ ecbuild_add_library( TARGET atlas
     "${TRANSI_LIBRARIES}"
     "${FCKIT_LIBRARIES}"
     "${FFTW_LIBRARIES}"
+    "${MKL_LIBRARIES}"
   DEFINITIONS
     ${ATLAS_DEFINITIONS}
 )
diff --git a/src/atlas/library/defines.h.in b/src/atlas/library/defines.h.in
index e644fc73e..75f91f2b5 100644
--- a/src/atlas/library/defines.h.in
+++ b/src/atlas/library/defines.h.in
@@ -11,6 +11,7 @@
 #define ATLAS_HAVE_FORTRAN                   @ATLAS_HAVE_FORTRAN@
 #define ATLAS_HAVE_EIGEN                     @ATLAS_HAVE_EIGEN@
 #define ATLAS_HAVE_FFTW                      @ATLAS_HAVE_FFTW@
+#define ATLAS_HAVE_MKL                       @ATLAS_HAVE_MKL@
 #define ATLAS_BITS_GLOBAL                    @ATLAS_BITS_GLOBAL@
 #define ATLAS_ARRAYVIEW_BOUNDS_CHECKING      @ATLAS_HAVE_BOUNDSCHECKING@
 #define ATLAS_INDEXVIEW_BOUNDS_CHECKING      @ATLAS_HAVE_BOUNDSCHECKING@
diff --git a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc
index 90e398229..9d96b1ac0 100644
--- a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc
+++ b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc
@@ -21,11 +21,14 @@ namespace trans {
 
 //-----------------------------------------------------------------------------
 
-void compute_legendre_polynomialsopt2( const size_t trc,     // truncation (in)
-                                       const int nlats,      // number of latitudes
-                                       const double lats[],  // latitudes in radians (in)
-                                       double leg_sym[],     // values of associated Legendre functions, symmetric part
-                                       double leg_asym[] )   // values of associated Legendre functions, asymmetric part
+void compute_legendre_polynomialsopt2(
+    const size_t trc,          // truncation (in)
+    const int nlats,           // number of latitudes
+    const double lats[],       // latitudes in radians (in)
+    double leg_sym[],          // values of associated Legendre functions, symmetric part
+    double leg_asym[],         // values of associated Legendre functions, asymmetric part
+    size_t leg_start_sym[],    // start indices for different zonal wave numbers, symmetric part
+    size_t leg_start_asym[] )  // start indices for different zonal wave numbers, asymmetric part
 {
     auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; };
     array::ArrayT<double> zfn_( trc + 1, trc + 1 );
@@ -157,7 +160,6 @@ void compute_legendre_polynomialsopt2( const size_t trc,     // truncation (in)
                     legpol[idxmn( jm, jn )] *= 2.;
                 }
             }
-            int is0 = 0, ia0 = 0;
             for ( int jm = 0; jm <= trc; jm++ ) {
                 int is1 = 0, ia1 = 0;
                 for ( int jn = jm; jn <= trc; jn++ ) {
@@ -176,16 +178,14 @@ void compute_legendre_polynomialsopt2( const size_t trc,     // truncation (in)
                 //for ( int jn = jm; jn <= trc; jn++ ) {
                 for ( int jn = trc; jn >= jm; jn-- ) {
                     if ( ( jn - jm ) % 2 == 0 ) {
-                        int is      = is0 * nlats + is1 * jlat + is2++;
+                        int is      = leg_start_sym[jm] + is1 * jlat + is2++;
                         leg_sym[is] = legpol[idxmn( jm, jn )];
                     }
                     else {
-                        int ia       = ia0 * nlats + ia1 * jlat + ia2++;
+                        int ia       = leg_start_asym[jm] + ia1 * jlat + ia2++;
                         leg_asym[ia] = legpol[idxmn( jm, jn )];
                     }
                 }
-                is0 += is2;
-                ia0 += ia2;
             }
         }
     }
diff --git a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h
index 0de2a5f69..7e97dec7e 100644
--- a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h
+++ b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h
@@ -33,11 +33,13 @@ namespace trans {
 // Andreas Mueller *ECMWF*
 //
 void compute_legendre_polynomialsopt2(
-    const size_t trc,          // truncation (in)
-    const int nlats,           // number of latitudes
-    const double lats[],       // latitudes in radians (in)
-    double legendre_sym[],     // values of associated Legendre functions, symmetric part
-    double legendre_asym[] );  // values of associated Legendre functions, asymmetric part
+    const size_t trc,           // truncation (in)
+    const int nlats,            // number of latitudes
+    const double lats[],        // latitudes in radians (in)
+    double legendre_sym[],      // values of associated Legendre functions, symmetric part
+    double legendre_asym[],     // values of associated Legendre functions, asymmetric part
+    size_t leg_start_sym[],     // start indices for different zonal wave numbers, symmetric part
+    size_t leg_start_asym[] );  // start indices for different zonal wave numbers, asymmetric part
 
 // --------------------------------------------------------------------------------------------------------------------
 
diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc
index 2f29c7acb..71aeeb6e0 100644
--- a/src/atlas/trans/localopt2/TransLocalopt2.cc
+++ b/src/atlas/trans/localopt2/TransLocalopt2.cc
@@ -23,6 +23,9 @@
 #include "atlas/util/Constants.h"
 #include "eckit/linalg/LinearAlgebra.h"
 #include "eckit/linalg/Matrix.h"
+#ifdef ATLAS_HAVE_MKL
+#include "mkl.h"
+#endif
 
 namespace atlas {
 namespace trans {
@@ -54,6 +57,28 @@ int num_n( const int truncation, const int m, const bool symmetric ) {
     return len;
 }
 
+void alloc_aligned( double*& ptr, size_t n ) {
+#ifdef ATLAS_HAVE_MKL
+    int al = 64;
+    ptr    = mkl_malloc( sizeof( double ) * n, al );
+#else
+    posix_memalign( (void**)&ptr, sizeof( double ) * 64, sizeof( double ) * n );
+    //ptr = (double*)malloc( sizeof( double ) * n );
+    //ptr = new double[n];
+#endif
+}
+
+void free_aligned( double*& ptr ) {
+#ifdef ATLAS_HAVE_MKL
+    mkl_free( ptr );
+#else
+    free( ptr );
+#endif
+}
+
+int add_padding( int n ) {
+    return std::ceil( n / 8. ) * 8;
+}
 }  // namespace
 
 // --------------------------------------------------------------------------------------------------------------------
@@ -110,21 +135,21 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long
         legendre_sym_begin_[0]  = 0;
         legendre_asym_begin_[0] = 0;
         for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
-            size_sym += num_n( truncation_ + 1, jm, true );
-            size_asym += num_n( truncation_ + 1, jm, false );
+            size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsNH );
+            size_asym += add_padding( num_n( truncation_ + 1, jm, false ) * nlatsNH );
             legendre_sym_begin_[jm + 1]  = size_sym;
             legendre_asym_begin_[jm + 1] = size_asym;
         }
-        legendre_sym_.resize( size_sym * nlatsNH );
-        legendre_asym_.resize( size_asym * nlatsNH );
-        compute_legendre_polynomialsopt2( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_.data(),
-                                          legendre_asym_.data() );
+        alloc_aligned( legendre_sym_, size_sym );
+        alloc_aligned( legendre_asym_, size_asym );
+        compute_legendre_polynomialsopt2( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_,
+                                          legendre_sym_begin_.data(), legendre_asym_begin_.data() );
     }
 
     // precomputations for Fourier transformations:
     {
         ATLAS_TRACE( "opt2 precomp Fourier" );
-        fourier_.resize( 2 * ( truncation_ + 1 ) * nlons );
+        alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons );
         int idx = 0;
         for ( int jlon = 0; jlon < nlons; jlon++ ) {
             for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
@@ -135,7 +160,7 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long
     }
     {
         ATLAS_TRACE( "opt2 precomp Fourier tp" );
-        fouriertp_.resize( 2 * ( truncation_ + 1 ) * nlons );
+        alloc_aligned( fouriertp_, 2 * ( truncation_ + 1 ) * nlons );
         int idx = 0;
         for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
             for ( int jlon = 0; jlon < nlons; jlon++ ) {
@@ -166,6 +191,10 @@ TransLocalopt2::TransLocalopt2( const Grid& grid, const long truncation, const e
 // --------------------------------------------------------------------------------------------------------------------
 
 TransLocalopt2::~TransLocalopt2() {
+    free_aligned( legendre_sym_ );
+    free_aligned( legendre_asym_ );
+    free_aligned( fourier_ );
+    free_aligned( fouriertp_ );
 #if ATLAS_HAVE_FFTW
     fftw_destroy_plan( plan_ );
     fftw_free( fft_in_ );
@@ -243,7 +272,8 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
             int nlons            = g.nxmax();
             int nlatsNH          = nlats_northernHemisphere( nlats );
             int size_fourier_max = nb_fields * 2 * nlats;
-            std::vector<double> scl_fourier( size_fourier_max * ( truncation + 1 ) );
+            double* scl_fourier;
+            alloc_aligned( scl_fourier, size_fourier_max * ( truncation + 1 ) );
 
             // Legendre transform:
             {
@@ -256,10 +286,14 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
                     int n_imag    = 2;
                     if ( jm == 0 ) { n_imag = 1; }
                     int size_fourier = nb_fields * n_imag * nlatsNH;
-                    std::vector<double> scalar_sym( n_imag * nb_fields * size_sym );
-                    std::vector<double> scalar_asym( n_imag * nb_fields * size_asym );
-                    std::vector<double> scl_fourier_sym( size_fourier );
-                    std::vector<double> scl_fourier_asym( size_fourier, 0. );
+                    double* scalar_sym;
+                    double* scalar_asym;
+                    double* scl_fourier_sym;
+                    double* scl_fourier_asym;
+                    alloc_aligned( scalar_sym, n_imag * nb_fields * size_sym );
+                    alloc_aligned( scalar_asym, n_imag * nb_fields * size_asym );
+                    alloc_aligned( scl_fourier_sym, size_fourier );
+                    alloc_aligned( scl_fourier_asym, size_fourier );
                     {
                         //ATLAS_TRACE( "opt2 Legendre split" );
                         int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2;
@@ -284,19 +318,15 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
                         ASSERT( ia == n_imag * nb_fields * size_asym && is == n_imag * nb_fields * size_sym );
                     }
                     {
-                        //Log::info() << "jm=" << jm << " symmetric - ";
-                        eckit::linalg::Matrix A( scalar_sym.data(), nb_fields * n_imag, size_sym );
-                        eckit::linalg::Matrix B( legendre_sym_.data() + legendre_sym_begin_[jm] * nlatsNH, size_sym,
-                                                 nlatsNH );
-                        eckit::linalg::Matrix C( scl_fourier_sym.data(), nb_fields * n_imag, nlatsNH );
+                        eckit::linalg::Matrix A( scalar_sym, nb_fields * n_imag, size_sym );
+                        eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm], size_sym, nlatsNH );
+                        eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsNH );
                         eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
                     }
                     if ( size_asym > 0 ) {
-                        //Log::info() << "jm=" << jm << " antisymmetric - ";
-                        eckit::linalg::Matrix A( scalar_asym.data(), nb_fields * n_imag, size_asym );
-                        eckit::linalg::Matrix B( legendre_asym_.data() + legendre_asym_begin_[jm] * nlatsNH, size_asym,
-                                                 nlatsNH );
-                        eckit::linalg::Matrix C( scl_fourier_asym.data(), nb_fields * n_imag, nlatsNH );
+                        eckit::linalg::Matrix A( scalar_asym, nb_fields * n_imag, size_asym );
+                        eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm], size_asym, nlatsNH );
+                        eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsNH );
                         eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
                     }
                     {
@@ -329,6 +359,11 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
                             }
                         }
                     }
+                    free_aligned( scalar_sym );
+                    free_aligned( scalar_asym );
+                    free_aligned( scl_fourier_sym );
+                    free_aligned( scl_fourier_asym );
+
 #else
                     int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1;
                     eckit::linalg::Matrix A( eckit::linalg::Matrix(
@@ -464,6 +499,7 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
                     }
                 }
             }
+            free_aligned( scl_fourier );
         }
         else {
             ATLAS_TRACE( "invtrans_uv unstructured opt2" );
diff --git a/src/atlas/trans/localopt2/TransLocalopt2.h b/src/atlas/trans/localopt2/TransLocalopt2.h
index 51644430d..c5f5f2aa4 100644
--- a/src/atlas/trans/localopt2/TransLocalopt2.h
+++ b/src/atlas/trans/localopt2/TransLocalopt2.h
@@ -113,10 +113,10 @@ class TransLocalopt2 : public trans::TransImpl {
     Grid grid_;
     int truncation_;
     bool precompute_;
-    mutable std::vector<double> legendre_sym_;
-    mutable std::vector<double> legendre_asym_;
-    mutable std::vector<double> fourier_;
-    mutable std::vector<double> fouriertp_;
+    double* legendre_sym_;
+    double* legendre_asym_;
+    double* fourier_;
+    double* fouriertp_;
     std::vector<size_t> legendre_begin_;
     std::vector<size_t> legendre_sym_begin_;
     std::vector<size_t> legendre_asym_begin_;
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index a4ab92c26..7ad11b882 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -714,7 +714,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
     double tolerance  = 1.e-13;
 
     // Grid: (Adjust the following line if the test takes too long!)
-    Grid g( "F3" );
+    Grid g( "F120" );
 
     grid::StructuredGrid gs( g );
     int ndgl = gs.ny();

From a5289bbb7693092d8e30262ffb9cf4e32d012423 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Sat, 17 Mar 2018 13:46:33 +0000
Subject: [PATCH 027/123] small fix for MKL

---
 src/atlas/trans/localopt2/TransLocalopt2.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc
index 71aeeb6e0..d06a7d435 100644
--- a/src/atlas/trans/localopt2/TransLocalopt2.cc
+++ b/src/atlas/trans/localopt2/TransLocalopt2.cc
@@ -60,7 +60,7 @@ int num_n( const int truncation, const int m, const bool symmetric ) {
 void alloc_aligned( double*& ptr, size_t n ) {
 #ifdef ATLAS_HAVE_MKL
     int al = 64;
-    ptr    = mkl_malloc( sizeof( double ) * n, al );
+    ptr    = (double*)mkl_malloc( sizeof( double ) * n, al );
 #else
     posix_memalign( (void**)&ptr, sizeof( double ) * 64, sizeof( double ) * n );
     //ptr = (double*)malloc( sizeof( double ) * n );

From b50570dcfe2f8365ada5042289f8d5aa242e281c Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Sat, 17 Mar 2018 16:05:39 +0000
Subject: [PATCH 028/123] added alignment to dgemm-Fourier transformation;
 three versions of localopt (1: FFTW, 2+3: no FFTW, different transp.)

---
 src/atlas/CMakeLists.txt                      |  10 +
 src/atlas/trans/Trans.cc                      |   2 +
 src/atlas/trans/VorDivToUV.cc                 |   2 +
 .../trans/localopt/LegendrePolynomialsopt.cc  |  20 +-
 .../trans/localopt/LegendrePolynomialsopt.h   |  12 +-
 src/atlas/trans/localopt/TransLocalopt.cc     | 552 ++++++++------
 src/atlas/trans/localopt/TransLocalopt.h      |   8 +-
 src/atlas/trans/localopt2/TransLocalopt2.cc   | 435 ++++++-----
 .../trans/localopt3/FourierTransformsopt3.cc  |  78 ++
 .../trans/localopt3/FourierTransformsopt3.h   |  38 +
 .../localopt3/LegendrePolynomialsopt3.cc      | 197 +++++
 .../trans/localopt3/LegendrePolynomialsopt3.h |  47 ++
 .../trans/localopt3/LegendreTransformsopt3.cc |  62 ++
 .../trans/localopt3/LegendreTransformsopt3.h  |  37 +
 src/atlas/trans/localopt3/TransLocalopt3.cc   | 690 ++++++++++++++++++
 src/atlas/trans/localopt3/TransLocalopt3.h    | 133 ++++
 .../trans/localopt3/VorDivToUVLocalopt3.cc    | 184 +++++
 .../trans/localopt3/VorDivToUVLocalopt3.h     |  67 ++
 src/tests/trans/test_transgeneral.cc          |   6 +-
 19 files changed, 2133 insertions(+), 447 deletions(-)
 create mode 100644 src/atlas/trans/localopt3/FourierTransformsopt3.cc
 create mode 100644 src/atlas/trans/localopt3/FourierTransformsopt3.h
 create mode 100644 src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc
 create mode 100644 src/atlas/trans/localopt3/LegendrePolynomialsopt3.h
 create mode 100644 src/atlas/trans/localopt3/LegendreTransformsopt3.cc
 create mode 100644 src/atlas/trans/localopt3/LegendreTransformsopt3.h
 create mode 100644 src/atlas/trans/localopt3/TransLocalopt3.cc
 create mode 100644 src/atlas/trans/localopt3/TransLocalopt3.h
 create mode 100644 src/atlas/trans/localopt3/VorDivToUVLocalopt3.cc
 create mode 100644 src/atlas/trans/localopt3/VorDivToUVLocalopt3.h

diff --git a/src/atlas/CMakeLists.txt b/src/atlas/CMakeLists.txt
index 26915243e..bf6f686c3 100644
--- a/src/atlas/CMakeLists.txt
+++ b/src/atlas/CMakeLists.txt
@@ -351,6 +351,16 @@ trans/localopt2/FourierTransformsopt2.h
 trans/localopt2/FourierTransformsopt2.cc
 trans/localopt2/VorDivToUVLocalopt2.h
 trans/localopt2/VorDivToUVLocalopt2.cc
+trans/localopt3/TransLocalopt3.h
+trans/localopt3/TransLocalopt3.cc
+trans/localopt3/LegendrePolynomialsopt3.h
+trans/localopt3/LegendrePolynomialsopt3.cc
+trans/localopt3/LegendreTransformsopt3.h
+trans/localopt3/LegendreTransformsopt3.cc
+trans/localopt3/FourierTransformsopt3.h
+trans/localopt3/FourierTransformsopt3.cc
+trans/localopt3/VorDivToUVLocalopt3.h
+trans/localopt3/VorDivToUVLocalopt3.cc
 
 )
 if( ATLAS_HAVE_TRANS )
diff --git a/src/atlas/trans/Trans.cc b/src/atlas/trans/Trans.cc
index efd8eabe9..b264cc7ee 100644
--- a/src/atlas/trans/Trans.cc
+++ b/src/atlas/trans/Trans.cc
@@ -30,6 +30,7 @@
 #include "atlas/trans/local/TransLocal.h"
 #include "atlas/trans/localopt/TransLocalopt.h"
 #include "atlas/trans/localopt2/TransLocalopt2.h"
+#include "atlas/trans/localopt3/TransLocalopt3.h"
 
 namespace atlas {
 namespace trans {
@@ -66,6 +67,7 @@ struct force_link {
         load_builder_grid<TransLocal>();
         load_builder_grid<TransLocalopt>();
         load_builder_grid<TransLocalopt2>();
+        load_builder_grid<TransLocalopt3>();
     }
 };
 
diff --git a/src/atlas/trans/VorDivToUV.cc b/src/atlas/trans/VorDivToUV.cc
index 566303c52..727ead312 100644
--- a/src/atlas/trans/VorDivToUV.cc
+++ b/src/atlas/trans/VorDivToUV.cc
@@ -29,6 +29,7 @@
 #include "atlas/trans/local/VorDivToUVLocal.h"
 #include "atlas/trans/localopt/VorDivToUVLocalopt.h"
 #include "atlas/trans/localopt2/VorDivToUVLocalopt2.h"
+#include "atlas/trans/localopt3/VorDivToUVLocalopt3.h"
 
 namespace atlas {
 namespace trans {
@@ -59,6 +60,7 @@ struct force_link {
         load_builder<VorDivToUVLocal>();
         load_builder<VorDivToUVLocalopt>();
         load_builder<VorDivToUVLocalopt2>();
+        load_builder<VorDivToUVLocalopt3>();
     }
 };
 
diff --git a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc
index 3ed9e7544..a31d893c6 100644
--- a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc
+++ b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc
@@ -21,11 +21,14 @@ namespace trans {
 
 //-----------------------------------------------------------------------------
 
-void compute_legendre_polynomialsopt( const size_t trc,     // truncation (in)
-                                      const int nlats,      // number of latitudes
-                                      const double lats[],  // latitudes in radians (in)
-                                      double leg_sym[],     // values of associated Legendre functions, symmetric part
-                                      double leg_asym[] )   // values of associated Legendre functions, asymmetric part
+void compute_legendre_polynomialsopt(
+    const size_t trc,          // truncation (in)
+    const int nlats,           // number of latitudes
+    const double lats[],       // latitudes in radians (in)
+    double leg_sym[],          // values of associated Legendre functions, symmetric part
+    double leg_asym[],         // values of associated Legendre functions, asymmetric part
+    size_t leg_start_sym[],    // start indices for different zonal wave numbers, symmetric part
+    size_t leg_start_asym[] )  // start indices for different zonal wave numbers, asymmetric part
 {
     auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; };
     array::ArrayT<double> zfn_( trc + 1, trc + 1 );
@@ -157,7 +160,6 @@ void compute_legendre_polynomialsopt( const size_t trc,     // truncation (in)
                     legpol[idxmn( jm, jn )] *= 2.;
                 }
             }
-            int is0 = 0, ia0 = 0;
             for ( int jm = 0; jm <= trc; jm++ ) {
                 int is1 = 0, ia1 = 0;
                 for ( int jn = jm; jn <= trc; jn++ ) {
@@ -176,16 +178,14 @@ void compute_legendre_polynomialsopt( const size_t trc,     // truncation (in)
                 //for ( int jn = jm; jn <= trc; jn++ ) {
                 for ( int jn = trc; jn >= jm; jn-- ) {
                     if ( ( jn - jm ) % 2 == 0 ) {
-                        int is      = is0 * nlats + is1 * jlat + is2++;
+                        int is      = leg_start_sym[jm] + is1 * jlat + is2++;
                         leg_sym[is] = legpol[idxmn( jm, jn )];
                     }
                     else {
-                        int ia       = ia0 * nlats + ia1 * jlat + ia2++;
+                        int ia       = leg_start_asym[jm] + ia1 * jlat + ia2++;
                         leg_asym[ia] = legpol[idxmn( jm, jn )];
                     }
                 }
-                is0 += is2;
-                ia0 += ia2;
             }
         }
     }
diff --git a/src/atlas/trans/localopt/LegendrePolynomialsopt.h b/src/atlas/trans/localopt/LegendrePolynomialsopt.h
index 4dc3ce2e5..4a2f004c7 100644
--- a/src/atlas/trans/localopt/LegendrePolynomialsopt.h
+++ b/src/atlas/trans/localopt/LegendrePolynomialsopt.h
@@ -33,11 +33,13 @@ namespace trans {
 // Andreas Mueller *ECMWF*
 //
 void compute_legendre_polynomialsopt(
-    const size_t trc,          // truncation (in)
-    const int nlats,           // number of latitudes
-    const double lats[],       // latitudes in radians (in)
-    double legendre_sym[],     // values of associated Legendre functions, symmetric part
-    double legendre_asym[] );  // values of associated Legendre functions, asymmetric part
+    const size_t trc,           // truncation (in)
+    const int nlats,            // number of latitudes
+    const double lats[],        // latitudes in radians (in)
+    double legendre_sym[],      // values of associated Legendre functions, symmetric part
+    double legendre_asym[],     // values of associated Legendre functions, asymmetric part
+    size_t leg_start_sym[],     // start indices for different zonal wave numbers, symmetric part
+    size_t leg_start_asym[] );  // start indices for different zonal wave numbers, asymmetric part
 
 // --------------------------------------------------------------------------------------------------------------------
 
diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc
index 9c4469725..0b9e1bf5f 100644
--- a/src/atlas/trans/localopt/TransLocalopt.cc
+++ b/src/atlas/trans/localopt/TransLocalopt.cc
@@ -23,6 +23,9 @@
 #include "atlas/util/Constants.h"
 #include "eckit/linalg/LinearAlgebra.h"
 #include "eckit/linalg/Matrix.h"
+#if ATLAS_HAVE_MKL
+#include "mkl.h"
+#endif
 
 namespace atlas {
 namespace trans {
@@ -54,6 +57,28 @@ int num_n( const int truncation, const int m, const bool symmetric ) {
     return len;
 }
 
+void alloc_aligned( double*& ptr, size_t n ) {
+#if ATLAS_HAVE_MKL
+    int al = 64;
+    ptr    = (double*)mkl_malloc( sizeof( double ) * n, al );
+#else
+    posix_memalign( (void**)&ptr, sizeof( double ) * 64, sizeof( double ) * n );
+    //ptr = (double*)malloc( sizeof( double ) * n );
+    //ptr = new double[n];
+#endif
+}
+
+void free_aligned( double*& ptr ) {
+#if ATLAS_HAVE_MKL
+    mkl_free( ptr );
+#else
+    free( ptr );
+#endif
+}
+
+int add_padding( int n ) {
+    return std::ceil( n / 8. ) * 8;
+}
 }  // namespace
 
 // --------------------------------------------------------------------------------------------------------------------
@@ -66,7 +91,11 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t
     truncation_( truncation ),
     precompute_( config.getBool( "precompute", true ) ) {
     ATLAS_TRACE( "Precompute legendre opt" );
+#if ATLAS_HAVE_MKL
+    eckit::linalg::LinearAlgebra::backend( "mkl" );  // might want to choose backend with this command
+#else
     eckit::linalg::LinearAlgebra::backend( "generic" );  // might want to choose backend with this command
+#endif
     int nlats   = 0;
     int nlons   = 0;
     int nlatsNH = nlats_northernHemisphere( nlats );
@@ -110,21 +139,21 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t
         legendre_sym_begin_[0]  = 0;
         legendre_asym_begin_[0] = 0;
         for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
-            size_sym += num_n( truncation_ + 1, jm, true );
-            size_asym += num_n( truncation_ + 1, jm, false );
+            size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsNH );
+            size_asym += add_padding( num_n( truncation_ + 1, jm, false ) * nlatsNH );
             legendre_sym_begin_[jm + 1]  = size_sym;
             legendre_asym_begin_[jm + 1] = size_asym;
         }
-        legendre_sym_.resize( size_sym * nlatsNH );
-        legendre_asym_.resize( size_asym * nlatsNH );
-        compute_legendre_polynomialsopt( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_.data(),
-                                         legendre_asym_.data() );
+        alloc_aligned( legendre_sym_, size_sym );
+        alloc_aligned( legendre_asym_, size_asym );
+        compute_legendre_polynomialsopt( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_,
+                                         legendre_sym_begin_.data(), legendre_asym_begin_.data() );
     }
 
     // precomputations for Fourier transformations:
     {
         ATLAS_TRACE( "opt precomp Fourier" );
-        fourier_.resize( 2 * ( truncation_ + 1 ) * nlons );
+        alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons );
         int idx = 0;
         for ( int jlon = 0; jlon < nlons; jlon++ ) {
             for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
@@ -135,7 +164,7 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t
     }
     {
         ATLAS_TRACE( "opt precomp Fourier tp" );
-        fouriertp_.resize( 2 * ( truncation_ + 1 ) * nlons );
+        alloc_aligned( fouriertp_, 2 * ( truncation_ + 1 ) * nlons );
         int idx = 0;
         for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
             for ( int jlon = 0; jlon < nlons; jlon++ ) {
@@ -166,6 +195,10 @@ TransLocalopt::TransLocalopt( const Grid& grid, const long truncation, const eck
 // --------------------------------------------------------------------------------------------------------------------
 
 TransLocalopt::~TransLocalopt() {
+    free_aligned( legendre_sym_ );
+    free_aligned( legendre_asym_ );
+    free_aligned( fourier_ );
+    free_aligned( fouriertp_ );
 #if ATLAS_HAVE_FFTW
     fftw_destroy_plan( plan_ );
     fftw_free( fft_in_ );
@@ -238,24 +271,42 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
         // Transform
         if ( grid::StructuredGrid g = grid_ ) {
             ATLAS_TRACE( "invtrans_uv structured opt" );
-            int nlats        = g.ny();
-            int nlons        = g.nxmax();
-            int nlatsNH      = nlats_northernHemisphere( nlats );
-            int size_fourier = nb_fields * 2 * g.ny();
-            std::vector<double> scl_fourier( size_fourier * ( truncation + 1 ) );
+            int nlats    = g.ny();
+            int nlons    = g.nxmax();
+            int nlatsNH  = nlats_northernHemisphere( nlats );
+            auto posFFTW = [&]( int jfld, int imag, int jlat, int jm ) {
+                return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) );
+            };
+            /*auto posFFTW = [&]( int jfld, int imag, int jlat, int jm ) {
+                return jfld + nb_fields * ( imag + 2 * ( nlats - jlat - 1 ) + jm * nb_fields * 2 * nlats );
+            };*/
+            auto posGemm1 = [&]( int jfld, int imag, int jlat, int jm ) {
+                return jfld + nb_fields * ( jlat + nlats * ( imag + 2 * ( jm ) ) );
+            };
+            auto posGemm2 = [&]( int jfld, int imag, int jlat, int jm ) {
+                return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) );
+            };
+            int size_fourier_max = nb_fields * 2 * nlats;
+            double* scl_fourier;
+            alloc_aligned( scl_fourier, size_fourier_max * ( truncation + 1 ) );
 
             // Legendre transform:
             {
                 ATLAS_TRACE( "opt Legendre dgemm" );
-                for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
-#if 1  // 0: no symmetry, 1: use symmetry \
-    // TODO: 0 is currently not working because it requires all latitudes to be included in legendre_ (which is currently not done)
+                for ( int jm = 0; jm <= truncation_; jm++ ) {
                     int size_sym  = num_n( truncation_ + 1, jm, true );
                     int size_asym = num_n( truncation_ + 1, jm, false );
-                    std::vector<double> scalar_sym( 2 * nb_fields * size_sym );
-                    std::vector<double> scalar_asym( 2 * nb_fields * size_asym );
-                    std::vector<double> scl_fourier_sym( size_fourier );
-                    std::vector<double> scl_fourier_asym( size_fourier );
+                    int n_imag    = 2;
+                    if ( jm == 0 ) { n_imag = 1; }
+                    int size_fourier = nb_fields * n_imag * nlatsNH;
+                    double* scalar_sym;
+                    double* scalar_asym;
+                    double* scl_fourier_sym;
+                    double* scl_fourier_asym;
+                    alloc_aligned( scalar_sym, n_imag * nb_fields * size_sym );
+                    alloc_aligned( scalar_asym, n_imag * nb_fields * size_asym );
+                    alloc_aligned( scl_fourier_sym, size_fourier );
+                    alloc_aligned( scl_fourier_asym, size_fourier );
                     {
                         //ATLAS_TRACE( "opt Legendre split" );
                         int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2;
@@ -267,7 +318,7 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
                         // compute_legendre_polynomialsopt!
                         //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) {
                         for ( int jn = truncation_ + 1; jn >= jm; jn-- ) {
-                            for ( int imag = 0; imag < 2; imag++ ) {
+                            for ( int imag = 0; imag < n_imag; imag++ ) {
                                 for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
                                     idx = jfld + nb_fields * ( imag + 2 * ( jn - jm ) );
                                     if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; }
@@ -277,73 +328,70 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
                                 }
                             }
                         }
-                        ASSERT( ia == 2 * nb_fields * size_asym && is == 2 * nb_fields * size_sym );
+                        ASSERT( ia == n_imag * nb_fields * size_asym && is == n_imag * nb_fields * size_sym );
                     }
                     {
-                        eckit::linalg::Matrix A( scalar_sym.data(), nb_fields * 2, size_sym );
-                        eckit::linalg::Matrix B( legendre_sym_.data() + legendre_sym_begin_[jm] * nlatsNH, size_sym,
-                                                 nlatsNH );
-                        eckit::linalg::Matrix C( scl_fourier_sym.data(), nb_fields * 2, nlatsNH );
+                        eckit::linalg::Matrix A( scalar_sym, nb_fields * n_imag, size_sym );
+                        eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm], size_sym, nlatsNH );
+                        eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsNH );
                         eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
                     }
                     if ( size_asym > 0 ) {
-                        eckit::linalg::Matrix A( scalar_asym.data(), nb_fields * 2, size_asym );
-                        eckit::linalg::Matrix B( legendre_asym_.data() + legendre_asym_begin_[jm] * nlatsNH, size_asym,
-                                                 nlatsNH );
-                        eckit::linalg::Matrix C( scl_fourier_asym.data(), nb_fields * 2, nlatsNH );
+                        eckit::linalg::Matrix A( scalar_asym, nb_fields * n_imag, size_asym );
+                        eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm], size_asym, nlatsNH );
+                        eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsNH );
                         eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-                        {
-                            //ATLAS_TRACE( "opt merge spheres" );
-                            // northern hemisphere:
-                            int ioff = jm * size_fourier;
-                            for ( int j = 0; j < 2 * nb_fields * nlatsNH; j++ ) {
-                                scl_fourier[j + ioff] = scl_fourier_sym[j] + scl_fourier_asym[j];
+                    }
+#if 1  //ATLAS_HAVE_FFTW
+                    {
+                        //ATLAS_TRACE( "opt merge spheres" );
+                        // northern hemisphere:
+                        int ioff = jm * size_fourier_max;
+                        int pos0 = ioff;
+                        int idx  = 0;
+                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
+                            int poslat = pos0 + 2 * jlat;
+                            for ( int imag = 0; imag < n_imag; imag++ ) {
+                                int posimag = nb_fields * ( imag + poslat );
+                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
+                                    scl_fourier[posFFTW( jfld, imag, jlat, jm )] =
+                                        scl_fourier_sym[idx] + scl_fourier_asym[idx];
+                                }
                             }
-                            // southern hemisphere:
-                            int idx = 0;
-                            for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
-                                for ( int imag = 0; imag < 2; imag++ ) {
-                                    for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
-                                        int pos = jfld + nb_fields * ( imag + 2 * ( nlats - jlat - 1 ) );
-                                        scl_fourier[pos + ioff] = scl_fourier_sym[idx] - scl_fourier_asym[idx];
-                                    }
+                        }
+                        // southern hemisphere:
+                        idx  = 0;
+                        pos0 = 2 * ( nlats - 1 ) + ioff;
+                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
+                            int poslat = pos0 - 2 * jlat;
+                            for ( int imag = 0; imag < n_imag; imag++ ) {
+                                int posimag = nb_fields * ( imag + poslat );
+                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
+                                    int jslat = nlats - jlat - 1;
+                                    scl_fourier[posFFTW( jfld, imag, jslat, jm )] =
+                                        scl_fourier_sym[idx] - scl_fourier_asym[idx];
                                 }
                             }
                         }
                     }
-#else
-                    int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1;
-                    eckit::linalg::Matrix A( eckit::linalg::Matrix(
-                        const_cast<double*>( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) );
-                    eckit::linalg::Matrix B( legendre_.data() + noff * g.ny(), ns, g.ny() );
-                    eckit::linalg::Matrix C( scl_fourier.data() + jm * size_fourier, nb_fields * 2, g.ny() );
-                    eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-#endif
+                    free_aligned( scalar_sym );
+                    free_aligned( scalar_asym );
+                    free_aligned( scl_fourier_sym );
+                    free_aligned( scl_fourier_asym );
                 }
             }
-#if ATLAS_HAVE_FFTW
             {
-                auto position = [&]( int jfld, int imag, int jlat, int jm ) {
-                    return jfld + nb_fields * ( imag + 2 * ( jlat + g.ny() * ( jm ) ) );
-                };
-                auto factor = [&]( int jm ) {
-                    if ( jm > 0 ) { return 2.; }
-                    else {
-                        return 1.;
-                    }
-                };
-
                 int num_complex = ( nlons / 2 ) + 1;
                 {
                     ATLAS_TRACE( "opt FFTW" );
                     for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
                         int idx = 0;
-                        for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
-                            for ( int jm = 0; jm < num_complex; jm++, idx++ ) {
+                        for ( int jlat = 0; jlat < nlats; jlat++ ) {
+                            fft_in_[idx++][0] = scl_fourier[posFFTW( jfld, 0, jlat, 0 )];
+                            for ( int jm = 1; jm < num_complex; jm++, idx++ ) {
                                 for ( int imag = 0; imag < 2; imag++ ) {
                                     if ( jm <= truncation_ ) {
-                                        fft_in_[idx][imag] =
-                                            scl_fourier[position( jfld, imag, jlat, jm )] / factor( jm );
+                                        fft_in_[idx][imag] = scl_fourier[posFFTW( jfld, imag, jlat, jm )] / 2.;
                                     }
                                     else {
                                         fft_in_[idx][imag] = 0.;
@@ -351,7 +399,7 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
                                 }
                             }
                         }
-                        fftw_execute( plan_ );
+                        fftw_execute_dft_c2r( plan_, fft_in_, fft_out_ );
                         for ( int j = 0; j < nlats * nlons; j++ ) {
                             gp_fields[j + jfld * nlats * nlons] = fft_out_[j];
                         }
@@ -360,32 +408,52 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
             }
 #else
 #if 0  // 1: better for small number of columns, large truncation; 0: better for large number of columns
-
-            // Transposition in Fourier space:
-            std::vector<double> scl_fourier_tp( size_fourier * ( truncation + 1 ) );
-            {
-                ATLAS_TRACE( "opt transposition in Fourier" );
-                int idx = 0;
-                for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
-                    for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
-                        for ( int imag = 0; imag < 2; imag++ ) {
-                            for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                                int pos_tp = jfld + nb_fields * ( jlat + g.ny() * ( imag + 2 * ( jm ) ) );
-                                //int pos  = jfld + nb_fields * ( imag + 2 * ( jlat + g.ny() * ( jm ) ) );
-                                scl_fourier_tp[pos_tp] = scl_fourier[idx++];  // = scl_fourier[pos]
+                    {
+                        //ATLAS_TRACE( "opt merge spheres" );
+                        // northern hemisphere:
+                        int ioff = jm * size_fourier_max;
+                        int pos0 = ioff;
+                        int idx  = 0;
+                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
+                            int poslat = pos0 + 2 * jlat;
+                            for ( int imag = 0; imag < n_imag; imag++ ) {
+                                int posimag = nb_fields * ( imag + poslat );
+                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
+                                    scl_fourier[posGemm1( jfld, imag, jlat, jm )] =
+                                        scl_fourier_sym[idx] + scl_fourier_asym[idx];
+                                }
+                            }
+                        }
+                        // southern hemisphere:
+                        idx  = 0;
+                        pos0 = 2 * ( nlats - 1 ) + ioff;
+                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
+                            int poslat = pos0 - 2 * jlat;
+                            for ( int imag = 0; imag < n_imag; imag++ ) {
+                                int posimag = nb_fields * ( imag + poslat );
+                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
+                                    int jslat = nlats - jlat - 1;
+                                    scl_fourier[posGemm1( jfld, imag, jslat, jm )] =
+                                        scl_fourier_sym[idx] - scl_fourier_asym[idx];
+                                }
                             }
                         }
                     }
+                    free_aligned( scalar_sym );
+                    free_aligned( scalar_asym );
+                    free_aligned( scl_fourier_sym );
+                    free_aligned( scl_fourier_asym );
                 }
             }
 
             // Fourier transformation:
-            std::vector<double> gp_opt( nb_fields * grid_.size(), 0. );
+            double* gp_opt;
+            alloc_aligned( gp_opt, nb_fields * grid_.size() );
             {
                 ATLAS_TRACE( "opt Fourier dgemm" );
-                eckit::linalg::Matrix A( scl_fourier_tp.data(), nb_fields * g.ny(), ( truncation_ + 1 ) * 2 );
-                eckit::linalg::Matrix B( fourier_.data(), ( truncation_ + 1 ) * 2, g.nxmax() );
-                eckit::linalg::Matrix C( gp_opt.data(), nb_fields * g.ny(), g.nxmax() );
+                eckit::linalg::Matrix A( scl_fourier, nb_fields * g.ny(), ( truncation_ + 1 ) * 2 );
+                eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, g.nxmax() );
+                eckit::linalg::Matrix C( gp_opt, nb_fields * g.ny(), g.nxmax() );
                 eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
             }
 
@@ -403,194 +471,218 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
                     }
                 }
             }
+            free_aligned( gp_opt );
 #else
-            // Transposition in Fourier space:
-            std::vector<double> scl_fourier_tp( size_fourier * ( truncation + 1 ) );
-            {
-                ATLAS_TRACE( "opt transposition in Fourier" );
-                int idx = 0;
-                for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
-                    for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
-                        for ( int imag = 0; imag < 2; imag++ ) {
-                            for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                                int pos_tp = imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + g.ny() * ( jfld ) ) );
-                                //int pos  = jfld + nb_fields * ( imag + 2 * ( jlat + g.ny() * ( jm ) ) );
-                                scl_fourier_tp[pos_tp] = scl_fourier[idx++];  // = scl_fourier[pos]
+                    {
+                        //ATLAS_TRACE( "opt merge spheres" );
+                        // northern hemisphere:
+                        int ioff = jm * size_fourier_max;
+                        int pos0 = ioff;
+                        int idx  = 0;
+                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
+                            int poslat = pos0 + 2 * jlat;
+                            for ( int imag = 0; imag < n_imag; imag++ ) {
+                                int posimag = nb_fields * ( imag + poslat );
+                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
+                                    scl_fourier[posGemm2( jfld, imag, jlat, jm )] =
+                                        scl_fourier_sym[idx] + scl_fourier_asym[idx];
+                                }
+                            }
+                        }
+                        // southern hemisphere:
+                        idx  = 0;
+                        pos0 = 2 * ( nlats - 1 ) + ioff;
+                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
+                            int poslat = pos0 - 2 * jlat;
+                            for ( int imag = 0; imag < n_imag; imag++ ) {
+                                int posimag = nb_fields * ( imag + poslat );
+                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
+                                    int jslat = nlats - jlat - 1;
+                                    scl_fourier[posGemm2( jfld, imag, jslat, jm )] =
+                                        scl_fourier_sym[idx] - scl_fourier_asym[idx];
+                                }
                             }
                         }
                     }
+                    free_aligned( scalar_sym );
+                    free_aligned( scalar_asym );
+                    free_aligned( scl_fourier_sym );
+                    free_aligned( scl_fourier_asym );
                 }
             }
 
             // Fourier transformation:
-            std::vector<double> gp_opt( nb_fields * grid_.size(), 0. );
             {
                 ATLAS_TRACE( "opt Fourier dgemm" );
-                eckit::linalg::Matrix A( fouriertp_.data(), g.nxmax(), ( truncation_ + 1 ) * 2 );
-                eckit::linalg::Matrix B( scl_fourier_tp.data(), ( truncation_ + 1 ) * 2, nb_fields * g.ny() );
+                eckit::linalg::Matrix A( fouriertp_, g.nxmax(), ( truncation_ + 1 ) * 2 );
+                eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * g.ny() );
                 eckit::linalg::Matrix C( gp_fields, g.nxmax(), nb_fields * g.ny() );
                 eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
             }
 
 #endif
 #endif
-            // Computing u,v from U,V:
-            {
-                if ( nb_vordiv_fields > 0 ) {
-                    ATLAS_TRACE( "opt u,v from U,V" );
-                    std::vector<double> coslats( nlats );
-                    for ( size_t j = 0; j < nlats; ++j ) {
-                        coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() );
-                    }
-                    int idx = 0;
-                    for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                        for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
-                            for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) {
-                                gp_fields[idx] /= coslats[jlat];
-                                idx++;
+                    // Computing u,v from U,V:
+                    {
+                        if ( nb_vordiv_fields > 0 ) {
+                            ATLAS_TRACE( "opt u,v from U,V" );
+                            std::vector<double> coslats( nlats );
+                            for ( size_t j = 0; j < nlats; ++j ) {
+                                coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() );
+                            }
+                            int idx = 0;
+                            for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
+                                    for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) {
+                                        gp_fields[idx] /= coslats[jlat];
+                                        idx++;
+                                    }
+                                }
                             }
                         }
                     }
+                    free_aligned( scl_fourier );
                 }
-            }
-        }
-        else {
-            ATLAS_TRACE( "invtrans_uv unstructured opt" );
-            int idx = 0;
-            for ( PointXY p : grid_.xy() ) {
-                double lon   = p.x() * util::Constants::degreesToRadians();
-                double lat   = p.y() * util::Constants::degreesToRadians();
-                double trcFT = truncation;
-
-                // Legendre transform:
-                //invtrans_legendreopt( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra,
-                //                      legReal.data(), legImag.data() );
-
-                // Fourier transform:
-                //invtrans_fourieropt( trcFT, lon, nb_fields, legReal.data(), legImag.data(),
-                //                     gp_tmp.data() + ( nb_fields * idx ) );
-                for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
-                    //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat );
+                else {
+                    ATLAS_TRACE( "invtrans_uv unstructured opt" );
+                    int idx = 0;
+                    for ( PointXY p : grid_.xy() ) {
+                        double lon   = p.x() * util::Constants::degreesToRadians();
+                        double lat   = p.y() * util::Constants::degreesToRadians();
+                        double trcFT = truncation;
+
+                        // Legendre transform:
+                        //invtrans_legendreopt( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra,
+                        //                      legReal.data(), legImag.data() );
+
+                        // Fourier transform:
+                        //invtrans_fourieropt( trcFT, lon, nb_fields, legReal.data(), legImag.data(),
+                        //                     gp_tmp.data() + ( nb_fields * idx ) );
+                        for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
+                            //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat );
+                        }
+                        ++idx;
+                    }
                 }
-                ++idx;
             }
-        }
-    }
-}  // namespace trans
+        }  // namespace trans
 
-// --------------------------------------------------------------------------------------------------------------------
+        // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[],
-                              const double divergence_spectra[], double gp_fields[],
-                              const eckit::Configuration& config ) const {
-    invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config );
-}
+        void TransLocalopt::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[],
+                                      const double divergence_spectra[], double gp_fields[],
+                                      const eckit::Configuration& config ) const {
+            invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config );
+        }
 
-void extend_truncationopt( const int old_truncation, const int nb_fields, const double old_spectra[],
-                           double new_spectra[] ) {
-    int k = 0, k_old = 0;
-    for ( int m = 0; m <= old_truncation + 1; m++ ) {             // zonal wavenumber
-        for ( int n = m; n <= old_truncation + 1; n++ ) {         // total wavenumber
-            for ( int imag = 0; imag < 2; imag++ ) {              // imaginary/real part
-                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {  // field
-                    if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; }
-                    else {
-                        new_spectra[k++] = old_spectra[k_old++];
+        void extend_truncationopt( const int old_truncation, const int nb_fields, const double old_spectra[],
+                                   double new_spectra[] ) {
+            int k = 0, k_old = 0;
+            for ( int m = 0; m <= old_truncation + 1; m++ ) {             // zonal wavenumber
+                for ( int n = m; n <= old_truncation + 1; n++ ) {         // total wavenumber
+                    for ( int imag = 0; imag < 2; imag++ ) {              // imaginary/real part
+                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {  // field
+                            if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; }
+                            else {
+                                new_spectra[k++] = old_spectra[k_old++];
+                            }
+                        }
                     }
                 }
             }
         }
-    }
-}
 
-// --------------------------------------------------------------------------------------------------------------------
+        // --------------------------------------------------------------------------------------------------------------------
+
+        void TransLocalopt::invtrans( const int nb_scalar_fields, const double scalar_spectra[],
+                                      const int nb_vordiv_fields, const double vorticity_spectra[],
+                                      const double divergence_spectra[], double gp_fields[],
+                                      const eckit::Configuration& config ) const {
+            ATLAS_TRACE( "TransLocalopt::invtrans" );
+            int nb_gp              = grid_.size();
+            int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields;
+            if ( nb_vordiv_fields > 0 ) {
+                std::vector<double> vorticity_spectra_extended( nb_vordiv_spec_ext, 0. );
+                std::vector<double> divergence_spectra_extended( nb_vordiv_spec_ext, 0. );
+                std::vector<double> U_ext( nb_vordiv_spec_ext, 0. );
+                std::vector<double> V_ext( nb_vordiv_spec_ext, 0. );
 
-void TransLocalopt::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields,
-                              const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[],
-                              const eckit::Configuration& config ) const {
-    ATLAS_TRACE( "TransLocalopt::invtrans" );
-    int nb_gp              = grid_.size();
-    int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields;
-    if ( nb_vordiv_fields > 0 ) {
-        std::vector<double> vorticity_spectra_extended( nb_vordiv_spec_ext, 0. );
-        std::vector<double> divergence_spectra_extended( nb_vordiv_spec_ext, 0. );
-        std::vector<double> U_ext( nb_vordiv_spec_ext, 0. );
-        std::vector<double> V_ext( nb_vordiv_spec_ext, 0. );
-
-        {
-            ATLAS_TRACE( "opt extend vordiv" );
-            // increase truncation in vorticity_spectra and divergence_spectra:
-            extend_truncationopt( truncation_, nb_vordiv_fields, vorticity_spectra, vorticity_spectra_extended.data() );
-            extend_truncationopt( truncation_, nb_vordiv_fields, divergence_spectra,
-                                  divergence_spectra_extended.data() );
-        }
+                {
+                    ATLAS_TRACE( "opt extend vordiv" );
+                    // increase truncation in vorticity_spectra and divergence_spectra:
+                    extend_truncationopt( truncation_, nb_vordiv_fields, vorticity_spectra,
+                                          vorticity_spectra_extended.data() );
+                    extend_truncationopt( truncation_, nb_vordiv_fields, divergence_spectra,
+                                          divergence_spectra_extended.data() );
+                }
 
-        {
-            ATLAS_TRACE( "vordiv to UV opt" );
-            // call vd2uv to compute u and v in spectral space
-            trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt" ) );
-            vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(),
-                                      divergence_spectra_extended.data(), U_ext.data(), V_ext.data() );
-        }
+                {
+                    ATLAS_TRACE( "vordiv to UV opt" );
+                    // call vd2uv to compute u and v in spectral space
+                    trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt" ) );
+                    vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(),
+                                              divergence_spectra_extended.data(), U_ext.data(), V_ext.data() );
+                }
 
-        // perform spectral transform to compute all fields in grid point space
-        invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config );
-        invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(),
-                     gp_fields + nb_gp * nb_vordiv_fields, config );
-    }
-    if ( nb_scalar_fields > 0 ) {
-        int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields;
-        std::vector<double> scalar_spectra_extended( nb_scalar_spec_ext, 0. );
-        extend_truncationopt( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() );
-        invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(),
-                     gp_fields + 2 * nb_gp * nb_vordiv_fields, config );
-    }
-}
+                // perform spectral transform to compute all fields in grid point space
+                invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config );
+                invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(),
+                             gp_fields + nb_gp * nb_vordiv_fields, config );
+            }
+            if ( nb_scalar_fields > 0 ) {
+                int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields;
+                std::vector<double> scalar_spectra_extended( nb_scalar_spec_ext, 0. );
+                extend_truncationopt( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() );
+                invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(),
+                             gp_fields + 2 * nb_gp * nb_vordiv_fields, config );
+            }
+        }
 
-// --------------------------------------------------------------------------------------------------------------------
+        // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const {
-    NOTIMP;
-    // Not implemented and not planned.
-    // Use the TransIFS implementation instead.
-}
+        void TransLocalopt::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const {
+            NOTIMP;
+            // Not implemented and not planned.
+            // Use the TransIFS implementation instead.
+        }
 
-// --------------------------------------------------------------------------------------------------------------------
+        // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt::dirtrans( const FieldSet& gpfields, FieldSet& spfields, const eckit::Configuration& config ) const {
-    NOTIMP;
-    // Not implemented and not planned.
-    // Use the TransIFS implementation instead.
-}
+        void TransLocalopt::dirtrans( const FieldSet& gpfields, FieldSet& spfields, const eckit::Configuration& config )
+            const {
+            NOTIMP;
+            // Not implemented and not planned.
+            // Use the TransIFS implementation instead.
+        }
 
-// --------------------------------------------------------------------------------------------------------------------
+        // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv,
-                                          const eckit::Configuration& config ) const {
-    NOTIMP;
-    // Not implemented and not planned.
-    // Use the TransIFS implementation instead.
-}
+        void TransLocalopt::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv,
+                                                  const eckit::Configuration& config ) const {
+            NOTIMP;
+            // Not implemented and not planned.
+            // Use the TransIFS implementation instead.
+        }
 
-// --------------------------------------------------------------------------------------------------------------------
+        // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[],
-                              const eckit::Configuration& ) const {
-    NOTIMP;
-    // Not implemented and not planned.
-    // Use the TransIFS implementation instead.
-}
+        void TransLocalopt::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[],
+                                      const eckit::Configuration& ) const {
+            NOTIMP;
+            // Not implemented and not planned.
+            // Use the TransIFS implementation instead.
+        }
 
-// --------------------------------------------------------------------------------------------------------------------
+        // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[],
-                              double divergence_spectra[], const eckit::Configuration& ) const {
-    NOTIMP;
-    // Not implemented and not planned.
-    // Use the TransIFS implementation instead.
-}
+        void TransLocalopt::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[],
+                                      double divergence_spectra[], const eckit::Configuration& ) const {
+            NOTIMP;
+            // Not implemented and not planned.
+            // Use the TransIFS implementation instead.
+        }
 
-// --------------------------------------------------------------------------------------------------------------------
+        // --------------------------------------------------------------------------------------------------------------------
 
-}  // namespace trans
+    }  // namespace trans
 }  // namespace atlas
diff --git a/src/atlas/trans/localopt/TransLocalopt.h b/src/atlas/trans/localopt/TransLocalopt.h
index b9f89ef08..fce71261a 100644
--- a/src/atlas/trans/localopt/TransLocalopt.h
+++ b/src/atlas/trans/localopt/TransLocalopt.h
@@ -112,10 +112,10 @@ class TransLocalopt : public trans::TransImpl {
     Grid grid_;
     int truncation_;
     bool precompute_;
-    mutable std::vector<double> legendre_sym_;
-    mutable std::vector<double> legendre_asym_;
-    mutable std::vector<double> fourier_;
-    mutable std::vector<double> fouriertp_;
+    double* legendre_sym_;
+    double* legendre_asym_;
+    double* fourier_;
+    double* fouriertp_;
     std::vector<size_t> legendre_begin_;
     std::vector<size_t> legendre_sym_begin_;
     std::vector<size_t> legendre_asym_begin_;
diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc
index d06a7d435..f4672becc 100644
--- a/src/atlas/trans/localopt2/TransLocalopt2.cc
+++ b/src/atlas/trans/localopt2/TransLocalopt2.cc
@@ -23,7 +23,7 @@
 #include "atlas/util/Constants.h"
 #include "eckit/linalg/LinearAlgebra.h"
 #include "eckit/linalg/Matrix.h"
-#ifdef ATLAS_HAVE_MKL
+#if ATLAS_HAVE_MKL
 #include "mkl.h"
 #endif
 
@@ -58,7 +58,7 @@ int num_n( const int truncation, const int m, const bool symmetric ) {
 }
 
 void alloc_aligned( double*& ptr, size_t n ) {
-#ifdef ATLAS_HAVE_MKL
+#if ATLAS_HAVE_MKL
     int al = 64;
     ptr    = (double*)mkl_malloc( sizeof( double ) * n, al );
 #else
@@ -69,7 +69,7 @@ void alloc_aligned( double*& ptr, size_t n ) {
 }
 
 void free_aligned( double*& ptr ) {
-#ifdef ATLAS_HAVE_MKL
+#if ATLAS_HAVE_MKL
     mkl_free( ptr );
 #else
     free( ptr );
@@ -91,7 +91,11 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long
     truncation_( truncation ),
     precompute_( config.getBool( "precompute", true ) ) {
     ATLAS_TRACE( "Precompute legendre opt2" );
+#if ATLAS_HAVE_MKL
+    eckit::linalg::LinearAlgebra::backend( "mkl" );  // might want to choose backend with this command
+#else
     eckit::linalg::LinearAlgebra::backend( "generic" );  // might want to choose backend with this command
+#endif
     int nlats   = 0;
     int nlons   = 0;
     int nlatsNH = nlats_northernHemisphere( nlats );
@@ -268,9 +272,21 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
         // Transform
         if ( grid::StructuredGrid g = grid_ ) {
             ATLAS_TRACE( "invtrans_uv structured opt2" );
-            int nlats            = g.ny();
-            int nlons            = g.nxmax();
-            int nlatsNH          = nlats_northernHemisphere( nlats );
+            int nlats    = g.ny();
+            int nlons    = g.nxmax();
+            int nlatsNH  = nlats_northernHemisphere( nlats );
+            auto posFFTW = [&]( int jfld, int imag, int jlat, int jm ) {
+                return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) );
+            };
+            /*auto posFFTW = [&]( int jfld, int imag, int jlat, int jm ) {
+                return jfld + nb_fields * ( imag + 2 * ( nlats - jlat - 1 ) + jm * nb_fields * 2 * nlats );
+            };*/
+            auto posGemm1 = [&]( int jfld, int imag, int jlat, int jm ) {
+                return jfld + nb_fields * ( jlat + nlats * ( imag + 2 * ( jm ) ) );
+            };
+            auto posGemm2 = [&]( int jfld, int imag, int jlat, int jm ) {
+                return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) );
+            };
             int size_fourier_max = nb_fields * 2 * nlats;
             double* scl_fourier;
             alloc_aligned( scl_fourier, size_fourier_max * ( truncation + 1 ) );
@@ -278,9 +294,7 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
             // Legendre transform:
             {
                 ATLAS_TRACE( "opt2 Legendre dgemm" );
-                for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
-#if 1  // 0: no symmetry, 1: use symmetry \
-    // TODO: 0 is currently not working because it requires all latitudes to be included in legendre_ (which is currently not done)
+                for ( int jm = 0; jm <= truncation_; jm++ ) {
                     int size_sym  = num_n( truncation_ + 1, jm, true );
                     int size_asym = num_n( truncation_ + 1, jm, false );
                     int n_imag    = 2;
@@ -329,6 +343,7 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
                         eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsNH );
                         eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
                     }
+#if 0  //ATLAS_HAVE_FFTW
                     {
                         //ATLAS_TRACE( "opt2 merge spheres" );
                         // northern hemisphere:
@@ -340,8 +355,8 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
                             for ( int imag = 0; imag < n_imag; imag++ ) {
                                 int posimag = nb_fields * ( imag + poslat );
                                 for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
-                                    int pos          = jfld + posimag;
-                                    scl_fourier[pos] = scl_fourier_sym[idx] + scl_fourier_asym[idx];
+                                    scl_fourier[posFFTW( jfld, imag, jlat, jm )] =
+                                        scl_fourier_sym[idx] + scl_fourier_asym[idx];
                                 }
                             }
                         }
@@ -353,8 +368,9 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
                             for ( int imag = 0; imag < n_imag; imag++ ) {
                                 int posimag = nb_fields * ( imag + poslat );
                                 for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
-                                    int pos          = jfld + posimag;
-                                    scl_fourier[pos] = scl_fourier_sym[idx] - scl_fourier_asym[idx];
+                                    int jslat = nlats - jlat - 1;
+                                    scl_fourier[posFFTW( jfld, imag, jslat, jm )] =
+                                        scl_fourier_sym[idx] - scl_fourier_asym[idx];
                                 }
                             }
                         }
@@ -363,33 +379,20 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
                     free_aligned( scalar_asym );
                     free_aligned( scl_fourier_sym );
                     free_aligned( scl_fourier_asym );
-
-#else
-                    int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1;
-                    eckit::linalg::Matrix A( eckit::linalg::Matrix(
-                        const_cast<double*>( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) );
-                    eckit::linalg::Matrix B( legendre_.data() + noff * g.ny(), ns, g.ny() );
-                    eckit::linalg::Matrix C( scl_fourier.data() + jm * size_fourier, nb_fields * 2, g.ny() );
-                    eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-#endif
                 }
             }
-#if ATLAS_HAVE_FFTW
             {
-                auto position = [&]( int jfld, int imag, int jlat, int jm ) {
-                    return jfld + nb_fields * ( imag + 2 * ( jlat + nlats * ( jm ) ) );
-                };
                 int num_complex = ( nlons / 2 ) + 1;
                 {
                     ATLAS_TRACE( "opt2 FFTW" );
                     for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
                         int idx = 0;
-                        for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
-                            fft_in_[idx++][0] = scl_fourier[position( jfld, 0, jlat, 0 )];
+                        for ( int jlat = 0; jlat < nlats; jlat++ ) {
+                            fft_in_[idx++][0] = scl_fourier[posFFTW( jfld, 0, jlat, 0 )];
                             for ( int jm = 1; jm < num_complex; jm++, idx++ ) {
                                 for ( int imag = 0; imag < 2; imag++ ) {
                                     if ( jm <= truncation_ ) {
-                                        fft_in_[idx][imag] = scl_fourier[position( jfld, imag, jlat, jm )] / 2.;
+                                        fft_in_[idx][imag] = scl_fourier[posFFTW( jfld, imag, jlat, jm )] / 2.;
                                     }
                                     else {
                                         fft_in_[idx][imag] = 0.;
@@ -405,33 +408,53 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
                 }
             }
 #else
-#if 0  // 1: better for small number of columns, large truncation; 0: better for large number of columns
-
-            // Transposition in Fourier space:
-            std::vector<double> scl_fourier_tp( size_fourier * ( truncation + 1 ) );
-            {
-                ATLAS_TRACE( "opt2 transposition in Fourier" );
-                int idx = 0;
-                for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
-                    for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
-                        for ( int imag = 0; imag < 2; imag++ ) {
-                            for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                                int pos_tp = jfld + nb_fields * ( jlat + g.ny() * ( imag + 2 * ( jm ) ) );
-                                //int pos  = jfld + nb_fields * ( imag + 2 * ( jlat + g.ny() * ( jm ) ) );
-                                scl_fourier_tp[pos_tp] = scl_fourier[idx++];  // = scl_fourier[pos]
+#if 1  // 1: better for small number of columns, large truncation; 0: better for large number of columns
+                    {
+                        //ATLAS_TRACE( "opt2 merge spheres" );
+                        // northern hemisphere:
+                        int ioff = jm * size_fourier_max;
+                        int pos0 = ioff;
+                        int idx  = 0;
+                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
+                            int poslat = pos0 + 2 * jlat;
+                            for ( int imag = 0; imag < n_imag; imag++ ) {
+                                int posimag = nb_fields * ( imag + poslat );
+                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
+                                    scl_fourier[posGemm1( jfld, imag, jlat, jm )] =
+                                        scl_fourier_sym[idx] + scl_fourier_asym[idx];
+                                }
+                            }
+                        }
+                        // southern hemisphere:
+                        idx  = 0;
+                        pos0 = 2 * ( nlats - 1 ) + ioff;
+                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
+                            int poslat = pos0 - 2 * jlat;
+                            for ( int imag = 0; imag < n_imag; imag++ ) {
+                                int posimag = nb_fields * ( imag + poslat );
+                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
+                                    int jslat = nlats - jlat - 1;
+                                    scl_fourier[posGemm1( jfld, imag, jslat, jm )] =
+                                        scl_fourier_sym[idx] - scl_fourier_asym[idx];
+                                }
                             }
                         }
                     }
+                    free_aligned( scalar_sym );
+                    free_aligned( scalar_asym );
+                    free_aligned( scl_fourier_sym );
+                    free_aligned( scl_fourier_asym );
                 }
             }
 
             // Fourier transformation:
-            std::vector<double> gp_opt2( nb_fields * grid_.size(), 0. );
+            double* gp_opt2;
+            alloc_aligned( gp_opt2, nb_fields * grid_.size() );
             {
                 ATLAS_TRACE( "opt2 Fourier dgemm" );
-                eckit::linalg::Matrix A( scl_fourier_tp.data(), nb_fields * g.ny(), ( truncation_ + 1 ) * 2 );
-                eckit::linalg::Matrix B( fourier_.data(), ( truncation_ + 1 ) * 2, g.nxmax() );
-                eckit::linalg::Matrix C( gp_opt2.data(), nb_fields * g.ny(), g.nxmax() );
+                eckit::linalg::Matrix A( scl_fourier, nb_fields * g.ny(), ( truncation_ + 1 ) * 2 );
+                eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, g.nxmax() );
+                eckit::linalg::Matrix C( gp_opt2, nb_fields * g.ny(), g.nxmax() );
                 eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
             }
 
@@ -449,197 +472,219 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
                     }
                 }
             }
+            free_aligned( gp_opt2 );
 #else
-            // Transposition in Fourier space:
-            std::vector<double> scl_fourier_tp( size_fourier * ( truncation + 1 ) );
-            {
-                ATLAS_TRACE( "opt2 transposition in Fourier" );
-                int idx = 0;
-                for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
-                    for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
-                        for ( int imag = 0; imag < 2; imag++ ) {
-                            for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                                int pos_tp = imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + g.ny() * ( jfld ) ) );
-                                //int pos  = jfld + nb_fields * ( imag + 2 * ( jlat + g.ny() * ( jm ) ) );
-                                scl_fourier_tp[pos_tp] = scl_fourier[idx++];  // = scl_fourier[pos]
+                    {
+                        //ATLAS_TRACE( "opt2 merge spheres" );
+                        // northern hemisphere:
+                        int ioff = jm * size_fourier_max;
+                        int pos0 = ioff;
+                        int idx  = 0;
+                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
+                            int poslat = pos0 + 2 * jlat;
+                            for ( int imag = 0; imag < n_imag; imag++ ) {
+                                int posimag = nb_fields * ( imag + poslat );
+                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
+                                    scl_fourier[posGemm2( jfld, imag, jlat, jm )] =
+                                        scl_fourier_sym[idx] + scl_fourier_asym[idx];
+                                }
+                            }
+                        }
+                        // southern hemisphere:
+                        idx  = 0;
+                        pos0 = 2 * ( nlats - 1 ) + ioff;
+                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
+                            int poslat = pos0 - 2 * jlat;
+                            for ( int imag = 0; imag < n_imag; imag++ ) {
+                                int posimag = nb_fields * ( imag + poslat );
+                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
+                                    int jslat = nlats - jlat - 1;
+                                    scl_fourier[posGemm2( jfld, imag, jslat, jm )] =
+                                        scl_fourier_sym[idx] - scl_fourier_asym[idx];
+                                }
                             }
                         }
                     }
+                    free_aligned( scalar_sym );
+                    free_aligned( scalar_asym );
+                    free_aligned( scl_fourier_sym );
+                    free_aligned( scl_fourier_asym );
                 }
             }
 
             // Fourier transformation:
-            std::vector<double> gp_opt2( nb_fields * grid_.size(), 0. );
             {
                 ATLAS_TRACE( "opt2 Fourier dgemm" );
-                eckit::linalg::Matrix A( fouriertp_.data(), g.nxmax(), ( truncation_ + 1 ) * 2 );
-                eckit::linalg::Matrix B( scl_fourier_tp.data(), ( truncation_ + 1 ) * 2, nb_fields * g.ny() );
+                eckit::linalg::Matrix A( fouriertp_, g.nxmax(), ( truncation_ + 1 ) * 2 );
+                eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * g.ny() );
                 eckit::linalg::Matrix C( gp_fields, g.nxmax(), nb_fields * g.ny() );
                 eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
             }
 
 #endif
 #endif
-            // Computing u,v from U,V:
-            {
-                if ( nb_vordiv_fields > 0 ) {
-                    ATLAS_TRACE( "opt2 u,v from U,V" );
-                    std::vector<double> coslats( nlats );
-                    for ( size_t j = 0; j < nlats; ++j ) {
-                        coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() );
-                    }
-                    int idx = 0;
-                    for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                        for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
-                            for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) {
-                                gp_fields[idx] /= coslats[jlat];
-                                idx++;
+                    // Computing u,v from U,V:
+                    {
+                        if ( nb_vordiv_fields > 0 ) {
+                            ATLAS_TRACE( "opt2 u,v from U,V" );
+                            std::vector<double> coslats( nlats );
+                            for ( size_t j = 0; j < nlats; ++j ) {
+                                coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() );
+                            }
+                            int idx = 0;
+                            for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
+                                    for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) {
+                                        gp_fields[idx] /= coslats[jlat];
+                                        idx++;
+                                    }
+                                }
                             }
                         }
                     }
+                    free_aligned( scl_fourier );
                 }
-            }
-            free_aligned( scl_fourier );
-        }
-        else {
-            ATLAS_TRACE( "invtrans_uv unstructured opt2" );
-            int idx = 0;
-            for ( PointXY p : grid_.xy() ) {
-                double lon   = p.x() * util::Constants::degreesToRadians();
-                double lat   = p.y() * util::Constants::degreesToRadians();
-                double trcFT = truncation;
-
-                // Legendre transform:
-                //invtrans_legendreopt2( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra,
-                //                      legReal.data(), legImag.data() );
-
-                // Fourier transform:
-                //invtrans_fourieropt2( trcFT, lon, nb_fields, legReal.data(), legImag.data(),
-                //                     gp_tmp.data() + ( nb_fields * idx ) );
-                for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
-                    //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat );
+                else {
+                    ATLAS_TRACE( "invtrans_uv unstructured opt2" );
+                    int idx = 0;
+                    for ( PointXY p : grid_.xy() ) {
+                        double lon   = p.x() * util::Constants::degreesToRadians();
+                        double lat   = p.y() * util::Constants::degreesToRadians();
+                        double trcFT = truncation;
+
+                        // Legendre transform:
+                        //invtrans_legendreopt2( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra,
+                        //                      legReal.data(), legImag.data() );
+
+                        // Fourier transform:
+                        //invtrans_fourieropt2( trcFT, lon, nb_fields, legReal.data(), legImag.data(),
+                        //                     gp_tmp.data() + ( nb_fields * idx ) );
+                        for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
+                            //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat );
+                        }
+                        ++idx;
+                    }
                 }
-                ++idx;
             }
-        }
-    }
-}  // namespace trans
+        }  // namespace trans
 
-// --------------------------------------------------------------------------------------------------------------------
+        // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt2::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[],
-                               const double divergence_spectra[], double gp_fields[],
-                               const eckit::Configuration& config ) const {
-    invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config );
-}
+        void TransLocalopt2::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[],
+                                       const double divergence_spectra[], double gp_fields[],
+                                       const eckit::Configuration& config ) const {
+            invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config );
+        }
 
-void extend_truncationopt2( const int old_truncation, const int nb_fields, const double old_spectra[],
-                            double new_spectra[] ) {
-    int k = 0, k_old = 0;
-    for ( int m = 0; m <= old_truncation + 1; m++ ) {             // zonal wavenumber
-        for ( int n = m; n <= old_truncation + 1; n++ ) {         // total wavenumber
-            for ( int imag = 0; imag < 2; imag++ ) {              // imaginary/real part
-                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {  // field
-                    if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; }
-                    else {
-                        new_spectra[k++] = old_spectra[k_old++];
+        void extend_truncationopt2( const int old_truncation, const int nb_fields, const double old_spectra[],
+                                    double new_spectra[] ) {
+            int k = 0, k_old = 0;
+            for ( int m = 0; m <= old_truncation + 1; m++ ) {             // zonal wavenumber
+                for ( int n = m; n <= old_truncation + 1; n++ ) {         // total wavenumber
+                    for ( int imag = 0; imag < 2; imag++ ) {              // imaginary/real part
+                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {  // field
+                            if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; }
+                            else {
+                                new_spectra[k++] = old_spectra[k_old++];
+                            }
+                        }
                     }
                 }
             }
         }
-    }
-}
 
-// --------------------------------------------------------------------------------------------------------------------
+        // --------------------------------------------------------------------------------------------------------------------
+
+        void TransLocalopt2::invtrans( const int nb_scalar_fields, const double scalar_spectra[],
+                                       const int nb_vordiv_fields, const double vorticity_spectra[],
+                                       const double divergence_spectra[], double gp_fields[],
+                                       const eckit::Configuration& config ) const {
+            ATLAS_TRACE( "TransLocalopt2::invtrans" );
+            int nb_gp              = grid_.size();
+            int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields;
+            if ( nb_vordiv_fields > 0 ) {
+                std::vector<double> vorticity_spectra_extended( nb_vordiv_spec_ext, 0. );
+                std::vector<double> divergence_spectra_extended( nb_vordiv_spec_ext, 0. );
+                std::vector<double> U_ext( nb_vordiv_spec_ext, 0. );
+                std::vector<double> V_ext( nb_vordiv_spec_ext, 0. );
 
-void TransLocalopt2::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields,
-                               const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[],
-                               const eckit::Configuration& config ) const {
-    ATLAS_TRACE( "TransLocalopt2::invtrans" );
-    int nb_gp              = grid_.size();
-    int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields;
-    if ( nb_vordiv_fields > 0 ) {
-        std::vector<double> vorticity_spectra_extended( nb_vordiv_spec_ext, 0. );
-        std::vector<double> divergence_spectra_extended( nb_vordiv_spec_ext, 0. );
-        std::vector<double> U_ext( nb_vordiv_spec_ext, 0. );
-        std::vector<double> V_ext( nb_vordiv_spec_ext, 0. );
-
-        {
-            ATLAS_TRACE( "opt2 extend vordiv" );
-            // increase truncation in vorticity_spectra and divergence_spectra:
-            extend_truncationopt2( truncation_, nb_vordiv_fields, vorticity_spectra,
-                                   vorticity_spectra_extended.data() );
-            extend_truncationopt2( truncation_, nb_vordiv_fields, divergence_spectra,
-                                   divergence_spectra_extended.data() );
-        }
+                {
+                    ATLAS_TRACE( "opt2 extend vordiv" );
+                    // increase truncation in vorticity_spectra and divergence_spectra:
+                    extend_truncationopt2( truncation_, nb_vordiv_fields, vorticity_spectra,
+                                           vorticity_spectra_extended.data() );
+                    extend_truncationopt2( truncation_, nb_vordiv_fields, divergence_spectra,
+                                           divergence_spectra_extended.data() );
+                }
 
-        {
-            ATLAS_TRACE( "vordiv to UV opt2" );
-            // call vd2uv to compute u and v in spectral space
-            trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt2" ) );
-            vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(),
-                                      divergence_spectra_extended.data(), U_ext.data(), V_ext.data() );
-        }
+                {
+                    ATLAS_TRACE( "vordiv to UV opt2" );
+                    // call vd2uv to compute u and v in spectral space
+                    trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt2" ) );
+                    vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(),
+                                              divergence_spectra_extended.data(), U_ext.data(), V_ext.data() );
+                }
 
-        // perform spectral transform to compute all fields in grid point space
-        invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config );
-        invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(),
-                     gp_fields + nb_gp * nb_vordiv_fields, config );
-    }
-    if ( nb_scalar_fields > 0 ) {
-        int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields;
-        std::vector<double> scalar_spectra_extended( nb_scalar_spec_ext, 0. );
-        extend_truncationopt2( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() );
-        invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(),
-                     gp_fields + 2 * nb_gp * nb_vordiv_fields, config );
-    }
-}
+                // perform spectral transform to compute all fields in grid point space
+                invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config );
+                invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(),
+                             gp_fields + nb_gp * nb_vordiv_fields, config );
+            }
+            if ( nb_scalar_fields > 0 ) {
+                int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields;
+                std::vector<double> scalar_spectra_extended( nb_scalar_spec_ext, 0. );
+                extend_truncationopt2( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() );
+                invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(),
+                             gp_fields + 2 * nb_gp * nb_vordiv_fields, config );
+            }
+        }
 
-// --------------------------------------------------------------------------------------------------------------------
+        // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt2::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const {
-    NOTIMP;
-    // Not implemented and not planned.
-    // Use the TransIFS implementation instead.
-}
+        void TransLocalopt2::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config )
+            const {
+            NOTIMP;
+            // Not implemented and not planned.
+            // Use the TransIFS implementation instead.
+        }
 
-// --------------------------------------------------------------------------------------------------------------------
+        // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt2::dirtrans( const FieldSet& gpfields, FieldSet& spfields,
-                               const eckit::Configuration& config ) const {
-    NOTIMP;
-    // Not implemented and not planned.
-    // Use the TransIFS implementation instead.
-}
+        void TransLocalopt2::dirtrans( const FieldSet& gpfields, FieldSet& spfields,
+                                       const eckit::Configuration& config ) const {
+            NOTIMP;
+            // Not implemented and not planned.
+            // Use the TransIFS implementation instead.
+        }
 
-// --------------------------------------------------------------------------------------------------------------------
+        // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt2::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv,
-                                           const eckit::Configuration& config ) const {
-    NOTIMP;
-    // Not implemented and not planned.
-    // Use the TransIFS implementation instead.
-}
+        void TransLocalopt2::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv,
+                                                   const eckit::Configuration& config ) const {
+            NOTIMP;
+            // Not implemented and not planned.
+            // Use the TransIFS implementation instead.
+        }
 
-// --------------------------------------------------------------------------------------------------------------------
+        // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt2::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[],
-                               const eckit::Configuration& ) const {
-    NOTIMP;
-    // Not implemented and not planned.
-    // Use the TransIFS implementation instead.
-}
+        void TransLocalopt2::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[],
+                                       const eckit::Configuration& ) const {
+            NOTIMP;
+            // Not implemented and not planned.
+            // Use the TransIFS implementation instead.
+        }
 
-// --------------------------------------------------------------------------------------------------------------------
+        // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt2::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[],
-                               double divergence_spectra[], const eckit::Configuration& ) const {
-    NOTIMP;
-    // Not implemented and not planned.
-    // Use the TransIFS implementation instead.
-}
+        void TransLocalopt2::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[],
+                                       double divergence_spectra[], const eckit::Configuration& ) const {
+            NOTIMP;
+            // Not implemented and not planned.
+            // Use the TransIFS implementation instead.
+        }
 
-// --------------------------------------------------------------------------------------------------------------------
+        // --------------------------------------------------------------------------------------------------------------------
 
-}  // namespace trans
+    }  // namespace trans
 }  // namespace atlas
diff --git a/src/atlas/trans/localopt3/FourierTransformsopt3.cc b/src/atlas/trans/localopt3/FourierTransformsopt3.cc
new file mode 100644
index 000000000..c02b57ada
--- /dev/null
+++ b/src/atlas/trans/localopt3/FourierTransformsopt3.cc
@@ -0,0 +1,78 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor
+ * does it submit to any jurisdiction.
+ */
+
+#include <algorithm>
+#include <cmath>
+#include <iostream>
+
+#include "atlas/trans/localopt3/FourierTransformsopt3.h"
+
+namespace atlas {
+namespace trans {
+
+//-----------------------------------------------------------------------------
+
+void invtrans_fourieropt3( const size_t trcFT,
+                           const double lon,         // longitude in radians (in)
+                           const int nb_fields,      // Number of fields
+                           const double rlegReal[],  // associated Legendre functions, size (trc+1)*trc/2 (in)
+                           const double rlegImag[],  // associated Legendre functions, size (trc+1)*trc/2 (in)
+                           double rgp[] )            // gridpoint
+{
+    for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
+        rgp[jfld] = 0.;
+    }
+    // local Fourier transformation:
+    for ( int jm = 0; jm <= trcFT; ++jm ) {
+        const double cos = std::cos( jm * lon );
+        const double sin = std::sin( jm * lon );
+        for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
+            double real = cos * rlegReal[jm * nb_fields + jfld];
+            double imag = sin * rlegImag[jm * nb_fields + jfld];
+            rgp[jfld] += real - imag;
+        }
+    }
+}
+
+int fourier_truncationopt3( const int truncation,    // truncation
+                            const int nx,            // number of longitudes
+                            const int nxmax,         // maximum nx
+                            const int ndgl,          // number of latitudes
+                            const double lat,        // latitude in radian
+                            const bool fullgrid ) {  // regular grid
+    int trc     = truncation;
+    int trclin  = ndgl - 1;
+    int trcquad = ndgl * 2 / 3 - 1;
+    if ( truncation >= trclin || fullgrid ) {
+        // linear
+        trc = ( nx - 1 ) / 2;
+    }
+    else if ( truncation >= trcquad ) {
+        // quadratic
+        double weight = 3 * ( trclin - truncation ) / ndgl;
+        double sqcos  = std::pow( std::cos( lat ), 2 );
+
+        trc = ( nx - 1 ) / ( 2 + weight * sqcos );
+    }
+    else {
+        // cubic
+        double sqcos = std::pow( std::cos( lat ), 2 );
+
+        trc = ( nx - 1 ) / ( 2 + sqcos ) - 1;
+    }
+    trc = std::min( truncation, trc );
+    return trc;
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/atlas/trans/localopt3/FourierTransformsopt3.h b/src/atlas/trans/localopt3/FourierTransformsopt3.h
new file mode 100644
index 000000000..b735ed145
--- /dev/null
+++ b/src/atlas/trans/localopt3/FourierTransformsopt3.h
@@ -0,0 +1,38 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#pragma once
+
+#include <cstddef>
+
+namespace atlas {
+namespace trans {
+
+//-----------------------------------------------------------------------------
+// Routine to compute the local Fourier transformation
+//
+// Author:
+// Andreas Mueller *ECMWF*
+//
+
+void invtrans_fourieropt3( const size_t trcFT,
+                           const double lon,         // longitude in radians (in)
+                           const int nb_fields,      // Number of fields
+                           const double rlegReal[],  // values of associated Legendre functions, size (trc+1)*trc/2 (in)
+                           const double rlegImag[],  // values of associated Legendre functions, size (trc+1)*trc/2 (in)
+                           double rgp[] );           // gridpoint
+
+int fourier_truncationopt3( const int truncation, const int nx, const int nxmax, const int ndgl, const double lat,
+                            const bool fullgrid );
+
+// --------------------------------------------------------------------------------------------------------------------
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc
new file mode 100644
index 000000000..c6fbcad68
--- /dev/null
+++ b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc
@@ -0,0 +1,197 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor
+ * does it submit to any jurisdiction.
+ */
+
+#include <cmath>
+#include <limits>
+
+#include "atlas/array.h"
+#include "atlas/parallel/mpi/mpi.h"
+#include "atlas/trans/localopt3/LegendrePolynomialsopt3.h"
+
+namespace atlas {
+namespace trans {
+
+//-----------------------------------------------------------------------------
+
+void compute_legendre_polynomialsopt3(
+    const size_t trc,          // truncation (in)
+    const int nlats,           // number of latitudes
+    const double lats[],       // latitudes in radians (in)
+    double leg_sym[],          // values of associated Legendre functions, symmetric part
+    double leg_asym[],         // values of associated Legendre functions, asymmetric part
+    size_t leg_start_sym[],    // start indices for different zonal wave numbers, symmetric part
+    size_t leg_start_asym[] )  // start indices for different zonal wave numbers, asymmetric part
+{
+    auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; };
+    array::ArrayT<double> zfn_( trc + 1, trc + 1 );
+    array::ArrayView<double, 2> zfn = array::make_view<double, 2>( zfn_ );
+    std::vector<double> legpol( legendre_size( trc ) );
+    auto idxmn = [&]( int jm, int jn ) { return ( 2 * trc + 3 - jm ) * jm / 2 + jn - jm; };
+    int iodd;
+
+    // Compute coefficients for Taylor series in Belousov (19) and (21)
+    // Belousov, Swarztrauber use zfn(0,0)=std::sqrt(2.)
+    // IFS normalisation chosen to be 0.5*Integral(Pnm**2) = 1
+    zfn( 0, 0 ) = 2.;
+    for ( int jn = 1; jn <= trc; ++jn ) {
+        double zfnn = zfn( 0, 0 );
+        for ( int jgl = 1; jgl <= jn; ++jgl ) {
+            zfnn *= std::sqrt( 1. - 0.25 / ( jgl * jgl ) );
+        }
+        iodd          = jn % 2;
+        zfn( jn, jn ) = zfnn;
+        for ( int jgl = 2; jgl <= jn - iodd; jgl += 2 ) {
+            double zfjn = ( ( jgl - 1. ) * ( 2. * jn - jgl + 2. ) );  // new factor numerator
+            double zfjd = ( jgl * ( 2. * jn - jgl + 1. ) );           // new factor denominator
+
+            zfn( jn, jn - jgl ) = zfn( jn, jn - jgl + 2 ) * zfjn / zfjd;
+        }
+    }
+
+    // Loop over latitudes:
+    for ( int jlat = 0; jlat < nlats; ++jlat ) {
+        {
+            //ATLAS_TRACE( "compute Legendre polynomials" );
+            // --------------------
+            // 1. First two columns
+            // --------------------
+            double lat     = lats[jlat];
+            double zdlx1   = ( M_PI_2 - lat );               // theta
+            double zdlx    = std::cos( zdlx1 );              // cos(theta)
+            double zdlsita = std::sqrt( 1. - zdlx * zdlx );  // sin(theta) (this is how trans library does it)
+
+            legpol[idxmn( 0, 0 )] = 1.;
+
+            double zdl1sita = 0.;
+            // if we are less than 1 meter from the pole,
+            if ( std::abs( zdlsita ) <= std::sqrt( std::numeric_limits<double>::epsilon() ) ) {
+                zdlx    = 1.;
+                zdlsita = 0.;
+            }
+            else {
+                zdl1sita = 1. / zdlsita;
+            }
+
+            // ordinary Legendre polynomials from series expansion
+            // ---------------------------------------------------
+
+            // even N
+            for ( int jn = 2; jn <= trc; jn += 2 ) {
+                double zdlk   = 0.5 * zfn( jn, 0 );
+                double zdlldn = 0.0;
+                double zdsq   = 1. / std::sqrt( jn * ( jn + 1. ) );
+                // represented by only even k
+                for ( int jk = 2; jk <= jn; jk += 2 ) {
+                    // normalised ordinary Legendre polynomial == \overbar{P_n}^0
+                    zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 );
+                    // normalised associated Legendre polynomial == \overbar{P_n}^1
+                    zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 );
+                }
+                legpol[idxmn( 0, jn )] = zdlk;
+                legpol[idxmn( 1, jn )] = zdlldn;
+            }
+
+            // odd N
+            for ( int jn = 1; jn <= trc; jn += 2 ) {
+                zfn( jn, 0 )  = 0.;
+                double zdlk   = 0.;
+                double zdlldn = 0.0;
+                double zdsq   = 1. / std::sqrt( jn * ( jn + 1. ) );
+                // represented by only even k
+                for ( int jk = 1; jk <= jn; jk += 2 ) {
+                    // normalised ordinary Legendre polynomial == \overbar{P_n}^0
+                    zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 );
+                    // normalised associated Legendre polynomial == \overbar{P_n}^1
+                    zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 );
+                }
+                legpol[idxmn( 0, jn )] = zdlk;
+                legpol[idxmn( 1, jn )] = zdlldn;
+            }
+
+            // --------------------------------------------------------------
+            // 2. Diagonal (the terms 0,0 and 1,1 have already been computed)
+            //    Belousov, equation (23)
+            // --------------------------------------------------------------
+
+            double zdls = zdl1sita * std::numeric_limits<double>::min();
+            for ( int jn = 2; jn <= trc; ++jn ) {
+                double sq = std::sqrt( ( 2. * jn + 1. ) / ( 2. * jn ) );
+
+                legpol[idxmn( jn, jn )] = legpol[idxmn( jn - 1, jn - 1 )] * zdlsita * sq;
+                if ( std::abs( legpol[idxmn( jn, jn )] ) < zdls ) legpol[idxmn( jn, jn )] = 0.0;
+            }
+
+            // ---------------------------------------------
+            // 3. General recurrence (Belousov, equation 17)
+            // ---------------------------------------------
+
+            for ( int jn = 3; jn <= trc; ++jn ) {
+                for ( int jm = 2; jm < jn; ++jm ) {
+                    double cn =
+                        ( ( 2. * jn + 1. ) * ( jn + jm - 3. ) * ( jn + jm - 1. ) );     // numerator of c in Belousov
+                    double cd = ( ( 2. * jn - 3. ) * ( jn + jm - 2. ) * ( jn + jm ) );  // denominator of c in Belousov
+                    double dn =
+                        ( ( 2. * jn + 1. ) * ( jn - jm + 1. ) * ( jn + jm - 1. ) );     // numerator of d in Belousov
+                    double dd = ( ( 2. * jn - 1. ) * ( jn + jm - 2. ) * ( jn + jm ) );  // denominator of d in Belousov
+                    double en = ( ( 2. * jn + 1. ) * ( jn - jm ) );                     // numerator of e in Belousov
+                    double ed = ( ( 2. * jn - 1. ) * ( jn + jm ) );                     // denominator of e in Belousov
+
+                    legpol[idxmn( jm, jn )] = std::sqrt( cn / cd ) * legpol[idxmn( jm - 2, jn - 2 )] -
+                                              std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1 )] * zdlx +
+                                              std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1 )] * zdlx;
+                }
+            }
+        }
+
+        {
+            //ATLAS_TRACE( "add to global arrays" );
+
+            // take factor 2 for m > 0 into account:
+            for ( int jm = 1; jm <= trc; ++jm ) {
+                for ( int jn = jm; jn <= trc; ++jn ) {
+                    legpol[idxmn( jm, jn )] *= 2.;
+                }
+            }
+            for ( int jm = 0; jm <= trc; jm++ ) {
+                int is1 = 0, ia1 = 0;
+                for ( int jn = jm; jn <= trc; jn++ ) {
+                    if ( ( jn - jm ) % 2 == 0 ) { is1++; }
+                    else {
+                        ia1++;
+                    }
+                }
+                int is2 = 0, ia2 = 0;
+                // the choice between the following two code lines determines whether
+                // total wavenumbers are summed in an ascending or descending order.
+                // The trans library in IFS uses descending order because it should
+                // be more accurate (higher wavenumbers have smaller contributions).
+                // This also needs to be changed when splitting the spectral data in
+                // TransLocalopt3::invtrans_uv!
+                //for ( int jn = jm; jn <= trc; jn++ ) {
+                for ( int jn = trc; jn >= jm; jn-- ) {
+                    if ( ( jn - jm ) % 2 == 0 ) {
+                        int is      = leg_start_sym[jm] + is1 * jlat + is2++;
+                        leg_sym[is] = legpol[idxmn( jm, jn )];
+                    }
+                    else {
+                        int ia       = leg_start_asym[jm] + ia1 * jlat + ia2++;
+                        leg_asym[ia] = legpol[idxmn( jm, jn )];
+                    }
+                }
+            }
+        }
+    }
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.h b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.h
new file mode 100644
index 000000000..1698fb80c
--- /dev/null
+++ b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.h
@@ -0,0 +1,47 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#pragma once
+
+#include <cstddef>
+
+namespace atlas {
+namespace trans {
+
+//-----------------------------------------------------------------------------
+// Routine to compute the Legendre polynomials in serial according to Belousov
+// (using correction by Swarztrauber)
+//
+// Reference:
+// S.L. Belousov, Tables of normalized associated Legendre Polynomials, Pergamon
+// Press (1962)
+// P.N. Swarztrauber, On computing the points and weights for Gauss-Legendre
+// quadrature,
+//      SIAM J. Sci. Comput. Vol. 24 (3) pp. 945-954 (2002)
+//
+// Author of Fortran version:
+// Mats Hamrud, Philippe Courtier, Nils Wedi *ECMWF*
+//
+// Ported to C++ by:
+// Andreas Mueller *ECMWF*
+//
+void compute_legendre_polynomialsopt3(
+    const size_t trc,           // truncation (in)
+    const int nlats,            // number of latitudes
+    const double lats[],        // latitudes in radians (in)
+    double legendre_sym[],      // values of associated Legendre functions, symmetric part
+    double legendre_asym[],     // values of associated Legendre functions, asymmetric part
+    size_t leg_start_sym[],     // start indices for different zonal wave numbers, symmetric part
+    size_t leg_start_asym[] );  // start indices for different zonal wave numbers, asymmetric part
+
+// --------------------------------------------------------------------------------------------------------------------
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/atlas/trans/localopt3/LegendreTransformsopt3.cc b/src/atlas/trans/localopt3/LegendreTransformsopt3.cc
new file mode 100644
index 000000000..919e9246c
--- /dev/null
+++ b/src/atlas/trans/localopt3/LegendreTransformsopt3.cc
@@ -0,0 +1,62 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#include <cstddef>
+
+#include "atlas/trans/localopt3/LegendreTransformsopt3.h"
+
+namespace atlas {
+namespace trans {
+
+//-----------------------------------------------------------------------------
+
+void invtrans_legendreopt3(
+    const size_t trc,       // truncation (in)
+    const size_t trcFT,     // truncation for Fourier transformation (in)
+    const size_t trcLP,     // truncation of Legendre polynomials data legpol. Needs to be >= trc (in)
+    const double legpol[],  // values of associated Legendre functions, size (trc+1)*trc/2 (in)
+    const int nb_fields,    // number of fields
+    const double spec[],    // spectral data, size (trc+1)*trc (in)
+    double leg_real[],      // values of associated Legendre functions, size (trc+1)*trc/2 (out)
+    double leg_imag[] )     // values of associated Legendre functions, size (trc+1)*trc/2 (out)
+{
+    // Legendre transformation:
+    int k = 0, klp = 0;
+    for ( int jm = 0; jm <= trcFT; ++jm ) {
+        for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
+            leg_real[jm * nb_fields + jfld] = 0.;
+            leg_imag[jm * nb_fields + jfld] = 0.;
+        }
+        for ( int jn = jm; jn <= trcLP; ++jn, ++klp ) {
+            if ( jn <= trc ) {
+                for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
+                    // not completely sure where this factor 2 comes from. One possible
+                    // explanation:
+                    // normalization of trigonometric functions in the spherical harmonics
+                    // integral over square of trig function is 1 for m=0 and 0.5 (?) for
+                    // m>0
+                    leg_real[jm * nb_fields + jfld] += 2. * spec[( 2 * k ) * nb_fields + jfld] * legpol[klp];
+                    leg_imag[jm * nb_fields + jfld] += 2. * spec[( 2 * k + 1 ) * nb_fields + jfld] * legpol[klp];
+                }
+                ++k;
+            }
+        }
+    }
+    // Undo factor 2 for (jm == 0)
+    for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
+        leg_real[jfld] /= 2.;
+        leg_imag[jfld] /= 2.;
+    }
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/atlas/trans/localopt3/LegendreTransformsopt3.h b/src/atlas/trans/localopt3/LegendreTransformsopt3.h
new file mode 100644
index 000000000..7205b3bf0
--- /dev/null
+++ b/src/atlas/trans/localopt3/LegendreTransformsopt3.h
@@ -0,0 +1,37 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#pragma once
+
+#include <cstddef>
+
+namespace atlas {
+namespace trans {
+
+//-----------------------------------------------------------------------------
+// Routine to compute the Legendre transformation
+//
+// Author:
+// Andreas Mueller *ECMWF*
+//
+void invtrans_legendreopt3(
+    const size_t trc,       // truncation (in)
+    const size_t trcFT,     // truncation for Fourier transformation (in)
+    const size_t trcLP,     // truncation of Legendre polynomials data legpol. Needs to be >= trc (in)
+    const double legpol[],  // values of associated Legendre functions, size (trc+1)*trc/2 (in)
+    const int nb_fields,    // number of fields
+    const double spec[],    // spectral data, size (trc+1)*trc (in)
+    double leg_real[],      // values of associated Legendre functions, size (trc+1)*trc/2 (out)
+    double leg_imag[] );    // values of associated Legendre functions, size (trc+1)*trc/2 (out)
+
+// --------------------------------------------------------------------------------------------------------------------
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc
new file mode 100644
index 000000000..e65ac2e63
--- /dev/null
+++ b/src/atlas/trans/localopt3/TransLocalopt3.cc
@@ -0,0 +1,690 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#include "atlas/trans/localopt3/TransLocalopt3.h"
+#include <math.h>
+#include "atlas/array.h"
+#include "atlas/option.h"
+#include "atlas/parallel/mpi/mpi.h"
+#include "atlas/runtime/ErrorHandling.h"
+#include "atlas/runtime/Log.h"
+#include "atlas/trans/VorDivToUV.h"
+#include "atlas/trans/local/LegendrePolynomials.h"
+#include "atlas/trans/localopt3/FourierTransformsopt3.h"
+#include "atlas/trans/localopt3/LegendrePolynomialsopt3.h"
+#include "atlas/trans/localopt3/LegendreTransformsopt3.h"
+#include "atlas/util/Constants.h"
+#include "eckit/linalg/LinearAlgebra.h"
+#include "eckit/linalg/Matrix.h"
+#if ATLAS_HAVE_MKL
+#include "mkl.h"
+#endif
+
+namespace atlas {
+namespace trans {
+
+namespace {
+static TransBuilderGrid<TransLocalopt3> builder( "localopt3" );
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+// Helper functions
+// --------------------------------------------------------------------------------------------------------------------
+namespace {  // anonymous
+
+size_t legendre_size( const size_t truncation ) {
+    return ( truncation + 2 ) * ( truncation + 1 ) / 2;
+}
+
+int nlats_northernHemisphere( const int nlats ) {
+    return ceil( nlats / 2. );
+    // using ceil here should make it possible to have odd number of latitudes (with the centre latitude being the equator)
+}
+
+int num_n( const int truncation, const int m, const bool symmetric ) {
+    int len = 0;
+    if ( symmetric ) { len = ( truncation - m + 2 ) / 2; }
+    else {
+        len = ( truncation - m + 1 ) / 2;
+    }
+    return len;
+}
+
+void alloc_aligned( double*& ptr, size_t n ) {
+#if ATLAS_HAVE_MKL
+    int al = 64;
+    ptr    = (double*)mkl_malloc( sizeof( double ) * n, al );
+#else
+    posix_memalign( (void**)&ptr, sizeof( double ) * 64, sizeof( double ) * n );
+    //ptr = (double*)malloc( sizeof( double ) * n );
+    //ptr = new double[n];
+#endif
+}
+
+void free_aligned( double*& ptr ) {
+#if ATLAS_HAVE_MKL
+    mkl_free( ptr );
+#else
+    free( ptr );
+#endif
+}
+
+int add_padding( int n ) {
+    return std::ceil( n / 8. ) * 8;
+}
+}  // namespace
+
+// --------------------------------------------------------------------------------------------------------------------
+// Class TransLocalopt3
+// --------------------------------------------------------------------------------------------------------------------
+
+TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long truncation,
+                                const eckit::Configuration& config ) :
+    grid_( grid ),
+    truncation_( truncation ),
+    precompute_( config.getBool( "precompute", true ) ) {
+    ATLAS_TRACE( "Precompute legendre opt3" );
+#if ATLAS_HAVE_MKL
+    eckit::linalg::LinearAlgebra::backend( "mkl" );  // might want to choose backend with this command
+#else
+    eckit::linalg::LinearAlgebra::backend( "generic" );  // might want to choose backend with this command
+#endif
+    int nlats   = 0;
+    int nlons   = 0;
+    int nlatsNH = nlats_northernHemisphere( nlats );
+    if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
+        grid::StructuredGrid g( grid_ );
+        nlats   = g.ny();
+        nlons   = g.nxmax();
+        nlatsNH = nlats_northernHemisphere( nlats );
+    }
+    else {
+        nlats   = grid_.size();
+        nlons   = grid_.size();
+        nlatsNH = nlats;
+    }
+    std::vector<double> lats( nlatsNH );
+    std::vector<double> lons( nlons );
+    if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
+        grid::StructuredGrid g( grid_ );
+        // TODO: remove legendre_begin and legendre_data (only legendre_ should be needed)
+        for ( size_t j = 0; j < nlatsNH; ++j ) {
+            lats[j] = g.y( j ) * util::Constants::degreesToRadians();
+        }
+        for ( size_t j = 0; j < nlons; ++j ) {
+            lons[j] = g.x( j, 0 ) * util::Constants::degreesToRadians();
+        }
+    }
+    else {
+        int j( 0 );
+        for ( PointXY p : grid_.xy() ) {
+            lats[j++] = p.y() * util::Constants::degreesToRadians();
+            lons[j++] = p.x() * util::Constants::degreesToRadians();
+        }
+    }
+    // precomputations for Legendre polynomials:
+    {
+        ATLAS_TRACE( "opt3 precomp Legendre" );
+        int size_sym  = 0;
+        int size_asym = 0;
+        legendre_sym_begin_.resize( truncation_ + 3 );
+        legendre_asym_begin_.resize( truncation_ + 3 );
+        legendre_sym_begin_[0]  = 0;
+        legendre_asym_begin_[0] = 0;
+        for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
+            size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsNH );
+            size_asym += add_padding( num_n( truncation_ + 1, jm, false ) * nlatsNH );
+            legendre_sym_begin_[jm + 1]  = size_sym;
+            legendre_asym_begin_[jm + 1] = size_asym;
+        }
+        alloc_aligned( legendre_sym_, size_sym );
+        alloc_aligned( legendre_asym_, size_asym );
+        compute_legendre_polynomialsopt3( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_,
+                                          legendre_sym_begin_.data(), legendre_asym_begin_.data() );
+    }
+
+    // precomputations for Fourier transformations:
+    {
+        ATLAS_TRACE( "opt3 precomp Fourier" );
+        alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons );
+        int idx = 0;
+        for ( int jlon = 0; jlon < nlons; jlon++ ) {
+            for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
+                fourier_[idx++] = +std::cos( jm * lons[jlon] );  // real part
+                fourier_[idx++] = -std::sin( jm * lons[jlon] );  // imaginary part
+            }
+        }
+    }
+    {
+        ATLAS_TRACE( "opt3 precomp Fourier tp" );
+        alloc_aligned( fouriertp_, 2 * ( truncation_ + 1 ) * nlons );
+        int idx = 0;
+        for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
+            for ( int jlon = 0; jlon < nlons; jlon++ ) {
+                fouriertp_[idx++] = +std::cos( jm * lons[jlon] );  // real part
+            }
+            for ( int jlon = 0; jlon < nlons; jlon++ ) {
+                fouriertp_[idx++] = -std::sin( jm * lons[jlon] );  // imaginary part
+            }
+        }
+    }
+#if ATLAS_HAVE_FFTW
+    {
+        ATLAS_TRACE( "opt3 precomp FFTW" );
+        int num_complex = ( nlons / 2 ) + 1;
+        fft_in_         = fftw_alloc_complex( nlats * num_complex );
+        fft_out_        = fftw_alloc_real( nlats * nlons );
+        plan_ = fftw_plan_many_dft_c2r( 1, &nlons, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1, nlons,
+                                        FFTW_ESTIMATE );
+    }
+#endif
+}  // namespace atlas
+
+// --------------------------------------------------------------------------------------------------------------------
+
+TransLocalopt3::TransLocalopt3( const Grid& grid, const long truncation, const eckit::Configuration& config ) :
+    TransLocalopt3( Cache(), grid, truncation, config ) {}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+TransLocalopt3::~TransLocalopt3() {
+    free_aligned( legendre_sym_ );
+    free_aligned( legendre_asym_ );
+    free_aligned( fourier_ );
+    free_aligned( fouriertp_ );
+#if ATLAS_HAVE_FFTW
+    fftw_destroy_plan( plan_ );
+    fftw_free( fft_in_ );
+    fftw_free( fft_out_ );
+#endif
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt3::invtrans( const Field& spfield, Field& gpfield, const eckit::Configuration& config ) const {
+    NOTIMP;
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt3::invtrans( const FieldSet& spfields, FieldSet& gpfields,
+                               const eckit::Configuration& config ) const {
+    NOTIMP;
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt3::invtrans_grad( const Field& spfield, Field& gradfield, const eckit::Configuration& config ) const {
+    NOTIMP;
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt3::invtrans_grad( const FieldSet& spfields, FieldSet& gradfields,
+                                    const eckit::Configuration& config ) const {
+    NOTIMP;
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt3::invtrans_vordiv2wind( const Field& spvor, const Field& spdiv, Field& gpwind,
+                                           const eckit::Configuration& config ) const {
+    NOTIMP;
+}
+
+void TransLocalopt3::invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[],
+                               const eckit::Configuration& config ) const {
+    invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields, config );
+}
+
+void gp_transposeopt3( const int nb_size, const int nb_fields, const double gp_tmp[], double gp_fields[] ) {
+    for ( int jgp = 0; jgp < nb_size; jgp++ ) {
+        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+            gp_fields[jfld * nb_size + jgp] = gp_tmp[jgp * nb_fields + jfld];
+        }
+    }
+}
+
+//-----------------------------------------------------------------------------
+// Routine to compute the spectral transform by using a localopt3 Fourier
+// transformation
+// for a grid (same latitude for all longitudes, allows to compute Legendre
+// functions
+// once for all longitudes). U and v components are divided by cos(latitude) for
+// nb_vordiv_fields > 0.
+//
+// Author:
+// Andreas Mueller *ECMWF*
+//
+void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields,
+                                  const double scalar_spectra[], double gp_fields[],
+                                  const eckit::Configuration& config ) const {
+    if ( nb_scalar_fields > 0 ) {
+        int nb_fields = nb_scalar_fields;
+
+        // Transform
+        if ( grid::StructuredGrid g = grid_ ) {
+            ATLAS_TRACE( "invtrans_uv structured opt3" );
+            int nlats    = g.ny();
+            int nlons    = g.nxmax();
+            int nlatsNH  = nlats_northernHemisphere( nlats );
+            auto posFFTW = [&]( int jfld, int imag, int jlat, int jm ) {
+                return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) );
+            };
+            /*auto posFFTW = [&]( int jfld, int imag, int jlat, int jm ) {
+                return jfld + nb_fields * ( imag + 2 * ( nlats - jlat - 1 ) + jm * nb_fields * 2 * nlats );
+            };*/
+            auto posGemm1 = [&]( int jfld, int imag, int jlat, int jm ) {
+                return jfld + nb_fields * ( jlat + nlats * ( imag + 2 * ( jm ) ) );
+            };
+            auto posGemm2 = [&]( int jfld, int imag, int jlat, int jm ) {
+                return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) );
+            };
+            int size_fourier_max = nb_fields * 2 * nlats;
+            double* scl_fourier;
+            alloc_aligned( scl_fourier, size_fourier_max * ( truncation + 1 ) );
+
+            // Legendre transform:
+            {
+                ATLAS_TRACE( "opt3 Legendre dgemm" );
+                for ( int jm = 0; jm <= truncation_; jm++ ) {
+                    int size_sym  = num_n( truncation_ + 1, jm, true );
+                    int size_asym = num_n( truncation_ + 1, jm, false );
+                    int n_imag    = 2;
+                    if ( jm == 0 ) { n_imag = 1; }
+                    int size_fourier = nb_fields * n_imag * nlatsNH;
+                    double* scalar_sym;
+                    double* scalar_asym;
+                    double* scl_fourier_sym;
+                    double* scl_fourier_asym;
+                    alloc_aligned( scalar_sym, n_imag * nb_fields * size_sym );
+                    alloc_aligned( scalar_asym, n_imag * nb_fields * size_asym );
+                    alloc_aligned( scl_fourier_sym, size_fourier );
+                    alloc_aligned( scl_fourier_asym, size_fourier );
+                    {
+                        //ATLAS_TRACE( "opt3 Legendre split" );
+                        int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2;
+                        // the choice between the following two code lines determines whether
+                        // total wavenumbers are summed in an ascending or descending order.
+                        // The trans library in IFS uses descending order because it should
+                        // be more accurate (higher wavenumbers have smaller contributions).
+                        // This also needs to be changed when splitting the spectral data in
+                        // compute_legendre_polynomialsopt3!
+                        //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) {
+                        for ( int jn = truncation_ + 1; jn >= jm; jn-- ) {
+                            for ( int imag = 0; imag < n_imag; imag++ ) {
+                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                    idx = jfld + nb_fields * ( imag + 2 * ( jn - jm ) );
+                                    if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; }
+                                    else {
+                                        scalar_asym[ia++] = scalar_spectra[idx + ioff];
+                                    }
+                                }
+                            }
+                        }
+                        ASSERT( ia == n_imag * nb_fields * size_asym && is == n_imag * nb_fields * size_sym );
+                    }
+                    {
+                        eckit::linalg::Matrix A( scalar_sym, nb_fields * n_imag, size_sym );
+                        eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm], size_sym, nlatsNH );
+                        eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsNH );
+                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+                    }
+                    if ( size_asym > 0 ) {
+                        eckit::linalg::Matrix A( scalar_asym, nb_fields * n_imag, size_asym );
+                        eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm], size_asym, nlatsNH );
+                        eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsNH );
+                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+                    }
+#if 0  //ATLAS_HAVE_FFTW
+                    {
+                        //ATLAS_TRACE( "opt3 merge spheres" );
+                        // northern hemisphere:
+                        int ioff = jm * size_fourier_max;
+                        int pos0 = ioff;
+                        int idx  = 0;
+                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
+                            int poslat = pos0 + 2 * jlat;
+                            for ( int imag = 0; imag < n_imag; imag++ ) {
+                                int posimag = nb_fields * ( imag + poslat );
+                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
+                                    scl_fourier[posFFTW( jfld, imag, jlat, jm )] =
+                                        scl_fourier_sym[idx] + scl_fourier_asym[idx];
+                                }
+                            }
+                        }
+                        // southern hemisphere:
+                        idx  = 0;
+                        pos0 = 2 * ( nlats - 1 ) + ioff;
+                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
+                            int poslat = pos0 - 2 * jlat;
+                            for ( int imag = 0; imag < n_imag; imag++ ) {
+                                int posimag = nb_fields * ( imag + poslat );
+                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
+                                    int jslat = nlats - jlat - 1;
+                                    scl_fourier[posFFTW( jfld, imag, jslat, jm )] =
+                                        scl_fourier_sym[idx] - scl_fourier_asym[idx];
+                                }
+                            }
+                        }
+                    }
+                    free_aligned( scalar_sym );
+                    free_aligned( scalar_asym );
+                    free_aligned( scl_fourier_sym );
+                    free_aligned( scl_fourier_asym );
+                }
+            }
+            {
+                int num_complex = ( nlons / 2 ) + 1;
+                {
+                    ATLAS_TRACE( "opt3 FFTW" );
+                    for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                        int idx = 0;
+                        for ( int jlat = 0; jlat < nlats; jlat++ ) {
+                            fft_in_[idx++][0] = scl_fourier[posFFTW( jfld, 0, jlat, 0 )];
+                            for ( int jm = 1; jm < num_complex; jm++, idx++ ) {
+                                for ( int imag = 0; imag < 2; imag++ ) {
+                                    if ( jm <= truncation_ ) {
+                                        fft_in_[idx][imag] = scl_fourier[posFFTW( jfld, imag, jlat, jm )] / 2.;
+                                    }
+                                    else {
+                                        fft_in_[idx][imag] = 0.;
+                                    }
+                                }
+                            }
+                        }
+                        fftw_execute_dft_c2r( plan_, fft_in_, fft_out_ );
+                        for ( int j = 0; j < nlats * nlons; j++ ) {
+                            gp_fields[j + jfld * nlats * nlons] = fft_out_[j];
+                        }
+                    }
+                }
+            }
+#else
+#if 0  // 1: better for small number of columns, large truncation; 0: better for large number of columns
+                    {
+                        //ATLAS_TRACE( "opt3 merge spheres" );
+                        // northern hemisphere:
+                        int ioff = jm * size_fourier_max;
+                        int pos0 = ioff;
+                        int idx  = 0;
+                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
+                            int poslat = pos0 + 2 * jlat;
+                            for ( int imag = 0; imag < n_imag; imag++ ) {
+                                int posimag = nb_fields * ( imag + poslat );
+                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
+                                    scl_fourier[posGemm1( jfld, imag, jlat, jm )] =
+                                        scl_fourier_sym[idx] + scl_fourier_asym[idx];
+                                }
+                            }
+                        }
+                        // southern hemisphere:
+                        idx  = 0;
+                        pos0 = 2 * ( nlats - 1 ) + ioff;
+                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
+                            int poslat = pos0 - 2 * jlat;
+                            for ( int imag = 0; imag < n_imag; imag++ ) {
+                                int posimag = nb_fields * ( imag + poslat );
+                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
+                                    int jslat = nlats - jlat - 1;
+                                    scl_fourier[posGemm1( jfld, imag, jslat, jm )] =
+                                        scl_fourier_sym[idx] - scl_fourier_asym[idx];
+                                }
+                            }
+                        }
+                    }
+                    free_aligned( scalar_sym );
+                    free_aligned( scalar_asym );
+                    free_aligned( scl_fourier_sym );
+                    free_aligned( scl_fourier_asym );
+                }
+            }
+
+            // Fourier transformation:
+            double* gp_opt3;
+            alloc_aligned( gp_opt3, nb_fields * grid_.size() );
+            {
+                ATLAS_TRACE( "opt3 Fourier dgemm" );
+                eckit::linalg::Matrix A( scl_fourier, nb_fields * g.ny(), ( truncation_ + 1 ) * 2 );
+                eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, g.nxmax() );
+                eckit::linalg::Matrix C( gp_opt3, nb_fields * g.ny(), g.nxmax() );
+                eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+            }
+
+            // Transposition in grid point space:
+            {
+                ATLAS_TRACE( "opt3 transposition in gp-space" );
+                int idx = 0;
+                for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) {
+                    for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
+                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                            int pos_tp = jlon + g.nxmax() * ( jlat + g.ny() * ( jfld ) );
+                            //int pos  = jfld + nb_fields * ( jlat + g.ny() * ( jlon ) );
+                            gp_fields[pos_tp] = gp_opt3[idx++];  // = gp_opt3[pos]
+                        }
+                    }
+                }
+            }
+            free_aligned( gp_opt3 );
+#else
+                    {
+                        //ATLAS_TRACE( "opt3 merge spheres" );
+                        // northern hemisphere:
+                        int ioff = jm * size_fourier_max;
+                        int pos0 = ioff;
+                        int idx  = 0;
+                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
+                            int poslat = pos0 + 2 * jlat;
+                            for ( int imag = 0; imag < n_imag; imag++ ) {
+                                int posimag = nb_fields * ( imag + poslat );
+                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
+                                    scl_fourier[posGemm2( jfld, imag, jlat, jm )] =
+                                        scl_fourier_sym[idx] + scl_fourier_asym[idx];
+                                }
+                            }
+                        }
+                        // southern hemisphere:
+                        idx  = 0;
+                        pos0 = 2 * ( nlats - 1 ) + ioff;
+                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
+                            int poslat = pos0 - 2 * jlat;
+                            for ( int imag = 0; imag < n_imag; imag++ ) {
+                                int posimag = nb_fields * ( imag + poslat );
+                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
+                                    int jslat = nlats - jlat - 1;
+                                    scl_fourier[posGemm2( jfld, imag, jslat, jm )] =
+                                        scl_fourier_sym[idx] - scl_fourier_asym[idx];
+                                }
+                            }
+                        }
+                    }
+                    free_aligned( scalar_sym );
+                    free_aligned( scalar_asym );
+                    free_aligned( scl_fourier_sym );
+                    free_aligned( scl_fourier_asym );
+                }
+            }
+
+            // Fourier transformation:
+            {
+                ATLAS_TRACE( "opt3 Fourier dgemm" );
+                eckit::linalg::Matrix A( fouriertp_, g.nxmax(), ( truncation_ + 1 ) * 2 );
+                eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * g.ny() );
+                eckit::linalg::Matrix C( gp_fields, g.nxmax(), nb_fields * g.ny() );
+                eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+            }
+
+#endif
+#endif
+                    // Computing u,v from U,V:
+                    {
+                        if ( nb_vordiv_fields > 0 ) {
+                            ATLAS_TRACE( "opt3 u,v from U,V" );
+                            std::vector<double> coslats( nlats );
+                            for ( size_t j = 0; j < nlats; ++j ) {
+                                coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() );
+                            }
+                            int idx = 0;
+                            for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
+                                    for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) {
+                                        gp_fields[idx] /= coslats[jlat];
+                                        idx++;
+                                    }
+                                }
+                            }
+                        }
+                    }
+                    free_aligned( scl_fourier );
+                }
+                else {
+                    ATLAS_TRACE( "invtrans_uv unstructured opt3" );
+                    int idx = 0;
+                    for ( PointXY p : grid_.xy() ) {
+                        double lon   = p.x() * util::Constants::degreesToRadians();
+                        double lat   = p.y() * util::Constants::degreesToRadians();
+                        double trcFT = truncation;
+
+                        // Legendre transform:
+                        //invtrans_legendreopt3( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra,
+                        //                      legReal.data(), legImag.data() );
+
+                        // Fourier transform:
+                        //invtrans_fourieropt3( trcFT, lon, nb_fields, legReal.data(), legImag.data(),
+                        //                     gp_tmp.data() + ( nb_fields * idx ) );
+                        for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
+                            //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat );
+                        }
+                        ++idx;
+                    }
+                }
+            }
+        }  // namespace trans
+
+        // --------------------------------------------------------------------------------------------------------------------
+
+        void TransLocalopt3::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[],
+                                       const double divergence_spectra[], double gp_fields[],
+                                       const eckit::Configuration& config ) const {
+            invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config );
+        }
+
+        void extend_truncationopt3( const int old_truncation, const int nb_fields, const double old_spectra[],
+                                    double new_spectra[] ) {
+            int k = 0, k_old = 0;
+            for ( int m = 0; m <= old_truncation + 1; m++ ) {             // zonal wavenumber
+                for ( int n = m; n <= old_truncation + 1; n++ ) {         // total wavenumber
+                    for ( int imag = 0; imag < 2; imag++ ) {              // imaginary/real part
+                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {  // field
+                            if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; }
+                            else {
+                                new_spectra[k++] = old_spectra[k_old++];
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        // --------------------------------------------------------------------------------------------------------------------
+
+        void TransLocalopt3::invtrans( const int nb_scalar_fields, const double scalar_spectra[],
+                                       const int nb_vordiv_fields, const double vorticity_spectra[],
+                                       const double divergence_spectra[], double gp_fields[],
+                                       const eckit::Configuration& config ) const {
+            ATLAS_TRACE( "TransLocalopt3::invtrans" );
+            int nb_gp              = grid_.size();
+            int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields;
+            if ( nb_vordiv_fields > 0 ) {
+                std::vector<double> vorticity_spectra_extended( nb_vordiv_spec_ext, 0. );
+                std::vector<double> divergence_spectra_extended( nb_vordiv_spec_ext, 0. );
+                std::vector<double> U_ext( nb_vordiv_spec_ext, 0. );
+                std::vector<double> V_ext( nb_vordiv_spec_ext, 0. );
+
+                {
+                    ATLAS_TRACE( "opt3 extend vordiv" );
+                    // increase truncation in vorticity_spectra and divergence_spectra:
+                    extend_truncationopt3( truncation_, nb_vordiv_fields, vorticity_spectra,
+                                           vorticity_spectra_extended.data() );
+                    extend_truncationopt3( truncation_, nb_vordiv_fields, divergence_spectra,
+                                           divergence_spectra_extended.data() );
+                }
+
+                {
+                    ATLAS_TRACE( "vordiv to UV opt3" );
+                    // call vd2uv to compute u and v in spectral space
+                    trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt3" ) );
+                    vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(),
+                                              divergence_spectra_extended.data(), U_ext.data(), V_ext.data() );
+                }
+
+                // perform spectral transform to compute all fields in grid point space
+                invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config );
+                invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(),
+                             gp_fields + nb_gp * nb_vordiv_fields, config );
+            }
+            if ( nb_scalar_fields > 0 ) {
+                int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields;
+                std::vector<double> scalar_spectra_extended( nb_scalar_spec_ext, 0. );
+                extend_truncationopt3( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() );
+                invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(),
+                             gp_fields + 2 * nb_gp * nb_vordiv_fields, config );
+            }
+        }
+
+        // --------------------------------------------------------------------------------------------------------------------
+
+        void TransLocalopt3::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config )
+            const {
+            NOTIMP;
+            // Not implemented and not planned.
+            // Use the TransIFS implementation instead.
+        }
+
+        // --------------------------------------------------------------------------------------------------------------------
+
+        void TransLocalopt3::dirtrans( const FieldSet& gpfields, FieldSet& spfields,
+                                       const eckit::Configuration& config ) const {
+            NOTIMP;
+            // Not implemented and not planned.
+            // Use the TransIFS implementation instead.
+        }
+
+        // --------------------------------------------------------------------------------------------------------------------
+
+        void TransLocalopt3::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv,
+                                                   const eckit::Configuration& config ) const {
+            NOTIMP;
+            // Not implemented and not planned.
+            // Use the TransIFS implementation instead.
+        }
+
+        // --------------------------------------------------------------------------------------------------------------------
+
+        void TransLocalopt3::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[],
+                                       const eckit::Configuration& ) const {
+            NOTIMP;
+            // Not implemented and not planned.
+            // Use the TransIFS implementation instead.
+        }
+
+        // --------------------------------------------------------------------------------------------------------------------
+
+        void TransLocalopt3::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[],
+                                       double divergence_spectra[], const eckit::Configuration& ) const {
+            NOTIMP;
+            // Not implemented and not planned.
+            // Use the TransIFS implementation instead.
+        }
+
+        // --------------------------------------------------------------------------------------------------------------------
+
+    }  // namespace trans
+}  // namespace atlas
diff --git a/src/atlas/trans/localopt3/TransLocalopt3.h b/src/atlas/trans/localopt3/TransLocalopt3.h
new file mode 100644
index 000000000..3673cd07c
--- /dev/null
+++ b/src/atlas/trans/localopt3/TransLocalopt3.h
@@ -0,0 +1,133 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#pragma once
+
+#include <vector>
+
+#include "atlas/array.h"
+#include "atlas/grid/Grid.h"
+#include "atlas/trans/Trans.h"
+#if ATLAS_HAVE_FFTW
+#include <fftw3.h>
+#endif
+
+//-----------------------------------------------------------------------------
+// Forward declarations
+
+namespace atlas {
+class Field;
+class FieldSet;
+}  // namespace atlas
+
+//-----------------------------------------------------------------------------
+
+namespace atlas {
+namespace trans {
+
+//-----------------------------------------------------------------------------
+
+/// @class TransLocalopt3
+///
+/// Localopt3 spherical harmonics transformations to any grid
+/// Optimisations are present for structured grids
+/// For global grids, please consider using TransIFS instead.
+///
+/// @todo:
+///  - support multiple fields
+///  - support atlas::Field and atlas::FieldSet based on function spaces
+///
+/// @note: Direct transforms are not implemented and cannot be unless
+///        the grid is global. There are no plans to support this at the moment.
+class TransLocalopt3 : public trans::TransImpl {
+public:
+    TransLocalopt3( const Grid& g, const long truncation, const eckit::Configuration& = util::NoConfig() );
+    TransLocalopt3( const Cache&, const Grid& g, const long truncation,
+                    const eckit::Configuration& = util::NoConfig() );
+
+    virtual ~TransLocalopt3();
+
+    virtual int truncation() const override { return truncation_; }
+    virtual size_t spectralCoefficients() const override { return ( truncation_ + 1 ) * ( truncation_ + 2 ); }
+
+    virtual const Grid& grid() const override { return grid_; }
+
+    virtual void invtrans( const Field& spfield, Field& gpfield,
+                           const eckit::Configuration& = util::NoConfig() ) const override;
+
+    virtual void invtrans( const FieldSet& spfields, FieldSet& gpfields,
+                           const eckit::Configuration& = util::NoConfig() ) const override;
+
+    virtual void invtrans_grad( const Field& spfield, Field& gradfield,
+                                const eckit::Configuration& = util::NoConfig() ) const override;
+
+    virtual void invtrans_grad( const FieldSet& spfields, FieldSet& gradfields,
+                                const eckit::Configuration& = util::NoConfig() ) const override;
+
+    virtual void invtrans_vordiv2wind( const Field& spvor, const Field& spdiv, Field& gpwind,
+                                       const eckit::Configuration& = util::NoConfig() ) const override;
+
+    // -- IFS style API --
+
+    virtual void invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields,
+                           const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[],
+                           const eckit::Configuration& = util::NoConfig() ) const override;
+
+    virtual void invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[],
+                           const eckit::Configuration& = util::NoConfig() ) const override;
+
+    virtual void invtrans( const int nb_vordiv_fields, const double vorticity_spectra[],
+                           const double divergence_spectra[], double gp_fields[],
+                           const eckit::Configuration& = util::NoConfig() ) const override;
+
+    // -- NOT SUPPORTED -- //
+
+    virtual void dirtrans( const Field& gpfield, Field& spfield,
+                           const eckit::Configuration& = util::NoConfig() ) const override;
+
+    virtual void dirtrans( const FieldSet& gpfields, FieldSet& spfields,
+                           const eckit::Configuration& = util::NoConfig() ) const override;
+
+    virtual void dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv,
+                                       const eckit::Configuration& = util::NoConfig() ) const override;
+
+    virtual void dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[],
+                           const eckit::Configuration& = util::NoConfig() ) const override;
+
+    virtual void dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[],
+                           double divergence_spectra[], const eckit::Configuration& = util::NoConfig() ) const override;
+
+private:
+    void invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields,
+                      const double scalar_spectra[], double gp_fields[],
+                      const eckit::Configuration& = util::NoConfig() ) const;
+
+private:
+    Grid grid_;
+    int truncation_;
+    bool precompute_;
+    double* legendre_sym_;
+    double* legendre_asym_;
+    double* fourier_;
+    double* fouriertp_;
+    std::vector<size_t> legendre_begin_;
+    std::vector<size_t> legendre_sym_begin_;
+    std::vector<size_t> legendre_asym_begin_;
+#if ATLAS_HAVE_FFTW
+    fftw_complex* fft_in_;
+    double* fft_out_;
+    fftw_plan plan_;
+#endif
+};
+
+//-----------------------------------------------------------------------------
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/atlas/trans/localopt3/VorDivToUVLocalopt3.cc b/src/atlas/trans/localopt3/VorDivToUVLocalopt3.cc
new file mode 100644
index 000000000..4e7267748
--- /dev/null
+++ b/src/atlas/trans/localopt3/VorDivToUVLocalopt3.cc
@@ -0,0 +1,184 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#include "atlas/trans/localopt3/VorDivToUVLocalopt3.h"
+#include <cmath>  // for std::sqrt
+#include "atlas/functionspace/Spectral.h"
+#include "atlas/runtime/Log.h"
+#include "atlas/util/Earth.h"
+
+using atlas::FunctionSpace;
+using atlas::functionspace::Spectral;
+
+namespace atlas {
+namespace trans {
+
+namespace {
+static VorDivToUVBuilder<VorDivToUVLocalopt3> builder( "localopt3" );
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+// Routine to copy spectral data into internal storage form of IFS trans
+// Ported to C++ by: Andreas Mueller *ECMWF*
+void prfi1bopt3( const int truncation,
+                 const int km,          // zonal wavenumber
+                 const int nb_fields,   // number of fields
+                 const double rspec[],  // spectral data
+                 double pia[] )         // spectral components in data layout of trans library
+{
+    int ilcm = truncation + 1 - km, ioff = ( 2 * truncation - km + 3 ) * km,
+        nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2;
+    for ( int j = 1; j <= ilcm; j++ ) {
+        int inm = ioff + ( ilcm - j ) * 2;
+        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+            int ir = 2 * jfld, ii = ir + 1;
+            pia[ir * nlei1 + j + 1] = rspec[inm * nb_fields + jfld];
+            pia[ii * nlei1 + j + 1] = rspec[( inm + 1 ) * nb_fields + jfld];
+        }
+    }
+
+    for ( int jfld = 0; jfld < 2 * nb_fields; jfld++ ) {
+        pia[jfld * nlei1]            = 0.;
+        pia[jfld * nlei1 + 1]        = 0.;
+        pia[jfld * nlei1 + ilcm + 2] = 0.;
+    }
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+// Routine to compute spectral velocities (*cos(latitude)) out of spectral
+// vorticity and divergence
+// Reference:
+//        ECMWF Research Department documentation of the IFS
+//        Temperton, 1991, MWR 119 p1303
+// Ported to C++ by: Andreas Mueller *ECMWF*
+void vd2uvopt3( const int truncation,               // truncation
+                const int km,                       // zonal wavenumber
+                const int nb_vordiv_fields,         // number of vorticity and divergence fields
+                const double vorticity_spectra[],   // spectral data of vorticity
+                const double divergence_spectra[],  // spectral data of divergence
+                double U[],                         // spectral data of U
+                double V[],                         // spectral data of V
+                const eckit::Configuration& config ) {
+    int nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2;
+
+    // repsnm: epsilon from eq.(2.12) and (2.13) in [Temperton 1991]
+    std::vector<double> repsnm( ( truncation + 1 ) * ( truncation + 6 ) / 2 );
+    int idx = 0;
+    for ( int jm = 0; jm <= truncation; ++jm ) {
+        for ( int jn = jm; jn <= truncation + 2; ++jn, ++idx ) {
+            repsnm[idx] = std::sqrt( ( jn * jn - jm * jm ) / ( 4. * jn * jn - 1. ) );
+        }
+    }
+    repsnm[0] = 0.;
+
+    // rlapin: constant factor from eq.(2.2) and (2.3) in [Temperton 1991]
+    double ra = util::Earth::radius();
+    std::vector<double> rlapin( truncation + 3 );
+    for ( int jn = 1; jn <= truncation + 2; ++jn ) {
+        rlapin[jn] = -ra * ra / ( jn * ( jn + 1. ) );
+    }
+    rlapin[0] = 0.;
+
+    // inverse the order of repsnm and rlapin for improved accuracy
+    std::vector<double> zepsnm( truncation + 6 );
+    std::vector<double> zlapin( truncation + 6 );
+    std::vector<double> zn( truncation + 6 );
+    for ( int jn = km - 1; jn <= truncation + 2; ++jn ) {
+        int ij = truncation + 3 - jn;
+        if ( jn >= 0 ) {
+            zlapin[ij] = rlapin[jn];
+            if ( jn < km ) { zepsnm[ij] = 0.; }
+            else {
+                zepsnm[ij] = repsnm[jn + ( 2 * truncation - km + 5 ) * km / 2];
+            }
+        }
+        else {
+            zlapin[ij] = 0.;
+            zepsnm[ij] = 0.;
+        }
+        zn[ij] = jn;
+    }
+    zn[0] = truncation + 3;
+
+    // copy spectral data into internal trans storage:
+    std::vector<double> rvor( 2 * nb_vordiv_fields * nlei1 );
+    std::vector<double> rdiv( 2 * nb_vordiv_fields * nlei1 );
+    std::vector<double> ru( 2 * nb_vordiv_fields * nlei1 );
+    std::vector<double> rv( 2 * nb_vordiv_fields * nlei1 );
+    prfi1bopt3( truncation, km, nb_vordiv_fields, vorticity_spectra, rvor.data() );
+    prfi1bopt3( truncation, km, nb_vordiv_fields, divergence_spectra, rdiv.data() );
+
+    // compute eq.(2.12) and (2.13) in [Temperton 1991]:
+    if ( km == 0 ) {
+        for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
+            int ir = 2 * jfld * nlei1 - 1;
+            for ( int ji = 2; ji < truncation + 4 - km; ++ji ) {
+                double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1];
+                double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1];
+                ru[ir + ji]  = +psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1];
+                rv[ir + ji]  = -psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1];
+            }
+        }
+    }
+    else {
+        for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
+            int ir = 2 * jfld * nlei1 - 1, ii = ir + nlei1;
+            for ( int ji = 2; ji < truncation + 4 - km; ++ji ) {
+                double chiIm = km * zlapin[ji];
+                double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1];
+                double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1];
+                ru[ir + ji]  = -chiIm * rdiv[ii + ji] + psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1];
+                ru[ii + ji]  = +chiIm * rdiv[ir + ji] + psiM1 * rvor[ii + ji + 1] - psiP1 * rvor[ii + ji - 1];
+                rv[ir + ji]  = -chiIm * rvor[ii + ji] - psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1];
+                rv[ii + ji]  = +chiIm * rvor[ir + ji] - psiM1 * rdiv[ii + ji + 1] + psiP1 * rdiv[ii + ji - 1];
+            }
+        }
+    }
+
+    // copy data from internal storage back to external spectral data:
+    int ilcm = truncation - km;
+    int ioff = ( 2 * truncation - km + 3 ) * km;
+    // ioff: start index of zonal wavenumber km in spectral data
+    double za_r = 1. / util::Earth::radius();
+    for ( int j = 0; j <= ilcm; ++j ) {
+        // ilcm-j = total wavenumber
+        int inm = ioff + ( ilcm - j ) * 2;
+        for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
+            int ir = 2 * jfld * nlei1, ii = ir + nlei1;
+            int idx = inm * nb_vordiv_fields + jfld;
+            // real part:
+            U[idx] = ru[ir + j + 2] * za_r;
+            V[idx] = rv[ir + j + 2] * za_r;
+            idx += nb_vordiv_fields;
+            // imaginary part:
+            U[idx] = ru[ii + j + 2] * za_r;
+            V[idx] = rv[ii + j + 2] * za_r;
+        }
+    }
+}
+
+void VorDivToUVLocalopt3::execute( const int nb_coeff, const int nb_fields, const double vorticity[],
+                                   const double divergence[], double U[], double V[],
+                                   const eckit::Configuration& config ) const {
+    for ( int jm = 0; jm <= truncation_; ++jm ) {
+        vd2uvopt3( truncation_, jm, nb_fields, vorticity, divergence, U, V, config );
+    }
+}
+
+VorDivToUVLocalopt3::VorDivToUVLocalopt3( const int truncation, const eckit::Configuration& config ) :
+    truncation_( truncation ) {}
+
+VorDivToUVLocalopt3::VorDivToUVLocalopt3( const FunctionSpace& fs, const eckit::Configuration& config ) :
+    truncation_( Spectral( fs ).truncation() ) {}
+
+VorDivToUVLocalopt3::~VorDivToUVLocalopt3() {}
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/atlas/trans/localopt3/VorDivToUVLocalopt3.h b/src/atlas/trans/localopt3/VorDivToUVLocalopt3.h
new file mode 100644
index 000000000..44fdc98fe
--- /dev/null
+++ b/src/atlas/trans/localopt3/VorDivToUVLocalopt3.h
@@ -0,0 +1,67 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#pragma once
+
+#include "atlas/trans/VorDivToUV.h"
+
+//-----------------------------------------------------------------------------
+// Forward declarations
+
+namespace atlas {
+class FunctionSpace;
+}
+
+//-----------------------------------------------------------------------------
+
+namespace atlas {
+namespace trans {
+
+//-----------------------------------------------------------------------------
+
+class VorDivToUVLocalopt3 : public trans::VorDivToUVImpl {
+public:
+    VorDivToUVLocalopt3( const FunctionSpace&, const eckit::Configuration& = util::NoConfig() );
+    VorDivToUVLocalopt3( int truncation, const eckit::Configuration& = util::NoConfig() );
+
+    virtual ~VorDivToUVLocalopt3();
+
+    virtual int truncation() const override { return truncation_; }
+
+    // pure virtual interface
+
+    // -- IFS style API --
+    // These fields have special interpretation required. You need to know what
+    // you're doing.
+    // See IFS trans library.
+
+    /*!
+ * @brief Compute spectral wind (U/V) from spectral vorticity/divergence
+ *
+ * U = u*cos(lat)
+ * V = v*cos(lat)
+ *
+ * @param nb_fields [in] Number of fields
+ * @param vorticity [in] Spectral vorticity
+ * @param divergence [in] Spectral divergence
+ * @param U [out] Spectral wind U = u*cos(lat)
+ * @param V [out] Spectral wind V = v*cos(lat)
+ */
+    virtual void execute( const int nb_coeff, const int nb_fields, const double vorticity[], const double divergence[],
+                          double U[], double V[], const eckit::Configuration& = util::NoConfig() ) const override;
+
+private:
+    int truncation_;
+};
+
+// ------------------------------------------------------------------
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index 7ad11b882..fd1693cef 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -704,7 +704,7 @@ CASE( "test_transgeneral_with_translib" ) {
 #endif
 #endif
 //-----------------------------------------------------------------------------
-#if 1
+#if 0
 CASE( "test_trans_vordiv_with_translib" ) {
     Log::info() << "test_trans_vordiv_with_translib" << std::endl;
     // test transgeneral by comparing its result with the trans library
@@ -853,7 +853,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
 }
 #endif
 //-----------------------------------------------------------------------------
-#if 0
+#if 1
 CASE( "test_trans_hires" ) {
     Log::info() << "test_trans_hires" << std::endl;
     // test transgeneral by comparing its result with the trans library
@@ -865,7 +865,7 @@ CASE( "test_trans_hires" ) {
     // Grid: (Adjust the following line if the test takes too long!)
     Grid g( "F640" );
 #if ATLAS_HAVE_TRANS
-    std::string transTypes[2] = {"localopt2", "ifs"};
+    std::string transTypes[4] = {"localopt", "localopt2", "localopt3", "ifs"};
     //std::string transTypes[3] = {"localopt", "localopt2", "ifs"};
 #else
     std::string transTypes[1] = {"localopt2"};

From 3c7070308a386c7b3a209c6f3294c70187bff02e Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Tue, 20 Mar 2018 11:53:19 +0000
Subject: [PATCH 029/123] got rid of intermediate variable in FFT. Creating the
 FFTW plan now in invtrans_uv.

---
 .../trans/localopt/LegendrePolynomialsopt.cc  |   2 +-
 src/atlas/trans/localopt/TransLocalopt.cc     | 342 +++++++++---------
 src/atlas/trans/localopt/TransLocalopt.h      |   8 -
 src/atlas/trans/localopt2/TransLocalopt2.cc   | 270 +++++++-------
 src/tests/trans/test_transgeneral.cc          |   4 +-
 5 files changed, 301 insertions(+), 325 deletions(-)

diff --git a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc
index a31d893c6..fc0e3d879 100644
--- a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc
+++ b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc
@@ -157,7 +157,7 @@ void compute_legendre_polynomialsopt(
             // take factor 2 for m > 0 into account:
             for ( int jm = 1; jm <= trc; ++jm ) {
                 for ( int jn = jm; jn <= trc; ++jn ) {
-                    legpol[idxmn( jm, jn )] *= 2.;
+                    //legpol[idxmn( jm, jn )] *= 2.;
                 }
             }
             for ( int jm = 0; jm <= trc; jm++ ) {
diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc
index 0b9e1bf5f..0be4d4fe7 100644
--- a/src/atlas/trans/localopt/TransLocalopt.cc
+++ b/src/atlas/trans/localopt/TransLocalopt.cc
@@ -26,6 +26,9 @@
 #if ATLAS_HAVE_MKL
 #include "mkl.h"
 #endif
+#if ATLAS_HAVE_FFTW
+#include <fftw3.h>
+#endif
 
 namespace atlas {
 namespace trans {
@@ -175,16 +178,6 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t
             }
         }
     }
-#if ATLAS_HAVE_FFTW
-    {
-        ATLAS_TRACE( "opt precomp FFTW" );
-        int num_complex = ( nlons / 2 ) + 1;
-        fft_in_         = fftw_alloc_complex( nlats * num_complex );
-        fft_out_        = fftw_alloc_real( nlats * nlons );
-        plan_ = fftw_plan_many_dft_c2r( 1, &nlons, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1, nlons,
-                                        FFTW_ESTIMATE );
-    }
-#endif
 }  // namespace atlas
 
 // --------------------------------------------------------------------------------------------------------------------
@@ -199,11 +192,6 @@ TransLocalopt::~TransLocalopt() {
     free_aligned( legendre_asym_ );
     free_aligned( fourier_ );
     free_aligned( fouriertp_ );
-#if ATLAS_HAVE_FFTW
-    fftw_destroy_plan( plan_ );
-    fftw_free( fft_in_ );
-    fftw_free( fft_out_ );
-#endif
 }
 
 // --------------------------------------------------------------------------------------------------------------------
@@ -290,6 +278,21 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
             double* scl_fourier;
             alloc_aligned( scl_fourier, size_fourier_max * ( truncation + 1 ) );
 
+#if ATLAS_HAVE_FFTW
+            int num_complex      = ( nlons / 2 ) + 1;
+            fftw_complex* fft_in = fftw_alloc_complex( nlats * num_complex * nb_fields );
+            double* fft_out      = fftw_alloc_real( nlats * nlons * nb_fields );
+            fftw_plan plan       = fftw_plan_many_dft_c2r( 1, &nlons, nlats * nb_fields, fft_in, NULL, 1, num_complex,
+                                                     fft_out, NULL, 1, nlons, FFTW_ESTIMATE );
+            for ( int j = 0; j < nlats * num_complex * nb_fields; j++ ) {
+                for ( int imag = 0; imag < 2; imag++ ) {
+                    fft_in[j][imag] = 0.;
+                }
+            }
+            auto posFFTWin = [&]( int jfld, int jlat, int jm ) {
+                return jm + num_complex * ( jlat + nlats * ( jfld ) );
+            };
+#endif
             // Legendre transform:
             {
                 ATLAS_TRACE( "opt Legendre dgemm" );
@@ -346,29 +349,22 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
                     {
                         //ATLAS_TRACE( "opt merge spheres" );
                         // northern hemisphere:
-                        int ioff = jm * size_fourier_max;
-                        int pos0 = ioff;
-                        int idx  = 0;
+                        int idx = 0;
                         for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
-                            int poslat = pos0 + 2 * jlat;
                             for ( int imag = 0; imag < n_imag; imag++ ) {
-                                int posimag = nb_fields * ( imag + poslat );
                                 for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
-                                    scl_fourier[posFFTW( jfld, imag, jlat, jm )] =
+                                    fft_in[posFFTWin( jfld, jlat, jm )][imag] =
                                         scl_fourier_sym[idx] + scl_fourier_asym[idx];
                                 }
                             }
                         }
                         // southern hemisphere:
-                        idx  = 0;
-                        pos0 = 2 * ( nlats - 1 ) + ioff;
+                        idx = 0;
                         for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
-                            int poslat = pos0 - 2 * jlat;
                             for ( int imag = 0; imag < n_imag; imag++ ) {
-                                int posimag = nb_fields * ( imag + poslat );
                                 for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
                                     int jslat = nlats - jlat - 1;
-                                    scl_fourier[posFFTW( jfld, imag, jslat, jm )] =
+                                    fft_in[posFFTWin( jfld, jslat, jm )][imag] =
                                         scl_fourier_sym[idx] - scl_fourier_asym[idx];
                                 }
                             }
@@ -384,28 +380,21 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
                 int num_complex = ( nlons / 2 ) + 1;
                 {
                     ATLAS_TRACE( "opt FFTW" );
-                    for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                        int idx = 0;
-                        for ( int jlat = 0; jlat < nlats; jlat++ ) {
-                            fft_in_[idx++][0] = scl_fourier[posFFTW( jfld, 0, jlat, 0 )];
-                            for ( int jm = 1; jm < num_complex; jm++, idx++ ) {
-                                for ( int imag = 0; imag < 2; imag++ ) {
-                                    if ( jm <= truncation_ ) {
-                                        fft_in_[idx][imag] = scl_fourier[posFFTW( jfld, imag, jlat, jm )] / 2.;
-                                    }
-                                    else {
-                                        fft_in_[idx][imag] = 0.;
-                                    }
-                                }
-                            }
-                        }
-                        fftw_execute_dft_c2r( plan_, fft_in_, fft_out_ );
-                        for ( int j = 0; j < nlats * nlons; j++ ) {
-                            gp_fields[j + jfld * nlats * nlons] = fft_out_[j];
+                    {
+                        ATLAS_TRACE( "fftw_execute" );
+                        fftw_execute( plan );
+                    }
+                    {
+                        ATLAS_TRACE( "read fft_out" );
+                        for ( int j = 0; j < nlats * nlons * nb_fields; j++ ) {
+                            gp_fields[j] = fft_out[j];
                         }
                     }
                 }
             }
+            fftw_destroy_plan( plan );
+            fftw_free( fft_in );
+            fftw_free( fft_out );
 #else
 #if 0  // 1: better for small number of columns, large truncation; 0: better for large number of columns
                     {
@@ -522,167 +511,164 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
 
 #endif
 #endif
-                    // Computing u,v from U,V:
-                    {
-                        if ( nb_vordiv_fields > 0 ) {
-                            ATLAS_TRACE( "opt u,v from U,V" );
-                            std::vector<double> coslats( nlats );
-                            for ( size_t j = 0; j < nlats; ++j ) {
-                                coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() );
-                            }
-                            int idx = 0;
-                            for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                                for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
-                                    for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) {
-                                        gp_fields[idx] /= coslats[jlat];
-                                        idx++;
-                                    }
-                                }
-                            }
-                        }
+            // Computing u,v from U,V:
+            {
+                if ( nb_vordiv_fields > 0 ) {
+                    ATLAS_TRACE( "opt u,v from U,V" );
+                    std::vector<double> coslats( nlats );
+                    for ( size_t j = 0; j < nlats; ++j ) {
+                        coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() );
                     }
-                    free_aligned( scl_fourier );
-                }
-                else {
-                    ATLAS_TRACE( "invtrans_uv unstructured opt" );
                     int idx = 0;
-                    for ( PointXY p : grid_.xy() ) {
-                        double lon   = p.x() * util::Constants::degreesToRadians();
-                        double lat   = p.y() * util::Constants::degreesToRadians();
-                        double trcFT = truncation;
-
-                        // Legendre transform:
-                        //invtrans_legendreopt( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra,
-                        //                      legReal.data(), legImag.data() );
-
-                        // Fourier transform:
-                        //invtrans_fourieropt( trcFT, lon, nb_fields, legReal.data(), legImag.data(),
-                        //                     gp_tmp.data() + ( nb_fields * idx ) );
-                        for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
-                            //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat );
+                    for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                        for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
+                            for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) {
+                                gp_fields[idx] /= coslats[jlat];
+                                idx++;
+                            }
                         }
-                        ++idx;
                     }
                 }
             }
-        }  // namespace trans
-
-        // --------------------------------------------------------------------------------------------------------------------
-
-        void TransLocalopt::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[],
-                                      const double divergence_spectra[], double gp_fields[],
-                                      const eckit::Configuration& config ) const {
-            invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config );
+            free_aligned( scl_fourier );
         }
-
-        void extend_truncationopt( const int old_truncation, const int nb_fields, const double old_spectra[],
-                                   double new_spectra[] ) {
-            int k = 0, k_old = 0;
-            for ( int m = 0; m <= old_truncation + 1; m++ ) {             // zonal wavenumber
-                for ( int n = m; n <= old_truncation + 1; n++ ) {         // total wavenumber
-                    for ( int imag = 0; imag < 2; imag++ ) {              // imaginary/real part
-                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {  // field
-                            if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; }
-                            else {
-                                new_spectra[k++] = old_spectra[k_old++];
-                            }
-                        }
-                    }
+        else {
+            ATLAS_TRACE( "invtrans_uv unstructured opt" );
+            int idx = 0;
+            for ( PointXY p : grid_.xy() ) {
+                double lon   = p.x() * util::Constants::degreesToRadians();
+                double lat   = p.y() * util::Constants::degreesToRadians();
+                double trcFT = truncation;
+
+                // Legendre transform:
+                //invtrans_legendreopt( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra,
+                //                      legReal.data(), legImag.data() );
+
+                // Fourier transform:
+                //invtrans_fourieropt( trcFT, lon, nb_fields, legReal.data(), legImag.data(),
+                //                     gp_tmp.data() + ( nb_fields * idx ) );
+                for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
+                    //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat );
                 }
+                ++idx;
             }
         }
+    }
+}  // namespace trans
 
-        // --------------------------------------------------------------------------------------------------------------------
-
-        void TransLocalopt::invtrans( const int nb_scalar_fields, const double scalar_spectra[],
-                                      const int nb_vordiv_fields, const double vorticity_spectra[],
-                                      const double divergence_spectra[], double gp_fields[],
-                                      const eckit::Configuration& config ) const {
-            ATLAS_TRACE( "TransLocalopt::invtrans" );
-            int nb_gp              = grid_.size();
-            int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields;
-            if ( nb_vordiv_fields > 0 ) {
-                std::vector<double> vorticity_spectra_extended( nb_vordiv_spec_ext, 0. );
-                std::vector<double> divergence_spectra_extended( nb_vordiv_spec_ext, 0. );
-                std::vector<double> U_ext( nb_vordiv_spec_ext, 0. );
-                std::vector<double> V_ext( nb_vordiv_spec_ext, 0. );
+// --------------------------------------------------------------------------------------------------------------------
 
-                {
-                    ATLAS_TRACE( "opt extend vordiv" );
-                    // increase truncation in vorticity_spectra and divergence_spectra:
-                    extend_truncationopt( truncation_, nb_vordiv_fields, vorticity_spectra,
-                                          vorticity_spectra_extended.data() );
-                    extend_truncationopt( truncation_, nb_vordiv_fields, divergence_spectra,
-                                          divergence_spectra_extended.data() );
-                }
+void TransLocalopt::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[],
+                              const double divergence_spectra[], double gp_fields[],
+                              const eckit::Configuration& config ) const {
+    invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config );
+}
 
-                {
-                    ATLAS_TRACE( "vordiv to UV opt" );
-                    // call vd2uv to compute u and v in spectral space
-                    trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt" ) );
-                    vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(),
-                                              divergence_spectra_extended.data(), U_ext.data(), V_ext.data() );
+void extend_truncationopt( const int old_truncation, const int nb_fields, const double old_spectra[],
+                           double new_spectra[] ) {
+    int k = 0, k_old = 0;
+    for ( int m = 0; m <= old_truncation + 1; m++ ) {             // zonal wavenumber
+        for ( int n = m; n <= old_truncation + 1; n++ ) {         // total wavenumber
+            for ( int imag = 0; imag < 2; imag++ ) {              // imaginary/real part
+                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {  // field
+                    if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; }
+                    else {
+                        new_spectra[k++] = old_spectra[k_old++];
+                    }
                 }
-
-                // perform spectral transform to compute all fields in grid point space
-                invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config );
-                invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(),
-                             gp_fields + nb_gp * nb_vordiv_fields, config );
-            }
-            if ( nb_scalar_fields > 0 ) {
-                int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields;
-                std::vector<double> scalar_spectra_extended( nb_scalar_spec_ext, 0. );
-                extend_truncationopt( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() );
-                invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(),
-                             gp_fields + 2 * nb_gp * nb_vordiv_fields, config );
             }
         }
+    }
+}
 
-        // --------------------------------------------------------------------------------------------------------------------
+// --------------------------------------------------------------------------------------------------------------------
 
-        void TransLocalopt::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const {
-            NOTIMP;
-            // Not implemented and not planned.
-            // Use the TransIFS implementation instead.
+void TransLocalopt::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields,
+                              const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[],
+                              const eckit::Configuration& config ) const {
+    ATLAS_TRACE( "TransLocalopt::invtrans" );
+    int nb_gp              = grid_.size();
+    int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields;
+    if ( nb_vordiv_fields > 0 ) {
+        std::vector<double> vorticity_spectra_extended( nb_vordiv_spec_ext, 0. );
+        std::vector<double> divergence_spectra_extended( nb_vordiv_spec_ext, 0. );
+        std::vector<double> U_ext( nb_vordiv_spec_ext, 0. );
+        std::vector<double> V_ext( nb_vordiv_spec_ext, 0. );
+
+        {
+            ATLAS_TRACE( "opt extend vordiv" );
+            // increase truncation in vorticity_spectra and divergence_spectra:
+            extend_truncationopt( truncation_, nb_vordiv_fields, vorticity_spectra, vorticity_spectra_extended.data() );
+            extend_truncationopt( truncation_, nb_vordiv_fields, divergence_spectra,
+                                  divergence_spectra_extended.data() );
         }
 
-        // --------------------------------------------------------------------------------------------------------------------
-
-        void TransLocalopt::dirtrans( const FieldSet& gpfields, FieldSet& spfields, const eckit::Configuration& config )
-            const {
-            NOTIMP;
-            // Not implemented and not planned.
-            // Use the TransIFS implementation instead.
+        {
+            ATLAS_TRACE( "vordiv to UV opt" );
+            // call vd2uv to compute u and v in spectral space
+            trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt" ) );
+            vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(),
+                                      divergence_spectra_extended.data(), U_ext.data(), V_ext.data() );
         }
 
-        // --------------------------------------------------------------------------------------------------------------------
+        // perform spectral transform to compute all fields in grid point space
+        invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config );
+        invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(),
+                     gp_fields + nb_gp * nb_vordiv_fields, config );
+    }
+    if ( nb_scalar_fields > 0 ) {
+        int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields;
+        std::vector<double> scalar_spectra_extended( nb_scalar_spec_ext, 0. );
+        extend_truncationopt( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() );
+        invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(),
+                     gp_fields + 2 * nb_gp * nb_vordiv_fields, config );
+    }
+}
 
-        void TransLocalopt::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv,
-                                                  const eckit::Configuration& config ) const {
-            NOTIMP;
-            // Not implemented and not planned.
-            // Use the TransIFS implementation instead.
-        }
+// --------------------------------------------------------------------------------------------------------------------
 
-        // --------------------------------------------------------------------------------------------------------------------
+void TransLocalopt::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const {
+    NOTIMP;
+    // Not implemented and not planned.
+    // Use the TransIFS implementation instead.
+}
 
-        void TransLocalopt::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[],
-                                      const eckit::Configuration& ) const {
-            NOTIMP;
-            // Not implemented and not planned.
-            // Use the TransIFS implementation instead.
-        }
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt::dirtrans( const FieldSet& gpfields, FieldSet& spfields, const eckit::Configuration& config ) const {
+    NOTIMP;
+    // Not implemented and not planned.
+    // Use the TransIFS implementation instead.
+}
 
-        // --------------------------------------------------------------------------------------------------------------------
+// --------------------------------------------------------------------------------------------------------------------
 
-        void TransLocalopt::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[],
-                                      double divergence_spectra[], const eckit::Configuration& ) const {
-            NOTIMP;
-            // Not implemented and not planned.
-            // Use the TransIFS implementation instead.
-        }
+void TransLocalopt::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv,
+                                          const eckit::Configuration& config ) const {
+    NOTIMP;
+    // Not implemented and not planned.
+    // Use the TransIFS implementation instead.
+}
 
-        // --------------------------------------------------------------------------------------------------------------------
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[],
+                              const eckit::Configuration& ) const {
+    NOTIMP;
+    // Not implemented and not planned.
+    // Use the TransIFS implementation instead.
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[],
+                              double divergence_spectra[], const eckit::Configuration& ) const {
+    NOTIMP;
+    // Not implemented and not planned.
+    // Use the TransIFS implementation instead.
+}
+
+// --------------------------------------------------------------------------------------------------------------------
 
-    }  // namespace trans
+}  // namespace trans
 }  // namespace atlas
diff --git a/src/atlas/trans/localopt/TransLocalopt.h b/src/atlas/trans/localopt/TransLocalopt.h
index fce71261a..55b7a074e 100644
--- a/src/atlas/trans/localopt/TransLocalopt.h
+++ b/src/atlas/trans/localopt/TransLocalopt.h
@@ -15,9 +15,6 @@
 #include "atlas/array.h"
 #include "atlas/grid/Grid.h"
 #include "atlas/trans/Trans.h"
-#if ATLAS_HAVE_FFTW
-#include <fftw3.h>
-#endif
 
 //-----------------------------------------------------------------------------
 // Forward declarations
@@ -119,11 +116,6 @@ class TransLocalopt : public trans::TransImpl {
     std::vector<size_t> legendre_begin_;
     std::vector<size_t> legendre_sym_begin_;
     std::vector<size_t> legendre_asym_begin_;
-#if ATLAS_HAVE_FFTW
-    fftw_complex* fft_in_;
-    double* fft_out_;
-    fftw_plan plan_;
-#endif
 };
 
 //-----------------------------------------------------------------------------
diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc
index f4672becc..d3bd6d45b 100644
--- a/src/atlas/trans/localopt2/TransLocalopt2.cc
+++ b/src/atlas/trans/localopt2/TransLocalopt2.cc
@@ -343,7 +343,7 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
                         eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsNH );
                         eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
                     }
-#if 0  //ATLAS_HAVE_FFTW
+#if 1  //ATLAS_HAVE_FFTW
                     {
                         //ATLAS_TRACE( "opt2 merge spheres" );
                         // northern hemisphere:
@@ -523,168 +523,166 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
 
 #endif
 #endif
-                    // Computing u,v from U,V:
-                    {
-                        if ( nb_vordiv_fields > 0 ) {
-                            ATLAS_TRACE( "opt2 u,v from U,V" );
-                            std::vector<double> coslats( nlats );
-                            for ( size_t j = 0; j < nlats; ++j ) {
-                                coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() );
-                            }
-                            int idx = 0;
-                            for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                                for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
-                                    for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) {
-                                        gp_fields[idx] /= coslats[jlat];
-                                        idx++;
-                                    }
-                                }
-                            }
-                        }
+            // Computing u,v from U,V:
+            {
+                if ( nb_vordiv_fields > 0 ) {
+                    ATLAS_TRACE( "opt2 u,v from U,V" );
+                    std::vector<double> coslats( nlats );
+                    for ( size_t j = 0; j < nlats; ++j ) {
+                        coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() );
                     }
-                    free_aligned( scl_fourier );
-                }
-                else {
-                    ATLAS_TRACE( "invtrans_uv unstructured opt2" );
                     int idx = 0;
-                    for ( PointXY p : grid_.xy() ) {
-                        double lon   = p.x() * util::Constants::degreesToRadians();
-                        double lat   = p.y() * util::Constants::degreesToRadians();
-                        double trcFT = truncation;
-
-                        // Legendre transform:
-                        //invtrans_legendreopt2( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra,
-                        //                      legReal.data(), legImag.data() );
-
-                        // Fourier transform:
-                        //invtrans_fourieropt2( trcFT, lon, nb_fields, legReal.data(), legImag.data(),
-                        //                     gp_tmp.data() + ( nb_fields * idx ) );
-                        for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
-                            //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat );
+                    for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                        for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
+                            for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) {
+                                gp_fields[idx] /= coslats[jlat];
+                                idx++;
+                            }
                         }
-                        ++idx;
                     }
                 }
             }
-        }  // namespace trans
-
-        // --------------------------------------------------------------------------------------------------------------------
-
-        void TransLocalopt2::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[],
-                                       const double divergence_spectra[], double gp_fields[],
-                                       const eckit::Configuration& config ) const {
-            invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config );
+            free_aligned( scl_fourier );
         }
-
-        void extend_truncationopt2( const int old_truncation, const int nb_fields, const double old_spectra[],
-                                    double new_spectra[] ) {
-            int k = 0, k_old = 0;
-            for ( int m = 0; m <= old_truncation + 1; m++ ) {             // zonal wavenumber
-                for ( int n = m; n <= old_truncation + 1; n++ ) {         // total wavenumber
-                    for ( int imag = 0; imag < 2; imag++ ) {              // imaginary/real part
-                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {  // field
-                            if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; }
-                            else {
-                                new_spectra[k++] = old_spectra[k_old++];
-                            }
-                        }
-                    }
+        else {
+            ATLAS_TRACE( "invtrans_uv unstructured opt2" );
+            int idx = 0;
+            for ( PointXY p : grid_.xy() ) {
+                double lon   = p.x() * util::Constants::degreesToRadians();
+                double lat   = p.y() * util::Constants::degreesToRadians();
+                double trcFT = truncation;
+
+                // Legendre transform:
+                //invtrans_legendreopt2( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra,
+                //                      legReal.data(), legImag.data() );
+
+                // Fourier transform:
+                //invtrans_fourieropt2( trcFT, lon, nb_fields, legReal.data(), legImag.data(),
+                //                     gp_tmp.data() + ( nb_fields * idx ) );
+                for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
+                    //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat );
                 }
+                ++idx;
             }
         }
+    }
+}  // namespace trans
 
-        // --------------------------------------------------------------------------------------------------------------------
-
-        void TransLocalopt2::invtrans( const int nb_scalar_fields, const double scalar_spectra[],
-                                       const int nb_vordiv_fields, const double vorticity_spectra[],
-                                       const double divergence_spectra[], double gp_fields[],
-                                       const eckit::Configuration& config ) const {
-            ATLAS_TRACE( "TransLocalopt2::invtrans" );
-            int nb_gp              = grid_.size();
-            int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields;
-            if ( nb_vordiv_fields > 0 ) {
-                std::vector<double> vorticity_spectra_extended( nb_vordiv_spec_ext, 0. );
-                std::vector<double> divergence_spectra_extended( nb_vordiv_spec_ext, 0. );
-                std::vector<double> U_ext( nb_vordiv_spec_ext, 0. );
-                std::vector<double> V_ext( nb_vordiv_spec_ext, 0. );
+// --------------------------------------------------------------------------------------------------------------------
 
-                {
-                    ATLAS_TRACE( "opt2 extend vordiv" );
-                    // increase truncation in vorticity_spectra and divergence_spectra:
-                    extend_truncationopt2( truncation_, nb_vordiv_fields, vorticity_spectra,
-                                           vorticity_spectra_extended.data() );
-                    extend_truncationopt2( truncation_, nb_vordiv_fields, divergence_spectra,
-                                           divergence_spectra_extended.data() );
-                }
+void TransLocalopt2::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[],
+                               const double divergence_spectra[], double gp_fields[],
+                               const eckit::Configuration& config ) const {
+    invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config );
+}
 
-                {
-                    ATLAS_TRACE( "vordiv to UV opt2" );
-                    // call vd2uv to compute u and v in spectral space
-                    trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt2" ) );
-                    vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(),
-                                              divergence_spectra_extended.data(), U_ext.data(), V_ext.data() );
+void extend_truncationopt2( const int old_truncation, const int nb_fields, const double old_spectra[],
+                            double new_spectra[] ) {
+    int k = 0, k_old = 0;
+    for ( int m = 0; m <= old_truncation + 1; m++ ) {             // zonal wavenumber
+        for ( int n = m; n <= old_truncation + 1; n++ ) {         // total wavenumber
+            for ( int imag = 0; imag < 2; imag++ ) {              // imaginary/real part
+                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {  // field
+                    if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; }
+                    else {
+                        new_spectra[k++] = old_spectra[k_old++];
+                    }
                 }
-
-                // perform spectral transform to compute all fields in grid point space
-                invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config );
-                invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(),
-                             gp_fields + nb_gp * nb_vordiv_fields, config );
-            }
-            if ( nb_scalar_fields > 0 ) {
-                int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields;
-                std::vector<double> scalar_spectra_extended( nb_scalar_spec_ext, 0. );
-                extend_truncationopt2( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() );
-                invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(),
-                             gp_fields + 2 * nb_gp * nb_vordiv_fields, config );
             }
         }
+    }
+}
 
-        // --------------------------------------------------------------------------------------------------------------------
+// --------------------------------------------------------------------------------------------------------------------
 
-        void TransLocalopt2::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config )
-            const {
-            NOTIMP;
-            // Not implemented and not planned.
-            // Use the TransIFS implementation instead.
+void TransLocalopt2::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields,
+                               const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[],
+                               const eckit::Configuration& config ) const {
+    ATLAS_TRACE( "TransLocalopt2::invtrans" );
+    int nb_gp              = grid_.size();
+    int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields;
+    if ( nb_vordiv_fields > 0 ) {
+        std::vector<double> vorticity_spectra_extended( nb_vordiv_spec_ext, 0. );
+        std::vector<double> divergence_spectra_extended( nb_vordiv_spec_ext, 0. );
+        std::vector<double> U_ext( nb_vordiv_spec_ext, 0. );
+        std::vector<double> V_ext( nb_vordiv_spec_ext, 0. );
+
+        {
+            ATLAS_TRACE( "opt2 extend vordiv" );
+            // increase truncation in vorticity_spectra and divergence_spectra:
+            extend_truncationopt2( truncation_, nb_vordiv_fields, vorticity_spectra,
+                                   vorticity_spectra_extended.data() );
+            extend_truncationopt2( truncation_, nb_vordiv_fields, divergence_spectra,
+                                   divergence_spectra_extended.data() );
         }
 
-        // --------------------------------------------------------------------------------------------------------------------
-
-        void TransLocalopt2::dirtrans( const FieldSet& gpfields, FieldSet& spfields,
-                                       const eckit::Configuration& config ) const {
-            NOTIMP;
-            // Not implemented and not planned.
-            // Use the TransIFS implementation instead.
+        {
+            ATLAS_TRACE( "vordiv to UV opt2" );
+            // call vd2uv to compute u and v in spectral space
+            trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt2" ) );
+            vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(),
+                                      divergence_spectra_extended.data(), U_ext.data(), V_ext.data() );
         }
 
-        // --------------------------------------------------------------------------------------------------------------------
+        // perform spectral transform to compute all fields in grid point space
+        invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config );
+        invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(),
+                     gp_fields + nb_gp * nb_vordiv_fields, config );
+    }
+    if ( nb_scalar_fields > 0 ) {
+        int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields;
+        std::vector<double> scalar_spectra_extended( nb_scalar_spec_ext, 0. );
+        extend_truncationopt2( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() );
+        invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(),
+                     gp_fields + 2 * nb_gp * nb_vordiv_fields, config );
+    }
+}
 
-        void TransLocalopt2::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv,
-                                                   const eckit::Configuration& config ) const {
-            NOTIMP;
-            // Not implemented and not planned.
-            // Use the TransIFS implementation instead.
-        }
+// --------------------------------------------------------------------------------------------------------------------
 
-        // --------------------------------------------------------------------------------------------------------------------
+void TransLocalopt2::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const {
+    NOTIMP;
+    // Not implemented and not planned.
+    // Use the TransIFS implementation instead.
+}
 
-        void TransLocalopt2::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[],
-                                       const eckit::Configuration& ) const {
-            NOTIMP;
-            // Not implemented and not planned.
-            // Use the TransIFS implementation instead.
-        }
+// --------------------------------------------------------------------------------------------------------------------
 
-        // --------------------------------------------------------------------------------------------------------------------
+void TransLocalopt2::dirtrans( const FieldSet& gpfields, FieldSet& spfields,
+                               const eckit::Configuration& config ) const {
+    NOTIMP;
+    // Not implemented and not planned.
+    // Use the TransIFS implementation instead.
+}
 
-        void TransLocalopt2::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[],
-                                       double divergence_spectra[], const eckit::Configuration& ) const {
-            NOTIMP;
-            // Not implemented and not planned.
-            // Use the TransIFS implementation instead.
-        }
+// --------------------------------------------------------------------------------------------------------------------
 
-        // --------------------------------------------------------------------------------------------------------------------
+void TransLocalopt2::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv,
+                                           const eckit::Configuration& config ) const {
+    NOTIMP;
+    // Not implemented and not planned.
+    // Use the TransIFS implementation instead.
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt2::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[],
+                               const eckit::Configuration& ) const {
+    NOTIMP;
+    // Not implemented and not planned.
+    // Use the TransIFS implementation instead.
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt2::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[],
+                               double divergence_spectra[], const eckit::Configuration& ) const {
+    NOTIMP;
+    // Not implemented and not planned.
+    // Use the TransIFS implementation instead.
+}
+
+// --------------------------------------------------------------------------------------------------------------------
 
-    }  // namespace trans
+}  // namespace trans
 }  // namespace atlas
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index fd1693cef..9e7540029 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -704,7 +704,7 @@ CASE( "test_transgeneral_with_translib" ) {
 #endif
 #endif
 //-----------------------------------------------------------------------------
-#if 0
+#if 1
 CASE( "test_trans_vordiv_with_translib" ) {
     Log::info() << "test_trans_vordiv_with_translib" << std::endl;
     // test transgeneral by comparing its result with the trans library
@@ -853,7 +853,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
 }
 #endif
 //-----------------------------------------------------------------------------
-#if 1
+#if 0
 CASE( "test_trans_hires" ) {
     Log::info() << "test_trans_hires" << std::endl;
     // test transgeneral by comparing its result with the trans library

From 40454fe6bcd2f7156592478815fa95b647422a5a Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Tue, 20 Mar 2018 17:12:31 +0000
Subject: [PATCH 030/123] TO BE DELETED! Commented out Legendre polynomials for
 testing purposes

---
 src/atlas/trans/localopt/TransLocalopt.cc   | 12 +++++---
 src/atlas/trans/localopt2/TransLocalopt2.cc | 32 +++++++++++----------
 src/atlas/trans/localopt3/TransLocalopt3.cc | 27 ++++++++---------
 src/tests/trans/test_transgeneral.cc        | 15 +++++-----
 4 files changed, 47 insertions(+), 39 deletions(-)

diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc
index 0be4d4fe7..087dcee41 100644
--- a/src/atlas/trans/localopt/TransLocalopt.cc
+++ b/src/atlas/trans/localopt/TransLocalopt.cc
@@ -149,11 +149,12 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t
         }
         alloc_aligned( legendre_sym_, size_sym );
         alloc_aligned( legendre_asym_, size_asym );
-        compute_legendre_polynomialsopt( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_,
-                                         legendre_sym_begin_.data(), legendre_asym_begin_.data() );
+        //compute_legendre_polynomialsopt( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_,
+        //                                 legendre_sym_begin_.data(), legendre_asym_begin_.data() );
     }
 
-    // precomputations for Fourier transformations:
+        // precomputations for Fourier transformations:
+#if !ATLAS_HAVE_FFTW
     {
         ATLAS_TRACE( "opt precomp Fourier" );
         alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons );
@@ -178,6 +179,7 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t
             }
         }
     }
+#endif
 }  // namespace atlas
 
 // --------------------------------------------------------------------------------------------------------------------
@@ -190,8 +192,10 @@ TransLocalopt::TransLocalopt( const Grid& grid, const long truncation, const eck
 TransLocalopt::~TransLocalopt() {
     free_aligned( legendre_sym_ );
     free_aligned( legendre_asym_ );
+#if !ATLAS_HAVE_FFTW
     free_aligned( fourier_ );
     free_aligned( fouriertp_ );
+#endif
 }
 
 // --------------------------------------------------------------------------------------------------------------------
@@ -347,7 +351,7 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
                     }
 #if 1  //ATLAS_HAVE_FFTW
                     {
-                        //ATLAS_TRACE( "opt merge spheres" );
+                        ATLAS_TRACE( "opt merge spheres" );
                         // northern hemisphere:
                         int idx = 0;
                         for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc
index d3bd6d45b..66b64d910 100644
--- a/src/atlas/trans/localopt2/TransLocalopt2.cc
+++ b/src/atlas/trans/localopt2/TransLocalopt2.cc
@@ -146,11 +146,21 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long
         }
         alloc_aligned( legendre_sym_, size_sym );
         alloc_aligned( legendre_asym_, size_asym );
-        compute_legendre_polynomialsopt2( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_,
-                                          legendre_sym_begin_.data(), legendre_asym_begin_.data() );
+        //compute_legendre_polynomialsopt2( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_,
+        //                                  legendre_sym_begin_.data(), legendre_asym_begin_.data() );
     }
 
-    // precomputations for Fourier transformations:
+        // precomputations for Fourier transformations:
+#if ATLAS_HAVE_FFTW
+    {
+        ATLAS_TRACE( "opt2 precomp FFTW" );
+        int num_complex = ( nlons / 2 ) + 1;
+        fft_in_         = fftw_alloc_complex( nlats * num_complex );
+        fft_out_        = fftw_alloc_real( nlats * nlons );
+        plan_ = fftw_plan_many_dft_c2r( 1, &nlons, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1, nlons,
+                                        FFTW_ESTIMATE );
+    }
+#else
     {
         ATLAS_TRACE( "opt2 precomp Fourier" );
         alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons );
@@ -175,15 +185,6 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long
             }
         }
     }
-#if ATLAS_HAVE_FFTW
-    {
-        ATLAS_TRACE( "opt2 precomp FFTW" );
-        int num_complex = ( nlons / 2 ) + 1;
-        fft_in_         = fftw_alloc_complex( nlats * num_complex );
-        fft_out_        = fftw_alloc_real( nlats * nlons );
-        plan_ = fftw_plan_many_dft_c2r( 1, &nlons, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1, nlons,
-                                        FFTW_ESTIMATE );
-    }
 #endif
 }  // namespace atlas
 
@@ -197,12 +198,13 @@ TransLocalopt2::TransLocalopt2( const Grid& grid, const long truncation, const e
 TransLocalopt2::~TransLocalopt2() {
     free_aligned( legendre_sym_ );
     free_aligned( legendre_asym_ );
-    free_aligned( fourier_ );
-    free_aligned( fouriertp_ );
 #if ATLAS_HAVE_FFTW
     fftw_destroy_plan( plan_ );
     fftw_free( fft_in_ );
     fftw_free( fft_out_ );
+#else
+    free_aligned( fourier_ );
+    free_aligned( fouriertp_ );
 #endif
 }
 
@@ -345,7 +347,7 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
                     }
 #if 1  //ATLAS_HAVE_FFTW
                     {
-                        //ATLAS_TRACE( "opt2 merge spheres" );
+                        ATLAS_TRACE( "opt2 merge spheres" );
                         // northern hemisphere:
                         int ioff = jm * size_fourier_max;
                         int pos0 = ioff;
diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc
index e65ac2e63..3cf7299bc 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.cc
+++ b/src/atlas/trans/localopt3/TransLocalopt3.cc
@@ -146,11 +146,21 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
         }
         alloc_aligned( legendre_sym_, size_sym );
         alloc_aligned( legendre_asym_, size_asym );
-        compute_legendre_polynomialsopt3( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_,
-                                          legendre_sym_begin_.data(), legendre_asym_begin_.data() );
+        //compute_legendre_polynomialsopt3( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_,
+        //                                  legendre_sym_begin_.data(), legendre_asym_begin_.data() );
     }
 
-    // precomputations for Fourier transformations:
+        // precomputations for Fourier transformations:
+#if 0  //ATLAS_HAVE_FFTW
+    {
+        ATLAS_TRACE( "opt3 precomp FFTW" );
+        int num_complex = ( nlons / 2 ) + 1;
+        fft_in_         = fftw_alloc_complex( nlats * num_complex );
+        fft_out_        = fftw_alloc_real( nlats * nlons );
+        plan_ = fftw_plan_many_dft_c2r( 1, &nlons, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1, nlons,
+                                        FFTW_ESTIMATE );
+    }
+#else
     {
         ATLAS_TRACE( "opt3 precomp Fourier" );
         alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons );
@@ -175,15 +185,6 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
             }
         }
     }
-#if ATLAS_HAVE_FFTW
-    {
-        ATLAS_TRACE( "opt3 precomp FFTW" );
-        int num_complex = ( nlons / 2 ) + 1;
-        fft_in_         = fftw_alloc_complex( nlats * num_complex );
-        fft_out_        = fftw_alloc_real( nlats * nlons );
-        plan_ = fftw_plan_many_dft_c2r( 1, &nlons, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1, nlons,
-                                        FFTW_ESTIMATE );
-    }
 #endif
 }  // namespace atlas
 
@@ -199,7 +200,7 @@ TransLocalopt3::~TransLocalopt3() {
     free_aligned( legendre_asym_ );
     free_aligned( fourier_ );
     free_aligned( fouriertp_ );
-#if ATLAS_HAVE_FFTW
+#if 0  //ATLAS_HAVE_FFTW
     fftw_destroy_plan( plan_ );
     fftw_free( fft_in_ );
     fftw_free( fft_out_ );
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index 9e7540029..2f17a4cd1 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -704,7 +704,7 @@ CASE( "test_transgeneral_with_translib" ) {
 #endif
 #endif
 //-----------------------------------------------------------------------------
-#if 1
+#if 0
 CASE( "test_trans_vordiv_with_translib" ) {
     Log::info() << "test_trans_vordiv_with_translib" << std::endl;
     // test transgeneral by comparing its result with the trans library
@@ -853,7 +853,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
 }
 #endif
 //-----------------------------------------------------------------------------
-#if 0
+#if 1
 CASE( "test_trans_hires" ) {
     Log::info() << "test_trans_hires" << std::endl;
     // test transgeneral by comparing its result with the trans library
@@ -863,9 +863,10 @@ CASE( "test_trans_hires" ) {
     double tolerance  = 1.e-13;
 
     // Grid: (Adjust the following line if the test takes too long!)
-    Grid g( "F640" );
+    Grid g( "F1280" );
 #if ATLAS_HAVE_TRANS
-    std::string transTypes[4] = {"localopt", "localopt2", "localopt3", "ifs"};
+    //std::string transTypes[4] = {"localopt", "localopt2", "localopt3", "ifs"};
+    std::string transTypes[3] = {"localopt", "localopt2", "localopt3"};
     //std::string transTypes[3] = {"localopt", "localopt2", "ifs"};
 #else
     std::string transTypes[1] = {"localopt2"};
@@ -878,9 +879,9 @@ CASE( "test_trans_hires" ) {
     int nb_scalar = 1, nb_vordiv = 0;
 
     for ( auto transType : transTypes ) {
-        if ( transType == "localopt2" ) { trc = ndgl / 2. - 2; }
+        if ( transType == "ifs" ) { trc = ndgl / 2. - 1; }
         else {
-            trc = ndgl / 2. - 1;
+            trc = ndgl / 2. - 2;
         }
         int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv;
         int icase = 0;
@@ -899,7 +900,7 @@ CASE( "test_trans_hires" ) {
                             for ( int imag = 0; imag <= 1; imag++ ) {  // real and imaginary part
 
                                 if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. &&
-                                     icase < 25 ) {
+                                     icase < 1 ) {
                                     auto start = std::chrono::system_clock::now();
                                     std::vector<double> sp( 2 * N * nb_scalar );
                                     std::vector<double> gp( nb_all * g.size() );

From c20fca7c55bc0004b1d1f10f5da40faf61ac2830 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Wed, 21 Mar 2018 16:01:00 +0000
Subject: [PATCH 031/123] opt and opt2 are now doing the Legendre dgemm in a
 transposed way; opt: Legendre is writing data streight to fft_in and fftw to
 gp_fields

---
 .../trans/localopt/LegendrePolynomialsopt.cc  |   4 +-
 src/atlas/trans/localopt/TransLocalopt.cc     |  54 ++--
 .../localopt2/LegendrePolynomialsopt2.cc      |   4 +-
 src/atlas/trans/localopt2/TransLocalopt2.cc   |  53 ++--
 src/atlas/trans/localopt3/TransLocalopt3.cc   | 285 +++++++++---------
 src/tests/trans/test_transgeneral.cc          |  10 +-
 6 files changed, 195 insertions(+), 215 deletions(-)

diff --git a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc
index fc0e3d879..993936124 100644
--- a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc
+++ b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc
@@ -178,11 +178,11 @@ void compute_legendre_polynomialsopt(
                 //for ( int jn = jm; jn <= trc; jn++ ) {
                 for ( int jn = trc; jn >= jm; jn-- ) {
                     if ( ( jn - jm ) % 2 == 0 ) {
-                        int is      = leg_start_sym[jm] + is1 * jlat + is2++;
+                        int is      = leg_start_sym[jm] + jlat + nlats * is2++;
                         leg_sym[is] = legpol[idxmn( jm, jn )];
                     }
                     else {
-                        int ia       = leg_start_asym[jm] + ia1 * jlat + ia2++;
+                        int ia       = leg_start_asym[jm] + jlat + nlats * ia2++;
                         leg_asym[ia] = legpol[idxmn( jm, jn )];
                     }
                 }
diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc
index 087dcee41..112b70fb9 100644
--- a/src/atlas/trans/localopt/TransLocalopt.cc
+++ b/src/atlas/trans/localopt/TransLocalopt.cc
@@ -149,8 +149,8 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t
         }
         alloc_aligned( legendre_sym_, size_sym );
         alloc_aligned( legendre_asym_, size_asym );
-        //compute_legendre_polynomialsopt( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_,
-        //                                 legendre_sym_begin_.data(), legendre_asym_begin_.data() );
+        compute_legendre_polynomialsopt( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_,
+                                         legendre_sym_begin_.data(), legendre_asym_begin_.data() );
     }
 
         // precomputations for Fourier transformations:
@@ -285,9 +285,8 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
 #if ATLAS_HAVE_FFTW
             int num_complex      = ( nlons / 2 ) + 1;
             fftw_complex* fft_in = fftw_alloc_complex( nlats * num_complex * nb_fields );
-            double* fft_out      = fftw_alloc_real( nlats * nlons * nb_fields );
             fftw_plan plan       = fftw_plan_many_dft_c2r( 1, &nlons, nlats * nb_fields, fft_in, NULL, 1, num_complex,
-                                                     fft_out, NULL, 1, nlons, FFTW_ESTIMATE );
+                                                     gp_fields, NULL, 1, nlons, FFTW_ESTIMATE );
             for ( int j = 0; j < nlats * num_complex * nb_fields; j++ ) {
                 for ( int imag = 0; imag < 2; imag++ ) {
                     fft_in[j][imag] = 0.;
@@ -317,16 +316,10 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
                     {
                         //ATLAS_TRACE( "opt Legendre split" );
                         int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2;
-                        // the choice between the following two code lines determines whether
-                        // total wavenumbers are summed in an ascending or descending order.
-                        // The trans library in IFS uses descending order because it should
-                        // be more accurate (higher wavenumbers have smaller contributions).
-                        // This also needs to be changed when splitting the spectral data in
-                        // compute_legendre_polynomialsopt!
-                        //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) {
-                        for ( int jn = truncation_ + 1; jn >= jm; jn-- ) {
+                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
                             for ( int imag = 0; imag < n_imag; imag++ ) {
-                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) { //ascending
+                                for ( int jn = truncation_ + 1; jn >= jm; jn-- ) {  // descending
                                     idx = jfld + nb_fields * ( imag + 2 * ( jn - jm ) );
                                     if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; }
                                     else {
@@ -338,25 +331,25 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
                         ASSERT( ia == n_imag * nb_fields * size_asym && is == n_imag * nb_fields * size_sym );
                     }
                     {
-                        eckit::linalg::Matrix A( scalar_sym, nb_fields * n_imag, size_sym );
-                        eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm], size_sym, nlatsNH );
-                        eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsNH );
+                        eckit::linalg::Matrix A( legendre_sym_ + legendre_sym_begin_[jm], nlatsNH, size_sym );
+                        eckit::linalg::Matrix B( scalar_sym, size_sym, nb_fields * n_imag );
+                        eckit::linalg::Matrix C( scl_fourier_sym, nlatsNH, nb_fields * n_imag );
                         eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
                     }
                     if ( size_asym > 0 ) {
-                        eckit::linalg::Matrix A( scalar_asym, nb_fields * n_imag, size_asym );
-                        eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm], size_asym, nlatsNH );
-                        eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsNH );
+                        eckit::linalg::Matrix A( legendre_asym_ + legendre_asym_begin_[jm], nlatsNH, size_asym );
+                        eckit::linalg::Matrix B( scalar_asym, size_asym, nb_fields * n_imag );
+                        eckit::linalg::Matrix C( scl_fourier_asym, nlatsNH, nb_fields * n_imag );
                         eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
                     }
 #if 1  //ATLAS_HAVE_FFTW
                     {
-                        ATLAS_TRACE( "opt merge spheres" );
+                        //ATLAS_TRACE( "opt merge spheres" );
                         // northern hemisphere:
                         int idx = 0;
-                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
+                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
                             for ( int imag = 0; imag < n_imag; imag++ ) {
-                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
+                                for ( int jlat = 0; jlat < nlatsNH; jlat++, idx++ ) {
                                     fft_in[posFFTWin( jfld, jlat, jm )][imag] =
                                         scl_fourier_sym[idx] + scl_fourier_asym[idx];
                                 }
@@ -364,9 +357,9 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
                         }
                         // southern hemisphere:
                         idx = 0;
-                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
+                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
                             for ( int imag = 0; imag < n_imag; imag++ ) {
-                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
+                                for ( int jlat = 0; jlat < nlatsNH; jlat++, idx++ ) {
                                     int jslat = nlats - jlat - 1;
                                     fft_in[posFFTWin( jfld, jslat, jm )][imag] =
                                         scl_fourier_sym[idx] - scl_fourier_asym[idx];
@@ -381,24 +374,13 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
                 }
             }
             {
-                int num_complex = ( nlons / 2 ) + 1;
                 {
                     ATLAS_TRACE( "opt FFTW" );
-                    {
-                        ATLAS_TRACE( "fftw_execute" );
-                        fftw_execute( plan );
-                    }
-                    {
-                        ATLAS_TRACE( "read fft_out" );
-                        for ( int j = 0; j < nlats * nlons * nb_fields; j++ ) {
-                            gp_fields[j] = fft_out[j];
-                        }
-                    }
+                    { fftw_execute( plan ); }
                 }
             }
             fftw_destroy_plan( plan );
             fftw_free( fft_in );
-            fftw_free( fft_out );
 #else
 #if 0  // 1: better for small number of columns, large truncation; 0: better for large number of columns
                     {
diff --git a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc
index 9d96b1ac0..3221c3936 100644
--- a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc
+++ b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc
@@ -178,11 +178,11 @@ void compute_legendre_polynomialsopt2(
                 //for ( int jn = jm; jn <= trc; jn++ ) {
                 for ( int jn = trc; jn >= jm; jn-- ) {
                     if ( ( jn - jm ) % 2 == 0 ) {
-                        int is      = leg_start_sym[jm] + is1 * jlat + is2++;
+                        int is      = leg_start_sym[jm] + jlat + nlats * is2++;
                         leg_sym[is] = legpol[idxmn( jm, jn )];
                     }
                     else {
-                        int ia       = leg_start_asym[jm] + ia1 * jlat + ia2++;
+                        int ia       = leg_start_asym[jm] + jlat + nlats * ia2++;
                         leg_asym[ia] = legpol[idxmn( jm, jn )];
                     }
                 }
diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc
index 66b64d910..ac1da815b 100644
--- a/src/atlas/trans/localopt2/TransLocalopt2.cc
+++ b/src/atlas/trans/localopt2/TransLocalopt2.cc
@@ -146,8 +146,8 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long
         }
         alloc_aligned( legendre_sym_, size_sym );
         alloc_aligned( legendre_asym_, size_asym );
-        //compute_legendre_polynomialsopt2( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_,
-        //                                  legendre_sym_begin_.data(), legendre_asym_begin_.data() );
+        compute_legendre_polynomialsopt2( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_,
+                                          legendre_sym_begin_.data(), legendre_asym_begin_.data() );
     }
 
         // precomputations for Fourier transformations:
@@ -313,16 +313,10 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
                     {
                         //ATLAS_TRACE( "opt2 Legendre split" );
                         int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2;
-                        // the choice between the following two code lines determines whether
-                        // total wavenumbers are summed in an ascending or descending order.
-                        // The trans library in IFS uses descending order because it should
-                        // be more accurate (higher wavenumbers have smaller contributions).
-                        // This also needs to be changed when splitting the spectral data in
-                        // compute_legendre_polynomialsopt2!
-                        //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) {
-                        for ( int jn = truncation_ + 1; jn >= jm; jn-- ) {
+                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
                             for ( int imag = 0; imag < n_imag; imag++ ) {
-                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) { //ascending
+                                for ( int jn = truncation_ + 1; jn >= jm; jn-- ) {  // descending
                                     idx = jfld + nb_fields * ( imag + 2 * ( jn - jm ) );
                                     if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; }
                                     else {
@@ -333,7 +327,19 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
                         }
                         ASSERT( ia == n_imag * nb_fields * size_asym && is == n_imag * nb_fields * size_sym );
                     }
-                    {
+                    {  // transposed
+                        eckit::linalg::Matrix A( legendre_sym_ + legendre_sym_begin_[jm], nlatsNH, size_sym );
+                        eckit::linalg::Matrix B( scalar_sym, size_sym, nb_fields * n_imag );
+                        eckit::linalg::Matrix C( scl_fourier_sym, nlatsNH, nb_fields * n_imag );
+                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+                    }
+                    if ( size_asym > 0 ) {
+                        eckit::linalg::Matrix A( legendre_asym_ + legendre_asym_begin_[jm], nlatsNH, size_asym );
+                        eckit::linalg::Matrix B( scalar_asym, size_asym, nb_fields * n_imag );
+                        eckit::linalg::Matrix C( scl_fourier_asym, nlatsNH, nb_fields * n_imag );
+                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+                    }
+                        /*{ // non-transposed
                         eckit::linalg::Matrix A( scalar_sym, nb_fields * n_imag, size_sym );
                         eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm], size_sym, nlatsNH );
                         eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsNH );
@@ -344,32 +350,25 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
                         eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm], size_asym, nlatsNH );
                         eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsNH );
                         eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-                    }
+                    }*/
 #if 1  //ATLAS_HAVE_FFTW
                     {
-                        ATLAS_TRACE( "opt2 merge spheres" );
+                        //ATLAS_TRACE( "opt2 merge spheres" );
                         // northern hemisphere:
-                        int ioff = jm * size_fourier_max;
-                        int pos0 = ioff;
-                        int idx  = 0;
-                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
-                            int poslat = pos0 + 2 * jlat;
+                        int idx = 0;
+                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
                             for ( int imag = 0; imag < n_imag; imag++ ) {
-                                int posimag = nb_fields * ( imag + poslat );
-                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
+                                for ( int jlat = 0; jlat < nlatsNH; jlat++, idx++ ) {
                                     scl_fourier[posFFTW( jfld, imag, jlat, jm )] =
                                         scl_fourier_sym[idx] + scl_fourier_asym[idx];
                                 }
                             }
                         }
                         // southern hemisphere:
-                        idx  = 0;
-                        pos0 = 2 * ( nlats - 1 ) + ioff;
-                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
-                            int poslat = pos0 - 2 * jlat;
+                        idx = 0;
+                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
                             for ( int imag = 0; imag < n_imag; imag++ ) {
-                                int posimag = nb_fields * ( imag + poslat );
-                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
+                                for ( int jlat = 0; jlat < nlatsNH; jlat++, idx++ ) {
                                     int jslat = nlats - jlat - 1;
                                     scl_fourier[posFFTW( jfld, imag, jslat, jm )] =
                                         scl_fourier_sym[idx] - scl_fourier_asym[idx];
diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc
index 3cf7299bc..273af7082 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.cc
+++ b/src/atlas/trans/localopt3/TransLocalopt3.cc
@@ -146,12 +146,12 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
         }
         alloc_aligned( legendre_sym_, size_sym );
         alloc_aligned( legendre_asym_, size_asym );
-        //compute_legendre_polynomialsopt3( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_,
-        //                                  legendre_sym_begin_.data(), legendre_asym_begin_.data() );
+        compute_legendre_polynomialsopt3( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_,
+                                          legendre_sym_begin_.data(), legendre_asym_begin_.data() );
     }
 
         // precomputations for Fourier transformations:
-#if 0  //ATLAS_HAVE_FFTW
+#if ATLAS_HAVE_FFTW
     {
         ATLAS_TRACE( "opt3 precomp FFTW" );
         int num_complex = ( nlons / 2 ) + 1;
@@ -198,12 +198,13 @@ TransLocalopt3::TransLocalopt3( const Grid& grid, const long truncation, const e
 TransLocalopt3::~TransLocalopt3() {
     free_aligned( legendre_sym_ );
     free_aligned( legendre_asym_ );
-    free_aligned( fourier_ );
-    free_aligned( fouriertp_ );
-#if 0  //ATLAS_HAVE_FFTW
+#if ATLAS_HAVE_FFTW
     fftw_destroy_plan( plan_ );
     fftw_free( fft_in_ );
     fftw_free( fft_out_ );
+#else
+    free_aligned( fourier_ );
+    free_aligned( fouriertp_ );
 #endif
 }
 
@@ -344,7 +345,7 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
                         eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsNH );
                         eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
                     }
-#if 0  //ATLAS_HAVE_FFTW
+#if 1  //ATLAS_HAVE_FFTW
                     {
                         //ATLAS_TRACE( "opt3 merge spheres" );
                         // northern hemisphere:
@@ -409,7 +410,7 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
                 }
             }
 #else
-#if 0  // 1: better for small number of columns, large truncation; 0: better for large number of columns
+#if 1  // 1: better for small number of columns, large truncation; 0: better for large number of columns
                     {
                         //ATLAS_TRACE( "opt3 merge spheres" );
                         // northern hemisphere:
@@ -524,168 +525,166 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
 
 #endif
 #endif
-                    // Computing u,v from U,V:
-                    {
-                        if ( nb_vordiv_fields > 0 ) {
-                            ATLAS_TRACE( "opt3 u,v from U,V" );
-                            std::vector<double> coslats( nlats );
-                            for ( size_t j = 0; j < nlats; ++j ) {
-                                coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() );
-                            }
-                            int idx = 0;
-                            for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                                for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
-                                    for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) {
-                                        gp_fields[idx] /= coslats[jlat];
-                                        idx++;
-                                    }
-                                }
-                            }
-                        }
+            // Computing u,v from U,V:
+            {
+                if ( nb_vordiv_fields > 0 ) {
+                    ATLAS_TRACE( "opt3 u,v from U,V" );
+                    std::vector<double> coslats( nlats );
+                    for ( size_t j = 0; j < nlats; ++j ) {
+                        coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() );
                     }
-                    free_aligned( scl_fourier );
-                }
-                else {
-                    ATLAS_TRACE( "invtrans_uv unstructured opt3" );
                     int idx = 0;
-                    for ( PointXY p : grid_.xy() ) {
-                        double lon   = p.x() * util::Constants::degreesToRadians();
-                        double lat   = p.y() * util::Constants::degreesToRadians();
-                        double trcFT = truncation;
-
-                        // Legendre transform:
-                        //invtrans_legendreopt3( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra,
-                        //                      legReal.data(), legImag.data() );
-
-                        // Fourier transform:
-                        //invtrans_fourieropt3( trcFT, lon, nb_fields, legReal.data(), legImag.data(),
-                        //                     gp_tmp.data() + ( nb_fields * idx ) );
-                        for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
-                            //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat );
+                    for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                        for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
+                            for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) {
+                                gp_fields[idx] /= coslats[jlat];
+                                idx++;
+                            }
                         }
-                        ++idx;
                     }
                 }
             }
-        }  // namespace trans
-
-        // --------------------------------------------------------------------------------------------------------------------
-
-        void TransLocalopt3::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[],
-                                       const double divergence_spectra[], double gp_fields[],
-                                       const eckit::Configuration& config ) const {
-            invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config );
+            free_aligned( scl_fourier );
         }
-
-        void extend_truncationopt3( const int old_truncation, const int nb_fields, const double old_spectra[],
-                                    double new_spectra[] ) {
-            int k = 0, k_old = 0;
-            for ( int m = 0; m <= old_truncation + 1; m++ ) {             // zonal wavenumber
-                for ( int n = m; n <= old_truncation + 1; n++ ) {         // total wavenumber
-                    for ( int imag = 0; imag < 2; imag++ ) {              // imaginary/real part
-                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {  // field
-                            if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; }
-                            else {
-                                new_spectra[k++] = old_spectra[k_old++];
-                            }
-                        }
-                    }
+        else {
+            ATLAS_TRACE( "invtrans_uv unstructured opt3" );
+            int idx = 0;
+            for ( PointXY p : grid_.xy() ) {
+                double lon   = p.x() * util::Constants::degreesToRadians();
+                double lat   = p.y() * util::Constants::degreesToRadians();
+                double trcFT = truncation;
+
+                // Legendre transform:
+                //invtrans_legendreopt3( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra,
+                //                      legReal.data(), legImag.data() );
+
+                // Fourier transform:
+                //invtrans_fourieropt3( trcFT, lon, nb_fields, legReal.data(), legImag.data(),
+                //                     gp_tmp.data() + ( nb_fields * idx ) );
+                for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
+                    //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat );
                 }
+                ++idx;
             }
         }
+    }
+}  // namespace trans
 
-        // --------------------------------------------------------------------------------------------------------------------
-
-        void TransLocalopt3::invtrans( const int nb_scalar_fields, const double scalar_spectra[],
-                                       const int nb_vordiv_fields, const double vorticity_spectra[],
-                                       const double divergence_spectra[], double gp_fields[],
-                                       const eckit::Configuration& config ) const {
-            ATLAS_TRACE( "TransLocalopt3::invtrans" );
-            int nb_gp              = grid_.size();
-            int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields;
-            if ( nb_vordiv_fields > 0 ) {
-                std::vector<double> vorticity_spectra_extended( nb_vordiv_spec_ext, 0. );
-                std::vector<double> divergence_spectra_extended( nb_vordiv_spec_ext, 0. );
-                std::vector<double> U_ext( nb_vordiv_spec_ext, 0. );
-                std::vector<double> V_ext( nb_vordiv_spec_ext, 0. );
+// --------------------------------------------------------------------------------------------------------------------
 
-                {
-                    ATLAS_TRACE( "opt3 extend vordiv" );
-                    // increase truncation in vorticity_spectra and divergence_spectra:
-                    extend_truncationopt3( truncation_, nb_vordiv_fields, vorticity_spectra,
-                                           vorticity_spectra_extended.data() );
-                    extend_truncationopt3( truncation_, nb_vordiv_fields, divergence_spectra,
-                                           divergence_spectra_extended.data() );
-                }
+void TransLocalopt3::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[],
+                               const double divergence_spectra[], double gp_fields[],
+                               const eckit::Configuration& config ) const {
+    invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config );
+}
 
-                {
-                    ATLAS_TRACE( "vordiv to UV opt3" );
-                    // call vd2uv to compute u and v in spectral space
-                    trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt3" ) );
-                    vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(),
-                                              divergence_spectra_extended.data(), U_ext.data(), V_ext.data() );
+void extend_truncationopt3( const int old_truncation, const int nb_fields, const double old_spectra[],
+                            double new_spectra[] ) {
+    int k = 0, k_old = 0;
+    for ( int m = 0; m <= old_truncation + 1; m++ ) {             // zonal wavenumber
+        for ( int n = m; n <= old_truncation + 1; n++ ) {         // total wavenumber
+            for ( int imag = 0; imag < 2; imag++ ) {              // imaginary/real part
+                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {  // field
+                    if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; }
+                    else {
+                        new_spectra[k++] = old_spectra[k_old++];
+                    }
                 }
-
-                // perform spectral transform to compute all fields in grid point space
-                invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config );
-                invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(),
-                             gp_fields + nb_gp * nb_vordiv_fields, config );
-            }
-            if ( nb_scalar_fields > 0 ) {
-                int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields;
-                std::vector<double> scalar_spectra_extended( nb_scalar_spec_ext, 0. );
-                extend_truncationopt3( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() );
-                invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(),
-                             gp_fields + 2 * nb_gp * nb_vordiv_fields, config );
             }
         }
+    }
+}
 
-        // --------------------------------------------------------------------------------------------------------------------
+// --------------------------------------------------------------------------------------------------------------------
 
-        void TransLocalopt3::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config )
-            const {
-            NOTIMP;
-            // Not implemented and not planned.
-            // Use the TransIFS implementation instead.
+void TransLocalopt3::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields,
+                               const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[],
+                               const eckit::Configuration& config ) const {
+    ATLAS_TRACE( "TransLocalopt3::invtrans" );
+    int nb_gp              = grid_.size();
+    int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields;
+    if ( nb_vordiv_fields > 0 ) {
+        std::vector<double> vorticity_spectra_extended( nb_vordiv_spec_ext, 0. );
+        std::vector<double> divergence_spectra_extended( nb_vordiv_spec_ext, 0. );
+        std::vector<double> U_ext( nb_vordiv_spec_ext, 0. );
+        std::vector<double> V_ext( nb_vordiv_spec_ext, 0. );
+
+        {
+            ATLAS_TRACE( "opt3 extend vordiv" );
+            // increase truncation in vorticity_spectra and divergence_spectra:
+            extend_truncationopt3( truncation_, nb_vordiv_fields, vorticity_spectra,
+                                   vorticity_spectra_extended.data() );
+            extend_truncationopt3( truncation_, nb_vordiv_fields, divergence_spectra,
+                                   divergence_spectra_extended.data() );
         }
 
-        // --------------------------------------------------------------------------------------------------------------------
-
-        void TransLocalopt3::dirtrans( const FieldSet& gpfields, FieldSet& spfields,
-                                       const eckit::Configuration& config ) const {
-            NOTIMP;
-            // Not implemented and not planned.
-            // Use the TransIFS implementation instead.
+        {
+            ATLAS_TRACE( "vordiv to UV opt3" );
+            // call vd2uv to compute u and v in spectral space
+            trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt3" ) );
+            vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(),
+                                      divergence_spectra_extended.data(), U_ext.data(), V_ext.data() );
         }
 
-        // --------------------------------------------------------------------------------------------------------------------
+        // perform spectral transform to compute all fields in grid point space
+        invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config );
+        invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(),
+                     gp_fields + nb_gp * nb_vordiv_fields, config );
+    }
+    if ( nb_scalar_fields > 0 ) {
+        int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields;
+        std::vector<double> scalar_spectra_extended( nb_scalar_spec_ext, 0. );
+        extend_truncationopt3( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() );
+        invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(),
+                     gp_fields + 2 * nb_gp * nb_vordiv_fields, config );
+    }
+}
 
-        void TransLocalopt3::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv,
-                                                   const eckit::Configuration& config ) const {
-            NOTIMP;
-            // Not implemented and not planned.
-            // Use the TransIFS implementation instead.
-        }
+// --------------------------------------------------------------------------------------------------------------------
 
-        // --------------------------------------------------------------------------------------------------------------------
+void TransLocalopt3::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const {
+    NOTIMP;
+    // Not implemented and not planned.
+    // Use the TransIFS implementation instead.
+}
 
-        void TransLocalopt3::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[],
-                                       const eckit::Configuration& ) const {
-            NOTIMP;
-            // Not implemented and not planned.
-            // Use the TransIFS implementation instead.
-        }
+// --------------------------------------------------------------------------------------------------------------------
 
-        // --------------------------------------------------------------------------------------------------------------------
+void TransLocalopt3::dirtrans( const FieldSet& gpfields, FieldSet& spfields,
+                               const eckit::Configuration& config ) const {
+    NOTIMP;
+    // Not implemented and not planned.
+    // Use the TransIFS implementation instead.
+}
 
-        void TransLocalopt3::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[],
-                                       double divergence_spectra[], const eckit::Configuration& ) const {
-            NOTIMP;
-            // Not implemented and not planned.
-            // Use the TransIFS implementation instead.
-        }
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt3::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv,
+                                           const eckit::Configuration& config ) const {
+    NOTIMP;
+    // Not implemented and not planned.
+    // Use the TransIFS implementation instead.
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt3::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[],
+                               const eckit::Configuration& ) const {
+    NOTIMP;
+    // Not implemented and not planned.
+    // Use the TransIFS implementation instead.
+}
+
+// --------------------------------------------------------------------------------------------------------------------
 
-        // --------------------------------------------------------------------------------------------------------------------
+void TransLocalopt3::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[],
+                               double divergence_spectra[], const eckit::Configuration& ) const {
+    NOTIMP;
+    // Not implemented and not planned.
+    // Use the TransIFS implementation instead.
+}
+
+// --------------------------------------------------------------------------------------------------------------------
 
-    }  // namespace trans
+}  // namespace trans
 }  // namespace atlas
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index 2f17a4cd1..9aa9eb6a6 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -724,8 +724,8 @@ CASE( "test_trans_vordiv_with_translib" ) {
     trans::Trans transIFS( g, trc, util::Config( "type", "ifs" ) );
     double rav = 0.;  // compute average rms error of trans library in rav
 #endif
-    trans::Trans transLocal1( g, trc, util::Config( "type", "localopt" ) );
-    trans::Trans transLocal2( g, trc, util::Config( "type", "localopt2" ) );
+    trans::Trans transLocal1( g, trc, util::Config( "type", "localopt2" ) );
+    trans::Trans transLocal2( g, trc, util::Config( "type", "localopt3" ) );
     double rav1 = 0., rav2 = 0.;  // compute average rms errors of transLocal1 and transLocal2
 
     functionspace::Spectral spectral( trc );
@@ -863,10 +863,10 @@ CASE( "test_trans_hires" ) {
     double tolerance  = 1.e-13;
 
     // Grid: (Adjust the following line if the test takes too long!)
-    Grid g( "F1280" );
+    Grid g( "F640" );
 #if ATLAS_HAVE_TRANS
-    //std::string transTypes[4] = {"localopt", "localopt2", "localopt3", "ifs"};
-    std::string transTypes[3] = {"localopt", "localopt2", "localopt3"};
+    std::string transTypes[4] = {"localopt", "localopt2", "localopt3", "ifs"};
+    //std::string transTypes[2] = {"localopt", "localopt2"};
     //std::string transTypes[3] = {"localopt", "localopt2", "ifs"};
 #else
     std::string transTypes[1] = {"localopt2"};

From 2a3a977dbc2df310193be4f10df4b847f00042ab Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Wed, 21 Mar 2018 19:12:34 +0000
Subject: [PATCH 032/123] optimised computation of Legendre polynomials

---
 .../trans/localopt/LegendrePolynomialsopt.cc  | 15 ++++++++---
 .../localopt2/LegendrePolynomialsopt2.cc      | 15 ++++++++---
 .../localopt3/LegendrePolynomialsopt3.cc      | 21 ++++++++--------
 src/atlas/trans/localopt3/TransLocalopt3.cc   | 25 ++++++++-----------
 src/tests/trans/test_transgeneral.cc          |  7 +++---
 5 files changed, 48 insertions(+), 35 deletions(-)

diff --git a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc
index 993936124..413620301 100644
--- a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc
+++ b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc
@@ -69,6 +69,13 @@ void compute_legendre_polynomialsopt(
             double zdlsita = std::sqrt( 1. - zdlx * zdlx );  // sin(theta) (this is how trans library does it)
 
             legpol[idxmn( 0, 0 )] = 1.;
+            double vsin[trc + 1], vcos[trc + 1];
+            for ( int j = 1; j <= trc; j++ ) {
+                vsin[j] = std::sin( j * zdlx1 );
+            }
+            for ( int j = 1; j <= trc; j++ ) {
+                vcos[j] = std::cos( j * zdlx1 );
+            }
 
             double zdl1sita = 0.;
             // if we are less than 1 meter from the pole,
@@ -91,9 +98,9 @@ void compute_legendre_polynomialsopt(
                 // represented by only even k
                 for ( int jk = 2; jk <= jn; jk += 2 ) {
                     // normalised ordinary Legendre polynomial == \overbar{P_n}^0
-                    zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 );
+                    zdlk = zdlk + zfn( jn, jk ) * vcos[jk];
                     // normalised associated Legendre polynomial == \overbar{P_n}^1
-                    zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 );
+                    zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * vsin[jk];
                 }
                 legpol[idxmn( 0, jn )] = zdlk;
                 legpol[idxmn( 1, jn )] = zdlldn;
@@ -108,9 +115,9 @@ void compute_legendre_polynomialsopt(
                 // represented by only even k
                 for ( int jk = 1; jk <= jn; jk += 2 ) {
                     // normalised ordinary Legendre polynomial == \overbar{P_n}^0
-                    zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 );
+                    zdlk = zdlk + zfn( jn, jk ) * vcos[jk];
                     // normalised associated Legendre polynomial == \overbar{P_n}^1
-                    zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 );
+                    zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * vsin[jk];
                 }
                 legpol[idxmn( 0, jn )] = zdlk;
                 legpol[idxmn( 1, jn )] = zdlldn;
diff --git a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc
index 3221c3936..1d9f86daa 100644
--- a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc
+++ b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc
@@ -69,6 +69,13 @@ void compute_legendre_polynomialsopt2(
             double zdlsita = std::sqrt( 1. - zdlx * zdlx );  // sin(theta) (this is how trans library does it)
 
             legpol[idxmn( 0, 0 )] = 1.;
+            double vsin[trc + 1], vcos[trc + 1];
+            for ( int j = 1; j <= trc; j++ ) {
+                vsin[j] = std::sin( j * zdlx1 );
+            }
+            for ( int j = 1; j <= trc; j++ ) {
+                vcos[j] = std::cos( j * zdlx1 );
+            }
 
             double zdl1sita = 0.;
             // if we are less than 1 meter from the pole,
@@ -91,9 +98,9 @@ void compute_legendre_polynomialsopt2(
                 // represented by only even k
                 for ( int jk = 2; jk <= jn; jk += 2 ) {
                     // normalised ordinary Legendre polynomial == \overbar{P_n}^0
-                    zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 );
+                    zdlk = zdlk + zfn( jn, jk ) * vcos[jk];
                     // normalised associated Legendre polynomial == \overbar{P_n}^1
-                    zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 );
+                    zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * vsin[jk];
                 }
                 legpol[idxmn( 0, jn )] = zdlk;
                 legpol[idxmn( 1, jn )] = zdlldn;
@@ -108,9 +115,9 @@ void compute_legendre_polynomialsopt2(
                 // represented by only even k
                 for ( int jk = 1; jk <= jn; jk += 2 ) {
                     // normalised ordinary Legendre polynomial == \overbar{P_n}^0
-                    zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 );
+                    zdlk = zdlk + zfn( jn, jk ) * vcos[jk];
                     // normalised associated Legendre polynomial == \overbar{P_n}^1
-                    zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 );
+                    zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * vsin[jk];
                 }
                 legpol[idxmn( 0, jn )] = zdlk;
                 legpol[idxmn( 1, jn )] = zdlldn;
diff --git a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc
index c6fbcad68..3ea2b41ef 100644
--- a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc
+++ b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc
@@ -69,6 +69,13 @@ void compute_legendre_polynomialsopt3(
             double zdlsita = std::sqrt( 1. - zdlx * zdlx );  // sin(theta) (this is how trans library does it)
 
             legpol[idxmn( 0, 0 )] = 1.;
+            double vsin[trc + 1], vcos[trc + 1];
+            for ( int j = 1; j <= trc; j++ ) {
+                vsin[j] = std::sin( j * zdlx1 );
+            }
+            for ( int j = 1; j <= trc; j++ ) {
+                vcos[j] = std::cos( j * zdlx1 );
+            }
 
             double zdl1sita = 0.;
             // if we are less than 1 meter from the pole,
@@ -91,9 +98,9 @@ void compute_legendre_polynomialsopt3(
                 // represented by only even k
                 for ( int jk = 2; jk <= jn; jk += 2 ) {
                     // normalised ordinary Legendre polynomial == \overbar{P_n}^0
-                    zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 );
+                    zdlk = zdlk + zfn( jn, jk ) * vcos[jk];
                     // normalised associated Legendre polynomial == \overbar{P_n}^1
-                    zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 );
+                    zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * vsin[jk];
                 }
                 legpol[idxmn( 0, jn )] = zdlk;
                 legpol[idxmn( 1, jn )] = zdlldn;
@@ -108,9 +115,9 @@ void compute_legendre_polynomialsopt3(
                 // represented by only even k
                 for ( int jk = 1; jk <= jn; jk += 2 ) {
                     // normalised ordinary Legendre polynomial == \overbar{P_n}^0
-                    zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 );
+                    zdlk = zdlk + zfn( jn, jk ) * vcos[jk];
                     // normalised associated Legendre polynomial == \overbar{P_n}^1
-                    zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 );
+                    zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * vsin[jk];
                 }
                 legpol[idxmn( 0, jn )] = zdlk;
                 legpol[idxmn( 1, jn )] = zdlldn;
@@ -154,12 +161,6 @@ void compute_legendre_polynomialsopt3(
         {
             //ATLAS_TRACE( "add to global arrays" );
 
-            // take factor 2 for m > 0 into account:
-            for ( int jm = 1; jm <= trc; ++jm ) {
-                for ( int jn = jm; jn <= trc; ++jn ) {
-                    legpol[idxmn( jm, jn )] *= 2.;
-                }
-            }
             for ( int jm = 0; jm <= trc; jm++ ) {
                 int is1 = 0, ia1 = 0;
                 for ( int jn = jm; jn <= trc; jn++ ) {
diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc
index 273af7082..ebab9bf06 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.cc
+++ b/src/atlas/trans/localopt3/TransLocalopt3.cc
@@ -166,9 +166,11 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
         alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons );
         int idx = 0;
         for ( int jlon = 0; jlon < nlons; jlon++ ) {
+            double factor = 1.;
+            if ( jm > 0 ) { factor = 0.5; }
             for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
-                fourier_[idx++] = +std::cos( jm * lons[jlon] );  // real part
-                fourier_[idx++] = -std::sin( jm * lons[jlon] );  // imaginary part
+                fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor;  // real part
+                fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor;  // imaginary part
             }
         }
     }
@@ -177,11 +179,13 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
         alloc_aligned( fouriertp_, 2 * ( truncation_ + 1 ) * nlons );
         int idx = 0;
         for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
+            double factor = 1.;
+            if ( jm > 0 ) { factor = 0.5; }
             for ( int jlon = 0; jlon < nlons; jlon++ ) {
-                fouriertp_[idx++] = +std::cos( jm * lons[jlon] );  // real part
+                fouriertp_[idx++] = +std::cos( jm * lons[jlon] ) * factor;  // real part
             }
             for ( int jlon = 0; jlon < nlons; jlon++ ) {
-                fouriertp_[idx++] = -std::sin( jm * lons[jlon] );  // imaginary part
+                fouriertp_[idx++] = -std::sin( jm * lons[jlon] ) * factor;  // imaginary part
             }
         }
     }
@@ -349,13 +353,9 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
                     {
                         //ATLAS_TRACE( "opt3 merge spheres" );
                         // northern hemisphere:
-                        int ioff = jm * size_fourier_max;
-                        int pos0 = ioff;
-                        int idx  = 0;
+                        int idx = 0;
                         for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
-                            int poslat = pos0 + 2 * jlat;
                             for ( int imag = 0; imag < n_imag; imag++ ) {
-                                int posimag = nb_fields * ( imag + poslat );
                                 for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
                                     scl_fourier[posFFTW( jfld, imag, jlat, jm )] =
                                         scl_fourier_sym[idx] + scl_fourier_asym[idx];
@@ -363,12 +363,9 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
                             }
                         }
                         // southern hemisphere:
-                        idx  = 0;
-                        pos0 = 2 * ( nlats - 1 ) + ioff;
+                        idx = 0;
                         for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
-                            int poslat = pos0 - 2 * jlat;
                             for ( int imag = 0; imag < n_imag; imag++ ) {
-                                int posimag = nb_fields * ( imag + poslat );
                                 for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
                                     int jslat = nlats - jlat - 1;
                                     scl_fourier[posFFTW( jfld, imag, jslat, jm )] =
@@ -394,7 +391,7 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
                             for ( int jm = 1; jm < num_complex; jm++, idx++ ) {
                                 for ( int imag = 0; imag < 2; imag++ ) {
                                     if ( jm <= truncation_ ) {
-                                        fft_in_[idx][imag] = scl_fourier[posFFTW( jfld, imag, jlat, jm )] / 2.;
+                                        fft_in_[idx][imag] = scl_fourier[posFFTW( jfld, imag, jlat, jm )];
                                     }
                                     else {
                                         fft_in_[idx][imag] = 0.;
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index 9aa9eb6a6..db9188c5a 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -724,8 +724,8 @@ CASE( "test_trans_vordiv_with_translib" ) {
     trans::Trans transIFS( g, trc, util::Config( "type", "ifs" ) );
     double rav = 0.;  // compute average rms error of trans library in rav
 #endif
-    trans::Trans transLocal1( g, trc, util::Config( "type", "localopt2" ) );
-    trans::Trans transLocal2( g, trc, util::Config( "type", "localopt3" ) );
+    trans::Trans transLocal1( g, trc, util::Config( "type", "localopt" ) );
+    trans::Trans transLocal2( g, trc, util::Config( "type", "localopt2" ) );
     double rav1 = 0., rav2 = 0.;  // compute average rms errors of transLocal1 and transLocal2
 
     functionspace::Spectral spectral( trc );
@@ -863,11 +863,12 @@ CASE( "test_trans_hires" ) {
     double tolerance  = 1.e-13;
 
     // Grid: (Adjust the following line if the test takes too long!)
-    Grid g( "F640" );
+    Grid g( "F1280" );
 #if ATLAS_HAVE_TRANS
     std::string transTypes[4] = {"localopt", "localopt2", "localopt3", "ifs"};
     //std::string transTypes[2] = {"localopt", "localopt2"};
     //std::string transTypes[3] = {"localopt", "localopt2", "ifs"};
+    //std::string transTypes[1] = {"localopt3"};
 #else
     std::string transTypes[1] = {"localopt2"};
 #endif

From ba42bb8600c5cd1f2babef3fea47412b34a67a54 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Fri, 23 Mar 2018 18:21:25 +0000
Subject: [PATCH 033/123] added test for limited domain; fixed Fourier
 transformation with dgemm

---
 src/atlas/trans/localopt3/TransLocalopt3.cc | 289 ++++++++++----------
 src/tests/trans/test_transgeneral.cc        | 139 +++++++++-
 2 files changed, 277 insertions(+), 151 deletions(-)

diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc
index ebab9bf06..d595f500a 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.cc
+++ b/src/atlas/trans/localopt3/TransLocalopt3.cc
@@ -151,7 +151,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
     }
 
         // precomputations for Fourier transformations:
-#if ATLAS_HAVE_FFTW
+#if 0  //ATLAS_HAVE_FFTW
     {
         ATLAS_TRACE( "opt3 precomp FFTW" );
         int num_complex = ( nlons / 2 ) + 1;
@@ -167,8 +167,8 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
         int idx = 0;
         for ( int jlon = 0; jlon < nlons; jlon++ ) {
             double factor = 1.;
-            if ( jm > 0 ) { factor = 0.5; }
             for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
+                if ( jm > 0 ) { factor = 2.; }
                 fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor;  // real part
                 fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor;  // imaginary part
             }
@@ -180,7 +180,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
         int idx = 0;
         for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
             double factor = 1.;
-            if ( jm > 0 ) { factor = 0.5; }
+            if ( jm > 0 ) { factor = 2.; }
             for ( int jlon = 0; jlon < nlons; jlon++ ) {
                 fouriertp_[idx++] = +std::cos( jm * lons[jlon] ) * factor;  // real part
             }
@@ -202,7 +202,7 @@ TransLocalopt3::TransLocalopt3( const Grid& grid, const long truncation, const e
 TransLocalopt3::~TransLocalopt3() {
     free_aligned( legendre_sym_ );
     free_aligned( legendre_asym_ );
-#if ATLAS_HAVE_FFTW
+#if 0  //ATLAS_HAVE_FFTW
     fftw_destroy_plan( plan_ );
     fftw_free( fft_in_ );
     fftw_free( fft_out_ );
@@ -349,7 +349,7 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
                         eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsNH );
                         eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
                     }
-#if 1  //ATLAS_HAVE_FFTW
+#if 0  //ATLAS_HAVE_FFTW
                     {
                         //ATLAS_TRACE( "opt3 merge spheres" );
                         // northern hemisphere:
@@ -411,13 +411,9 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
                     {
                         //ATLAS_TRACE( "opt3 merge spheres" );
                         // northern hemisphere:
-                        int ioff = jm * size_fourier_max;
-                        int pos0 = ioff;
-                        int idx  = 0;
+                        int idx = 0;
                         for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
-                            int poslat = pos0 + 2 * jlat;
                             for ( int imag = 0; imag < n_imag; imag++ ) {
-                                int posimag = nb_fields * ( imag + poslat );
                                 for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
                                     scl_fourier[posGemm1( jfld, imag, jlat, jm )] =
                                         scl_fourier_sym[idx] + scl_fourier_asym[idx];
@@ -425,12 +421,9 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
                             }
                         }
                         // southern hemisphere:
-                        idx  = 0;
-                        pos0 = 2 * ( nlats - 1 ) + ioff;
+                        idx = 0;
                         for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
-                            int poslat = pos0 - 2 * jlat;
                             for ( int imag = 0; imag < n_imag; imag++ ) {
-                                int posimag = nb_fields * ( imag + poslat );
                                 for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
                                     int jslat = nlats - jlat - 1;
                                     scl_fourier[posGemm1( jfld, imag, jslat, jm )] =
@@ -522,166 +515,168 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
 
 #endif
 #endif
-            // Computing u,v from U,V:
-            {
-                if ( nb_vordiv_fields > 0 ) {
-                    ATLAS_TRACE( "opt3 u,v from U,V" );
-                    std::vector<double> coslats( nlats );
-                    for ( size_t j = 0; j < nlats; ++j ) {
-                        coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() );
-                    }
-                    int idx = 0;
-                    for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                        for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
-                            for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) {
-                                gp_fields[idx] /= coslats[jlat];
-                                idx++;
+                    // Computing u,v from U,V:
+                    {
+                        if ( nb_vordiv_fields > 0 ) {
+                            ATLAS_TRACE( "opt3 u,v from U,V" );
+                            std::vector<double> coslats( nlats );
+                            for ( size_t j = 0; j < nlats; ++j ) {
+                                coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() );
+                            }
+                            int idx = 0;
+                            for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
+                                    for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) {
+                                        gp_fields[idx] /= coslats[jlat];
+                                        idx++;
+                                    }
+                                }
                             }
                         }
                     }
+                    free_aligned( scl_fourier );
                 }
-            }
-            free_aligned( scl_fourier );
-        }
-        else {
-            ATLAS_TRACE( "invtrans_uv unstructured opt3" );
-            int idx = 0;
-            for ( PointXY p : grid_.xy() ) {
-                double lon   = p.x() * util::Constants::degreesToRadians();
-                double lat   = p.y() * util::Constants::degreesToRadians();
-                double trcFT = truncation;
-
-                // Legendre transform:
-                //invtrans_legendreopt3( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra,
-                //                      legReal.data(), legImag.data() );
-
-                // Fourier transform:
-                //invtrans_fourieropt3( trcFT, lon, nb_fields, legReal.data(), legImag.data(),
-                //                     gp_tmp.data() + ( nb_fields * idx ) );
-                for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
-                    //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat );
+                else {
+                    ATLAS_TRACE( "invtrans_uv unstructured opt3" );
+                    int idx = 0;
+                    for ( PointXY p : grid_.xy() ) {
+                        double lon   = p.x() * util::Constants::degreesToRadians();
+                        double lat   = p.y() * util::Constants::degreesToRadians();
+                        double trcFT = truncation;
+
+                        // Legendre transform:
+                        //invtrans_legendreopt3( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra,
+                        //                      legReal.data(), legImag.data() );
+
+                        // Fourier transform:
+                        //invtrans_fourieropt3( trcFT, lon, nb_fields, legReal.data(), legImag.data(),
+                        //                     gp_tmp.data() + ( nb_fields * idx ) );
+                        for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
+                            //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat );
+                        }
+                        ++idx;
+                    }
                 }
-                ++idx;
             }
-        }
-    }
-}  // namespace trans
+        }  // namespace trans
 
-// --------------------------------------------------------------------------------------------------------------------
+        // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt3::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[],
-                               const double divergence_spectra[], double gp_fields[],
-                               const eckit::Configuration& config ) const {
-    invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config );
-}
+        void TransLocalopt3::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[],
+                                       const double divergence_spectra[], double gp_fields[],
+                                       const eckit::Configuration& config ) const {
+            invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config );
+        }
 
-void extend_truncationopt3( const int old_truncation, const int nb_fields, const double old_spectra[],
-                            double new_spectra[] ) {
-    int k = 0, k_old = 0;
-    for ( int m = 0; m <= old_truncation + 1; m++ ) {             // zonal wavenumber
-        for ( int n = m; n <= old_truncation + 1; n++ ) {         // total wavenumber
-            for ( int imag = 0; imag < 2; imag++ ) {              // imaginary/real part
-                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {  // field
-                    if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; }
-                    else {
-                        new_spectra[k++] = old_spectra[k_old++];
+        void extend_truncationopt3( const int old_truncation, const int nb_fields, const double old_spectra[],
+                                    double new_spectra[] ) {
+            int k = 0, k_old = 0;
+            for ( int m = 0; m <= old_truncation + 1; m++ ) {             // zonal wavenumber
+                for ( int n = m; n <= old_truncation + 1; n++ ) {         // total wavenumber
+                    for ( int imag = 0; imag < 2; imag++ ) {              // imaginary/real part
+                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {  // field
+                            if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; }
+                            else {
+                                new_spectra[k++] = old_spectra[k_old++];
+                            }
+                        }
                     }
                 }
             }
         }
-    }
-}
 
-// --------------------------------------------------------------------------------------------------------------------
+        // --------------------------------------------------------------------------------------------------------------------
+
+        void TransLocalopt3::invtrans( const int nb_scalar_fields, const double scalar_spectra[],
+                                       const int nb_vordiv_fields, const double vorticity_spectra[],
+                                       const double divergence_spectra[], double gp_fields[],
+                                       const eckit::Configuration& config ) const {
+            ATLAS_TRACE( "TransLocalopt3::invtrans" );
+            int nb_gp              = grid_.size();
+            int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields;
+            if ( nb_vordiv_fields > 0 ) {
+                std::vector<double> vorticity_spectra_extended( nb_vordiv_spec_ext, 0. );
+                std::vector<double> divergence_spectra_extended( nb_vordiv_spec_ext, 0. );
+                std::vector<double> U_ext( nb_vordiv_spec_ext, 0. );
+                std::vector<double> V_ext( nb_vordiv_spec_ext, 0. );
 
-void TransLocalopt3::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields,
-                               const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[],
-                               const eckit::Configuration& config ) const {
-    ATLAS_TRACE( "TransLocalopt3::invtrans" );
-    int nb_gp              = grid_.size();
-    int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields;
-    if ( nb_vordiv_fields > 0 ) {
-        std::vector<double> vorticity_spectra_extended( nb_vordiv_spec_ext, 0. );
-        std::vector<double> divergence_spectra_extended( nb_vordiv_spec_ext, 0. );
-        std::vector<double> U_ext( nb_vordiv_spec_ext, 0. );
-        std::vector<double> V_ext( nb_vordiv_spec_ext, 0. );
-
-        {
-            ATLAS_TRACE( "opt3 extend vordiv" );
-            // increase truncation in vorticity_spectra and divergence_spectra:
-            extend_truncationopt3( truncation_, nb_vordiv_fields, vorticity_spectra,
-                                   vorticity_spectra_extended.data() );
-            extend_truncationopt3( truncation_, nb_vordiv_fields, divergence_spectra,
-                                   divergence_spectra_extended.data() );
-        }
+                {
+                    ATLAS_TRACE( "opt3 extend vordiv" );
+                    // increase truncation in vorticity_spectra and divergence_spectra:
+                    extend_truncationopt3( truncation_, nb_vordiv_fields, vorticity_spectra,
+                                           vorticity_spectra_extended.data() );
+                    extend_truncationopt3( truncation_, nb_vordiv_fields, divergence_spectra,
+                                           divergence_spectra_extended.data() );
+                }
 
-        {
-            ATLAS_TRACE( "vordiv to UV opt3" );
-            // call vd2uv to compute u and v in spectral space
-            trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt3" ) );
-            vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(),
-                                      divergence_spectra_extended.data(), U_ext.data(), V_ext.data() );
-        }
+                {
+                    ATLAS_TRACE( "vordiv to UV opt3" );
+                    // call vd2uv to compute u and v in spectral space
+                    trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt3" ) );
+                    vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(),
+                                              divergence_spectra_extended.data(), U_ext.data(), V_ext.data() );
+                }
 
-        // perform spectral transform to compute all fields in grid point space
-        invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config );
-        invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(),
-                     gp_fields + nb_gp * nb_vordiv_fields, config );
-    }
-    if ( nb_scalar_fields > 0 ) {
-        int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields;
-        std::vector<double> scalar_spectra_extended( nb_scalar_spec_ext, 0. );
-        extend_truncationopt3( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() );
-        invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(),
-                     gp_fields + 2 * nb_gp * nb_vordiv_fields, config );
-    }
-}
+                // perform spectral transform to compute all fields in grid point space
+                invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config );
+                invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(),
+                             gp_fields + nb_gp * nb_vordiv_fields, config );
+            }
+            if ( nb_scalar_fields > 0 ) {
+                int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields;
+                std::vector<double> scalar_spectra_extended( nb_scalar_spec_ext, 0. );
+                extend_truncationopt3( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() );
+                invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(),
+                             gp_fields + 2 * nb_gp * nb_vordiv_fields, config );
+            }
+        }
 
-// --------------------------------------------------------------------------------------------------------------------
+        // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt3::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const {
-    NOTIMP;
-    // Not implemented and not planned.
-    // Use the TransIFS implementation instead.
-}
+        void TransLocalopt3::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config )
+            const {
+            NOTIMP;
+            // Not implemented and not planned.
+            // Use the TransIFS implementation instead.
+        }
 
-// --------------------------------------------------------------------------------------------------------------------
+        // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt3::dirtrans( const FieldSet& gpfields, FieldSet& spfields,
-                               const eckit::Configuration& config ) const {
-    NOTIMP;
-    // Not implemented and not planned.
-    // Use the TransIFS implementation instead.
-}
+        void TransLocalopt3::dirtrans( const FieldSet& gpfields, FieldSet& spfields,
+                                       const eckit::Configuration& config ) const {
+            NOTIMP;
+            // Not implemented and not planned.
+            // Use the TransIFS implementation instead.
+        }
 
-// --------------------------------------------------------------------------------------------------------------------
+        // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt3::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv,
-                                           const eckit::Configuration& config ) const {
-    NOTIMP;
-    // Not implemented and not planned.
-    // Use the TransIFS implementation instead.
-}
+        void TransLocalopt3::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv,
+                                                   const eckit::Configuration& config ) const {
+            NOTIMP;
+            // Not implemented and not planned.
+            // Use the TransIFS implementation instead.
+        }
 
-// --------------------------------------------------------------------------------------------------------------------
+        // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt3::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[],
-                               const eckit::Configuration& ) const {
-    NOTIMP;
-    // Not implemented and not planned.
-    // Use the TransIFS implementation instead.
-}
+        void TransLocalopt3::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[],
+                                       const eckit::Configuration& ) const {
+            NOTIMP;
+            // Not implemented and not planned.
+            // Use the TransIFS implementation instead.
+        }
 
-// --------------------------------------------------------------------------------------------------------------------
+        // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt3::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[],
-                               double divergence_spectra[], const eckit::Configuration& ) const {
-    NOTIMP;
-    // Not implemented and not planned.
-    // Use the TransIFS implementation instead.
-}
+        void TransLocalopt3::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[],
+                                       double divergence_spectra[], const eckit::Configuration& ) const {
+            NOTIMP;
+            // Not implemented and not planned.
+            // Use the TransIFS implementation instead.
+        }
 
-// --------------------------------------------------------------------------------------------------------------------
+        // --------------------------------------------------------------------------------------------------------------------
 
-}  // namespace trans
+    }  // namespace trans
 }  // namespace atlas
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index db9188c5a..8a1034b86 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -704,7 +704,7 @@ CASE( "test_transgeneral_with_translib" ) {
 #endif
 #endif
 //-----------------------------------------------------------------------------
-#if 0
+#if 1
 CASE( "test_trans_vordiv_with_translib" ) {
     Log::info() << "test_trans_vordiv_with_translib" << std::endl;
     // test transgeneral by comparing its result with the trans library
@@ -724,8 +724,8 @@ CASE( "test_trans_vordiv_with_translib" ) {
     trans::Trans transIFS( g, trc, util::Config( "type", "ifs" ) );
     double rav = 0.;  // compute average rms error of trans library in rav
 #endif
-    trans::Trans transLocal1( g, trc, util::Config( "type", "localopt" ) );
-    trans::Trans transLocal2( g, trc, util::Config( "type", "localopt2" ) );
+    trans::Trans transLocal1( g, trc, util::Config( "type", "localopt3" ) );
+    trans::Trans transLocal2( g, trc, util::Config( "type", "localopt3" ) );
     double rav1 = 0., rav2 = 0.;  // compute average rms errors of transLocal1 and transLocal2
 
     functionspace::Spectral spectral( trc );
@@ -853,7 +853,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
 }
 #endif
 //-----------------------------------------------------------------------------
-#if 1
+#if 0
 CASE( "test_trans_hires" ) {
     Log::info() << "test_trans_hires" << std::endl;
     // test transgeneral by comparing its result with the trans library
@@ -932,6 +932,137 @@ CASE( "test_trans_hires" ) {
 #endif
 //-----------------------------------------------------------------------------
 #if 0
+CASE( "test_trans_domain" ) {
+    Log::info() << "test_trans_domain" << std::endl;
+    // test transgeneral by comparing with analytic solution on a cropped domain
+
+    std::ostream& out = Log::info();
+    double tolerance  = 1.e-13;
+
+    //Domain testdomain = ZonalBandDomain( {-90., 90.} );
+    //Domain testdomain = ZonalBandDomain( {-.5, .5} );
+    Domain testdomain = RectangularDomain( {0., 30.}, {-.05, .05} );
+    // Grid: (Adjust the following line if the test takes too long!)
+    Grid g( "F8000", testdomain );
+    Grid g_global( g.name() );
+
+    grid::StructuredGrid gs( g );
+    grid::StructuredGrid gs_global( g_global );
+    int ndgl = gs_global.ny();
+    //int trc  = ndgl - 1;  // linear
+    int trc = ndgl / 2. - 1;  // cubic
+    trans::Trans transLocal1( g, trc, util::Config( "type", "localopt3" ) );
+    trans::Trans transLocal2( g, trc, util::Config( "type", "localopt3" ) );
+    double rav1 = 0., rav2 = 0.;  // compute average rms errors of transLocal1 and transLocal2
+
+    functionspace::Spectral spectral( trc );
+    functionspace::StructuredColumns gridpoints( g );
+
+    int nb_scalar = 1, nb_vordiv = 0;
+    int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv;
+    std::vector<double> sp( 2 * N * nb_scalar );
+    std::vector<double> vor( 2 * N * nb_vordiv );
+    std::vector<double> div( 2 * N * nb_vordiv );
+    std::vector<double> rspecg( 2 * N );
+    std::vector<double> gp( nb_all * g.size() );
+    std::vector<double> rgp1( nb_all * g.size() );
+    std::vector<double> rgp2( nb_all * g.size() );
+    std::vector<double> rgp_analytic( g.size() );
+
+    int icase = 0;
+    for ( int ivar_in = 2; ivar_in < 3; ivar_in++ ) {         // vorticity, divergence, scalar
+        for ( int ivar_out = 2; ivar_out < 3; ivar_out++ ) {  // u, v, scalar
+            int nb_fld = 1;
+            if ( ivar_out == 2 ) {
+                tolerance = 1.e-13;
+                nb_fld    = nb_scalar;
+            }
+            else {
+                tolerance = 2.e-6;
+                nb_fld    = nb_vordiv;
+            }
+            for ( int jfld = 0; jfld < nb_fld; jfld++ ) {  // multiple fields
+                int k = 0;
+                for ( int m = 0; m <= trc; m++ ) {                 // zonal wavenumber
+                    for ( int n = m; n <= trc; n++ ) {             // total wavenumber
+                        for ( int imag = 0; imag <= 1; imag++ ) {  // real and imaginary part
+
+                            if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. &&
+                                 icase < 1 ) {
+                                auto start = std::chrono::system_clock::now();
+                                for ( int j = 0; j < 2 * N * nb_scalar; j++ ) {
+                                    sp[j] = 0.;
+                                }
+                                for ( int j = 0; j < 2 * N * nb_vordiv; j++ ) {
+                                    vor[j] = 0.;
+                                    div[j] = 0.;
+                                }
+                                if ( ivar_in == 0 ) vor[k * nb_vordiv + jfld] = 1.;
+                                if ( ivar_in == 1 ) div[k * nb_vordiv + jfld] = 1.;
+                                if ( ivar_in == 2 ) sp[k * nb_scalar + jfld] = 1.;
+
+                                for ( int j = 0; j < nb_all * g.size(); j++ ) {
+                                    gp[j]   = 0.;
+                                    rgp1[j] = 0.;
+                                    rgp2[j] = 0.;
+                                }
+                                for ( int j = 0; j < g.size(); j++ ) {
+                                    rgp_analytic[j] = 0.;
+                                }
+
+                                spectral_transform_grid_analytic( trc, trc, n, m, imag, g, rspecg.data(),
+                                                                  rgp_analytic.data(), ivar_in, ivar_out );
+
+                                EXPECT_NO_THROW( transLocal1.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
+                                                                       div.data(), rgp1.data() ) );
+
+                                EXPECT_NO_THROW( transLocal2.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
+                                                                       div.data(), rgp2.data() ) );
+
+                                int pos = ( ivar_out * nb_vordiv + jfld );
+
+                                double rms_gen1 =
+                                    compute_rms( g.size(), rgp1.data() + pos * g.size(), rgp_analytic.data() );
+
+                                double rms_gen2 =
+                                    compute_rms( g.size(), rgp2.data() + pos * g.size(), rgp_analytic.data() );
+
+                                rav1 += rms_gen1;
+                                rav2 += rms_gen2;
+                                if ( !( rms_gen1 < tolerance ) || !( rms_gen2 < tolerance ) ) {
+                                    Log::info()
+                                        << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out
+                                        << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl;
+                                    ATLAS_DEBUG_VAR( rms_gen1 );
+                                    ATLAS_DEBUG_VAR( rms_gen2 );
+                                    ATLAS_DEBUG_VAR( tolerance );
+                                }
+                                EXPECT( rms_gen1 < tolerance );
+                                EXPECT( rms_gen2 < tolerance );
+                                icase++;
+                                auto end                                      = std::chrono::system_clock::now();  //
+                                std::chrono::duration<double> elapsed_seconds = end - start;
+                                std::time_t end_time = std::chrono::system_clock::to_time_t( end );
+                                std::string time_str = std::ctime( &end_time );
+                                Log::info() << "case " << icase << ", elapsed time: " << elapsed_seconds.count()
+                                            << "s. Now: " << time_str.substr( 0, time_str.length() - 1 ) << std::endl;
+                            }
+                            k++;
+                        }
+                    }
+                }
+            }
+        }
+    }
+    Log::info() << "Vordiv+scalar comparison with trans: all " << icase << " cases successfully passed!" << std::endl;
+    rav1 /= icase;
+    Log::info() << "average RMS error of transLocal1: " << rav1 << std::endl;
+    rav2 /= icase;
+    Log::info() << "average RMS error of transLocal2: " << rav2 << std::endl;
+}
+#endif
+//-----------------------------------------------------------------------------
+#if 0
 CASE( "test_trans_invtrans" ) {
     trans::Trans trans( Grid( "O64" ), 63, util::Config( "type", "local" ) );
 

From a82999018a92f0ad498b5450ec7b383f4ef3b2c0 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Mon, 26 Mar 2018 18:29:16 +0100
Subject: [PATCH 034/123] localopt3 can now handle arbitrary domains with
 regular grids; FFT still requires zonal band

---
 src/atlas/trans/localopt3/TransLocalopt3.cc | 116 ++++++++++++--------
 src/atlas/trans/localopt3/TransLocalopt3.h  |   3 +
 src/tests/trans/test_transgeneral.cc        |  16 +--
 3 files changed, 82 insertions(+), 53 deletions(-)

diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc
index d595f500a..d57b2810d 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.cc
+++ b/src/atlas/trans/localopt3/TransLocalopt3.cc
@@ -98,25 +98,52 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
 #endif
     int nlats   = 0;
     int nlons   = 0;
+    int neqtr   = 0;
     int nlatsNH = nlats_northernHemisphere( nlats );
+    nlatsNH_    = 0;
+    nlatsSH_    = 0;
+    nlatsLeg_   = 0;
+    double sign = 1.;
     if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
         grid::StructuredGrid g( grid_ );
-        nlats   = g.ny();
-        nlons   = g.nxmax();
-        nlatsNH = nlats_northernHemisphere( nlats );
+        nlats = g.ny();
+        nlons = g.nxmax();
+        for ( size_t j = 0; j < nlats; ++j ) {
+            // assumptions: latitudes in g.y(j) are monotone and decreasing
+            // no assumption on whether we have 0, 1 or 2 latitudes at the equator
+            double lat = g.y( j );
+            if ( lat > 0. ) { nlatsNH_++; }
+            if ( lat == 0. ) { neqtr++; }
+            if ( lat < 0. ) { nlatsSH_++; }
+        }
+        if ( neqtr > 0 ) {
+            nlatsNH_++;
+            nlatsSH_++;
+        }
+        if ( nlatsNH_ >= nlatsSH_ ) { nlatsLeg_ = nlatsNH_; }
+        else {
+            nlatsLeg_ = nlatsSH_;
+        }
     }
     else {
-        nlats   = grid_.size();
-        nlons   = grid_.size();
-        nlatsNH = nlats;
+        nlats     = grid_.size();
+        nlons     = grid_.size();
+        nlatsNH_  = nlats;
+        nlatsLeg_ = nlats;
     }
-    std::vector<double> lats( nlatsNH );
+    std::vector<double> lats( nlatsLeg_ );
     std::vector<double> lons( nlons );
     if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
         grid::StructuredGrid g( grid_ );
-        // TODO: remove legendre_begin and legendre_data (only legendre_ should be needed)
-        for ( size_t j = 0; j < nlatsNH; ++j ) {
-            lats[j] = g.y( j ) * util::Constants::degreesToRadians();
+        if ( nlatsNH_ >= nlatsSH_ ) {
+            for ( size_t j = 0; j < nlatsLeg_; ++j ) {
+                lats[j] = g.y( j ) * util::Constants::degreesToRadians();
+            }
+        }
+        else {
+            for ( size_t j = nlats - 1, idx = 0; idx < nlatsLeg_; --j, ++idx ) {
+                lats[idx] = -g.y( j ) * util::Constants::degreesToRadians();
+            }
         }
         for ( size_t j = 0; j < nlons; ++j ) {
             lons[j] = g.x( j, 0 ) * util::Constants::degreesToRadians();
@@ -139,14 +166,14 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
         legendre_sym_begin_[0]  = 0;
         legendre_asym_begin_[0] = 0;
         for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
-            size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsNH );
-            size_asym += add_padding( num_n( truncation_ + 1, jm, false ) * nlatsNH );
+            size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsLeg_ );
+            size_asym += add_padding( num_n( truncation_ + 1, jm, false ) * nlatsLeg_ );
             legendre_sym_begin_[jm + 1]  = size_sym;
             legendre_asym_begin_[jm + 1] = size_asym;
         }
         alloc_aligned( legendre_sym_, size_sym );
         alloc_aligned( legendre_asym_, size_asym );
-        compute_legendre_polynomialsopt3( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_,
+        compute_legendre_polynomialsopt3( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, legendre_asym_,
                                           legendre_sym_begin_.data(), legendre_asym_begin_.data() );
     }
 
@@ -162,6 +189,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
     }
 #else
     {
+        // todo: only compute fourier_ if needed
         ATLAS_TRACE( "opt3 precomp Fourier" );
         alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons );
         int idx = 0;
@@ -175,6 +203,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
         }
     }
     {
+        // todo: only compute fouriertp_ if needed
         ATLAS_TRACE( "opt3 precomp Fourier tp" );
         alloc_aligned( fouriertp_, 2 * ( truncation_ + 1 ) * nlons );
         int idx = 0;
@@ -190,7 +219,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
         }
     }
 #endif
-}  // namespace atlas
+}  // namespace trans
 
 // --------------------------------------------------------------------------------------------------------------------
 
@@ -280,7 +309,6 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
             ATLAS_TRACE( "invtrans_uv structured opt3" );
             int nlats    = g.ny();
             int nlons    = g.nxmax();
-            int nlatsNH  = nlats_northernHemisphere( nlats );
             auto posFFTW = [&]( int jfld, int imag, int jlat, int jm ) {
                 return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) );
             };
@@ -305,7 +333,10 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
                     int size_asym = num_n( truncation_ + 1, jm, false );
                     int n_imag    = 2;
                     if ( jm == 0 ) { n_imag = 1; }
-                    int size_fourier = nb_fields * n_imag * nlatsNH;
+                    int size_fourier = nb_fields * n_imag * nlatsLeg_;
+                    auto posFourier  = [&]( int jfld, int imag, int jlat, int jm, int nlatsH ) {
+                        return jfld + nb_fields * ( imag + n_imag * ( nlatsLeg_ - nlatsH + jlat ) );
+                    };
                     double* scalar_sym;
                     double* scalar_asym;
                     double* scl_fourier_sym;
@@ -339,34 +370,34 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
                     }
                     {
                         eckit::linalg::Matrix A( scalar_sym, nb_fields * n_imag, size_sym );
-                        eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm], size_sym, nlatsNH );
-                        eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsNH );
+                        eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm], size_sym, nlatsLeg_ );
+                        eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsLeg_ );
                         eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
                     }
                     if ( size_asym > 0 ) {
                         eckit::linalg::Matrix A( scalar_asym, nb_fields * n_imag, size_asym );
-                        eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm], size_asym, nlatsNH );
-                        eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsNH );
+                        eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm], size_asym, nlatsLeg_ );
+                        eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsLeg_ );
                         eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
                     }
 #if 0  //ATLAS_HAVE_FFTW
                     {
                         //ATLAS_TRACE( "opt3 merge spheres" );
                         // northern hemisphere:
-                        int idx = 0;
-                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
+                        for ( int jlat = 0; jlat < nlatsNH_; jlat++ ) {
                             for ( int imag = 0; imag < n_imag; imag++ ) {
-                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
+                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                    int idx = posFourier( jfld, imag, jlat, jm, nlatsNH_ );
                                     scl_fourier[posFFTW( jfld, imag, jlat, jm )] =
                                         scl_fourier_sym[idx] + scl_fourier_asym[idx];
                                 }
                             }
                         }
                         // southern hemisphere:
-                        idx = 0;
-                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
+                        for ( int jlat = 0; jlat < nlatsSH_; jlat++ ) {
                             for ( int imag = 0; imag < n_imag; imag++ ) {
-                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
+                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                    int idx   = posFourier( jfld, imag, jlat, jm, nlatsSH_ );
                                     int jslat = nlats - jlat - 1;
                                     scl_fourier[posFFTW( jfld, imag, jslat, jm )] =
                                         scl_fourier_sym[idx] - scl_fourier_asym[idx];
@@ -407,24 +438,24 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
                 }
             }
 #else
-#if 1  // 1: better for small number of columns, large truncation; 0: better for large number of columns
+#if 0  // 1: better for small number of columns, large truncation; 0: better for large number of columns
                     {
                         //ATLAS_TRACE( "opt3 merge spheres" );
                         // northern hemisphere:
-                        int idx = 0;
-                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
+                        for ( int jlat = 0; jlat < nlatsNH_; jlat++ ) {
                             for ( int imag = 0; imag < n_imag; imag++ ) {
-                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
+                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                    int idx = posFourier( jfld, imag, jlat, jm, nlatsNH_ );
                                     scl_fourier[posGemm1( jfld, imag, jlat, jm )] =
                                         scl_fourier_sym[idx] + scl_fourier_asym[idx];
                                 }
                             }
                         }
                         // southern hemisphere:
-                        idx = 0;
-                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
+                        for ( int jlat = 0; jlat < nlatsSH_; jlat++ ) {
                             for ( int imag = 0; imag < n_imag; imag++ ) {
-                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
+                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                    int idx   = posFourier( jfld, imag, jlat, jm, nlatsSH_ );
                                     int jslat = nlats - jlat - 1;
                                     scl_fourier[posGemm1( jfld, imag, jslat, jm )] =
                                         scl_fourier_sym[idx] - scl_fourier_asym[idx];
@@ -469,27 +500,20 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
                     {
                         //ATLAS_TRACE( "opt3 merge spheres" );
                         // northern hemisphere:
-                        int ioff = jm * size_fourier_max;
-                        int pos0 = ioff;
-                        int idx  = 0;
-                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
-                            int poslat = pos0 + 2 * jlat;
+                        for ( int jlat = 0; jlat < nlatsNH_; jlat++ ) {
                             for ( int imag = 0; imag < n_imag; imag++ ) {
-                                int posimag = nb_fields * ( imag + poslat );
-                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
+                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                    int idx = posFourier( jfld, imag, jlat, jm, nlatsNH_ );
                                     scl_fourier[posGemm2( jfld, imag, jlat, jm )] =
                                         scl_fourier_sym[idx] + scl_fourier_asym[idx];
                                 }
                             }
                         }
                         // southern hemisphere:
-                        idx  = 0;
-                        pos0 = 2 * ( nlats - 1 ) + ioff;
-                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
-                            int poslat = pos0 - 2 * jlat;
+                        for ( int jlat = 0; jlat < nlatsSH_; jlat++ ) {
                             for ( int imag = 0; imag < n_imag; imag++ ) {
-                                int posimag = nb_fields * ( imag + poslat );
-                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
+                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                    int idx   = posFourier( jfld, imag, jlat, jm, nlatsSH_ );
                                     int jslat = nlats - jlat - 1;
                                     scl_fourier[posGemm2( jfld, imag, jslat, jm )] =
                                         scl_fourier_sym[idx] - scl_fourier_asym[idx];
diff --git a/src/atlas/trans/localopt3/TransLocalopt3.h b/src/atlas/trans/localopt3/TransLocalopt3.h
index 3673cd07c..286ebc9d5 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.h
+++ b/src/atlas/trans/localopt3/TransLocalopt3.h
@@ -112,6 +112,9 @@ class TransLocalopt3 : public trans::TransImpl {
 private:
     Grid grid_;
     int truncation_;
+    int nlatsNH_;
+    int nlatsSH_;
+    int nlatsLeg_;
     bool precompute_;
     double* legendre_sym_;
     double* legendre_asym_;
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index 8a1034b86..6b556f663 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -704,7 +704,7 @@ CASE( "test_transgeneral_with_translib" ) {
 #endif
 #endif
 //-----------------------------------------------------------------------------
-#if 1
+#if 0
 CASE( "test_trans_vordiv_with_translib" ) {
     Log::info() << "test_trans_vordiv_with_translib" << std::endl;
     // test transgeneral by comparing its result with the trans library
@@ -931,7 +931,7 @@ CASE( "test_trans_hires" ) {
 }
 #endif
 //-----------------------------------------------------------------------------
-#if 0
+#if 1
 CASE( "test_trans_domain" ) {
     Log::info() << "test_trans_domain" << std::endl;
     // test transgeneral by comparing with analytic solution on a cropped domain
@@ -941,9 +941,11 @@ CASE( "test_trans_domain" ) {
 
     //Domain testdomain = ZonalBandDomain( {-90., 90.} );
     //Domain testdomain = ZonalBandDomain( {-.5, .5} );
-    Domain testdomain = RectangularDomain( {0., 30.}, {-.05, .05} );
+    //Domain testdomain = RectangularDomain( {0., 30.}, {-.05, .05} );
+    //Domain testdomain = ZonalBandDomain( {-85., -86.} );
+    Domain testdomain = RectangularDomain( {-5., 10.}, {5., 6.} );
     // Grid: (Adjust the following line if the test takes too long!)
-    Grid g( "F8000", testdomain );
+    Grid g( "F1280", testdomain );
     Grid g_global( g.name() );
 
     grid::StructuredGrid gs( g );
@@ -988,7 +990,7 @@ CASE( "test_trans_domain" ) {
                         for ( int imag = 0; imag <= 1; imag++ ) {  // real and imaginary part
 
                             if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. &&
-                                 icase < 1 ) {
+                                 icase < 1000 ) {
                                 auto start = std::chrono::system_clock::now();
                                 for ( int j = 0; j < 2 * N * nb_scalar; j++ ) {
                                     sp[j] = 0.;
@@ -1044,8 +1046,8 @@ CASE( "test_trans_domain" ) {
                                 std::chrono::duration<double> elapsed_seconds = end - start;
                                 std::time_t end_time = std::chrono::system_clock::to_time_t( end );
                                 std::string time_str = std::ctime( &end_time );
-                                Log::info() << "case " << icase << ", elapsed time: " << elapsed_seconds.count()
-                                            << "s. Now: " << time_str.substr( 0, time_str.length() - 1 ) << std::endl;
+                                //Log::info() << "case " << icase << ", elapsed time: " << elapsed_seconds.count()
+                                //            << "s. Now: " << time_str.substr( 0, time_str.length() - 1 ) << std::endl;
                             }
                             k++;
                         }

From d253ac929e5630c9d0bf2acda153a503d0fd1c5b Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Mon, 26 Mar 2018 19:15:57 +0100
Subject: [PATCH 035/123] FFT is now used for zonal bands and dgemm for smaller
 domains

---
 src/atlas/trans/localopt3/TransLocalopt3.cc | 548 +++++++++-----------
 src/atlas/trans/localopt3/TransLocalopt3.h  |   2 +
 2 files changed, 254 insertions(+), 296 deletions(-)

diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc
index d57b2810d..796f627ac 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.cc
+++ b/src/atlas/trans/localopt3/TransLocalopt3.cc
@@ -96,14 +96,15 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
 #else
     eckit::linalg::LinearAlgebra::backend( "generic" );  // might want to choose backend with this command
 #endif
-    int nlats   = 0;
-    int nlons   = 0;
-    int neqtr   = 0;
-    int nlatsNH = nlats_northernHemisphere( nlats );
-    nlatsNH_    = 0;
-    nlatsSH_    = 0;
-    nlatsLeg_   = 0;
-    double sign = 1.;
+    int nlats     = 0;
+    int nlons     = 0;
+    int neqtr     = 0;
+    int nlatsNH   = nlats_northernHemisphere( nlats );
+    useFFT_       = true;
+    dgemmMethod1_ = false;
+    nlatsNH_      = 0;
+    nlatsSH_      = 0;
+    nlatsLeg_     = 0;
     if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
         grid::StructuredGrid g( grid_ );
         nlats = g.ny();
@@ -124,8 +125,12 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
         else {
             nlatsLeg_ = nlatsSH_;
         }
+        Grid g_global( grid.name() );
+        grid::StructuredGrid gs_global( g_global );
+        if ( nlons < 1.0 * gs_global.nxmax() ) { useFFT_ = false; }
     }
     else {
+        useFFT_   = false;
         nlats     = grid_.size();
         nlons     = grid_.size();
         nlatsNH_  = nlats;
@@ -177,48 +182,55 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
                                           legendre_sym_begin_.data(), legendre_asym_begin_.data() );
     }
 
-        // precomputations for Fourier transformations:
-#if 0  //ATLAS_HAVE_FFTW
-    {
-        ATLAS_TRACE( "opt3 precomp FFTW" );
-        int num_complex = ( nlons / 2 ) + 1;
-        fft_in_         = fftw_alloc_complex( nlats * num_complex );
-        fft_out_        = fftw_alloc_real( nlats * nlons );
-        plan_ = fftw_plan_many_dft_c2r( 1, &nlons, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1, nlons,
-                                        FFTW_ESTIMATE );
-    }
+    // precomputations for Fourier transformations:
+    if ( useFFT_ ) {
+#if ATLAS_HAVE_FFTW
+        {
+            ATLAS_TRACE( "opt3 precomp FFTW" );
+            int num_complex = ( nlons / 2 ) + 1;
+            fft_in_         = fftw_alloc_complex( nlats * num_complex );
+            fft_out_        = fftw_alloc_real( nlats * nlons );
+            plan_ = fftw_plan_many_dft_c2r( 1, &nlons, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1, nlons,
+                                            FFTW_ESTIMATE );
+        }
+            // other FFT implementations should be added with #elif statements
 #else
-    {
-        // todo: only compute fourier_ if needed
-        ATLAS_TRACE( "opt3 precomp Fourier" );
+        useFFT_ = false;                                 // no FFT implemented => default to dgemm
+#endif
+    }
+    if ( !useFFT_ ) {
         alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons );
-        int idx = 0;
-        for ( int jlon = 0; jlon < nlons; jlon++ ) {
-            double factor = 1.;
-            for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
-                if ( jm > 0 ) { factor = 2.; }
-                fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor;  // real part
-                fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor;  // imaginary part
+        if ( dgemmMethod1_ ) {
+            {
+                ATLAS_TRACE( "opt3 precomp Fourier" );
+                int idx = 0;
+                for ( int jlon = 0; jlon < nlons; jlon++ ) {
+                    double factor = 1.;
+                    for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
+                        if ( jm > 0 ) { factor = 2.; }
+                        fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor;  // real part
+                        fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor;  // imaginary part
+                    }
+                }
             }
         }
-    }
-    {
-        // todo: only compute fouriertp_ if needed
-        ATLAS_TRACE( "opt3 precomp Fourier tp" );
-        alloc_aligned( fouriertp_, 2 * ( truncation_ + 1 ) * nlons );
-        int idx = 0;
-        for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
-            double factor = 1.;
-            if ( jm > 0 ) { factor = 2.; }
-            for ( int jlon = 0; jlon < nlons; jlon++ ) {
-                fouriertp_[idx++] = +std::cos( jm * lons[jlon] ) * factor;  // real part
-            }
-            for ( int jlon = 0; jlon < nlons; jlon++ ) {
-                fouriertp_[idx++] = -std::sin( jm * lons[jlon] ) * factor;  // imaginary part
+        else {
+            {
+                ATLAS_TRACE( "opt3 precomp Fourier tp" );
+                int idx = 0;
+                for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
+                    double factor = 1.;
+                    if ( jm > 0 ) { factor = 2.; }
+                    for ( int jlon = 0; jlon < nlons; jlon++ ) {
+                        fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor;  // real part
+                    }
+                    for ( int jlon = 0; jlon < nlons; jlon++ ) {
+                        fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor;  // imaginary part
+                    }
+                }
             }
         }
     }
-#endif
 }  // namespace trans
 
 // --------------------------------------------------------------------------------------------------------------------
@@ -231,14 +243,16 @@ TransLocalopt3::TransLocalopt3( const Grid& grid, const long truncation, const e
 TransLocalopt3::~TransLocalopt3() {
     free_aligned( legendre_sym_ );
     free_aligned( legendre_asym_ );
-#if 0  //ATLAS_HAVE_FFTW
-    fftw_destroy_plan( plan_ );
-    fftw_free( fft_in_ );
-    fftw_free( fft_out_ );
-#else
-    free_aligned( fourier_ );
-    free_aligned( fouriertp_ );
+    if ( useFFT_ ) {
+#if ATLAS_HAVE_FFTW
+        fftw_destroy_plan( plan_ );
+        fftw_free( fft_in_ );
+        fftw_free( fft_out_ );
 #endif
+    }
+    else {
+        free_aligned( fourier_ );
+    }
 }
 
 // --------------------------------------------------------------------------------------------------------------------
@@ -321,6 +335,8 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
             auto posGemm2 = [&]( int jfld, int imag, int jlat, int jm ) {
                 return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) );
             };
+            auto posMethod = posGemm2;
+            if ( useFFT_ ) { auto posMethod = posFFTW; }
             int size_fourier_max = nb_fields * 2 * nlats;
             double* scl_fourier;
             alloc_aligned( scl_fourier, size_fourier_max * ( truncation + 1 ) );
@@ -380,7 +396,6 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
                         eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsLeg_ );
                         eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
                     }
-#if 0  //ATLAS_HAVE_FFTW
                     {
                         //ATLAS_TRACE( "opt3 merge spheres" );
                         // northern hemisphere:
@@ -388,7 +403,7 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
                             for ( int imag = 0; imag < n_imag; imag++ ) {
                                 for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
                                     int idx = posFourier( jfld, imag, jlat, jm, nlatsNH_ );
-                                    scl_fourier[posFFTW( jfld, imag, jlat, jm )] =
+                                    scl_fourier[posMethod( jfld, imag, jlat, jm )] =
                                         scl_fourier_sym[idx] + scl_fourier_asym[idx];
                                 }
                             }
@@ -399,7 +414,7 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
                                 for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
                                     int idx   = posFourier( jfld, imag, jlat, jm, nlatsSH_ );
                                     int jslat = nlats - jlat - 1;
-                                    scl_fourier[posFFTW( jfld, imag, jslat, jm )] =
+                                    scl_fourier[posMethod( jfld, imag, jslat, jm )] =
                                         scl_fourier_sym[idx] - scl_fourier_asym[idx];
                                 }
                             }
@@ -411,296 +426,237 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
                     free_aligned( scl_fourier_asym );
                 }
             }
-            {
-                int num_complex = ( nlons / 2 ) + 1;
+            // Fourier transformation:
+            if ( useFFT_ ) {
+#if ATLAS_HAVE_FFTW
                 {
-                    ATLAS_TRACE( "opt3 FFTW" );
-                    for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                        int idx = 0;
-                        for ( int jlat = 0; jlat < nlats; jlat++ ) {
-                            fft_in_[idx++][0] = scl_fourier[posFFTW( jfld, 0, jlat, 0 )];
-                            for ( int jm = 1; jm < num_complex; jm++, idx++ ) {
-                                for ( int imag = 0; imag < 2; imag++ ) {
-                                    if ( jm <= truncation_ ) {
-                                        fft_in_[idx][imag] = scl_fourier[posFFTW( jfld, imag, jlat, jm )];
-                                    }
-                                    else {
-                                        fft_in_[idx][imag] = 0.;
+                    int num_complex = ( nlons / 2 ) + 1;
+                    {
+                        ATLAS_TRACE( "opt3 FFTW" );
+                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                            int idx = 0;
+                            for ( int jlat = 0; jlat < nlats; jlat++ ) {
+                                fft_in_[idx++][0] = scl_fourier[posFFTW( jfld, 0, jlat, 0 )];
+                                for ( int jm = 1; jm < num_complex; jm++, idx++ ) {
+                                    for ( int imag = 0; imag < 2; imag++ ) {
+                                        if ( jm <= truncation_ ) {
+                                            fft_in_[idx][imag] = scl_fourier[posFFTW( jfld, imag, jlat, jm )];
+                                        }
+                                        else {
+                                            fft_in_[idx][imag] = 0.;
+                                        }
                                     }
                                 }
                             }
-                        }
-                        fftw_execute_dft_c2r( plan_, fft_in_, fft_out_ );
-                        for ( int j = 0; j < nlats * nlons; j++ ) {
-                            gp_fields[j + jfld * nlats * nlons] = fft_out_[j];
+                            fftw_execute_dft_c2r( plan_, fft_in_, fft_out_ );
+                            for ( int j = 0; j < nlats * nlons; j++ ) {
+                                gp_fields[j + jfld * nlats * nlons] = fft_out_[j];
+                            }
                         }
                     }
                 }
+#endif
             }
-#else
-#if 0  // 1: better for small number of columns, large truncation; 0: better for large number of columns
+            else {
+                if ( dgemmMethod1_ ) {
+                    // dgemm-method 1
+                    double* gp_opt3;
+                    alloc_aligned( gp_opt3, nb_fields * grid_.size() );
                     {
-                        //ATLAS_TRACE( "opt3 merge spheres" );
-                        // northern hemisphere:
-                        for ( int jlat = 0; jlat < nlatsNH_; jlat++ ) {
-                            for ( int imag = 0; imag < n_imag; imag++ ) {
-                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                                    int idx = posFourier( jfld, imag, jlat, jm, nlatsNH_ );
-                                    scl_fourier[posGemm1( jfld, imag, jlat, jm )] =
-                                        scl_fourier_sym[idx] + scl_fourier_asym[idx];
-                                }
-                            }
-                        }
-                        // southern hemisphere:
-                        for ( int jlat = 0; jlat < nlatsSH_; jlat++ ) {
-                            for ( int imag = 0; imag < n_imag; imag++ ) {
-                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                                    int idx   = posFourier( jfld, imag, jlat, jm, nlatsSH_ );
-                                    int jslat = nlats - jlat - 1;
-                                    scl_fourier[posGemm1( jfld, imag, jslat, jm )] =
-                                        scl_fourier_sym[idx] - scl_fourier_asym[idx];
-                                }
-                            }
-                        }
+                        ATLAS_TRACE( "opt3 Fourier dgemm" );
+                        eckit::linalg::Matrix A( scl_fourier, nb_fields * g.ny(), ( truncation_ + 1 ) * 2 );
+                        eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, g.nxmax() );
+                        eckit::linalg::Matrix C( gp_opt3, nb_fields * g.ny(), g.nxmax() );
+                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
                     }
-                    free_aligned( scalar_sym );
-                    free_aligned( scalar_asym );
-                    free_aligned( scl_fourier_sym );
-                    free_aligned( scl_fourier_asym );
-                }
-            }
 
-            // Fourier transformation:
-            double* gp_opt3;
-            alloc_aligned( gp_opt3, nb_fields * grid_.size() );
-            {
-                ATLAS_TRACE( "opt3 Fourier dgemm" );
-                eckit::linalg::Matrix A( scl_fourier, nb_fields * g.ny(), ( truncation_ + 1 ) * 2 );
-                eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, g.nxmax() );
-                eckit::linalg::Matrix C( gp_opt3, nb_fields * g.ny(), g.nxmax() );
-                eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-            }
-
-            // Transposition in grid point space:
-            {
-                ATLAS_TRACE( "opt3 transposition in gp-space" );
-                int idx = 0;
-                for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) {
-                    for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
-                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                            int pos_tp = jlon + g.nxmax() * ( jlat + g.ny() * ( jfld ) );
-                            //int pos  = jfld + nb_fields * ( jlat + g.ny() * ( jlon ) );
-                            gp_fields[pos_tp] = gp_opt3[idx++];  // = gp_opt3[pos]
-                        }
-                    }
-                }
-            }
-            free_aligned( gp_opt3 );
-#else
+                    // Transposition in grid point space:
                     {
-                        //ATLAS_TRACE( "opt3 merge spheres" );
-                        // northern hemisphere:
-                        for ( int jlat = 0; jlat < nlatsNH_; jlat++ ) {
-                            for ( int imag = 0; imag < n_imag; imag++ ) {
-                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                                    int idx = posFourier( jfld, imag, jlat, jm, nlatsNH_ );
-                                    scl_fourier[posGemm2( jfld, imag, jlat, jm )] =
-                                        scl_fourier_sym[idx] + scl_fourier_asym[idx];
-                                }
-                            }
-                        }
-                        // southern hemisphere:
-                        for ( int jlat = 0; jlat < nlatsSH_; jlat++ ) {
-                            for ( int imag = 0; imag < n_imag; imag++ ) {
+                        ATLAS_TRACE( "opt3 transposition in gp-space" );
+                        int idx = 0;
+                        for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) {
+                            for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
                                 for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                                    int idx   = posFourier( jfld, imag, jlat, jm, nlatsSH_ );
-                                    int jslat = nlats - jlat - 1;
-                                    scl_fourier[posGemm2( jfld, imag, jslat, jm )] =
-                                        scl_fourier_sym[idx] - scl_fourier_asym[idx];
+                                    int pos_tp = jlon + g.nxmax() * ( jlat + g.ny() * ( jfld ) );
+                                    //int pos  = jfld + nb_fields * ( jlat + g.ny() * ( jlon ) );
+                                    gp_fields[pos_tp] = gp_opt3[idx++];  // = gp_opt3[pos]
                                 }
                             }
                         }
                     }
-                    free_aligned( scalar_sym );
-                    free_aligned( scalar_asym );
-                    free_aligned( scl_fourier_sym );
-                    free_aligned( scl_fourier_asym );
+                    free_aligned( gp_opt3 );
                 }
-            }
-
-            // Fourier transformation:
-            {
-                ATLAS_TRACE( "opt3 Fourier dgemm" );
-                eckit::linalg::Matrix A( fouriertp_, g.nxmax(), ( truncation_ + 1 ) * 2 );
-                eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * g.ny() );
-                eckit::linalg::Matrix C( gp_fields, g.nxmax(), nb_fields * g.ny() );
-                eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-            }
-
-#endif
-#endif
-                    // Computing u,v from U,V:
+                else {
+                    // dgemm-method 2
                     {
-                        if ( nb_vordiv_fields > 0 ) {
-                            ATLAS_TRACE( "opt3 u,v from U,V" );
-                            std::vector<double> coslats( nlats );
-                            for ( size_t j = 0; j < nlats; ++j ) {
-                                coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() );
-                            }
-                            int idx = 0;
-                            for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                                for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
-                                    for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) {
-                                        gp_fields[idx] /= coslats[jlat];
-                                        idx++;
-                                    }
-                                }
-                            }
-                        }
+                        ATLAS_TRACE( "opt3 Fourier dgemm" );
+                        eckit::linalg::Matrix A( fourier_, g.nxmax(), ( truncation_ + 1 ) * 2 );
+                        eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * g.ny() );
+                        eckit::linalg::Matrix C( gp_fields, g.nxmax(), nb_fields * g.ny() );
+                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
                     }
-                    free_aligned( scl_fourier );
                 }
-                else {
-                    ATLAS_TRACE( "invtrans_uv unstructured opt3" );
+            }  // namespace trans
+            // Computing u,v from U,V:
+            {
+                if ( nb_vordiv_fields > 0 ) {
+                    ATLAS_TRACE( "opt3 u,v from U,V" );
+                    std::vector<double> coslats( nlats );
+                    for ( size_t j = 0; j < nlats; ++j ) {
+                        coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() );
+                    }
                     int idx = 0;
-                    for ( PointXY p : grid_.xy() ) {
-                        double lon   = p.x() * util::Constants::degreesToRadians();
-                        double lat   = p.y() * util::Constants::degreesToRadians();
-                        double trcFT = truncation;
-
-                        // Legendre transform:
-                        //invtrans_legendreopt3( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra,
-                        //                      legReal.data(), legImag.data() );
-
-                        // Fourier transform:
-                        //invtrans_fourieropt3( trcFT, lon, nb_fields, legReal.data(), legImag.data(),
-                        //                     gp_tmp.data() + ( nb_fields * idx ) );
-                        for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
-                            //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat );
+                    for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                        for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
+                            for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) {
+                                gp_fields[idx] /= coslats[jlat];
+                                idx++;
+                            }
                         }
-                        ++idx;
                     }
                 }
             }
-        }  // namespace trans
+            free_aligned( scl_fourier );
+        }  // namespace atlas
+        else {
+            ATLAS_TRACE( "invtrans_uv unstructured opt3" );
+            int idx = 0;
+            for ( PointXY p : grid_.xy() ) {
+                double lon   = p.x() * util::Constants::degreesToRadians();
+                double lat   = p.y() * util::Constants::degreesToRadians();
+                double trcFT = truncation;
+
+                // Legendre transform:
+                //invtrans_legendreopt3( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra,
+                //                      legReal.data(), legImag.data() );
+
+                // Fourier transform:
+                //invtrans_fourieropt3( trcFT, lon, nb_fields, legReal.data(), legImag.data(),
+                //                     gp_tmp.data() + ( nb_fields * idx ) );
+                for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
+                    //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat );
+                }
+                ++idx;
+            }
+        }
+    }  // namespace trans
+}  // namespace atlas
 
-        // --------------------------------------------------------------------------------------------------------------------
+// --------------------------------------------------------------------------------------------------------------------
 
-        void TransLocalopt3::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[],
-                                       const double divergence_spectra[], double gp_fields[],
-                                       const eckit::Configuration& config ) const {
-            invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config );
-        }
+void TransLocalopt3::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[],
+                               const double divergence_spectra[], double gp_fields[],
+                               const eckit::Configuration& config ) const {
+    invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config );
+}
 
-        void extend_truncationopt3( const int old_truncation, const int nb_fields, const double old_spectra[],
-                                    double new_spectra[] ) {
-            int k = 0, k_old = 0;
-            for ( int m = 0; m <= old_truncation + 1; m++ ) {             // zonal wavenumber
-                for ( int n = m; n <= old_truncation + 1; n++ ) {         // total wavenumber
-                    for ( int imag = 0; imag < 2; imag++ ) {              // imaginary/real part
-                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {  // field
-                            if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; }
-                            else {
-                                new_spectra[k++] = old_spectra[k_old++];
-                            }
-                        }
+void extend_truncationopt3( const int old_truncation, const int nb_fields, const double old_spectra[],
+                            double new_spectra[] ) {
+    int k = 0, k_old = 0;
+    for ( int m = 0; m <= old_truncation + 1; m++ ) {             // zonal wavenumber
+        for ( int n = m; n <= old_truncation + 1; n++ ) {         // total wavenumber
+            for ( int imag = 0; imag < 2; imag++ ) {              // imaginary/real part
+                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {  // field
+                    if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; }
+                    else {
+                        new_spectra[k++] = old_spectra[k_old++];
                     }
                 }
             }
         }
+    }
+}
 
-        // --------------------------------------------------------------------------------------------------------------------
-
-        void TransLocalopt3::invtrans( const int nb_scalar_fields, const double scalar_spectra[],
-                                       const int nb_vordiv_fields, const double vorticity_spectra[],
-                                       const double divergence_spectra[], double gp_fields[],
-                                       const eckit::Configuration& config ) const {
-            ATLAS_TRACE( "TransLocalopt3::invtrans" );
-            int nb_gp              = grid_.size();
-            int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields;
-            if ( nb_vordiv_fields > 0 ) {
-                std::vector<double> vorticity_spectra_extended( nb_vordiv_spec_ext, 0. );
-                std::vector<double> divergence_spectra_extended( nb_vordiv_spec_ext, 0. );
-                std::vector<double> U_ext( nb_vordiv_spec_ext, 0. );
-                std::vector<double> V_ext( nb_vordiv_spec_ext, 0. );
-
-                {
-                    ATLAS_TRACE( "opt3 extend vordiv" );
-                    // increase truncation in vorticity_spectra and divergence_spectra:
-                    extend_truncationopt3( truncation_, nb_vordiv_fields, vorticity_spectra,
-                                           vorticity_spectra_extended.data() );
-                    extend_truncationopt3( truncation_, nb_vordiv_fields, divergence_spectra,
-                                           divergence_spectra_extended.data() );
-                }
+// --------------------------------------------------------------------------------------------------------------------
 
-                {
-                    ATLAS_TRACE( "vordiv to UV opt3" );
-                    // call vd2uv to compute u and v in spectral space
-                    trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt3" ) );
-                    vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(),
-                                              divergence_spectra_extended.data(), U_ext.data(), V_ext.data() );
-                }
+void TransLocalopt3::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields,
+                               const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[],
+                               const eckit::Configuration& config ) const {
+    ATLAS_TRACE( "TransLocalopt3::invtrans" );
+    int nb_gp              = grid_.size();
+    int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields;
+    if ( nb_vordiv_fields > 0 ) {
+        std::vector<double> vorticity_spectra_extended( nb_vordiv_spec_ext, 0. );
+        std::vector<double> divergence_spectra_extended( nb_vordiv_spec_ext, 0. );
+        std::vector<double> U_ext( nb_vordiv_spec_ext, 0. );
+        std::vector<double> V_ext( nb_vordiv_spec_ext, 0. );
+
+        {
+            ATLAS_TRACE( "opt3 extend vordiv" );
+            // increase truncation in vorticity_spectra and divergence_spectra:
+            extend_truncationopt3( truncation_, nb_vordiv_fields, vorticity_spectra,
+                                   vorticity_spectra_extended.data() );
+            extend_truncationopt3( truncation_, nb_vordiv_fields, divergence_spectra,
+                                   divergence_spectra_extended.data() );
+        }
 
-                // perform spectral transform to compute all fields in grid point space
-                invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config );
-                invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(),
-                             gp_fields + nb_gp * nb_vordiv_fields, config );
-            }
-            if ( nb_scalar_fields > 0 ) {
-                int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields;
-                std::vector<double> scalar_spectra_extended( nb_scalar_spec_ext, 0. );
-                extend_truncationopt3( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() );
-                invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(),
-                             gp_fields + 2 * nb_gp * nb_vordiv_fields, config );
-            }
+        {
+            ATLAS_TRACE( "vordiv to UV opt3" );
+            // call vd2uv to compute u and v in spectral space
+            trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt3" ) );
+            vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(),
+                                      divergence_spectra_extended.data(), U_ext.data(), V_ext.data() );
         }
 
-        // --------------------------------------------------------------------------------------------------------------------
+        // perform spectral transform to compute all fields in grid point space
+        invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config );
+        invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(),
+                     gp_fields + nb_gp * nb_vordiv_fields, config );
+    }
+    if ( nb_scalar_fields > 0 ) {
+        int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields;
+        std::vector<double> scalar_spectra_extended( nb_scalar_spec_ext, 0. );
+        extend_truncationopt3( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() );
+        invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(),
+                     gp_fields + 2 * nb_gp * nb_vordiv_fields, config );
+    }
+}
 
-        void TransLocalopt3::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config )
-            const {
-            NOTIMP;
-            // Not implemented and not planned.
-            // Use the TransIFS implementation instead.
-        }
+// --------------------------------------------------------------------------------------------------------------------
 
-        // --------------------------------------------------------------------------------------------------------------------
+void TransLocalopt3::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const {
+    NOTIMP;
+    // Not implemented and not planned.
+    // Use the TransIFS implementation instead.
+}
 
-        void TransLocalopt3::dirtrans( const FieldSet& gpfields, FieldSet& spfields,
-                                       const eckit::Configuration& config ) const {
-            NOTIMP;
-            // Not implemented and not planned.
-            // Use the TransIFS implementation instead.
-        }
+// --------------------------------------------------------------------------------------------------------------------
 
-        // --------------------------------------------------------------------------------------------------------------------
+void TransLocalopt3::dirtrans( const FieldSet& gpfields, FieldSet& spfields,
+                               const eckit::Configuration& config ) const {
+    NOTIMP;
+    // Not implemented and not planned.
+    // Use the TransIFS implementation instead.
+}
 
-        void TransLocalopt3::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv,
-                                                   const eckit::Configuration& config ) const {
-            NOTIMP;
-            // Not implemented and not planned.
-            // Use the TransIFS implementation instead.
-        }
+// --------------------------------------------------------------------------------------------------------------------
 
-        // --------------------------------------------------------------------------------------------------------------------
+void TransLocalopt3::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv,
+                                           const eckit::Configuration& config ) const {
+    NOTIMP;
+    // Not implemented and not planned.
+    // Use the TransIFS implementation instead.
+}
 
-        void TransLocalopt3::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[],
-                                       const eckit::Configuration& ) const {
-            NOTIMP;
-            // Not implemented and not planned.
-            // Use the TransIFS implementation instead.
-        }
+// --------------------------------------------------------------------------------------------------------------------
 
-        // --------------------------------------------------------------------------------------------------------------------
+void TransLocalopt3::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[],
+                               const eckit::Configuration& ) const {
+    NOTIMP;
+    // Not implemented and not planned.
+    // Use the TransIFS implementation instead.
+}
 
-        void TransLocalopt3::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[],
-                                       double divergence_spectra[], const eckit::Configuration& ) const {
-            NOTIMP;
-            // Not implemented and not planned.
-            // Use the TransIFS implementation instead.
-        }
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt3::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[],
+                               double divergence_spectra[], const eckit::Configuration& ) const {
+    NOTIMP;
+    // Not implemented and not planned.
+    // Use the TransIFS implementation instead.
+}
 
-        // --------------------------------------------------------------------------------------------------------------------
+// --------------------------------------------------------------------------------------------------------------------
 
-    }  // namespace trans
+}  // namespace trans
 }  // namespace atlas
diff --git a/src/atlas/trans/localopt3/TransLocalopt3.h b/src/atlas/trans/localopt3/TransLocalopt3.h
index 286ebc9d5..54798549a 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.h
+++ b/src/atlas/trans/localopt3/TransLocalopt3.h
@@ -111,6 +111,8 @@ class TransLocalopt3 : public trans::TransImpl {
 
 private:
     Grid grid_;
+    bool useFFT_;
+    bool dgemmMethod1_;
     int truncation_;
     int nlatsNH_;
     int nlatsSH_;

From ec1547eef9259836e04cd601b60d37238e9cafdc Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Tue, 3 Apr 2018 18:10:27 +0100
Subject: [PATCH 036/123] FFT is now used for cropped grids with large regions.
 The variable fft_threshold determines how many longitudes are needed to
 switch to FFT

---
 src/atlas/trans/localopt3/TransLocalopt3.cc | 44 +++++++++++++++------
 src/atlas/trans/localopt3/TransLocalopt3.h  |  2 +
 2 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc
index 796f627ac..556dc438f 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.cc
+++ b/src/atlas/trans/localopt3/TransLocalopt3.cc
@@ -127,9 +127,26 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
         }
         Grid g_global( grid.name() );
         grid::StructuredGrid gs_global( g_global );
-        if ( nlons < 1.0 * gs_global.nxmax() ) { useFFT_ = false; }
+        nlonsGlobal_  = gs_global.nxmax();
+        jlonMin_      = 0;
+        double lonmin = fmod( g.x( 0, 0 ), 360 );
+        if ( lonmin < 0. ) { lonmin += 360.; }
+        if ( nlons < nlonsGlobal_ ) {
+            double fft_threshold = 0.05;  // fraction of latitudes of the full grid up to which FFT is used.
+            // This threshold needs to be adjusted depending on the dgemm and FFT performance of the machine
+            // on which this code is running!
+            if ( nlons < fft_threshold * nlonsGlobal_ ) { useFFT_ = false; }
+            else {
+                // need to use FFT with cropped grid
+                for ( size_t j = 0; j < nlonsGlobal_; ++j ) {
+                    if ( gs_global.x( j, 0 ) == lonmin ) { jlonMin_ = j; }
+                }
+            }
+        }
+        //Log::info() << "nlats=" << g.ny() << " nlatsGlobal=" << gs_global.ny() << std::endl;
     }
     else {
+        // unstructured grid
         useFFT_   = false;
         nlats     = grid_.size();
         nlons     = grid_.size();
@@ -187,11 +204,11 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
 #if ATLAS_HAVE_FFTW
         {
             ATLAS_TRACE( "opt3 precomp FFTW" );
-            int num_complex = ( nlons / 2 ) + 1;
+            int num_complex = ( nlonsGlobal_ / 2 ) + 1;
             fft_in_         = fftw_alloc_complex( nlats * num_complex );
-            fft_out_        = fftw_alloc_real( nlats * nlons );
-            plan_ = fftw_plan_many_dft_c2r( 1, &nlons, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1, nlons,
-                                            FFTW_ESTIMATE );
+            fft_out_        = fftw_alloc_real( nlats * nlonsGlobal_ );
+            plan_ = fftw_plan_many_dft_c2r( 1, &nlonsGlobal_, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1,
+                                            nlonsGlobal_, FFTW_ESTIMATE );
         }
             // other FFT implementations should be added with #elif statements
 #else
@@ -302,10 +319,8 @@ void gp_transposeopt3( const int nb_size, const int nb_fields, const double gp_t
 }
 
 //-----------------------------------------------------------------------------
-// Routine to compute the spectral transform by using a localopt3 Fourier
-// transformation
-// for a grid (same latitude for all longitudes, allows to compute Legendre
-// functions
+// Routine to compute the spectral transform by using a localopt3 Fourier transformation
+// for a grid (same latitude for all longitudes, allows to compute Legendre functions
 // once for all longitudes). U and v components are divided by cos(latitude) for
 // nb_vordiv_fields > 0.
 //
@@ -430,7 +445,7 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
             if ( useFFT_ ) {
 #if ATLAS_HAVE_FFTW
                 {
-                    int num_complex = ( nlons / 2 ) + 1;
+                    int num_complex = ( nlonsGlobal_ / 2 ) + 1;
                     {
                         ATLAS_TRACE( "opt3 FFTW" );
                         for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
@@ -449,8 +464,13 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
                                 }
                             }
                             fftw_execute_dft_c2r( plan_, fft_in_, fft_out_ );
-                            for ( int j = 0; j < nlats * nlons; j++ ) {
-                                gp_fields[j + jfld * nlats * nlons] = fft_out_[j];
+                            for ( int jlat = 0; jlat < nlats; jlat++ ) {
+                                for ( int jlon = 0; jlon < nlons; jlon++ ) {
+                                    int j = jlon + jlonMin_;
+                                    if ( j >= nlonsGlobal_ ) { j -= nlonsGlobal_; }
+                                    gp_fields[jlon + nlons * ( jlat + nlats * jfld )] =
+                                        fft_out_[j + nlonsGlobal_ * jlat];
+                                }
                             }
                         }
                     }
diff --git a/src/atlas/trans/localopt3/TransLocalopt3.h b/src/atlas/trans/localopt3/TransLocalopt3.h
index 54798549a..ac33f6395 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.h
+++ b/src/atlas/trans/localopt3/TransLocalopt3.h
@@ -117,6 +117,8 @@ class TransLocalopt3 : public trans::TransImpl {
     int nlatsNH_;
     int nlatsSH_;
     int nlatsLeg_;
+    int jlonMin_;
+    int nlonsGlobal_;
     bool precompute_;
     double* legendre_sym_;
     double* legendre_asym_;

From ab6214ad0534900744cfda9976be455a5cf3d9d7 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Tue, 3 Apr 2018 19:12:59 +0100
Subject: [PATCH 037/123] fixed bug with dgemmMethod1 == true

---
 src/atlas/trans/localopt3/TransLocalopt3.cc | 64 ++++++++++-----------
 1 file changed, 29 insertions(+), 35 deletions(-)

diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc
index 556dc438f..d98bb522f 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.cc
+++ b/src/atlas/trans/localopt3/TransLocalopt3.cc
@@ -96,12 +96,14 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
 #else
     eckit::linalg::LinearAlgebra::backend( "generic" );  // might want to choose backend with this command
 #endif
+    double fft_threshold = 2.;  // 0.05;  // fraction of latitudes of the full grid up to which FFT is used.
+    // This threshold needs to be adjusted depending on the dgemm and FFT performance of the machine
+    // on which this code is running!
     int nlats     = 0;
     int nlons     = 0;
     int neqtr     = 0;
-    int nlatsNH   = nlats_northernHemisphere( nlats );
     useFFT_       = true;
-    dgemmMethod1_ = false;
+    dgemmMethod1_ = true;
     nlatsNH_      = 0;
     nlatsSH_      = 0;
     nlatsLeg_     = 0;
@@ -131,12 +133,9 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
         jlonMin_      = 0;
         double lonmin = fmod( g.x( 0, 0 ), 360 );
         if ( lonmin < 0. ) { lonmin += 360.; }
-        if ( nlons < nlonsGlobal_ ) {
-            double fft_threshold = 0.05;  // fraction of latitudes of the full grid up to which FFT is used.
-            // This threshold needs to be adjusted depending on the dgemm and FFT performance of the machine
-            // on which this code is running!
-            if ( nlons < fft_threshold * nlonsGlobal_ ) { useFFT_ = false; }
-            else {
+        if ( nlons < fft_threshold * nlonsGlobal_ ) { useFFT_ = false; }
+        else {
+            if ( nlons < nlonsGlobal_ ) {
                 // need to use FFT with cropped grid
                 for ( size_t j = 0; j < nlonsGlobal_; ++j ) {
                     if ( gs_global.x( j, 0 ) == lonmin ) { jlonMin_ = j; }
@@ -336,22 +335,16 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
         // Transform
         if ( grid::StructuredGrid g = grid_ ) {
             ATLAS_TRACE( "invtrans_uv structured opt3" );
-            int nlats    = g.ny();
-            int nlons    = g.nxmax();
-            auto posFFTW = [&]( int jfld, int imag, int jlat, int jm ) {
-                return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) );
-            };
-            /*auto posFFTW = [&]( int jfld, int imag, int jlat, int jm ) {
-                return jfld + nb_fields * ( imag + 2 * ( nlats - jlat - 1 ) + jm * nb_fields * 2 * nlats );
-            };*/
-            auto posGemm1 = [&]( int jfld, int imag, int jlat, int jm ) {
-                return jfld + nb_fields * ( jlat + nlats * ( imag + 2 * ( jm ) ) );
-            };
-            auto posGemm2 = [&]( int jfld, int imag, int jlat, int jm ) {
-                return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) );
+            int nlats      = g.ny();
+            int nlons      = g.nxmax();
+            auto posMethod = [&]( int jfld, int imag, int jlat, int jm ) {
+                if ( useFFT_ || !dgemmMethod1_ ) {
+                    return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) );
+                }
+                else {
+                    return jfld + nb_fields * ( jlat + nlats * ( imag + 2 * ( jm ) ) );
+                };
             };
-            auto posMethod = posGemm2;
-            if ( useFFT_ ) { auto posMethod = posFFTW; }
             int size_fourier_max = nb_fields * 2 * nlats;
             double* scl_fourier;
             alloc_aligned( scl_fourier, size_fourier_max * ( truncation + 1 ) );
@@ -451,11 +444,11 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
                         for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
                             int idx = 0;
                             for ( int jlat = 0; jlat < nlats; jlat++ ) {
-                                fft_in_[idx++][0] = scl_fourier[posFFTW( jfld, 0, jlat, 0 )];
+                                fft_in_[idx++][0] = scl_fourier[posMethod( jfld, 0, jlat, 0 )];
                                 for ( int jm = 1; jm < num_complex; jm++, idx++ ) {
                                     for ( int imag = 0; imag < 2; imag++ ) {
                                         if ( jm <= truncation_ ) {
-                                            fft_in_[idx][imag] = scl_fourier[posFFTW( jfld, imag, jlat, jm )];
+                                            fft_in_[idx][imag] = scl_fourier[posMethod( jfld, imag, jlat, jm )];
                                         }
                                         else {
                                             fft_in_[idx][imag] = 0.;
@@ -480,13 +473,14 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
             else {
                 if ( dgemmMethod1_ ) {
                     // dgemm-method 1
+                    // should be faster for small domains or large truncation
                     double* gp_opt3;
                     alloc_aligned( gp_opt3, nb_fields * grid_.size() );
                     {
                         ATLAS_TRACE( "opt3 Fourier dgemm" );
-                        eckit::linalg::Matrix A( scl_fourier, nb_fields * g.ny(), ( truncation_ + 1 ) * 2 );
-                        eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, g.nxmax() );
-                        eckit::linalg::Matrix C( gp_opt3, nb_fields * g.ny(), g.nxmax() );
+                        eckit::linalg::Matrix A( scl_fourier, nb_fields * nlats, ( truncation_ + 1 ) * 2 );
+                        eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, nlons );
+                        eckit::linalg::Matrix C( gp_opt3, nb_fields * nlats, nlons );
                         eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
                     }
 
@@ -494,11 +488,11 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
                     {
                         ATLAS_TRACE( "opt3 transposition in gp-space" );
                         int idx = 0;
-                        for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) {
-                            for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
+                        for ( int jlon = 0; jlon < nlons; jlon++ ) {
+                            for ( int jlat = 0; jlat < nlats; jlat++ ) {
                                 for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                                    int pos_tp = jlon + g.nxmax() * ( jlat + g.ny() * ( jfld ) );
-                                    //int pos  = jfld + nb_fields * ( jlat + g.ny() * ( jlon ) );
+                                    int pos_tp = jlon + nlons * ( jlat + nlats * ( jfld ) );
+                                    //int pos  = jfld + nb_fields * ( jlat + nlats * ( jlon ) );
                                     gp_fields[pos_tp] = gp_opt3[idx++];  // = gp_opt3[pos]
                                 }
                             }
@@ -510,9 +504,9 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
                     // dgemm-method 2
                     {
                         ATLAS_TRACE( "opt3 Fourier dgemm" );
-                        eckit::linalg::Matrix A( fourier_, g.nxmax(), ( truncation_ + 1 ) * 2 );
-                        eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * g.ny() );
-                        eckit::linalg::Matrix C( gp_fields, g.nxmax(), nb_fields * g.ny() );
+                        eckit::linalg::Matrix A( fourier_, nlons, ( truncation_ + 1 ) * 2 );
+                        eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * nlats );
+                        eckit::linalg::Matrix C( gp_fields, nlons, nb_fields * nlats );
                         eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
                     }
                 }

From 23777612fb04f363244b930f256501d5f3125dcf Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Wed, 4 Apr 2018 12:23:19 +0100
Subject: [PATCH 038/123] fixed the truncation parameter of invtrans_uv => no
 extension necessary. Comparing performance between FFT and 2 dgemm methods in
 opt, opt2, opt3

---
 .../trans/localopt/LegendrePolynomialsopt.cc  |  10 +-
 src/atlas/trans/localopt/TransLocalopt.cc     | 450 +++++++++--------
 src/atlas/trans/localopt/TransLocalopt.h      |  15 +
 .../localopt2/LegendrePolynomialsopt2.cc      |  10 +-
 src/atlas/trans/localopt2/TransLocalopt2.cc   | 470 +++++++++---------
 src/atlas/trans/localopt2/TransLocalopt2.h    |   7 +
 src/atlas/trans/localopt3/TransLocalopt3.cc   |  35 +-
 src/tests/trans/test_transgeneral.cc          |  41 +-
 8 files changed, 544 insertions(+), 494 deletions(-)

diff --git a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc
index 413620301..9c5562fa2 100644
--- a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc
+++ b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc
@@ -161,12 +161,6 @@ void compute_legendre_polynomialsopt(
         {
             //ATLAS_TRACE( "add to global arrays" );
 
-            // take factor 2 for m > 0 into account:
-            for ( int jm = 1; jm <= trc; ++jm ) {
-                for ( int jn = jm; jn <= trc; ++jn ) {
-                    //legpol[idxmn( jm, jn )] *= 2.;
-                }
-            }
             for ( int jm = 0; jm <= trc; jm++ ) {
                 int is1 = 0, ia1 = 0;
                 for ( int jn = jm; jn <= trc; jn++ ) {
@@ -185,11 +179,11 @@ void compute_legendre_polynomialsopt(
                 //for ( int jn = jm; jn <= trc; jn++ ) {
                 for ( int jn = trc; jn >= jm; jn-- ) {
                     if ( ( jn - jm ) % 2 == 0 ) {
-                        int is      = leg_start_sym[jm] + jlat + nlats * is2++;
+                        int is      = leg_start_sym[jm] + is1 * jlat + is2++;
                         leg_sym[is] = legpol[idxmn( jm, jn )];
                     }
                     else {
-                        int ia       = leg_start_asym[jm] + jlat + nlats * ia2++;
+                        int ia       = leg_start_asym[jm] + ia1 * jlat + ia2++;
                         leg_asym[ia] = legpol[idxmn( jm, jn )];
                     }
                 }
diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc
index 112b70fb9..48f90d386 100644
--- a/src/atlas/trans/localopt/TransLocalopt.cc
+++ b/src/atlas/trans/localopt/TransLocalopt.cc
@@ -26,9 +26,6 @@
 #if ATLAS_HAVE_MKL
 #include "mkl.h"
 #endif
-#if ATLAS_HAVE_FFTW
-#include <fftw3.h>
-#endif
 
 namespace atlas {
 namespace trans {
@@ -99,27 +96,75 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t
 #else
     eckit::linalg::LinearAlgebra::backend( "generic" );  // might want to choose backend with this command
 #endif
-    int nlats   = 0;
-    int nlons   = 0;
-    int nlatsNH = nlats_northernHemisphere( nlats );
+    double fft_threshold = 0.;  // 0.05;  // fraction of latitudes of the full grid up to which FFT is used.
+    // This threshold needs to be adjusted depending on the dgemm and FFT performance of the machine
+    // on which this code is running!
+    int nlats     = 0;
+    int nlons     = 0;
+    int neqtr     = 0;
+    useFFT_       = true;
+    dgemmMethod1_ = true;
+    nlatsNH_      = 0;
+    nlatsSH_      = 0;
+    nlatsLeg_     = 0;
     if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
         grid::StructuredGrid g( grid_ );
-        nlats   = g.ny();
-        nlons   = g.nxmax();
-        nlatsNH = nlats_northernHemisphere( nlats );
+        nlats = g.ny();
+        nlons = g.nxmax();
+        for ( size_t j = 0; j < nlats; ++j ) {
+            // assumptions: latitudes in g.y(j) are monotone and decreasing
+            // no assumption on whether we have 0, 1 or 2 latitudes at the equator
+            double lat = g.y( j );
+            if ( lat > 0. ) { nlatsNH_++; }
+            if ( lat == 0. ) { neqtr++; }
+            if ( lat < 0. ) { nlatsSH_++; }
+        }
+        if ( neqtr > 0 ) {
+            nlatsNH_++;
+            nlatsSH_++;
+        }
+        if ( nlatsNH_ >= nlatsSH_ ) { nlatsLeg_ = nlatsNH_; }
+        else {
+            nlatsLeg_ = nlatsSH_;
+        }
+        Grid g_global( grid.name() );
+        grid::StructuredGrid gs_global( g_global );
+        nlonsGlobal_  = gs_global.nxmax();
+        jlonMin_      = 0;
+        double lonmin = fmod( g.x( 0, 0 ), 360 );
+        if ( lonmin < 0. ) { lonmin += 360.; }
+        if ( nlons < fft_threshold * nlonsGlobal_ ) { useFFT_ = false; }
+        else {
+            if ( nlons < nlonsGlobal_ ) {
+                // need to use FFT with cropped grid
+                for ( size_t j = 0; j < nlonsGlobal_; ++j ) {
+                    if ( gs_global.x( j, 0 ) == lonmin ) { jlonMin_ = j; }
+                }
+            }
+        }
+        //Log::info() << "nlats=" << g.ny() << " nlatsGlobal=" << gs_global.ny() << std::endl;
     }
     else {
-        nlats   = grid_.size();
-        nlons   = grid_.size();
-        nlatsNH = nlats;
+        // unstructured grid
+        useFFT_   = false;
+        nlats     = grid_.size();
+        nlons     = grid_.size();
+        nlatsNH_  = nlats;
+        nlatsLeg_ = nlats;
     }
-    std::vector<double> lats( nlatsNH );
+    std::vector<double> lats( nlatsLeg_ );
     std::vector<double> lons( nlons );
     if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
         grid::StructuredGrid g( grid_ );
-        // TODO: remove legendre_begin and legendre_data (only legendre_ should be needed)
-        for ( size_t j = 0; j < nlatsNH; ++j ) {
-            lats[j] = g.y( j ) * util::Constants::degreesToRadians();
+        if ( nlatsNH_ >= nlatsSH_ ) {
+            for ( size_t j = 0; j < nlatsLeg_; ++j ) {
+                lats[j] = g.y( j ) * util::Constants::degreesToRadians();
+            }
+        }
+        else {
+            for ( size_t j = nlats - 1, idx = 0; idx < nlatsLeg_; --j, ++idx ) {
+                lats[idx] = -g.y( j ) * util::Constants::degreesToRadians();
+            }
         }
         for ( size_t j = 0; j < nlons; ++j ) {
             lons[j] = g.x( j, 0 ) * util::Constants::degreesToRadians();
@@ -142,45 +187,67 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t
         legendre_sym_begin_[0]  = 0;
         legendre_asym_begin_[0] = 0;
         for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
-            size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsNH );
-            size_asym += add_padding( num_n( truncation_ + 1, jm, false ) * nlatsNH );
+            size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsLeg_ );
+            size_asym += add_padding( num_n( truncation_ + 1, jm, false ) * nlatsLeg_ );
             legendre_sym_begin_[jm + 1]  = size_sym;
             legendre_asym_begin_[jm + 1] = size_asym;
         }
         alloc_aligned( legendre_sym_, size_sym );
         alloc_aligned( legendre_asym_, size_asym );
-        compute_legendre_polynomialsopt( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_,
+        compute_legendre_polynomialsopt( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, legendre_asym_,
                                          legendre_sym_begin_.data(), legendre_asym_begin_.data() );
     }
 
-        // precomputations for Fourier transformations:
-#if !ATLAS_HAVE_FFTW
-    {
-        ATLAS_TRACE( "opt precomp Fourier" );
-        alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons );
-        int idx = 0;
-        for ( int jlon = 0; jlon < nlons; jlon++ ) {
-            for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
-                fourier_[idx++] = +std::cos( jm * lons[jlon] );  // real part
-                fourier_[idx++] = -std::sin( jm * lons[jlon] );  // imaginary part
-            }
+    // precomputations for Fourier transformations:
+    if ( useFFT_ ) {
+#if ATLAS_HAVE_FFTW
+        {
+            ATLAS_TRACE( "opt precomp FFTW" );
+            int num_complex = ( nlonsGlobal_ / 2 ) + 1;
+            fft_in_         = fftw_alloc_complex( nlats * num_complex );
+            fft_out_        = fftw_alloc_real( nlats * nlonsGlobal_ );
+            plan_ = fftw_plan_many_dft_c2r( 1, &nlonsGlobal_, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1,
+                                            nlonsGlobal_, FFTW_ESTIMATE );
         }
+            // other FFT implementations should be added with #elif statements
+#else
+        useFFT_ = false;                                 // no FFT implemented => default to dgemm
+#endif
     }
-    {
-        ATLAS_TRACE( "opt precomp Fourier tp" );
-        alloc_aligned( fouriertp_, 2 * ( truncation_ + 1 ) * nlons );
-        int idx = 0;
-        for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
-            for ( int jlon = 0; jlon < nlons; jlon++ ) {
-                fouriertp_[idx++] = +std::cos( jm * lons[jlon] );  // real part
+    if ( !useFFT_ ) {
+        alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons );
+        if ( dgemmMethod1_ ) {
+            {
+                ATLAS_TRACE( "opt precomp Fourier" );
+                int idx = 0;
+                for ( int jlon = 0; jlon < nlons; jlon++ ) {
+                    double factor = 1.;
+                    for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
+                        if ( jm > 0 ) { factor = 2.; }
+                        fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor;  // real part
+                        fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor;  // imaginary part
+                    }
+                }
             }
-            for ( int jlon = 0; jlon < nlons; jlon++ ) {
-                fouriertp_[idx++] = -std::sin( jm * lons[jlon] );  // imaginary part
+        }
+        else {
+            {
+                ATLAS_TRACE( "opt precomp Fourier tp" );
+                int idx = 0;
+                for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
+                    double factor = 1.;
+                    if ( jm > 0 ) { factor = 2.; }
+                    for ( int jlon = 0; jlon < nlons; jlon++ ) {
+                        fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor;  // real part
+                    }
+                    for ( int jlon = 0; jlon < nlons; jlon++ ) {
+                        fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor;  // imaginary part
+                    }
+                }
             }
         }
     }
-#endif
-}  // namespace atlas
+}  // namespace trans
 
 // --------------------------------------------------------------------------------------------------------------------
 
@@ -192,10 +259,16 @@ TransLocalopt::TransLocalopt( const Grid& grid, const long truncation, const eck
 TransLocalopt::~TransLocalopt() {
     free_aligned( legendre_sym_ );
     free_aligned( legendre_asym_ );
-#if !ATLAS_HAVE_FFTW
-    free_aligned( fourier_ );
-    free_aligned( fouriertp_ );
+    if ( useFFT_ ) {
+#if ATLAS_HAVE_FFTW
+        fftw_destroy_plan( plan_ );
+        fftw_free( fft_in_ );
+        fftw_free( fft_out_ );
 #endif
+    }
+    else {
+        free_aligned( fourier_ );
+    }
 }
 
 // --------------------------------------------------------------------------------------------------------------------
@@ -244,13 +317,17 @@ void gp_transposeopt( const int nb_size, const int nb_fields, const double gp_tm
 }
 
 //-----------------------------------------------------------------------------
-// Routine to compute the spectral transform by using a localopt Fourier
-// transformation
-// for a grid (same latitude for all longitudes, allows to compute Legendre
-// functions
+// Routine to compute the spectral transform by using a localopt Fourier transformation
+// for a grid (same latitude for all longitudes, allows to compute Legendre functions
 // once for all longitudes). U and v components are divided by cos(latitude) for
 // nb_vordiv_fields > 0.
 //
+// Legendre polynomials are computed up to truncation_+1 to be accurate for vorticity and
+// divergence computation. The parameter truncation is the truncation used in storing the
+// spectral data scalar_spectra and can be different from truncation_. If truncation is
+// larger than truncation_+1 the transform will behave as if the spectral data was truncated
+// to truncation_+1.
+//
 // Author:
 // Andreas Mueller *ECMWF*
 //
@@ -263,39 +340,20 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
         // Transform
         if ( grid::StructuredGrid g = grid_ ) {
             ATLAS_TRACE( "invtrans_uv structured opt" );
-            int nlats    = g.ny();
-            int nlons    = g.nxmax();
-            int nlatsNH  = nlats_northernHemisphere( nlats );
-            auto posFFTW = [&]( int jfld, int imag, int jlat, int jm ) {
-                return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) );
-            };
-            /*auto posFFTW = [&]( int jfld, int imag, int jlat, int jm ) {
-                return jfld + nb_fields * ( imag + 2 * ( nlats - jlat - 1 ) + jm * nb_fields * 2 * nlats );
-            };*/
-            auto posGemm1 = [&]( int jfld, int imag, int jlat, int jm ) {
-                return jfld + nb_fields * ( jlat + nlats * ( imag + 2 * ( jm ) ) );
-            };
-            auto posGemm2 = [&]( int jfld, int imag, int jlat, int jm ) {
-                return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) );
+            int nlats      = g.ny();
+            int nlons      = g.nxmax();
+            auto posMethod = [&]( int jfld, int imag, int jlat, int jm ) {
+                if ( useFFT_ || !dgemmMethod1_ ) {
+                    return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) );
+                }
+                else {
+                    return jfld + nb_fields * ( jlat + nlats * ( imag + 2 * ( jm ) ) );
+                };
             };
             int size_fourier_max = nb_fields * 2 * nlats;
             double* scl_fourier;
-            alloc_aligned( scl_fourier, size_fourier_max * ( truncation + 1 ) );
+            alloc_aligned( scl_fourier, size_fourier_max * ( truncation_ + 1 ) );
 
-#if ATLAS_HAVE_FFTW
-            int num_complex      = ( nlons / 2 ) + 1;
-            fftw_complex* fft_in = fftw_alloc_complex( nlats * num_complex * nb_fields );
-            fftw_plan plan       = fftw_plan_many_dft_c2r( 1, &nlons, nlats * nb_fields, fft_in, NULL, 1, num_complex,
-                                                     gp_fields, NULL, 1, nlons, FFTW_ESTIMATE );
-            for ( int j = 0; j < nlats * num_complex * nb_fields; j++ ) {
-                for ( int imag = 0; imag < 2; imag++ ) {
-                    fft_in[j][imag] = 0.;
-                }
-            }
-            auto posFFTWin = [&]( int jfld, int jlat, int jm ) {
-                return jm + num_complex * ( jlat + nlats * ( jfld ) );
-            };
-#endif
             // Legendre transform:
             {
                 ATLAS_TRACE( "opt Legendre dgemm" );
@@ -304,7 +362,10 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
                     int size_asym = num_n( truncation_ + 1, jm, false );
                     int n_imag    = 2;
                     if ( jm == 0 ) { n_imag = 1; }
-                    int size_fourier = nb_fields * n_imag * nlatsNH;
+                    int size_fourier = nb_fields * n_imag * nlatsLeg_;
+                    auto posFourier  = [&]( int jfld, int imag, int jlat, int jm, int nlatsH ) {
+                        return jfld + nb_fields * ( imag + n_imag * ( nlatsLeg_ - nlatsH + jlat ) );
+                    };
                     double* scalar_sym;
                     double* scalar_asym;
                     double* scl_fourier_sym;
@@ -316,14 +377,28 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
                     {
                         //ATLAS_TRACE( "opt Legendre split" );
                         int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2;
-                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                        // the choice between the following two code lines determines whether
+                        // total wavenumbers are summed in an ascending or descending order.
+                        // The trans library in IFS uses descending order because it should
+                        // be more accurate (higher wavenumbers have smaller contributions).
+                        // This also needs to be changed when splitting the spectral data in
+                        // compute_legendre_polynomialsopt!
+                        //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) {
+                        for ( int jn = truncation_ + 1; jn >= jm; jn-- ) {
                             for ( int imag = 0; imag < n_imag; imag++ ) {
-                                //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) { //ascending
-                                for ( int jn = truncation_ + 1; jn >= jm; jn-- ) {  // descending
+                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
                                     idx = jfld + nb_fields * ( imag + 2 * ( jn - jm ) );
-                                    if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; }
+                                    if ( jn <= truncation && jm < truncation ) {
+                                        if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; }
+                                        else {
+                                            scalar_asym[ia++] = scalar_spectra[idx + ioff];
+                                        }
+                                    }
                                     else {
-                                        scalar_asym[ia++] = scalar_spectra[idx + ioff];
+                                        if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = 0.; }
+                                        else {
+                                            scalar_asym[ia++] = 0.;
+                                        }
                                     }
                                 }
                             }
@@ -331,37 +406,36 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
                         ASSERT( ia == n_imag * nb_fields * size_asym && is == n_imag * nb_fields * size_sym );
                     }
                     {
-                        eckit::linalg::Matrix A( legendre_sym_ + legendre_sym_begin_[jm], nlatsNH, size_sym );
-                        eckit::linalg::Matrix B( scalar_sym, size_sym, nb_fields * n_imag );
-                        eckit::linalg::Matrix C( scl_fourier_sym, nlatsNH, nb_fields * n_imag );
+                        eckit::linalg::Matrix A( scalar_sym, nb_fields * n_imag, size_sym );
+                        eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm], size_sym, nlatsLeg_ );
+                        eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsLeg_ );
                         eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
                     }
                     if ( size_asym > 0 ) {
-                        eckit::linalg::Matrix A( legendre_asym_ + legendre_asym_begin_[jm], nlatsNH, size_asym );
-                        eckit::linalg::Matrix B( scalar_asym, size_asym, nb_fields * n_imag );
-                        eckit::linalg::Matrix C( scl_fourier_asym, nlatsNH, nb_fields * n_imag );
+                        eckit::linalg::Matrix A( scalar_asym, nb_fields * n_imag, size_asym );
+                        eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm], size_asym, nlatsLeg_ );
+                        eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsLeg_ );
                         eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
                     }
-#if 1  //ATLAS_HAVE_FFTW
                     {
                         //ATLAS_TRACE( "opt merge spheres" );
                         // northern hemisphere:
-                        int idx = 0;
-                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                        for ( int jlat = 0; jlat < nlatsNH_; jlat++ ) {
                             for ( int imag = 0; imag < n_imag; imag++ ) {
-                                for ( int jlat = 0; jlat < nlatsNH; jlat++, idx++ ) {
-                                    fft_in[posFFTWin( jfld, jlat, jm )][imag] =
+                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                    int idx = posFourier( jfld, imag, jlat, jm, nlatsNH_ );
+                                    scl_fourier[posMethod( jfld, imag, jlat, jm )] =
                                         scl_fourier_sym[idx] + scl_fourier_asym[idx];
                                 }
                             }
                         }
                         // southern hemisphere:
-                        idx = 0;
-                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                        for ( int jlat = 0; jlat < nlatsSH_; jlat++ ) {
                             for ( int imag = 0; imag < n_imag; imag++ ) {
-                                for ( int jlat = 0; jlat < nlatsNH; jlat++, idx++ ) {
+                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                    int idx   = posFourier( jfld, imag, jlat, jm, nlatsSH_ );
                                     int jslat = nlats - jlat - 1;
-                                    fft_in[posFFTWin( jfld, jslat, jm )][imag] =
+                                    scl_fourier[posMethod( jfld, imag, jslat, jm )] =
                                         scl_fourier_sym[idx] - scl_fourier_asym[idx];
                                 }
                             }
@@ -373,130 +447,83 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
                     free_aligned( scl_fourier_asym );
                 }
             }
-            {
+            // Fourier transformation:
+            if ( useFFT_ ) {
+#if ATLAS_HAVE_FFTW
                 {
-                    ATLAS_TRACE( "opt FFTW" );
-                    { fftw_execute( plan ); }
-                }
-            }
-            fftw_destroy_plan( plan );
-            fftw_free( fft_in );
-#else
-#if 0  // 1: better for small number of columns, large truncation; 0: better for large number of columns
+                    int num_complex = ( nlonsGlobal_ / 2 ) + 1;
                     {
-                        //ATLAS_TRACE( "opt merge spheres" );
-                        // northern hemisphere:
-                        int ioff = jm * size_fourier_max;
-                        int pos0 = ioff;
-                        int idx  = 0;
-                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
-                            int poslat = pos0 + 2 * jlat;
-                            for ( int imag = 0; imag < n_imag; imag++ ) {
-                                int posimag = nb_fields * ( imag + poslat );
-                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
-                                    scl_fourier[posGemm1( jfld, imag, jlat, jm )] =
-                                        scl_fourier_sym[idx] + scl_fourier_asym[idx];
+                        ATLAS_TRACE( "opt FFTW" );
+                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                            int idx = 0;
+                            for ( int jlat = 0; jlat < nlats; jlat++ ) {
+                                fft_in_[idx++][0] = scl_fourier[posMethod( jfld, 0, jlat, 0 )];
+                                for ( int jm = 1; jm < num_complex; jm++, idx++ ) {
+                                    for ( int imag = 0; imag < 2; imag++ ) {
+                                        if ( jm <= truncation_ ) {
+                                            fft_in_[idx][imag] = scl_fourier[posMethod( jfld, imag, jlat, jm )];
+                                        }
+                                        else {
+                                            fft_in_[idx][imag] = 0.;
+                                        }
+                                    }
                                 }
                             }
-                        }
-                        // southern hemisphere:
-                        idx  = 0;
-                        pos0 = 2 * ( nlats - 1 ) + ioff;
-                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
-                            int poslat = pos0 - 2 * jlat;
-                            for ( int imag = 0; imag < n_imag; imag++ ) {
-                                int posimag = nb_fields * ( imag + poslat );
-                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
-                                    int jslat = nlats - jlat - 1;
-                                    scl_fourier[posGemm1( jfld, imag, jslat, jm )] =
-                                        scl_fourier_sym[idx] - scl_fourier_asym[idx];
+                            fftw_execute_dft_c2r( plan_, fft_in_, fft_out_ );
+                            for ( int jlat = 0; jlat < nlats; jlat++ ) {
+                                for ( int jlon = 0; jlon < nlons; jlon++ ) {
+                                    int j = jlon + jlonMin_;
+                                    if ( j >= nlonsGlobal_ ) { j -= nlonsGlobal_; }
+                                    gp_fields[jlon + nlons * ( jlat + nlats * jfld )] =
+                                        fft_out_[j + nlonsGlobal_ * jlat];
                                 }
                             }
                         }
                     }
-                    free_aligned( scalar_sym );
-                    free_aligned( scalar_asym );
-                    free_aligned( scl_fourier_sym );
-                    free_aligned( scl_fourier_asym );
                 }
+#endif
             }
-
-            // Fourier transformation:
-            double* gp_opt;
-            alloc_aligned( gp_opt, nb_fields * grid_.size() );
-            {
-                ATLAS_TRACE( "opt Fourier dgemm" );
-                eckit::linalg::Matrix A( scl_fourier, nb_fields * g.ny(), ( truncation_ + 1 ) * 2 );
-                eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, g.nxmax() );
-                eckit::linalg::Matrix C( gp_opt, nb_fields * g.ny(), g.nxmax() );
-                eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-            }
-
-            // Transposition in grid point space:
-            {
-                ATLAS_TRACE( "opt transposition in gp-space" );
-                int idx = 0;
-                for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) {
-                    for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
-                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                            int pos_tp = jlon + g.nxmax() * ( jlat + g.ny() * ( jfld ) );
-                            //int pos  = jfld + nb_fields * ( jlat + g.ny() * ( jlon ) );
-                            gp_fields[pos_tp] = gp_opt[idx++];  // = gp_opt[pos]
-                        }
+            else {
+                if ( dgemmMethod1_ ) {
+                    // dgemm-method 1
+                    // should be faster for small domains or large truncation
+                    double* gp_opt;
+                    alloc_aligned( gp_opt, nb_fields * grid_.size() );
+                    {
+                        ATLAS_TRACE( "opt Fourier dgemm method 1" );
+                        eckit::linalg::Matrix A( scl_fourier, nb_fields * nlats, ( truncation_ + 1 ) * 2 );
+                        eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, nlons );
+                        eckit::linalg::Matrix C( gp_opt, nb_fields * nlats, nlons );
+                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
                     }
-                }
-            }
-            free_aligned( gp_opt );
-#else
+
+                    // Transposition in grid point space:
                     {
-                        //ATLAS_TRACE( "opt merge spheres" );
-                        // northern hemisphere:
-                        int ioff = jm * size_fourier_max;
-                        int pos0 = ioff;
-                        int idx  = 0;
-                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
-                            int poslat = pos0 + 2 * jlat;
-                            for ( int imag = 0; imag < n_imag; imag++ ) {
-                                int posimag = nb_fields * ( imag + poslat );
-                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
-                                    scl_fourier[posGemm2( jfld, imag, jlat, jm )] =
-                                        scl_fourier_sym[idx] + scl_fourier_asym[idx];
-                                }
-                            }
-                        }
-                        // southern hemisphere:
-                        idx  = 0;
-                        pos0 = 2 * ( nlats - 1 ) + ioff;
-                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
-                            int poslat = pos0 - 2 * jlat;
-                            for ( int imag = 0; imag < n_imag; imag++ ) {
-                                int posimag = nb_fields * ( imag + poslat );
-                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
-                                    int jslat = nlats - jlat - 1;
-                                    scl_fourier[posGemm2( jfld, imag, jslat, jm )] =
-                                        scl_fourier_sym[idx] - scl_fourier_asym[idx];
+                        ATLAS_TRACE( "opt transposition in gp-space" );
+                        int idx = 0;
+                        for ( int jlon = 0; jlon < nlons; jlon++ ) {
+                            for ( int jlat = 0; jlat < nlats; jlat++ ) {
+                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                    int pos_tp = jlon + nlons * ( jlat + nlats * ( jfld ) );
+                                    //int pos  = jfld + nb_fields * ( jlat + nlats * ( jlon ) );
+                                    gp_fields[pos_tp] = gp_opt[idx++];  // = gp_opt[pos]
                                 }
                             }
                         }
                     }
-                    free_aligned( scalar_sym );
-                    free_aligned( scalar_asym );
-                    free_aligned( scl_fourier_sym );
-                    free_aligned( scl_fourier_asym );
+                    free_aligned( gp_opt );
                 }
-            }
-
-            // Fourier transformation:
-            {
-                ATLAS_TRACE( "opt Fourier dgemm" );
-                eckit::linalg::Matrix A( fouriertp_, g.nxmax(), ( truncation_ + 1 ) * 2 );
-                eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * g.ny() );
-                eckit::linalg::Matrix C( gp_fields, g.nxmax(), nb_fields * g.ny() );
-                eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-            }
-
-#endif
-#endif
+                else {
+                    // dgemm-method 2
+                    {
+                        ATLAS_TRACE( "opt Fourier dgemm method 2" );
+                        eckit::linalg::Matrix A( fourier_, nlons, ( truncation_ + 1 ) * 2 );
+                        eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * nlats );
+                        eckit::linalg::Matrix C( gp_fields, nlons, nb_fields * nlats );
+                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+                    }
+                }
+            }  // namespace trans
             // Computing u,v from U,V:
             {
                 if ( nb_vordiv_fields > 0 ) {
@@ -517,7 +544,7 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
                 }
             }
             free_aligned( scl_fourier );
-        }
+        }  // namespace atlas
         else {
             ATLAS_TRACE( "invtrans_uv unstructured opt" );
             int idx = 0;
@@ -539,8 +566,8 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field
                 ++idx;
             }
         }
-    }
-}  // namespace trans
+    }  // namespace trans
+}  // namespace atlas
 
 // --------------------------------------------------------------------------------------------------------------------
 
@@ -603,11 +630,8 @@ void TransLocalopt::invtrans( const int nb_scalar_fields, const double scalar_sp
                      gp_fields + nb_gp * nb_vordiv_fields, config );
     }
     if ( nb_scalar_fields > 0 ) {
-        int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields;
-        std::vector<double> scalar_spectra_extended( nb_scalar_spec_ext, 0. );
-        extend_truncationopt( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() );
-        invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(),
-                     gp_fields + 2 * nb_gp * nb_vordiv_fields, config );
+        invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields + 2 * nb_gp * nb_vordiv_fields,
+                     config );
     }
 }
 
diff --git a/src/atlas/trans/localopt/TransLocalopt.h b/src/atlas/trans/localopt/TransLocalopt.h
index 55b7a074e..d81445f5b 100644
--- a/src/atlas/trans/localopt/TransLocalopt.h
+++ b/src/atlas/trans/localopt/TransLocalopt.h
@@ -15,6 +15,9 @@
 #include "atlas/array.h"
 #include "atlas/grid/Grid.h"
 #include "atlas/trans/Trans.h"
+#if ATLAS_HAVE_FFTW
+#include <fftw3.h>
+#endif
 
 //-----------------------------------------------------------------------------
 // Forward declarations
@@ -107,7 +110,14 @@ class TransLocalopt : public trans::TransImpl {
 
 private:
     Grid grid_;
+    bool useFFT_;
+    bool dgemmMethod1_;
     int truncation_;
+    int nlatsNH_;
+    int nlatsSH_;
+    int nlatsLeg_;
+    int jlonMin_;
+    int nlonsGlobal_;
     bool precompute_;
     double* legendre_sym_;
     double* legendre_asym_;
@@ -116,6 +126,11 @@ class TransLocalopt : public trans::TransImpl {
     std::vector<size_t> legendre_begin_;
     std::vector<size_t> legendre_sym_begin_;
     std::vector<size_t> legendre_asym_begin_;
+#if ATLAS_HAVE_FFTW
+    fftw_complex* fft_in_;
+    double* fft_out_;
+    fftw_plan plan_;
+#endif
 };
 
 //-----------------------------------------------------------------------------
diff --git a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc
index 1d9f86daa..9d11ea850 100644
--- a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc
+++ b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc
@@ -161,12 +161,6 @@ void compute_legendre_polynomialsopt2(
         {
             //ATLAS_TRACE( "add to global arrays" );
 
-            // take factor 2 for m > 0 into account:
-            for ( int jm = 1; jm <= trc; ++jm ) {
-                for ( int jn = jm; jn <= trc; ++jn ) {
-                    legpol[idxmn( jm, jn )] *= 2.;
-                }
-            }
             for ( int jm = 0; jm <= trc; jm++ ) {
                 int is1 = 0, ia1 = 0;
                 for ( int jn = jm; jn <= trc; jn++ ) {
@@ -185,11 +179,11 @@ void compute_legendre_polynomialsopt2(
                 //for ( int jn = jm; jn <= trc; jn++ ) {
                 for ( int jn = trc; jn >= jm; jn-- ) {
                     if ( ( jn - jm ) % 2 == 0 ) {
-                        int is      = leg_start_sym[jm] + jlat + nlats * is2++;
+                        int is      = leg_start_sym[jm] + is1 * jlat + is2++;
                         leg_sym[is] = legpol[idxmn( jm, jn )];
                     }
                     else {
-                        int ia       = leg_start_asym[jm] + jlat + nlats * ia2++;
+                        int ia       = leg_start_asym[jm] + ia1 * jlat + ia2++;
                         leg_asym[ia] = legpol[idxmn( jm, jn )];
                     }
                 }
diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc
index ac1da815b..2091f57b9 100644
--- a/src/atlas/trans/localopt2/TransLocalopt2.cc
+++ b/src/atlas/trans/localopt2/TransLocalopt2.cc
@@ -96,27 +96,75 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long
 #else
     eckit::linalg::LinearAlgebra::backend( "generic" );  // might want to choose backend with this command
 #endif
-    int nlats   = 0;
-    int nlons   = 0;
-    int nlatsNH = nlats_northernHemisphere( nlats );
+    double fft_threshold = 0.05;  // fraction of latitudes of the full grid up to which FFT is used.
+    // This threshold needs to be adjusted depending on the dgemm and FFT performance of the machine
+    // on which this code is running!
+    int nlats     = 0;
+    int nlons     = 0;
+    int neqtr     = 0;
+    useFFT_       = true;
+    dgemmMethod1_ = true;
+    nlatsNH_      = 0;
+    nlatsSH_      = 0;
+    nlatsLeg_     = 0;
     if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
         grid::StructuredGrid g( grid_ );
-        nlats   = g.ny();
-        nlons   = g.nxmax();
-        nlatsNH = nlats_northernHemisphere( nlats );
+        nlats = g.ny();
+        nlons = g.nxmax();
+        for ( size_t j = 0; j < nlats; ++j ) {
+            // assumptions: latitudes in g.y(j) are monotone and decreasing
+            // no assumption on whether we have 0, 1 or 2 latitudes at the equator
+            double lat = g.y( j );
+            if ( lat > 0. ) { nlatsNH_++; }
+            if ( lat == 0. ) { neqtr++; }
+            if ( lat < 0. ) { nlatsSH_++; }
+        }
+        if ( neqtr > 0 ) {
+            nlatsNH_++;
+            nlatsSH_++;
+        }
+        if ( nlatsNH_ >= nlatsSH_ ) { nlatsLeg_ = nlatsNH_; }
+        else {
+            nlatsLeg_ = nlatsSH_;
+        }
+        Grid g_global( grid.name() );
+        grid::StructuredGrid gs_global( g_global );
+        nlonsGlobal_  = gs_global.nxmax();
+        jlonMin_      = 0;
+        double lonmin = fmod( g.x( 0, 0 ), 360 );
+        if ( lonmin < 0. ) { lonmin += 360.; }
+        if ( nlons < fft_threshold * nlonsGlobal_ ) { useFFT_ = false; }
+        else {
+            if ( nlons < nlonsGlobal_ ) {
+                // need to use FFT with cropped grid
+                for ( size_t j = 0; j < nlonsGlobal_; ++j ) {
+                    if ( gs_global.x( j, 0 ) == lonmin ) { jlonMin_ = j; }
+                }
+            }
+        }
+        //Log::info() << "nlats=" << g.ny() << " nlatsGlobal=" << gs_global.ny() << std::endl;
     }
     else {
-        nlats   = grid_.size();
-        nlons   = grid_.size();
-        nlatsNH = nlats;
+        // unstructured grid
+        useFFT_   = false;
+        nlats     = grid_.size();
+        nlons     = grid_.size();
+        nlatsNH_  = nlats;
+        nlatsLeg_ = nlats;
     }
-    std::vector<double> lats( nlatsNH );
+    std::vector<double> lats( nlatsLeg_ );
     std::vector<double> lons( nlons );
     if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
         grid::StructuredGrid g( grid_ );
-        // TODO: remove legendre_begin and legendre_data (only legendre_ should be needed)
-        for ( size_t j = 0; j < nlatsNH; ++j ) {
-            lats[j] = g.y( j ) * util::Constants::degreesToRadians();
+        if ( nlatsNH_ >= nlatsSH_ ) {
+            for ( size_t j = 0; j < nlatsLeg_; ++j ) {
+                lats[j] = g.y( j ) * util::Constants::degreesToRadians();
+            }
+        }
+        else {
+            for ( size_t j = nlats - 1, idx = 0; idx < nlatsLeg_; --j, ++idx ) {
+                lats[idx] = -g.y( j ) * util::Constants::degreesToRadians();
+            }
         }
         for ( size_t j = 0; j < nlons; ++j ) {
             lons[j] = g.x( j, 0 ) * util::Constants::degreesToRadians();
@@ -139,54 +187,67 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long
         legendre_sym_begin_[0]  = 0;
         legendre_asym_begin_[0] = 0;
         for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
-            size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsNH );
-            size_asym += add_padding( num_n( truncation_ + 1, jm, false ) * nlatsNH );
+            size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsLeg_ );
+            size_asym += add_padding( num_n( truncation_ + 1, jm, false ) * nlatsLeg_ );
             legendre_sym_begin_[jm + 1]  = size_sym;
             legendre_asym_begin_[jm + 1] = size_asym;
         }
         alloc_aligned( legendre_sym_, size_sym );
         alloc_aligned( legendre_asym_, size_asym );
-        compute_legendre_polynomialsopt2( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_,
+        compute_legendre_polynomialsopt2( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, legendre_asym_,
                                           legendre_sym_begin_.data(), legendre_asym_begin_.data() );
     }
 
-        // precomputations for Fourier transformations:
+    // precomputations for Fourier transformations:
+    if ( useFFT_ ) {
 #if ATLAS_HAVE_FFTW
-    {
-        ATLAS_TRACE( "opt2 precomp FFTW" );
-        int num_complex = ( nlons / 2 ) + 1;
-        fft_in_         = fftw_alloc_complex( nlats * num_complex );
-        fft_out_        = fftw_alloc_real( nlats * nlons );
-        plan_ = fftw_plan_many_dft_c2r( 1, &nlons, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1, nlons,
-                                        FFTW_ESTIMATE );
-    }
+        {
+            ATLAS_TRACE( "opt2 precomp FFTW" );
+            int num_complex = ( nlonsGlobal_ / 2 ) + 1;
+            fft_in_         = fftw_alloc_complex( nlats * num_complex );
+            fft_out_        = fftw_alloc_real( nlats * nlonsGlobal_ );
+            plan_ = fftw_plan_many_dft_c2r( 1, &nlonsGlobal_, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1,
+                                            nlonsGlobal_, FFTW_ESTIMATE );
+        }
+            // other FFT implementations should be added with #elif statements
 #else
-    {
-        ATLAS_TRACE( "opt2 precomp Fourier" );
+        useFFT_ = false;                                 // no FFT implemented => default to dgemm
+#endif
+    }
+    if ( !useFFT_ ) {
         alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons );
-        int idx = 0;
-        for ( int jlon = 0; jlon < nlons; jlon++ ) {
-            for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
-                fourier_[idx++] = +std::cos( jm * lons[jlon] );  // real part
-                fourier_[idx++] = -std::sin( jm * lons[jlon] );  // imaginary part
+        if ( dgemmMethod1_ ) {
+            {
+                ATLAS_TRACE( "opt2 precomp Fourier" );
+                int idx = 0;
+                for ( int jlon = 0; jlon < nlons; jlon++ ) {
+                    double factor = 1.;
+                    for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
+                        if ( jm > 0 ) { factor = 2.; }
+                        fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor;  // real part
+                        fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor;  // imaginary part
+                    }
+                }
             }
         }
-    }
-    {
-        ATLAS_TRACE( "opt2 precomp Fourier tp" );
-        alloc_aligned( fouriertp_, 2 * ( truncation_ + 1 ) * nlons );
-        int idx = 0;
-        for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
-            for ( int jlon = 0; jlon < nlons; jlon++ ) {
-                fouriertp_[idx++] = +std::cos( jm * lons[jlon] );  // real part
-            }
-            for ( int jlon = 0; jlon < nlons; jlon++ ) {
-                fouriertp_[idx++] = -std::sin( jm * lons[jlon] );  // imaginary part
+        else {
+            {
+                ATLAS_TRACE( "opt2 precomp Fourier tp" );
+                int idx = 0;
+                for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
+                    double factor = 1.;
+                    if ( jm > 0 ) { factor = 2.; }
+                    for ( int jlon = 0; jlon < nlons; jlon++ ) {
+                        fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor;  // real part
+                    }
+                    for ( int jlon = 0; jlon < nlons; jlon++ ) {
+                        fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor;  // imaginary part
+                    }
+                }
             }
         }
     }
-#endif
-}  // namespace atlas
+}  // namespace trans
 
 // --------------------------------------------------------------------------------------------------------------------
 
@@ -198,14 +259,16 @@ TransLocalopt2::TransLocalopt2( const Grid& grid, const long truncation, const e
 TransLocalopt2::~TransLocalopt2() {
     free_aligned( legendre_sym_ );
     free_aligned( legendre_asym_ );
+    if ( useFFT_ ) {
 #if ATLAS_HAVE_FFTW
-    fftw_destroy_plan( plan_ );
-    fftw_free( fft_in_ );
-    fftw_free( fft_out_ );
-#else
-    free_aligned( fourier_ );
-    free_aligned( fouriertp_ );
+        fftw_destroy_plan( plan_ );
+        fftw_free( fft_in_ );
+        fftw_free( fft_out_ );
 #endif
+    }
+    else {
+        free_aligned( fourier_ );
+    }
 }
 
 // --------------------------------------------------------------------------------------------------------------------
@@ -255,13 +318,17 @@ void gp_transposeopt2( const int nb_size, const int nb_fields, const double gp_t
 }
 
 //-----------------------------------------------------------------------------
-// Routine to compute the spectral transform by using a localopt2 Fourier
-// transformation
-// for a grid (same latitude for all longitudes, allows to compute Legendre
-// functions
+// Routine to compute the spectral transform by using a localopt2 Fourier transformation
+// for a grid (same latitude for all longitudes, allows to compute Legendre functions
 // once for all longitudes). U and v components are divided by cos(latitude) for
 // nb_vordiv_fields > 0.
 //
+// Legendre polynomials are computed up to truncation_+1 to be accurate for vorticity and
+// divergence computation. The parameter truncation is the truncation used in storing the
+// spectral data scalar_spectra and can be different from truncation_. If truncation is
+// larger than truncation_+1 the transform will behave as if the spectral data was truncated
+// to truncation_+1.
+//
 // Author:
 // Andreas Mueller *ECMWF*
 //
@@ -274,24 +341,19 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
         // Transform
         if ( grid::StructuredGrid g = grid_ ) {
             ATLAS_TRACE( "invtrans_uv structured opt2" );
-            int nlats    = g.ny();
-            int nlons    = g.nxmax();
-            int nlatsNH  = nlats_northernHemisphere( nlats );
-            auto posFFTW = [&]( int jfld, int imag, int jlat, int jm ) {
-                return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) );
-            };
-            /*auto posFFTW = [&]( int jfld, int imag, int jlat, int jm ) {
-                return jfld + nb_fields * ( imag + 2 * ( nlats - jlat - 1 ) + jm * nb_fields * 2 * nlats );
-            };*/
-            auto posGemm1 = [&]( int jfld, int imag, int jlat, int jm ) {
-                return jfld + nb_fields * ( jlat + nlats * ( imag + 2 * ( jm ) ) );
-            };
-            auto posGemm2 = [&]( int jfld, int imag, int jlat, int jm ) {
-                return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) );
+            int nlats      = g.ny();
+            int nlons      = g.nxmax();
+            auto posMethod = [&]( int jfld, int imag, int jlat, int jm ) {
+                if ( useFFT_ || !dgemmMethod1_ ) {
+                    return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) );
+                }
+                else {
+                    return jfld + nb_fields * ( jlat + nlats * ( imag + 2 * ( jm ) ) );
+                };
             };
             int size_fourier_max = nb_fields * 2 * nlats;
             double* scl_fourier;
-            alloc_aligned( scl_fourier, size_fourier_max * ( truncation + 1 ) );
+            alloc_aligned( scl_fourier, size_fourier_max * ( truncation_ + 1 ) );
 
             // Legendre transform:
             {
@@ -301,7 +363,10 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
                     int size_asym = num_n( truncation_ + 1, jm, false );
                     int n_imag    = 2;
                     if ( jm == 0 ) { n_imag = 1; }
-                    int size_fourier = nb_fields * n_imag * nlatsNH;
+                    int size_fourier = nb_fields * n_imag * nlatsLeg_;
+                    auto posFourier  = [&]( int jfld, int imag, int jlat, int jm, int nlatsH ) {
+                        return jfld + nb_fields * ( imag + n_imag * ( nlatsLeg_ - nlatsH + jlat ) );
+                    };
                     double* scalar_sym;
                     double* scalar_asym;
                     double* scl_fourier_sym;
@@ -313,64 +378,65 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
                     {
                         //ATLAS_TRACE( "opt2 Legendre split" );
                         int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2;
-                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                        // the choice between the following two code lines determines whether
+                        // total wavenumbers are summed in an ascending or descending order.
+                        // The trans library in IFS uses descending order because it should
+                        // be more accurate (higher wavenumbers have smaller contributions).
+                        // This also needs to be changed when splitting the spectral data in
+                        // compute_legendre_polynomialsopt2!
+                        //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) {
+                        for ( int jn = truncation_ + 1; jn >= jm; jn-- ) {
                             for ( int imag = 0; imag < n_imag; imag++ ) {
-                                //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) { //ascending
-                                for ( int jn = truncation_ + 1; jn >= jm; jn-- ) {  // descending
+                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
                                     idx = jfld + nb_fields * ( imag + 2 * ( jn - jm ) );
-                                    if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; }
+                                    if ( jn <= truncation && jm < truncation ) {
+                                        if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; }
+                                        else {
+                                            scalar_asym[ia++] = scalar_spectra[idx + ioff];
+                                        }
+                                    }
                                     else {
-                                        scalar_asym[ia++] = scalar_spectra[idx + ioff];
+                                        if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = 0.; }
+                                        else {
+                                            scalar_asym[ia++] = 0.;
+                                        }
                                     }
                                 }
                             }
                         }
                         ASSERT( ia == n_imag * nb_fields * size_asym && is == n_imag * nb_fields * size_sym );
                     }
-                    {  // transposed
-                        eckit::linalg::Matrix A( legendre_sym_ + legendre_sym_begin_[jm], nlatsNH, size_sym );
-                        eckit::linalg::Matrix B( scalar_sym, size_sym, nb_fields * n_imag );
-                        eckit::linalg::Matrix C( scl_fourier_sym, nlatsNH, nb_fields * n_imag );
-                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-                    }
-                    if ( size_asym > 0 ) {
-                        eckit::linalg::Matrix A( legendre_asym_ + legendre_asym_begin_[jm], nlatsNH, size_asym );
-                        eckit::linalg::Matrix B( scalar_asym, size_asym, nb_fields * n_imag );
-                        eckit::linalg::Matrix C( scl_fourier_asym, nlatsNH, nb_fields * n_imag );
-                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-                    }
-                        /*{ // non-transposed
+                    {
                         eckit::linalg::Matrix A( scalar_sym, nb_fields * n_imag, size_sym );
-                        eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm], size_sym, nlatsNH );
-                        eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsNH );
+                        eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm], size_sym, nlatsLeg_ );
+                        eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsLeg_ );
                         eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
                     }
                     if ( size_asym > 0 ) {
                         eckit::linalg::Matrix A( scalar_asym, nb_fields * n_imag, size_asym );
-                        eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm], size_asym, nlatsNH );
-                        eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsNH );
+                        eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm], size_asym, nlatsLeg_ );
+                        eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsLeg_ );
                         eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-                    }*/
-#if 1  //ATLAS_HAVE_FFTW
+                    }
                     {
                         //ATLAS_TRACE( "opt2 merge spheres" );
                         // northern hemisphere:
-                        int idx = 0;
-                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                        for ( int jlat = 0; jlat < nlatsNH_; jlat++ ) {
                             for ( int imag = 0; imag < n_imag; imag++ ) {
-                                for ( int jlat = 0; jlat < nlatsNH; jlat++, idx++ ) {
-                                    scl_fourier[posFFTW( jfld, imag, jlat, jm )] =
+                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                    int idx = posFourier( jfld, imag, jlat, jm, nlatsNH_ );
+                                    scl_fourier[posMethod( jfld, imag, jlat, jm )] =
                                         scl_fourier_sym[idx] + scl_fourier_asym[idx];
                                 }
                             }
                         }
                         // southern hemisphere:
-                        idx = 0;
-                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                        for ( int jlat = 0; jlat < nlatsSH_; jlat++ ) {
                             for ( int imag = 0; imag < n_imag; imag++ ) {
-                                for ( int jlat = 0; jlat < nlatsNH; jlat++, idx++ ) {
+                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                    int idx   = posFourier( jfld, imag, jlat, jm, nlatsSH_ );
                                     int jslat = nlats - jlat - 1;
-                                    scl_fourier[posFFTW( jfld, imag, jslat, jm )] =
+                                    scl_fourier[posMethod( jfld, imag, jslat, jm )] =
                                         scl_fourier_sym[idx] - scl_fourier_asym[idx];
                                 }
                             }
@@ -382,148 +448,83 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
                     free_aligned( scl_fourier_asym );
                 }
             }
-            {
-                int num_complex = ( nlons / 2 ) + 1;
+            // Fourier transformation:
+            if ( useFFT_ ) {
+#if ATLAS_HAVE_FFTW
                 {
-                    ATLAS_TRACE( "opt2 FFTW" );
-                    for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                        int idx = 0;
-                        for ( int jlat = 0; jlat < nlats; jlat++ ) {
-                            fft_in_[idx++][0] = scl_fourier[posFFTW( jfld, 0, jlat, 0 )];
-                            for ( int jm = 1; jm < num_complex; jm++, idx++ ) {
-                                for ( int imag = 0; imag < 2; imag++ ) {
-                                    if ( jm <= truncation_ ) {
-                                        fft_in_[idx][imag] = scl_fourier[posFFTW( jfld, imag, jlat, jm )] / 2.;
-                                    }
-                                    else {
-                                        fft_in_[idx][imag] = 0.;
-                                    }
-                                }
-                            }
-                        }
-                        fftw_execute_dft_c2r( plan_, fft_in_, fft_out_ );
-                        for ( int j = 0; j < nlats * nlons; j++ ) {
-                            gp_fields[j + jfld * nlats * nlons] = fft_out_[j];
-                        }
-                    }
-                }
-            }
-#else
-#if 1  // 1: better for small number of columns, large truncation; 0: better for large number of columns
+                    int num_complex = ( nlonsGlobal_ / 2 ) + 1;
                     {
-                        //ATLAS_TRACE( "opt2 merge spheres" );
-                        // northern hemisphere:
-                        int ioff = jm * size_fourier_max;
-                        int pos0 = ioff;
-                        int idx  = 0;
-                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
-                            int poslat = pos0 + 2 * jlat;
-                            for ( int imag = 0; imag < n_imag; imag++ ) {
-                                int posimag = nb_fields * ( imag + poslat );
-                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
-                                    scl_fourier[posGemm1( jfld, imag, jlat, jm )] =
-                                        scl_fourier_sym[idx] + scl_fourier_asym[idx];
+                        ATLAS_TRACE( "opt2 FFTW" );
+                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                            int idx = 0;
+                            for ( int jlat = 0; jlat < nlats; jlat++ ) {
+                                fft_in_[idx++][0] = scl_fourier[posMethod( jfld, 0, jlat, 0 )];
+                                for ( int jm = 1; jm < num_complex; jm++, idx++ ) {
+                                    for ( int imag = 0; imag < 2; imag++ ) {
+                                        if ( jm <= truncation_ ) {
+                                            fft_in_[idx][imag] = scl_fourier[posMethod( jfld, imag, jlat, jm )];
+                                        }
+                                        else {
+                                            fft_in_[idx][imag] = 0.;
+                                        }
+                                    }
                                 }
                             }
-                        }
-                        // southern hemisphere:
-                        idx  = 0;
-                        pos0 = 2 * ( nlats - 1 ) + ioff;
-                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
-                            int poslat = pos0 - 2 * jlat;
-                            for ( int imag = 0; imag < n_imag; imag++ ) {
-                                int posimag = nb_fields * ( imag + poslat );
-                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
-                                    int jslat = nlats - jlat - 1;
-                                    scl_fourier[posGemm1( jfld, imag, jslat, jm )] =
-                                        scl_fourier_sym[idx] - scl_fourier_asym[idx];
+                            fftw_execute_dft_c2r( plan_, fft_in_, fft_out_ );
+                            for ( int jlat = 0; jlat < nlats; jlat++ ) {
+                                for ( int jlon = 0; jlon < nlons; jlon++ ) {
+                                    int j = jlon + jlonMin_;
+                                    if ( j >= nlonsGlobal_ ) { j -= nlonsGlobal_; }
+                                    gp_fields[jlon + nlons * ( jlat + nlats * jfld )] =
+                                        fft_out_[j + nlonsGlobal_ * jlat];
                                 }
                             }
                         }
                     }
-                    free_aligned( scalar_sym );
-                    free_aligned( scalar_asym );
-                    free_aligned( scl_fourier_sym );
-                    free_aligned( scl_fourier_asym );
                 }
+#endif
             }
-
-            // Fourier transformation:
-            double* gp_opt2;
-            alloc_aligned( gp_opt2, nb_fields * grid_.size() );
-            {
-                ATLAS_TRACE( "opt2 Fourier dgemm" );
-                eckit::linalg::Matrix A( scl_fourier, nb_fields * g.ny(), ( truncation_ + 1 ) * 2 );
-                eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, g.nxmax() );
-                eckit::linalg::Matrix C( gp_opt2, nb_fields * g.ny(), g.nxmax() );
-                eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-            }
-
-            // Transposition in grid point space:
-            {
-                ATLAS_TRACE( "opt2 transposition in gp-space" );
-                int idx = 0;
-                for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) {
-                    for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
-                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                            int pos_tp = jlon + g.nxmax() * ( jlat + g.ny() * ( jfld ) );
-                            //int pos  = jfld + nb_fields * ( jlat + g.ny() * ( jlon ) );
-                            gp_fields[pos_tp] = gp_opt2[idx++];  // = gp_opt2[pos]
-                        }
+            else {
+                if ( dgemmMethod1_ ) {
+                    // dgemm-method 1
+                    // should be faster for small domains or large truncation
+                    double* gp_opt2;
+                    alloc_aligned( gp_opt2, nb_fields * grid_.size() );
+                    {
+                        ATLAS_TRACE( "opt2 Fourier dgemm method 1" );
+                        eckit::linalg::Matrix A( scl_fourier, nb_fields * nlats, ( truncation_ + 1 ) * 2 );
+                        eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, nlons );
+                        eckit::linalg::Matrix C( gp_opt2, nb_fields * nlats, nlons );
+                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
                     }
-                }
-            }
-            free_aligned( gp_opt2 );
-#else
+
+                    // Transposition in grid point space:
                     {
-                        //ATLAS_TRACE( "opt2 merge spheres" );
-                        // northern hemisphere:
-                        int ioff = jm * size_fourier_max;
-                        int pos0 = ioff;
-                        int idx  = 0;
-                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
-                            int poslat = pos0 + 2 * jlat;
-                            for ( int imag = 0; imag < n_imag; imag++ ) {
-                                int posimag = nb_fields * ( imag + poslat );
-                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
-                                    scl_fourier[posGemm2( jfld, imag, jlat, jm )] =
-                                        scl_fourier_sym[idx] + scl_fourier_asym[idx];
-                                }
-                            }
-                        }
-                        // southern hemisphere:
-                        idx  = 0;
-                        pos0 = 2 * ( nlats - 1 ) + ioff;
-                        for ( int jlat = 0; jlat < nlatsNH; jlat++ ) {
-                            int poslat = pos0 - 2 * jlat;
-                            for ( int imag = 0; imag < n_imag; imag++ ) {
-                                int posimag = nb_fields * ( imag + poslat );
-                                for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) {
-                                    int jslat = nlats - jlat - 1;
-                                    scl_fourier[posGemm2( jfld, imag, jslat, jm )] =
-                                        scl_fourier_sym[idx] - scl_fourier_asym[idx];
+                        ATLAS_TRACE( "opt2 transposition in gp-space" );
+                        int idx = 0;
+                        for ( int jlon = 0; jlon < nlons; jlon++ ) {
+                            for ( int jlat = 0; jlat < nlats; jlat++ ) {
+                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                    int pos_tp = jlon + nlons * ( jlat + nlats * ( jfld ) );
+                                    //int pos  = jfld + nb_fields * ( jlat + nlats * ( jlon ) );
+                                    gp_fields[pos_tp] = gp_opt2[idx++];  // = gp_opt2[pos]
                                 }
                             }
                         }
                     }
-                    free_aligned( scalar_sym );
-                    free_aligned( scalar_asym );
-                    free_aligned( scl_fourier_sym );
-                    free_aligned( scl_fourier_asym );
+                    free_aligned( gp_opt2 );
                 }
-            }
-
-            // Fourier transformation:
-            {
-                ATLAS_TRACE( "opt2 Fourier dgemm" );
-                eckit::linalg::Matrix A( fouriertp_, g.nxmax(), ( truncation_ + 1 ) * 2 );
-                eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * g.ny() );
-                eckit::linalg::Matrix C( gp_fields, g.nxmax(), nb_fields * g.ny() );
-                eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-            }
-
-#endif
-#endif
+                else {
+                    // dgemm-method 2
+                    {
+                        ATLAS_TRACE( "opt2 Fourier dgemm method 2" );
+                        eckit::linalg::Matrix A( fourier_, nlons, ( truncation_ + 1 ) * 2 );
+                        eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * nlats );
+                        eckit::linalg::Matrix C( gp_fields, nlons, nb_fields * nlats );
+                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+                    }
+                }
+            }  // namespace trans
             // Computing u,v from U,V:
             {
                 if ( nb_vordiv_fields > 0 ) {
@@ -544,7 +545,7 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
                 }
             }
             free_aligned( scl_fourier );
-        }
+        }  // namespace atlas
         else {
             ATLAS_TRACE( "invtrans_uv unstructured opt2" );
             int idx = 0;
@@ -566,8 +567,8 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
                 ++idx;
             }
         }
-    }
-}  // namespace trans
+    }  // namespace trans
+}  // namespace atlas
 
 // --------------------------------------------------------------------------------------------------------------------
 
@@ -631,11 +632,8 @@ void TransLocalopt2::invtrans( const int nb_scalar_fields, const double scalar_s
                      gp_fields + nb_gp * nb_vordiv_fields, config );
     }
     if ( nb_scalar_fields > 0 ) {
-        int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields;
-        std::vector<double> scalar_spectra_extended( nb_scalar_spec_ext, 0. );
-        extend_truncationopt2( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() );
-        invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(),
-                     gp_fields + 2 * nb_gp * nb_vordiv_fields, config );
+        invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields + 2 * nb_gp * nb_vordiv_fields,
+                     config );
     }
 }
 
diff --git a/src/atlas/trans/localopt2/TransLocalopt2.h b/src/atlas/trans/localopt2/TransLocalopt2.h
index c5f5f2aa4..2bc500ea6 100644
--- a/src/atlas/trans/localopt2/TransLocalopt2.h
+++ b/src/atlas/trans/localopt2/TransLocalopt2.h
@@ -111,7 +111,14 @@ class TransLocalopt2 : public trans::TransImpl {
 
 private:
     Grid grid_;
+    bool useFFT_;
+    bool dgemmMethod1_;
     int truncation_;
+    int nlatsNH_;
+    int nlatsSH_;
+    int nlatsLeg_;
+    int jlonMin_;
+    int nlonsGlobal_;
     bool precompute_;
     double* legendre_sym_;
     double* legendre_asym_;
diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc
index d98bb522f..8f5f9c2d3 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.cc
+++ b/src/atlas/trans/localopt3/TransLocalopt3.cc
@@ -96,14 +96,14 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
 #else
     eckit::linalg::LinearAlgebra::backend( "generic" );  // might want to choose backend with this command
 #endif
-    double fft_threshold = 2.;  // 0.05;  // fraction of latitudes of the full grid up to which FFT is used.
+    double fft_threshold = 0.05;  // fraction of latitudes of the full grid up to which FFT is used.
     // This threshold needs to be adjusted depending on the dgemm and FFT performance of the machine
     // on which this code is running!
     int nlats     = 0;
     int nlons     = 0;
     int neqtr     = 0;
     useFFT_       = true;
-    dgemmMethod1_ = true;
+    dgemmMethod1_ = false;
     nlatsNH_      = 0;
     nlatsSH_      = 0;
     nlatsLeg_     = 0;
@@ -323,6 +323,12 @@ void gp_transposeopt3( const int nb_size, const int nb_fields, const double gp_t
 // once for all longitudes). U and v components are divided by cos(latitude) for
 // nb_vordiv_fields > 0.
 //
+// Legendre polynomials are computed up to truncation_+1 to be accurate for vorticity and
+// divergence computation. The parameter truncation is the truncation used in storing the
+// spectral data scalar_spectra and can be different from truncation_. If truncation is
+// larger than truncation_+1 the transform will behave as if the spectral data was truncated
+// to truncation_+1.
+//
 // Author:
 // Andreas Mueller *ECMWF*
 //
@@ -347,7 +353,7 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
             };
             int size_fourier_max = nb_fields * 2 * nlats;
             double* scl_fourier;
-            alloc_aligned( scl_fourier, size_fourier_max * ( truncation + 1 ) );
+            alloc_aligned( scl_fourier, size_fourier_max * ( truncation_ + 1 ) );
 
             // Legendre transform:
             {
@@ -383,9 +389,17 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
                             for ( int imag = 0; imag < n_imag; imag++ ) {
                                 for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
                                     idx = jfld + nb_fields * ( imag + 2 * ( jn - jm ) );
-                                    if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; }
+                                    if ( jn <= truncation && jm < truncation ) {
+                                        if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; }
+                                        else {
+                                            scalar_asym[ia++] = scalar_spectra[idx + ioff];
+                                        }
+                                    }
                                     else {
-                                        scalar_asym[ia++] = scalar_spectra[idx + ioff];
+                                        if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = 0.; }
+                                        else {
+                                            scalar_asym[ia++] = 0.;
+                                        }
                                     }
                                 }
                             }
@@ -477,7 +491,7 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
                     double* gp_opt3;
                     alloc_aligned( gp_opt3, nb_fields * grid_.size() );
                     {
-                        ATLAS_TRACE( "opt3 Fourier dgemm" );
+                        ATLAS_TRACE( "opt3 Fourier dgemm method 1" );
                         eckit::linalg::Matrix A( scl_fourier, nb_fields * nlats, ( truncation_ + 1 ) * 2 );
                         eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, nlons );
                         eckit::linalg::Matrix C( gp_opt3, nb_fields * nlats, nlons );
@@ -503,7 +517,7 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
                 else {
                     // dgemm-method 2
                     {
-                        ATLAS_TRACE( "opt3 Fourier dgemm" );
+                        ATLAS_TRACE( "opt3 Fourier dgemm method 2" );
                         eckit::linalg::Matrix A( fourier_, nlons, ( truncation_ + 1 ) * 2 );
                         eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * nlats );
                         eckit::linalg::Matrix C( gp_fields, nlons, nb_fields * nlats );
@@ -618,11 +632,8 @@ void TransLocalopt3::invtrans( const int nb_scalar_fields, const double scalar_s
                      gp_fields + nb_gp * nb_vordiv_fields, config );
     }
     if ( nb_scalar_fields > 0 ) {
-        int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields;
-        std::vector<double> scalar_spectra_extended( nb_scalar_spec_ext, 0. );
-        extend_truncationopt3( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() );
-        invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(),
-                     gp_fields + 2 * nb_gp * nb_vordiv_fields, config );
+        invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields + 2 * nb_gp * nb_vordiv_fields,
+                     config );
     }
 }
 
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index 6b556f663..6774d9a29 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -853,7 +853,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
 }
 #endif
 //-----------------------------------------------------------------------------
-#if 0
+#if 1
 CASE( "test_trans_hires" ) {
     Log::info() << "test_trans_hires" << std::endl;
     // test transgeneral by comparing its result with the trans library
@@ -862,28 +862,34 @@ CASE( "test_trans_hires" ) {
     std::ostream& out = Log::info();
     double tolerance  = 1.e-13;
 
-    // Grid: (Adjust the following line if the test takes too long!)
-    Grid g( "F1280" );
 #if ATLAS_HAVE_TRANS
-    std::string transTypes[4] = {"localopt", "localopt2", "localopt3", "ifs"};
-    //std::string transTypes[2] = {"localopt", "localopt2"};
-    //std::string transTypes[3] = {"localopt", "localopt2", "ifs"};
+    //std::string transTypes[4] = {"localopt", "localopt2", "localopt3", "ifs"};
+    //std::string transTypes[2] = {"localopt2", "localopt3"};
+    std::string transTypes[3] = {"localopt", "localopt2", "localopt3"};
     //std::string transTypes[1] = {"localopt3"};
 #else
     std::string transTypes[1] = {"localopt2"};
 #endif
+
+    //Domain testdomain = ZonalBandDomain( {-90., 90.} );
+    //Domain testdomain = ZonalBandDomain( {-.5, .5} );
+    //Domain testdomain = RectangularDomain( {0., 30.}, {-.05, .05} );
+    //Domain testdomain = ZonalBandDomain( {-85., -86.} );
+    Domain testdomain = RectangularDomain( {-1., 1.}, {5., 5.5} );
+    // Grid: (Adjust the following line if the test takes too long!)
+    Grid g( "F1280", testdomain );
+    Grid g_global( g.name() );
+
     grid::StructuredGrid gs( g );
-    int ndgl = gs.ny();
+    grid::StructuredGrid gs_global( g_global );
+    Log::info() << "nlats: " << gs.ny() << " nlons:" << gs.nxmax() << std::endl;
+    int ndgl = gs_global.ny();
     //int trc  = ndgl - 1;  // linear
     int trc = ndgl / 2. - 1;  // cubic
 
-    int nb_scalar = 1, nb_vordiv = 0;
+    int nb_scalar = 1000, nb_vordiv = 0;
 
     for ( auto transType : transTypes ) {
-        if ( transType == "ifs" ) { trc = ndgl / 2. - 1; }
-        else {
-            trc = ndgl / 2. - 2;
-        }
         int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv;
         int icase = 0;
         trans::Trans trans( g, trc, util::Config( "type", transType ) );
@@ -931,7 +937,7 @@ CASE( "test_trans_hires" ) {
 }
 #endif
 //-----------------------------------------------------------------------------
-#if 1
+#if 0
 CASE( "test_trans_domain" ) {
     Log::info() << "test_trans_domain" << std::endl;
     // test transgeneral by comparing with analytic solution on a cropped domain
@@ -943,24 +949,25 @@ CASE( "test_trans_domain" ) {
     //Domain testdomain = ZonalBandDomain( {-.5, .5} );
     //Domain testdomain = RectangularDomain( {0., 30.}, {-.05, .05} );
     //Domain testdomain = ZonalBandDomain( {-85., -86.} );
-    Domain testdomain = RectangularDomain( {-5., 10.}, {5., 6.} );
+    Domain testdomain = RectangularDomain( {-1., 1.}, {5., 5.5} );
     // Grid: (Adjust the following line if the test takes too long!)
     Grid g( "F1280", testdomain );
     Grid g_global( g.name() );
 
     grid::StructuredGrid gs( g );
     grid::StructuredGrid gs_global( g_global );
+    Log::info() << "nlats: " << gs.ny() << " nlons:" << gs.nxmax() << std::endl;
     int ndgl = gs_global.ny();
     //int trc  = ndgl - 1;  // linear
     int trc = ndgl / 2. - 1;  // cubic
-    trans::Trans transLocal1( g, trc, util::Config( "type", "localopt3" ) );
+    trans::Trans transLocal1( g, trc, util::Config( "type", "localopt2" ) );
     trans::Trans transLocal2( g, trc, util::Config( "type", "localopt3" ) );
     double rav1 = 0., rav2 = 0.;  // compute average rms errors of transLocal1 and transLocal2
 
     functionspace::Spectral spectral( trc );
     functionspace::StructuredColumns gridpoints( g );
 
-    int nb_scalar = 1, nb_vordiv = 0;
+    int nb_scalar = 1000, nb_vordiv = 0;
     int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv;
     std::vector<double> sp( 2 * N * nb_scalar );
     std::vector<double> vor( 2 * N * nb_vordiv );
@@ -990,7 +997,7 @@ CASE( "test_trans_domain" ) {
                         for ( int imag = 0; imag <= 1; imag++ ) {  // real and imaginary part
 
                             if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. &&
-                                 icase < 1000 ) {
+                                 icase < 1 ) {
                                 auto start = std::chrono::system_clock::now();
                                 for ( int j = 0; j < 2 * N * nb_scalar; j++ ) {
                                     sp[j] = 0.;

From 993347b990a30fbd7b4d30fe5d1ac1404e2b290e Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Wed, 4 Apr 2018 15:44:17 +0100
Subject: [PATCH 039/123] writing and reading Legendre polynomials from file to
 speed up testing

---
 src/atlas/trans/localopt/TransLocalopt.cc   | 17 +++++++++++++++--
 src/atlas/trans/localopt2/TransLocalopt2.cc | 17 +++++++++++++++--
 src/atlas/trans/localopt3/TransLocalopt3.cc | 17 +++++++++++++++--
 src/tests/trans/test_transgeneral.cc        |  8 ++++----
 4 files changed, 49 insertions(+), 10 deletions(-)

diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc
index 48f90d386..89434ff6b 100644
--- a/src/atlas/trans/localopt/TransLocalopt.cc
+++ b/src/atlas/trans/localopt/TransLocalopt.cc
@@ -194,8 +194,21 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t
         }
         alloc_aligned( legendre_sym_, size_sym );
         alloc_aligned( legendre_asym_, size_asym );
-        compute_legendre_polynomialsopt( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, legendre_asym_,
-                                         legendre_sym_begin_.data(), legendre_asym_begin_.data() );
+        FILE* file_leg;
+        file_leg = fopen( "legendre.bin", "r" );
+        if ( file_leg ) {
+            fread( legendre_sym_, sizeof( double ), size_sym, file_leg );
+            fread( legendre_asym_, sizeof( double ), size_asym, file_leg );
+            fclose( file_leg );
+        }
+        else {
+            compute_legendre_polynomialsopt( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, legendre_asym_,
+                                             legendre_sym_begin_.data(), legendre_asym_begin_.data() );
+            file_leg = fopen( "legendre.bin", "wb" );
+            fwrite( legendre_sym_, sizeof( double ), size_sym, file_leg );
+            fwrite( legendre_asym_, sizeof( double ), size_asym, file_leg );
+            fclose( file_leg );
+        }
     }
 
     // precomputations for Fourier transformations:
diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc
index 2091f57b9..7eea4f9d3 100644
--- a/src/atlas/trans/localopt2/TransLocalopt2.cc
+++ b/src/atlas/trans/localopt2/TransLocalopt2.cc
@@ -194,8 +194,21 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long
         }
         alloc_aligned( legendre_sym_, size_sym );
         alloc_aligned( legendre_asym_, size_asym );
-        compute_legendre_polynomialsopt2( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, legendre_asym_,
-                                          legendre_sym_begin_.data(), legendre_asym_begin_.data() );
+        FILE* file_leg;
+        file_leg = fopen( "legendre.bin", "r" );
+        if ( file_leg ) {
+            fread( legendre_sym_, sizeof( double ), size_sym, file_leg );
+            fread( legendre_asym_, sizeof( double ), size_asym, file_leg );
+            fclose( file_leg );
+        }
+        else {
+            compute_legendre_polynomialsopt2( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, legendre_asym_,
+                                              legendre_sym_begin_.data(), legendre_asym_begin_.data() );
+            file_leg = fopen( "legendre.bin", "wb" );
+            fwrite( legendre_sym_, sizeof( double ), size_sym, file_leg );
+            fwrite( legendre_asym_, sizeof( double ), size_asym, file_leg );
+            fclose( file_leg );
+        }
     }
 
     // precomputations for Fourier transformations:
diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc
index 8f5f9c2d3..36104005c 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.cc
+++ b/src/atlas/trans/localopt3/TransLocalopt3.cc
@@ -194,8 +194,21 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
         }
         alloc_aligned( legendre_sym_, size_sym );
         alloc_aligned( legendre_asym_, size_asym );
-        compute_legendre_polynomialsopt3( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, legendre_asym_,
-                                          legendre_sym_begin_.data(), legendre_asym_begin_.data() );
+        FILE* file_leg;
+        file_leg = fopen( "legendre.bin", "r" );
+        if ( file_leg ) {
+            fread( legendre_sym_, sizeof( double ), size_sym, file_leg );
+            fread( legendre_asym_, sizeof( double ), size_asym, file_leg );
+            fclose( file_leg );
+        }
+        else {
+            compute_legendre_polynomialsopt3( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, legendre_asym_,
+                                              legendre_sym_begin_.data(), legendre_asym_begin_.data() );
+            file_leg = fopen( "legendre.bin", "wb" );
+            fwrite( legendre_sym_, sizeof( double ), size_sym, file_leg );
+            fwrite( legendre_asym_, sizeof( double ), size_asym, file_leg );
+            fclose( file_leg );
+        }
     }
 
     // precomputations for Fourier transformations:
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index 6774d9a29..421dbdeaf 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -877,7 +877,7 @@ CASE( "test_trans_hires" ) {
     //Domain testdomain = ZonalBandDomain( {-85., -86.} );
     Domain testdomain = RectangularDomain( {-1., 1.}, {5., 5.5} );
     // Grid: (Adjust the following line if the test takes too long!)
-    Grid g( "F1280", testdomain );
+    Grid g( "F5000", testdomain );
     Grid g_global( g.name() );
 
     grid::StructuredGrid gs( g );
@@ -887,7 +887,7 @@ CASE( "test_trans_hires" ) {
     //int trc  = ndgl - 1;  // linear
     int trc = ndgl / 2. - 1;  // cubic
 
-    int nb_scalar = 1000, nb_vordiv = 0;
+    int nb_scalar = 1, nb_vordiv = 0;
 
     for ( auto transType : transTypes ) {
         int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv;
@@ -967,7 +967,7 @@ CASE( "test_trans_domain" ) {
     functionspace::Spectral spectral( trc );
     functionspace::StructuredColumns gridpoints( g );
 
-    int nb_scalar = 1000, nb_vordiv = 0;
+    int nb_scalar = 1, nb_vordiv = 0;
     int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv;
     std::vector<double> sp( 2 * N * nb_scalar );
     std::vector<double> vor( 2 * N * nb_vordiv );
@@ -997,7 +997,7 @@ CASE( "test_trans_domain" ) {
                         for ( int imag = 0; imag <= 1; imag++ ) {  // real and imaginary part
 
                             if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. &&
-                                 icase < 1 ) {
+                                 icase < 1000 ) {
                                 auto start = std::chrono::system_clock::now();
                                 for ( int j = 0; j < 2 * N * nb_scalar; j++ ) {
                                     sp[j] = 0.;

From 7ab54d8fd53f70c50971e78519490242dc33f538 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Thu, 5 Apr 2018 14:40:13 +0100
Subject: [PATCH 040/123] unstructured meshes are working in opt3 for scalar
 fields. computing Legendre-polynomials for every point.

---
 .../localopt3/LegendrePolynomialsopt3.cc      | 236 +++++++--------
 .../trans/localopt3/LegendrePolynomialsopt3.h |   7 +
 src/atlas/trans/localopt3/TransLocalopt3.cc   | 269 ++++++++++--------
 src/tests/trans/test_transgeneral.cc          | 147 +++++++++-
 4 files changed, 427 insertions(+), 232 deletions(-)

diff --git a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc
index 3ea2b41ef..c285d32b2 100644
--- a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc
+++ b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc
@@ -21,143 +21,155 @@ namespace trans {
 
 //-----------------------------------------------------------------------------
 
-void compute_legendre_polynomialsopt3(
-    const size_t trc,          // truncation (in)
-    const int nlats,           // number of latitudes
-    const double lats[],       // latitudes in radians (in)
-    double leg_sym[],          // values of associated Legendre functions, symmetric part
-    double leg_asym[],         // values of associated Legendre functions, asymmetric part
-    size_t leg_start_sym[],    // start indices for different zonal wave numbers, symmetric part
-    size_t leg_start_asym[] )  // start indices for different zonal wave numbers, asymmetric part
-{
-    auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; };
-    array::ArrayT<double> zfn_( trc + 1, trc + 1 );
-    array::ArrayView<double, 2> zfn = array::make_view<double, 2>( zfn_ );
-    std::vector<double> legpol( legendre_size( trc ) );
-    auto idxmn = [&]( int jm, int jn ) { return ( 2 * trc + 3 - jm ) * jm / 2 + jn - jm; };
-    int iodd;
-
+void compute_zfnopt3( const size_t trc, double zfn[] ) {
+    auto idxzfn = [&]( int jn, int jk ) { return jk + ( trc + 1 ) * jn; };
+    int iodd    = 0;
     // Compute coefficients for Taylor series in Belousov (19) and (21)
-    // Belousov, Swarztrauber use zfn(0,0)=std::sqrt(2.)
+    // Belousov, Swarztrauber use zfn[0]=std::sqrt(2.)
     // IFS normalisation chosen to be 0.5*Integral(Pnm**2) = 1
-    zfn( 0, 0 ) = 2.;
+    zfn[idxzfn( 0, 0 )] = 2.;
     for ( int jn = 1; jn <= trc; ++jn ) {
-        double zfnn = zfn( 0, 0 );
+        double zfnn = zfn[idxzfn( 0, 0 )];
         for ( int jgl = 1; jgl <= jn; ++jgl ) {
             zfnn *= std::sqrt( 1. - 0.25 / ( jgl * jgl ) );
         }
-        iodd          = jn % 2;
-        zfn( jn, jn ) = zfnn;
+        iodd                  = jn % 2;
+        zfn[idxzfn( jn, jn )] = zfnn;
         for ( int jgl = 2; jgl <= jn - iodd; jgl += 2 ) {
             double zfjn = ( ( jgl - 1. ) * ( 2. * jn - jgl + 2. ) );  // new factor numerator
             double zfjd = ( jgl * ( 2. * jn - jgl + 1. ) );           // new factor denominator
 
-            zfn( jn, jn - jgl ) = zfn( jn, jn - jgl + 2 ) * zfjn / zfjd;
+            zfn[idxzfn( jn, jn - jgl )] = zfn[idxzfn( jn, jn - jgl + 2 )] * zfjn / zfjd;
         }
     }
+}
 
-    // Loop over latitudes:
-    for ( int jlat = 0; jlat < nlats; ++jlat ) {
-        {
-            //ATLAS_TRACE( "compute Legendre polynomials" );
-            // --------------------
-            // 1. First two columns
-            // --------------------
-            double lat     = lats[jlat];
-            double zdlx1   = ( M_PI_2 - lat );               // theta
-            double zdlx    = std::cos( zdlx1 );              // cos(theta)
-            double zdlsita = std::sqrt( 1. - zdlx * zdlx );  // sin(theta) (this is how trans library does it)
-
-            legpol[idxmn( 0, 0 )] = 1.;
-            double vsin[trc + 1], vcos[trc + 1];
-            for ( int j = 1; j <= trc; j++ ) {
-                vsin[j] = std::sin( j * zdlx1 );
-            }
-            for ( int j = 1; j <= trc; j++ ) {
-                vcos[j] = std::cos( j * zdlx1 );
-            }
 
-            double zdl1sita = 0.;
-            // if we are less than 1 meter from the pole,
-            if ( std::abs( zdlsita ) <= std::sqrt( std::numeric_limits<double>::epsilon() ) ) {
-                zdlx    = 1.;
-                zdlsita = 0.;
-            }
-            else {
-                zdl1sita = 1. / zdlsita;
-            }
+void compute_legendre_polynomials_latopt3( const size_t trc,  // truncation (in)
+                                           const double lat,  // latitude in radians (in)
+                                           double legpol[],   // legendre polynomials
+                                           double zfn[] ) {
+    auto idxmn  = [&]( int jm, int jn ) { return ( 2 * trc + 3 - jm ) * jm / 2 + jn - jm; };
+    auto idxzfn = [&]( int jn, int jk ) { return jk + ( trc + 1 ) * jn; };
+    {  //ATLAS_TRACE( "compute Legendre polynomials" );
+        // --------------------
+        // 1. First two columns
+        // --------------------
+        double zdlx1   = ( M_PI_2 - lat );               // theta
+        double zdlx    = std::cos( zdlx1 );              // cos(theta)
+        double zdlsita = std::sqrt( 1. - zdlx * zdlx );  // sin(theta) (this is how trans library does it)
+
+        legpol[idxmn( 0, 0 )] = 1.;
+        double vsin[trc + 1], vcos[trc + 1];
+        for ( int j = 1; j <= trc; j++ ) {
+            vsin[j] = std::sin( j * zdlx1 );
+        }
+        for ( int j = 1; j <= trc; j++ ) {
+            vcos[j] = std::cos( j * zdlx1 );
+        }
 
-            // ordinary Legendre polynomials from series expansion
-            // ---------------------------------------------------
-
-            // even N
-            for ( int jn = 2; jn <= trc; jn += 2 ) {
-                double zdlk   = 0.5 * zfn( jn, 0 );
-                double zdlldn = 0.0;
-                double zdsq   = 1. / std::sqrt( jn * ( jn + 1. ) );
-                // represented by only even k
-                for ( int jk = 2; jk <= jn; jk += 2 ) {
-                    // normalised ordinary Legendre polynomial == \overbar{P_n}^0
-                    zdlk = zdlk + zfn( jn, jk ) * vcos[jk];
-                    // normalised associated Legendre polynomial == \overbar{P_n}^1
-                    zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * vsin[jk];
-                }
-                legpol[idxmn( 0, jn )] = zdlk;
-                legpol[idxmn( 1, jn )] = zdlldn;
+        double zdl1sita = 0.;
+        // if we are less than 1 meter from the pole,
+        if ( std::abs( zdlsita ) <= std::sqrt( std::numeric_limits<double>::epsilon() ) ) {
+            zdlx    = 1.;
+            zdlsita = 0.;
+        }
+        else {
+            zdl1sita = 1. / zdlsita;
+        }
+
+        // ordinary Legendre polynomials from series expansion
+        // ---------------------------------------------------
+
+        // even N
+        for ( int jn = 2; jn <= trc; jn += 2 ) {
+            double zdlk   = 0.5 * zfn[idxzfn( jn, 0 )];
+            double zdlldn = 0.0;
+            double zdsq   = 1. / std::sqrt( jn * ( jn + 1. ) );
+            // represented by only even k
+            for ( int jk = 2; jk <= jn; jk += 2 ) {
+                // normalised ordinary Legendre polynomial == \overbar{P_n}^0
+                zdlk = zdlk + zfn[idxzfn( jn, jk )] * vcos[jk];
+                // normalised associated Legendre polynomial == \overbar{P_n}^1
+                zdlldn = zdlldn + zdsq * zfn[idxzfn( jn, jk )] * jk * vsin[jk];
             }
+            legpol[idxmn( 0, jn )] = zdlk;
+            legpol[idxmn( 1, jn )] = zdlldn;
+        }
 
-            // odd N
-            for ( int jn = 1; jn <= trc; jn += 2 ) {
-                zfn( jn, 0 )  = 0.;
-                double zdlk   = 0.;
-                double zdlldn = 0.0;
-                double zdsq   = 1. / std::sqrt( jn * ( jn + 1. ) );
-                // represented by only even k
-                for ( int jk = 1; jk <= jn; jk += 2 ) {
-                    // normalised ordinary Legendre polynomial == \overbar{P_n}^0
-                    zdlk = zdlk + zfn( jn, jk ) * vcos[jk];
-                    // normalised associated Legendre polynomial == \overbar{P_n}^1
-                    zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * vsin[jk];
-                }
-                legpol[idxmn( 0, jn )] = zdlk;
-                legpol[idxmn( 1, jn )] = zdlldn;
+        // odd N
+        for ( int jn = 1; jn <= trc; jn += 2 ) {
+            zfn[idxzfn( jn, 0 )] = 0.;
+            double zdlk          = 0.;
+            double zdlldn        = 0.0;
+            double zdsq          = 1. / std::sqrt( jn * ( jn + 1. ) );
+            // represented by only even k
+            for ( int jk = 1; jk <= jn; jk += 2 ) {
+                // normalised ordinary Legendre polynomial == \overbar{P_n}^0
+                zdlk = zdlk + zfn[idxzfn( jn, jk )] * vcos[jk];
+                // normalised associated Legendre polynomial == \overbar{P_n}^1
+                zdlldn = zdlldn + zdsq * zfn[idxzfn( jn, jk )] * jk * vsin[jk];
             }
+            legpol[idxmn( 0, jn )] = zdlk;
+            legpol[idxmn( 1, jn )] = zdlldn;
+        }
 
-            // --------------------------------------------------------------
-            // 2. Diagonal (the terms 0,0 and 1,1 have already been computed)
-            //    Belousov, equation (23)
-            // --------------------------------------------------------------
+        // --------------------------------------------------------------
+        // 2. Diagonal (the terms 0,0 and 1,1 have already been computed)
+        //    Belousov, equation (23)
+        // --------------------------------------------------------------
 
-            double zdls = zdl1sita * std::numeric_limits<double>::min();
-            for ( int jn = 2; jn <= trc; ++jn ) {
-                double sq = std::sqrt( ( 2. * jn + 1. ) / ( 2. * jn ) );
+        double zdls = zdl1sita * std::numeric_limits<double>::min();
+        for ( int jn = 2; jn <= trc; ++jn ) {
+            double sq = std::sqrt( ( 2. * jn + 1. ) / ( 2. * jn ) );
 
-                legpol[idxmn( jn, jn )] = legpol[idxmn( jn - 1, jn - 1 )] * zdlsita * sq;
-                if ( std::abs( legpol[idxmn( jn, jn )] ) < zdls ) legpol[idxmn( jn, jn )] = 0.0;
-            }
+            legpol[idxmn( jn, jn )] = legpol[idxmn( jn - 1, jn - 1 )] * zdlsita * sq;
+            if ( std::abs( legpol[idxmn( jn, jn )] ) < zdls ) legpol[idxmn( jn, jn )] = 0.0;
+        }
 
-            // ---------------------------------------------
-            // 3. General recurrence (Belousov, equation 17)
-            // ---------------------------------------------
-
-            for ( int jn = 3; jn <= trc; ++jn ) {
-                for ( int jm = 2; jm < jn; ++jm ) {
-                    double cn =
-                        ( ( 2. * jn + 1. ) * ( jn + jm - 3. ) * ( jn + jm - 1. ) );     // numerator of c in Belousov
-                    double cd = ( ( 2. * jn - 3. ) * ( jn + jm - 2. ) * ( jn + jm ) );  // denominator of c in Belousov
-                    double dn =
-                        ( ( 2. * jn + 1. ) * ( jn - jm + 1. ) * ( jn + jm - 1. ) );     // numerator of d in Belousov
-                    double dd = ( ( 2. * jn - 1. ) * ( jn + jm - 2. ) * ( jn + jm ) );  // denominator of d in Belousov
-                    double en = ( ( 2. * jn + 1. ) * ( jn - jm ) );                     // numerator of e in Belousov
-                    double ed = ( ( 2. * jn - 1. ) * ( jn + jm ) );                     // denominator of e in Belousov
-
-                    legpol[idxmn( jm, jn )] = std::sqrt( cn / cd ) * legpol[idxmn( jm - 2, jn - 2 )] -
-                                              std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1 )] * zdlx +
-                                              std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1 )] * zdlx;
-                }
+        // ---------------------------------------------
+        // 3. General recurrence (Belousov, equation 17)
+        // ---------------------------------------------
+
+        for ( int jn = 3; jn <= trc; ++jn ) {
+            for ( int jm = 2; jm < jn; ++jm ) {
+                double cn = ( ( 2. * jn + 1. ) * ( jn + jm - 3. ) * ( jn + jm - 1. ) );  // numerator of c in Belousov
+                double cd = ( ( 2. * jn - 3. ) * ( jn + jm - 2. ) * ( jn + jm ) );       // denominator of c in Belousov
+                double dn = ( ( 2. * jn + 1. ) * ( jn - jm + 1. ) * ( jn + jm - 1. ) );  // numerator of d in Belousov
+                double dd = ( ( 2. * jn - 1. ) * ( jn + jm - 2. ) * ( jn + jm ) );       // denominator of d in Belousov
+                double en = ( ( 2. * jn + 1. ) * ( jn - jm ) );                          // numerator of e in Belousov
+                double ed = ( ( 2. * jn - 1. ) * ( jn + jm ) );                          // denominator of e in Belousov
+
+                legpol[idxmn( jm, jn )] = std::sqrt( cn / cd ) * legpol[idxmn( jm - 2, jn - 2 )] -
+                                          std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1 )] * zdlx +
+                                          std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1 )] * zdlx;
             }
         }
+    }
+}
+
+
+void compute_legendre_polynomialsopt3(
+    const size_t trc,          // truncation (in)
+    const int nlats,           // number of latitudes
+    const double lats[],       // latitudes in radians (in)
+    double leg_sym[],          // values of associated Legendre functions, symmetric part
+    double leg_asym[],         // values of associated Legendre functions, asymmetric part
+    size_t leg_start_sym[],    // start indices for different zonal wave numbers, symmetric part
+    size_t leg_start_asym[] )  // start indices for different zonal wave numbers, asymmetric part
+{
+    auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; };
+    std::vector<double> legpol( legendre_size( trc ) );
+    std::vector<double> zfn( ( trc + 1 ) * ( trc + 1 ) );
+    auto idxmn = [&]( int jm, int jn ) { return ( 2 * trc + 3 - jm ) * jm / 2 + jn - jm; };
+    compute_zfnopt3( trc, zfn.data() );
+
+    // Loop over latitudes:
+    for ( int jlat = 0; jlat < nlats; ++jlat ) {
+        // compute legendre polynomials for current latitude:
+        compute_legendre_polynomials_latopt3( trc, lats[jlat], legpol.data(), zfn.data() );
 
+        // split polynomials into symmetric and antisymmetric parts:
         {
             //ATLAS_TRACE( "add to global arrays" );
 
diff --git a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.h b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.h
index 1698fb80c..f5dbd7aa3 100644
--- a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.h
+++ b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.h
@@ -32,6 +32,13 @@ namespace trans {
 // Ported to C++ by:
 // Andreas Mueller *ECMWF*
 //
+void compute_zfnopt3( const size_t trc, double zfn[] );
+
+void compute_legendre_polynomials_latopt3( const size_t trc,  // truncation (in)
+                                           const double lat,  // latitude in radians (in)
+                                           double legpol[],   // legendre polynomials
+                                           double zfn[] );
+
 void compute_legendre_polynomialsopt3(
     const size_t trc,           // truncation (in)
     const int nlats,            // number of latitudes
diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc
index 36104005c..f214c6309 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.cc
+++ b/src/atlas/trans/localopt3/TransLocalopt3.cc
@@ -143,19 +143,8 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
             }
         }
         //Log::info() << "nlats=" << g.ny() << " nlatsGlobal=" << gs_global.ny() << std::endl;
-    }
-    else {
-        // unstructured grid
-        useFFT_   = false;
-        nlats     = grid_.size();
-        nlons     = grid_.size();
-        nlatsNH_  = nlats;
-        nlatsLeg_ = nlats;
-    }
-    std::vector<double> lats( nlatsLeg_ );
-    std::vector<double> lons( nlons );
-    if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
-        grid::StructuredGrid g( grid_ );
+        std::vector<double> lats( nlatsLeg_ );
+        std::vector<double> lons( nlons );
         if ( nlatsNH_ >= nlatsSH_ ) {
             for ( size_t j = 0; j < nlatsLeg_; ++j ) {
                 lats[j] = g.y( j ) * util::Constants::degreesToRadians();
@@ -169,92 +158,86 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
         for ( size_t j = 0; j < nlons; ++j ) {
             lons[j] = g.x( j, 0 ) * util::Constants::degreesToRadians();
         }
-    }
-    else {
-        int j( 0 );
-        for ( PointXY p : grid_.xy() ) {
-            lats[j++] = p.y() * util::Constants::degreesToRadians();
-            lons[j++] = p.x() * util::Constants::degreesToRadians();
-        }
-    }
-    // precomputations for Legendre polynomials:
-    {
-        ATLAS_TRACE( "opt3 precomp Legendre" );
-        int size_sym  = 0;
-        int size_asym = 0;
-        legendre_sym_begin_.resize( truncation_ + 3 );
-        legendre_asym_begin_.resize( truncation_ + 3 );
-        legendre_sym_begin_[0]  = 0;
-        legendre_asym_begin_[0] = 0;
-        for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
-            size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsLeg_ );
-            size_asym += add_padding( num_n( truncation_ + 1, jm, false ) * nlatsLeg_ );
-            legendre_sym_begin_[jm + 1]  = size_sym;
-            legendre_asym_begin_[jm + 1] = size_asym;
-        }
-        alloc_aligned( legendre_sym_, size_sym );
-        alloc_aligned( legendre_asym_, size_asym );
-        FILE* file_leg;
-        file_leg = fopen( "legendre.bin", "r" );
-        if ( file_leg ) {
-            fread( legendre_sym_, sizeof( double ), size_sym, file_leg );
-            fread( legendre_asym_, sizeof( double ), size_asym, file_leg );
-            fclose( file_leg );
-        }
-        else {
-            compute_legendre_polynomialsopt3( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, legendre_asym_,
-                                              legendre_sym_begin_.data(), legendre_asym_begin_.data() );
-            file_leg = fopen( "legendre.bin", "wb" );
-            fwrite( legendre_sym_, sizeof( double ), size_sym, file_leg );
-            fwrite( legendre_asym_, sizeof( double ), size_asym, file_leg );
-            fclose( file_leg );
+        // precomputations for Legendre polynomials:
+        {
+            ATLAS_TRACE( "opt3 precomp Legendre" );
+            int size_sym  = 0;
+            int size_asym = 0;
+            legendre_sym_begin_.resize( truncation_ + 3 );
+            legendre_asym_begin_.resize( truncation_ + 3 );
+            legendre_sym_begin_[0]  = 0;
+            legendre_asym_begin_[0] = 0;
+            for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
+                size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsLeg_ );
+                size_asym += add_padding( num_n( truncation_ + 1, jm, false ) * nlatsLeg_ );
+                legendre_sym_begin_[jm + 1]  = size_sym;
+                legendre_asym_begin_[jm + 1] = size_asym;
+            }
+            alloc_aligned( legendre_sym_, size_sym );
+            alloc_aligned( legendre_asym_, size_asym );
+            FILE* file_leg;
+            file_leg = fopen( "legendre.bin", "r" );
+            if ( false ) {  //if ( file_leg ) {
+                fread( legendre_sym_, sizeof( double ), size_sym, file_leg );
+                fread( legendre_asym_, sizeof( double ), size_asym, file_leg );
+                fclose( file_leg );
+            }
+            else {
+                compute_legendre_polynomialsopt3( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_,
+                                                  legendre_asym_, legendre_sym_begin_.data(),
+                                                  legendre_asym_begin_.data() );
+                /*file_leg = fopen( "legendre.bin", "wb" );
+                fwrite( legendre_sym_, sizeof( double ), size_sym, file_leg );
+                fwrite( legendre_asym_, sizeof( double ), size_asym, file_leg );
+                fclose( file_leg );*/
+            }
         }
-    }
 
-    // precomputations for Fourier transformations:
-    if ( useFFT_ ) {
+        // precomputations for Fourier transformations:
+        if ( useFFT_ ) {
 #if ATLAS_HAVE_FFTW
-        {
-            ATLAS_TRACE( "opt3 precomp FFTW" );
-            int num_complex = ( nlonsGlobal_ / 2 ) + 1;
-            fft_in_         = fftw_alloc_complex( nlats * num_complex );
-            fft_out_        = fftw_alloc_real( nlats * nlonsGlobal_ );
-            plan_ = fftw_plan_many_dft_c2r( 1, &nlonsGlobal_, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1,
-                                            nlonsGlobal_, FFTW_ESTIMATE );
-        }
-            // other FFT implementations should be added with #elif statements
-#else
-        useFFT_ = false;                                 // no FFT implemented => default to dgemm
-#endif
-    }
-    if ( !useFFT_ ) {
-        alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons );
-        if ( dgemmMethod1_ ) {
             {
-                ATLAS_TRACE( "opt3 precomp Fourier" );
-                int idx = 0;
-                for ( int jlon = 0; jlon < nlons; jlon++ ) {
-                    double factor = 1.;
-                    for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
-                        if ( jm > 0 ) { factor = 2.; }
-                        fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor;  // real part
-                        fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor;  // imaginary part
-                    }
-                }
+                ATLAS_TRACE( "opt3 precomp FFTW" );
+                int num_complex = ( nlonsGlobal_ / 2 ) + 1;
+                fft_in_         = fftw_alloc_complex( nlats * num_complex );
+                fft_out_        = fftw_alloc_real( nlats * nlonsGlobal_ );
+                plan_ = fftw_plan_many_dft_c2r( 1, &nlonsGlobal_, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL,
+                                                1, nlonsGlobal_, FFTW_ESTIMATE );
             }
+                // other FFT implementations should be added with #elif statements
+#else
+            useFFT_ = false;                             // no FFT implemented => default to dgemm
+#endif
         }
-        else {
-            {
-                ATLAS_TRACE( "opt3 precomp Fourier tp" );
-                int idx = 0;
-                for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
-                    double factor = 1.;
-                    if ( jm > 0 ) { factor = 2.; }
+        if ( !useFFT_ ) {
+            alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons );
+            if ( dgemmMethod1_ ) {
+                {
+                    ATLAS_TRACE( "opt3 precomp Fourier" );
+                    int idx = 0;
                     for ( int jlon = 0; jlon < nlons; jlon++ ) {
-                        fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor;  // real part
+                        double factor = 1.;
+                        for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
+                            if ( jm > 0 ) { factor = 2.; }
+                            fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor;  // real part
+                            fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor;  // imaginary part
+                        }
                     }
-                    for ( int jlon = 0; jlon < nlons; jlon++ ) {
-                        fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor;  // imaginary part
+                }
+            }
+            else {
+                {
+                    ATLAS_TRACE( "opt3 precomp Fourier tp" );
+                    int idx = 0;
+                    for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
+                        double factor = 1.;
+                        if ( jm > 0 ) { factor = 2.; }
+                        for ( int jlon = 0; jlon < nlons; jlon++ ) {
+                            fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor;  // real part
+                        }
+                        for ( int jlon = 0; jlon < nlons; jlon++ ) {
+                            fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor;  // imaginary part
+                        }
                     }
                 }
             }
@@ -270,17 +253,19 @@ TransLocalopt3::TransLocalopt3( const Grid& grid, const long truncation, const e
 // --------------------------------------------------------------------------------------------------------------------
 
 TransLocalopt3::~TransLocalopt3() {
-    free_aligned( legendre_sym_ );
-    free_aligned( legendre_asym_ );
-    if ( useFFT_ ) {
+    if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
+        free_aligned( legendre_sym_ );
+        free_aligned( legendre_asym_ );
+        if ( useFFT_ ) {
 #if ATLAS_HAVE_FFTW
-        fftw_destroy_plan( plan_ );
-        fftw_free( fft_in_ );
-        fftw_free( fft_out_ );
+            fftw_destroy_plan( plan_ );
+            fftw_free( fft_in_ );
+            fftw_free( fft_out_ );
 #endif
-    }
-    else {
-        free_aligned( fourier_ );
+        }
+        else {
+            free_aligned( fourier_ );
+        }
     }
 }
 
@@ -561,24 +546,78 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
         }  // namespace atlas
         else {
             ATLAS_TRACE( "invtrans_uv unstructured opt3" );
-            int idx = 0;
-            for ( PointXY p : grid_.xy() ) {
-                double lon   = p.x() * util::Constants::degreesToRadians();
-                double lat   = p.y() * util::Constants::degreesToRadians();
-                double trcFT = truncation;
-
+            grid::UnstructuredGrid gu = grid_;
+            double* zfn;
+            alloc_aligned( zfn, ( truncation + 1 ) * ( truncation + 1 ) );
+            compute_zfnopt3( truncation, zfn );
+            int size_fourier = nb_fields * 2;
+            double* legendre;
+            double* scl_fourier;
+            double* scl_fourier_tp;
+            double* fouriertp;
+            double* gp_opt;
+            alloc_aligned( legendre, legendre_size( truncation + 1 ) );
+            alloc_aligned( scl_fourier, size_fourier * ( truncation + 1 ) );
+            alloc_aligned( scl_fourier_tp, size_fourier * ( truncation + 1 ) );
+            alloc_aligned( fouriertp, 2 * ( truncation + 1 ) );
+            alloc_aligned( gp_opt, nb_fields );
+
+            // loop over all points:
+            for ( int ip = 0; ip < grid_.size(); ip++ ) {
+                PointXY p  = gu.xy( ip );
+                double lon = p.x() * util::Constants::degreesToRadians();
+                double lat = p.y() * util::Constants::degreesToRadians();
+                compute_legendre_polynomials_latopt3( truncation, lat, legendre, zfn );
                 // Legendre transform:
-                //invtrans_legendreopt3( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra,
-                //                      legReal.data(), legImag.data() );
-
-                // Fourier transform:
-                //invtrans_fourieropt3( trcFT, lon, nb_fields, legReal.data(), legImag.data(),
-                //                     gp_tmp.data() + ( nb_fields * idx ) );
-                for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
-                    //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat );
+                {
+                    //ATLAS_TRACE( "opt Legendre dgemm" );
+                    for ( int jm = 0; jm <= truncation; jm++ ) {
+                        int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1;
+                        eckit::linalg::Matrix A( eckit::linalg::Matrix(
+                            const_cast<double*>( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) );
+                        eckit::linalg::Matrix B( legendre + noff, ns, 1 );
+                        eckit::linalg::Matrix C( scl_fourier + jm * size_fourier, nb_fields * 2, 1 );
+                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+                    }
+                }
+                {
+                    //ATLAS_TRACE( "opt transposition in Fourier" );
+                    int idx = 0;
+                    for ( int jm = 0; jm < truncation + 1; jm++ ) {
+                        for ( int imag = 0; imag < 2; imag++ ) {
+                            for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                int pos_tp = imag + 2 * ( jm + ( truncation + 1 ) * ( jfld ) );
+                                //int pos  = jfld + nb_fields * ( imag + 2 * ( jm ) );
+                                scl_fourier_tp[pos_tp] = scl_fourier[idx++];  // = scl_fourier[pos]
+                            }
+                        }
+                    }
+                }
+
+                // Fourier transformation:
+                int idx          = 0;
+                fouriertp[idx++] = 1.;  // real part
+                fouriertp[idx++] = 0.;  // imaginary part
+                for ( int jm = 1; jm < truncation + 1; jm++ ) {
+                    fouriertp[idx++] = +2. * std::cos( jm * lon );  // real part
+                    fouriertp[idx++] = -2. * std::sin( jm * lon );  // imaginary part
+                }
+                {
+                    //ATLAS_TRACE( "opt Fourier dgemm" );
+                    eckit::linalg::Matrix A( fouriertp, 1, ( truncation + 1 ) * 2 );
+                    eckit::linalg::Matrix B( scl_fourier_tp, ( truncation + 1 ) * 2, nb_fields );
+                    eckit::linalg::Matrix C( gp_opt, 1, nb_fields );
+                    eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+                    for ( int j = 0; j < nb_fields; j++ ) {
+                        gp_fields[ip + j * grid_.size()] = gp_opt[j];
+                    }
                 }
-                ++idx;
             }
+            free_aligned( legendre );
+            free_aligned( scl_fourier );
+            free_aligned( scl_fourier_tp );
+            free_aligned( fouriertp );
+            free_aligned( gp_opt );
         }
     }  // namespace trans
 }  // namespace atlas
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index 421dbdeaf..95a49eeb3 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -724,7 +724,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
     trans::Trans transIFS( g, trc, util::Config( "type", "ifs" ) );
     double rav = 0.;  // compute average rms error of trans library in rav
 #endif
-    trans::Trans transLocal1( g, trc, util::Config( "type", "localopt3" ) );
+    trans::Trans transLocal1( g, trc, util::Config( "type", "localopt2" ) );
     trans::Trans transLocal2( g, trc, util::Config( "type", "localopt3" ) );
     double rav1 = 0., rav2 = 0.;  // compute average rms errors of transLocal1 and transLocal2
 
@@ -853,7 +853,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
 }
 #endif
 //-----------------------------------------------------------------------------
-#if 1
+#if 0
 CASE( "test_trans_hires" ) {
     Log::info() << "test_trans_hires" << std::endl;
     // test transgeneral by comparing its result with the trans library
@@ -865,7 +865,7 @@ CASE( "test_trans_hires" ) {
 #if ATLAS_HAVE_TRANS
     //std::string transTypes[4] = {"localopt", "localopt2", "localopt3", "ifs"};
     //std::string transTypes[2] = {"localopt2", "localopt3"};
-    std::string transTypes[3] = {"localopt", "localopt2", "localopt3"};
+    std::string transTypes[3] = {"localopt3", "localopt2", "localopt"};
     //std::string transTypes[1] = {"localopt3"};
 #else
     std::string transTypes[1] = {"localopt2"};
@@ -875,9 +875,10 @@ CASE( "test_trans_hires" ) {
     //Domain testdomain = ZonalBandDomain( {-.5, .5} );
     //Domain testdomain = RectangularDomain( {0., 30.}, {-.05, .05} );
     //Domain testdomain = ZonalBandDomain( {-85., -86.} );
-    Domain testdomain = RectangularDomain( {-1., 1.}, {5., 5.5} );
+    ///Domain testdomain = RectangularDomain( {-.01, .01}, {-.01, .01} );
+    Domain testdomain = RectangularDomain( {-1, 1}, {-1, 1} );
     // Grid: (Adjust the following line if the test takes too long!)
-    Grid g( "F5000", testdomain );
+    Grid g( "F22000", testdomain );
     Grid g_global( g.name() );
 
     grid::StructuredGrid gs( g );
@@ -1071,6 +1072,142 @@ CASE( "test_trans_domain" ) {
 }
 #endif
 //-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+#if 1
+CASE( "test_trans_unstructured" ) {
+    Log::info() << "test_trans_unstructured" << std::endl;
+    // test transgeneral by comparing with analytic solution on an unstructured grid
+
+    std::ostream& out = Log::info();
+    double tolerance  = 1.e-13;
+
+    //Domain testdomain = RectangularDomain( {20., 25.}, {40., 60.} );
+    Domain testdomain = RectangularDomain( {20., 25.}, {45., 50.} );
+    // Grid: (Adjust the following line if the test takes too long!)
+    Grid g( "F12", testdomain );
+    grid::StructuredGrid gs( g );
+    std::vector<PointXY> pts( g.size() );
+    int idx( 0 );
+    for ( size_t j = 0; j < gs.ny(); ++j ) {
+        double lat = gs.y( j );
+        for ( size_t i = 0; i < gs.nx( j ); ++i ) {
+            double lon = gs.x( i, j );
+            Log::info() << "idx=" << idx << " lon=" << lon << " lat=" << lat << std::endl;
+            pts[idx++].assign( lon, lat );
+        }
+    }
+    Grid gu = grid::UnstructuredGrid( new std::vector<PointXY>( pts ) );
+
+    int trc     = 120;
+    double rav1 = 0., rav2 = 0.;  // compute average rms errors of transLocal1 and transLocal2
+
+    int nb_scalar = 100, nb_vordiv = 0;
+    int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv;
+    std::vector<double> sp( 2 * N * nb_scalar );
+    std::vector<double> vor( 2 * N * nb_vordiv );
+    std::vector<double> div( 2 * N * nb_vordiv );
+    std::vector<double> rspecg( 2 * N );
+    std::vector<double> gp( nb_all * g.size() );
+    std::vector<double> rgp1( nb_all * g.size() );
+    std::vector<double> rgp2( nb_all * g.size() );
+    std::vector<double> rgp_analytic( g.size() );
+
+    int icase = 0;
+    for ( int ivar_in = 2; ivar_in < 3; ivar_in++ ) {         // vorticity, divergence, scalar
+        for ( int ivar_out = 2; ivar_out < 3; ivar_out++ ) {  // u, v, scalar
+            int nb_fld = 1;
+            if ( ivar_out == 2 ) {
+                tolerance = 1.e-13;
+                nb_fld    = nb_scalar;
+            }
+            else {
+                tolerance = 2.e-6;
+                nb_fld    = nb_vordiv;
+            }
+            for ( int jfld = 0; jfld < nb_fld; jfld++ ) {  // multiple fields
+                int k = 0;
+                for ( int m = 0; m <= trc; m++ ) {                 // zonal wavenumber
+                    for ( int n = m; n <= trc; n++ ) {             // total wavenumber
+                        for ( int imag = 0; imag <= 1; imag++ ) {  // real and imaginary part
+
+                            if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. &&
+                                 icase < 1000 ) {
+                                auto start = std::chrono::system_clock::now();
+                                trans::Trans transLocal1( g, trc, util::Config( "type", "localopt3" ) );
+                                trans::Trans transLocal2( gu, trc, util::Config( "type", "localopt3" ) );
+                                for ( int j = 0; j < 2 * N * nb_scalar; j++ ) {
+                                    sp[j] = 0.;
+                                }
+                                for ( int j = 0; j < 2 * N * nb_vordiv; j++ ) {
+                                    vor[j] = 0.;
+                                    div[j] = 0.;
+                                }
+                                if ( ivar_in == 0 ) vor[k * nb_vordiv + jfld] = 1.;
+                                if ( ivar_in == 1 ) div[k * nb_vordiv + jfld] = 1.;
+                                if ( ivar_in == 2 ) sp[k * nb_scalar + jfld] = 1.;
+
+                                for ( int j = 0; j < nb_all * g.size(); j++ ) {
+                                    gp[j]   = 0.;
+                                    rgp1[j] = 0.;
+                                    rgp2[j] = 0.;
+                                }
+                                for ( int j = 0; j < g.size(); j++ ) {
+                                    rgp_analytic[j] = 0.;
+                                }
+
+                                spectral_transform_grid_analytic( trc, trc, n, m, imag, g, rspecg.data(),
+                                                                  rgp_analytic.data(), ivar_in, ivar_out );
+
+                                //Log::info() << icase << " m=" << m << " n=" << n << " imag=" << imag << " structured: ";
+                                EXPECT_NO_THROW( transLocal1.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
+                                                                       div.data(), rgp1.data() ) );
+
+                                //Log::info() << icase << " m=" << m << " n=" << n << " imag=" << imag << " unstructured: ";
+                                EXPECT_NO_THROW( transLocal2.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
+                                                                       div.data(), rgp2.data() ) );
+
+                                int pos = ( ivar_out * nb_vordiv + jfld );
+
+                                double rms_gen1 =
+                                    compute_rms( g.size(), rgp1.data() + pos * g.size(), rgp_analytic.data() );
+
+                                double rms_gen2 =
+                                    compute_rms( g.size(), rgp2.data() + pos * g.size(), rgp_analytic.data() );
+
+                                rav1 += rms_gen1;
+                                rav2 += rms_gen2;
+                                if ( !( rms_gen1 < tolerance ) || !( rms_gen2 < tolerance ) ) {
+                                    Log::info()
+                                        << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out
+                                        << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl;
+                                    ATLAS_DEBUG_VAR( rms_gen1 );
+                                    ATLAS_DEBUG_VAR( rms_gen2 );
+                                    ATLAS_DEBUG_VAR( tolerance );
+                                }
+                                EXPECT( rms_gen1 < tolerance );
+                                EXPECT( rms_gen2 < tolerance );
+                                icase++;
+                                auto end                                      = std::chrono::system_clock::now();  //
+                                std::chrono::duration<double> elapsed_seconds = end - start;
+                                std::time_t end_time = std::chrono::system_clock::to_time_t( end );
+                                std::string time_str = std::ctime( &end_time );
+                                //Log::info() << "case " << icase << ", elapsed time: " << elapsed_seconds.count()
+                                //            << "s. Now: " << time_str.substr( 0, time_str.length() - 1 ) << std::endl;
+                            }
+                            k++;
+                        }
+                    }
+                }
+            }
+        }
+    }
+    Log::info() << "Vordiv+scalar comparison with trans: all " << icase << " cases successfully passed!" << std::endl;
+    rav1 /= icase;
+    Log::info() << "average RMS error of transLocal1: " << rav1 << std::endl;
+    rav2 /= icase;
+    Log::info() << "average RMS error of transLocal2: " << rav2 << std::endl;
+}
+#endif
 #if 0
 CASE( "test_trans_invtrans" ) {
     trans::Trans trans( Grid( "O64" ), 63, util::Config( "type", "local" ) );

From be0ad59f365b12dc4427df03c022439e9ef42842 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Thu, 5 Apr 2018 16:23:47 +0100
Subject: [PATCH 041/123] not working

---
 .../localopt2/LegendrePolynomialsopt2.cc      | 263 +++++++++--------
 .../trans/localopt2/LegendrePolynomialsopt2.h |  12 +
 src/atlas/trans/localopt2/TransLocalopt2.cc   | 271 ++++++++++--------
 .../localopt3/LegendrePolynomialsopt3.cc      |  28 ++
 .../trans/localopt3/LegendrePolynomialsopt3.h |   5 +
 src/atlas/trans/localopt3/TransLocalopt3.cc   |  49 +++-
 src/atlas/trans/localopt3/TransLocalopt3.h    |   1 +
 src/tests/trans/test_transgeneral.cc          |  11 +-
 8 files changed, 392 insertions(+), 248 deletions(-)

diff --git a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc
index 9d11ea850..c3e3fe2f6 100644
--- a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc
+++ b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc
@@ -21,143 +21,155 @@ namespace trans {
 
 //-----------------------------------------------------------------------------
 
-void compute_legendre_polynomialsopt2(
-    const size_t trc,          // truncation (in)
-    const int nlats,           // number of latitudes
-    const double lats[],       // latitudes in radians (in)
-    double leg_sym[],          // values of associated Legendre functions, symmetric part
-    double leg_asym[],         // values of associated Legendre functions, asymmetric part
-    size_t leg_start_sym[],    // start indices for different zonal wave numbers, symmetric part
-    size_t leg_start_asym[] )  // start indices for different zonal wave numbers, asymmetric part
-{
-    auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; };
-    array::ArrayT<double> zfn_( trc + 1, trc + 1 );
-    array::ArrayView<double, 2> zfn = array::make_view<double, 2>( zfn_ );
-    std::vector<double> legpol( legendre_size( trc ) );
-    auto idxmn = [&]( int jm, int jn ) { return ( 2 * trc + 3 - jm ) * jm / 2 + jn - jm; };
-    int iodd;
-
+void compute_zfnopt2( const size_t trc, double zfn[] ) {
+    auto idxzfn = [&]( int jn, int jk ) { return jk + ( trc + 1 ) * jn; };
+    int iodd    = 0;
     // Compute coefficients for Taylor series in Belousov (19) and (21)
-    // Belousov, Swarztrauber use zfn(0,0)=std::sqrt(2.)
+    // Belousov, Swarztrauber use zfn[0]=std::sqrt(2.)
     // IFS normalisation chosen to be 0.5*Integral(Pnm**2) = 1
-    zfn( 0, 0 ) = 2.;
+    zfn[idxzfn( 0, 0 )] = 2.;
     for ( int jn = 1; jn <= trc; ++jn ) {
-        double zfnn = zfn( 0, 0 );
+        double zfnn = zfn[idxzfn( 0, 0 )];
         for ( int jgl = 1; jgl <= jn; ++jgl ) {
             zfnn *= std::sqrt( 1. - 0.25 / ( jgl * jgl ) );
         }
-        iodd          = jn % 2;
-        zfn( jn, jn ) = zfnn;
+        iodd                  = jn % 2;
+        zfn[idxzfn( jn, jn )] = zfnn;
         for ( int jgl = 2; jgl <= jn - iodd; jgl += 2 ) {
             double zfjn = ( ( jgl - 1. ) * ( 2. * jn - jgl + 2. ) );  // new factor numerator
             double zfjd = ( jgl * ( 2. * jn - jgl + 1. ) );           // new factor denominator
 
-            zfn( jn, jn - jgl ) = zfn( jn, jn - jgl + 2 ) * zfjn / zfjd;
+            zfn[idxzfn( jn, jn - jgl )] = zfn[idxzfn( jn, jn - jgl + 2 )] * zfjn / zfjd;
         }
     }
+}
 
-    // Loop over latitudes:
-    for ( int jlat = 0; jlat < nlats; ++jlat ) {
-        {
-            //ATLAS_TRACE( "compute Legendre polynomials" );
-            // --------------------
-            // 1. First two columns
-            // --------------------
-            double lat     = lats[jlat];
-            double zdlx1   = ( M_PI_2 - lat );               // theta
-            double zdlx    = std::cos( zdlx1 );              // cos(theta)
-            double zdlsita = std::sqrt( 1. - zdlx * zdlx );  // sin(theta) (this is how trans library does it)
-
-            legpol[idxmn( 0, 0 )] = 1.;
-            double vsin[trc + 1], vcos[trc + 1];
-            for ( int j = 1; j <= trc; j++ ) {
-                vsin[j] = std::sin( j * zdlx1 );
-            }
-            for ( int j = 1; j <= trc; j++ ) {
-                vcos[j] = std::cos( j * zdlx1 );
-            }
 
-            double zdl1sita = 0.;
-            // if we are less than 1 meter from the pole,
-            if ( std::abs( zdlsita ) <= std::sqrt( std::numeric_limits<double>::epsilon() ) ) {
-                zdlx    = 1.;
-                zdlsita = 0.;
-            }
-            else {
-                zdl1sita = 1. / zdlsita;
-            }
+void compute_legendre_polynomials_latopt2( const size_t trc,  // truncation (in)
+                                           const double lat,  // latitude in radians (in)
+                                           double legpol[],   // legendre polynomials
+                                           double zfn[] ) {
+    auto idxmn  = [&]( int jm, int jn ) { return ( 2 * trc + 3 - jm ) * jm / 2 + jn - jm; };
+    auto idxzfn = [&]( int jn, int jk ) { return jk + ( trc + 1 ) * jn; };
+    {  //ATLAS_TRACE( "compute Legendre polynomials" );
+        // --------------------
+        // 1. First two columns
+        // --------------------
+        double zdlx1   = ( M_PI_2 - lat );               // theta
+        double zdlx    = std::cos( zdlx1 );              // cos(theta)
+        double zdlsita = std::sqrt( 1. - zdlx * zdlx );  // sin(theta) (this is how trans library does it)
+
+        legpol[idxmn( 0, 0 )] = 1.;
+        double vsin[trc + 1], vcos[trc + 1];
+        for ( int j = 1; j <= trc; j++ ) {
+            vsin[j] = std::sin( j * zdlx1 );
+        }
+        for ( int j = 1; j <= trc; j++ ) {
+            vcos[j] = std::cos( j * zdlx1 );
+        }
 
-            // ordinary Legendre polynomials from series expansion
-            // ---------------------------------------------------
-
-            // even N
-            for ( int jn = 2; jn <= trc; jn += 2 ) {
-                double zdlk   = 0.5 * zfn( jn, 0 );
-                double zdlldn = 0.0;
-                double zdsq   = 1. / std::sqrt( jn * ( jn + 1. ) );
-                // represented by only even k
-                for ( int jk = 2; jk <= jn; jk += 2 ) {
-                    // normalised ordinary Legendre polynomial == \overbar{P_n}^0
-                    zdlk = zdlk + zfn( jn, jk ) * vcos[jk];
-                    // normalised associated Legendre polynomial == \overbar{P_n}^1
-                    zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * vsin[jk];
-                }
-                legpol[idxmn( 0, jn )] = zdlk;
-                legpol[idxmn( 1, jn )] = zdlldn;
+        double zdl1sita = 0.;
+        // if we are less than 1 meter from the pole,
+        if ( std::abs( zdlsita ) <= std::sqrt( std::numeric_limits<double>::epsilon() ) ) {
+            zdlx    = 1.;
+            zdlsita = 0.;
+        }
+        else {
+            zdl1sita = 1. / zdlsita;
+        }
+
+        // ordinary Legendre polynomials from series expansion
+        // ---------------------------------------------------
+
+        // even N
+        for ( int jn = 2; jn <= trc; jn += 2 ) {
+            double zdlk   = 0.5 * zfn[idxzfn( jn, 0 )];
+            double zdlldn = 0.0;
+            double zdsq   = 1. / std::sqrt( jn * ( jn + 1. ) );
+            // represented by only even k
+            for ( int jk = 2; jk <= jn; jk += 2 ) {
+                // normalised ordinary Legendre polynomial == \overbar{P_n}^0
+                zdlk = zdlk + zfn[idxzfn( jn, jk )] * vcos[jk];
+                // normalised associated Legendre polynomial == \overbar{P_n}^1
+                zdlldn = zdlldn + zdsq * zfn[idxzfn( jn, jk )] * jk * vsin[jk];
             }
+            legpol[idxmn( 0, jn )] = zdlk;
+            legpol[idxmn( 1, jn )] = zdlldn;
+        }
 
-            // odd N
-            for ( int jn = 1; jn <= trc; jn += 2 ) {
-                zfn( jn, 0 )  = 0.;
-                double zdlk   = 0.;
-                double zdlldn = 0.0;
-                double zdsq   = 1. / std::sqrt( jn * ( jn + 1. ) );
-                // represented by only even k
-                for ( int jk = 1; jk <= jn; jk += 2 ) {
-                    // normalised ordinary Legendre polynomial == \overbar{P_n}^0
-                    zdlk = zdlk + zfn( jn, jk ) * vcos[jk];
-                    // normalised associated Legendre polynomial == \overbar{P_n}^1
-                    zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * vsin[jk];
-                }
-                legpol[idxmn( 0, jn )] = zdlk;
-                legpol[idxmn( 1, jn )] = zdlldn;
+        // odd N
+        for ( int jn = 1; jn <= trc; jn += 2 ) {
+            zfn[idxzfn( jn, 0 )] = 0.;
+            double zdlk          = 0.;
+            double zdlldn        = 0.0;
+            double zdsq          = 1. / std::sqrt( jn * ( jn + 1. ) );
+            // represented by only even k
+            for ( int jk = 1; jk <= jn; jk += 2 ) {
+                // normalised ordinary Legendre polynomial == \overbar{P_n}^0
+                zdlk = zdlk + zfn[idxzfn( jn, jk )] * vcos[jk];
+                // normalised associated Legendre polynomial == \overbar{P_n}^1
+                zdlldn = zdlldn + zdsq * zfn[idxzfn( jn, jk )] * jk * vsin[jk];
             }
+            legpol[idxmn( 0, jn )] = zdlk;
+            legpol[idxmn( 1, jn )] = zdlldn;
+        }
 
-            // --------------------------------------------------------------
-            // 2. Diagonal (the terms 0,0 and 1,1 have already been computed)
-            //    Belousov, equation (23)
-            // --------------------------------------------------------------
+        // --------------------------------------------------------------
+        // 2. Diagonal (the terms 0,0 and 1,1 have already been computed)
+        //    Belousov, equation (23)
+        // --------------------------------------------------------------
 
-            double zdls = zdl1sita * std::numeric_limits<double>::min();
-            for ( int jn = 2; jn <= trc; ++jn ) {
-                double sq = std::sqrt( ( 2. * jn + 1. ) / ( 2. * jn ) );
+        double zdls = zdl1sita * std::numeric_limits<double>::min();
+        for ( int jn = 2; jn <= trc; ++jn ) {
+            double sq = std::sqrt( ( 2. * jn + 1. ) / ( 2. * jn ) );
 
-                legpol[idxmn( jn, jn )] = legpol[idxmn( jn - 1, jn - 1 )] * zdlsita * sq;
-                if ( std::abs( legpol[idxmn( jn, jn )] ) < zdls ) legpol[idxmn( jn, jn )] = 0.0;
-            }
+            legpol[idxmn( jn, jn )] = legpol[idxmn( jn - 1, jn - 1 )] * zdlsita * sq;
+            if ( std::abs( legpol[idxmn( jn, jn )] ) < zdls ) legpol[idxmn( jn, jn )] = 0.0;
+        }
 
-            // ---------------------------------------------
-            // 3. General recurrence (Belousov, equation 17)
-            // ---------------------------------------------
-
-            for ( int jn = 3; jn <= trc; ++jn ) {
-                for ( int jm = 2; jm < jn; ++jm ) {
-                    double cn =
-                        ( ( 2. * jn + 1. ) * ( jn + jm - 3. ) * ( jn + jm - 1. ) );     // numerator of c in Belousov
-                    double cd = ( ( 2. * jn - 3. ) * ( jn + jm - 2. ) * ( jn + jm ) );  // denominator of c in Belousov
-                    double dn =
-                        ( ( 2. * jn + 1. ) * ( jn - jm + 1. ) * ( jn + jm - 1. ) );     // numerator of d in Belousov
-                    double dd = ( ( 2. * jn - 1. ) * ( jn + jm - 2. ) * ( jn + jm ) );  // denominator of d in Belousov
-                    double en = ( ( 2. * jn + 1. ) * ( jn - jm ) );                     // numerator of e in Belousov
-                    double ed = ( ( 2. * jn - 1. ) * ( jn + jm ) );                     // denominator of e in Belousov
-
-                    legpol[idxmn( jm, jn )] = std::sqrt( cn / cd ) * legpol[idxmn( jm - 2, jn - 2 )] -
-                                              std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1 )] * zdlx +
-                                              std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1 )] * zdlx;
-                }
+        // ---------------------------------------------
+        // 3. General recurrence (Belousov, equation 17)
+        // ---------------------------------------------
+
+        for ( int jn = 3; jn <= trc; ++jn ) {
+            for ( int jm = 2; jm < jn; ++jm ) {
+                double cn = ( ( 2. * jn + 1. ) * ( jn + jm - 3. ) * ( jn + jm - 1. ) );  // numerator of c in Belousov
+                double cd = ( ( 2. * jn - 3. ) * ( jn + jm - 2. ) * ( jn + jm ) );       // denominator of c in Belousov
+                double dn = ( ( 2. * jn + 1. ) * ( jn - jm + 1. ) * ( jn + jm - 1. ) );  // numerator of d in Belousov
+                double dd = ( ( 2. * jn - 1. ) * ( jn + jm - 2. ) * ( jn + jm ) );       // denominator of d in Belousov
+                double en = ( ( 2. * jn + 1. ) * ( jn - jm ) );                          // numerator of e in Belousov
+                double ed = ( ( 2. * jn - 1. ) * ( jn + jm ) );                          // denominator of e in Belousov
+
+                legpol[idxmn( jm, jn )] = std::sqrt( cn / cd ) * legpol[idxmn( jm - 2, jn - 2 )] -
+                                          std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1 )] * zdlx +
+                                          std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1 )] * zdlx;
             }
         }
+    }
+}
+
+
+void compute_legendre_polynomialsopt2(
+    const size_t trc,          // truncation (in)
+    const int nlats,           // number of latitudes
+    const double lats[],       // latitudes in radians (in)
+    double leg_sym[],          // values of associated Legendre functions, symmetric part
+    double leg_asym[],         // values of associated Legendre functions, asymmetric part
+    size_t leg_start_sym[],    // start indices for different zonal wave numbers, symmetric part
+    size_t leg_start_asym[] )  // start indices for different zonal wave numbers, asymmetric part
+{
+    auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; };
+    std::vector<double> legpol( legendre_size( trc ) );
+    std::vector<double> zfn( ( trc + 1 ) * ( trc + 1 ) );
+    auto idxmn = [&]( int jm, int jn ) { return ( 2 * trc + 3 - jm ) * jm / 2 + jn - jm; };
+    compute_zfnopt2( trc, zfn.data() );
+
+    // Loop over latitudes:
+    for ( int jlat = 0; jlat < nlats; ++jlat ) {
+        // compute legendre polynomials for current latitude:
+        compute_legendre_polynomials_latopt2( trc, lats[jlat], legpol.data(), zfn.data() );
 
+        // split polynomials into symmetric and antisymmetric parts:
         {
             //ATLAS_TRACE( "add to global arrays" );
 
@@ -192,6 +204,33 @@ void compute_legendre_polynomialsopt2(
     }
 }
 
+void compute_legendre_polynomials_allopt2( const size_t trc,     // truncation (in)
+                                           const int nlats,      // number of latitudes
+                                           const double lats[],  // latitudes in radians (in)
+                                           double legendre[] )   // legendre polynomials for all latitudes
+{
+    auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; };
+    std::vector<double> legpol( legendre_size( trc ) );
+    std::vector<double> zfn( ( trc + 1 ) * ( trc + 1 ) );
+    auto idxmn  = [&]( int jm, int jn ) { return ( 2 * trc + 3 - jm ) * jm / 2 + jn - jm; };
+    auto idxmnl = [&]( int jm, int jn, int jlat ) {
+        return ( 2 * trc + 3 - jm ) * jm / 2 * nlats + jlat * ( trc - jm + 1 ) + jn - jm;
+    };
+    compute_zfnopt2( trc, zfn.data() );
+
+    // Loop over latitudes:
+    for ( int jlat = 0; jlat < nlats; ++jlat ) {
+        // compute legendre polynomials for current latitude:
+        compute_legendre_polynomials_latopt2( trc, lats[jlat], legpol.data(), zfn.data() );
+
+        for ( int jm = 1; jm <= trc; ++jm ) {
+            for ( int jn = jm; jn <= trc; ++jn ) {
+                legendre[idxmnl( jm, jn, jlat )] = legpol[idxmn( jm, jn )];
+            }
+        }
+    }
+}  // namespace trans
+
 // --------------------------------------------------------------------------------------------------------------------
 
 }  // namespace trans
diff --git a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h
index 7e97dec7e..f79438aea 100644
--- a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h
+++ b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h
@@ -32,6 +32,13 @@ namespace trans {
 // Ported to C++ by:
 // Andreas Mueller *ECMWF*
 //
+void compute_zfnopt2( const size_t trc, double zfn[] );
+
+void compute_legendre_polynomials_latopt2( const size_t trc,  // truncation (in)
+                                           const double lat,  // latitude in radians (in)
+                                           double legpol[],   // legendre polynomials
+                                           double zfn[] );
+
 void compute_legendre_polynomialsopt2(
     const size_t trc,           // truncation (in)
     const int nlats,            // number of latitudes
@@ -41,6 +48,11 @@ void compute_legendre_polynomialsopt2(
     size_t leg_start_sym[],     // start indices for different zonal wave numbers, symmetric part
     size_t leg_start_asym[] );  // start indices for different zonal wave numbers, asymmetric part
 
+void compute_legendre_polynomials_allopt2( const size_t trc,     // truncation (in)
+                                           const int nlats,      // number of latitudes
+                                           const double lats[],  // latitudes in radians (in)
+                                           double legendre[] );  // legendre polynomials for all latitudes
+
 // --------------------------------------------------------------------------------------------------------------------
 
 }  // namespace trans
diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc
index 7eea4f9d3..16062989d 100644
--- a/src/atlas/trans/localopt2/TransLocalopt2.cc
+++ b/src/atlas/trans/localopt2/TransLocalopt2.cc
@@ -103,7 +103,7 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long
     int nlons     = 0;
     int neqtr     = 0;
     useFFT_       = true;
-    dgemmMethod1_ = true;
+    dgemmMethod1_ = false;
     nlatsNH_      = 0;
     nlatsSH_      = 0;
     nlatsLeg_     = 0;
@@ -143,19 +143,8 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long
             }
         }
         //Log::info() << "nlats=" << g.ny() << " nlatsGlobal=" << gs_global.ny() << std::endl;
-    }
-    else {
-        // unstructured grid
-        useFFT_   = false;
-        nlats     = grid_.size();
-        nlons     = grid_.size();
-        nlatsNH_  = nlats;
-        nlatsLeg_ = nlats;
-    }
-    std::vector<double> lats( nlatsLeg_ );
-    std::vector<double> lons( nlons );
-    if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
-        grid::StructuredGrid g( grid_ );
+        std::vector<double> lats( nlatsLeg_ );
+        std::vector<double> lons( nlons );
         if ( nlatsNH_ >= nlatsSH_ ) {
             for ( size_t j = 0; j < nlatsLeg_; ++j ) {
                 lats[j] = g.y( j ) * util::Constants::degreesToRadians();
@@ -169,92 +158,86 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long
         for ( size_t j = 0; j < nlons; ++j ) {
             lons[j] = g.x( j, 0 ) * util::Constants::degreesToRadians();
         }
-    }
-    else {
-        int j( 0 );
-        for ( PointXY p : grid_.xy() ) {
-            lats[j++] = p.y() * util::Constants::degreesToRadians();
-            lons[j++] = p.x() * util::Constants::degreesToRadians();
-        }
-    }
-    // precomputations for Legendre polynomials:
-    {
-        ATLAS_TRACE( "opt2 precomp Legendre" );
-        int size_sym  = 0;
-        int size_asym = 0;
-        legendre_sym_begin_.resize( truncation_ + 3 );
-        legendre_asym_begin_.resize( truncation_ + 3 );
-        legendre_sym_begin_[0]  = 0;
-        legendre_asym_begin_[0] = 0;
-        for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
-            size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsLeg_ );
-            size_asym += add_padding( num_n( truncation_ + 1, jm, false ) * nlatsLeg_ );
-            legendre_sym_begin_[jm + 1]  = size_sym;
-            legendre_asym_begin_[jm + 1] = size_asym;
-        }
-        alloc_aligned( legendre_sym_, size_sym );
-        alloc_aligned( legendre_asym_, size_asym );
-        FILE* file_leg;
-        file_leg = fopen( "legendre.bin", "r" );
-        if ( file_leg ) {
-            fread( legendre_sym_, sizeof( double ), size_sym, file_leg );
-            fread( legendre_asym_, sizeof( double ), size_asym, file_leg );
-            fclose( file_leg );
-        }
-        else {
-            compute_legendre_polynomialsopt2( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, legendre_asym_,
-                                              legendre_sym_begin_.data(), legendre_asym_begin_.data() );
-            file_leg = fopen( "legendre.bin", "wb" );
-            fwrite( legendre_sym_, sizeof( double ), size_sym, file_leg );
-            fwrite( legendre_asym_, sizeof( double ), size_asym, file_leg );
-            fclose( file_leg );
+        // precomputations for Legendre polynomials:
+        {
+            ATLAS_TRACE( "opt2 precomp Legendre" );
+            int size_sym  = 0;
+            int size_asym = 0;
+            legendre_sym_begin_.resize( truncation_ + 3 );
+            legendre_asym_begin_.resize( truncation_ + 3 );
+            legendre_sym_begin_[0]  = 0;
+            legendre_asym_begin_[0] = 0;
+            for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
+                size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsLeg_ );
+                size_asym += add_padding( num_n( truncation_ + 1, jm, false ) * nlatsLeg_ );
+                legendre_sym_begin_[jm + 1]  = size_sym;
+                legendre_asym_begin_[jm + 1] = size_asym;
+            }
+            alloc_aligned( legendre_sym_, size_sym );
+            alloc_aligned( legendre_asym_, size_asym );
+            FILE* file_leg;
+            file_leg = fopen( "legendre.bin", "r" );
+            if ( false ) {  //if ( file_leg ) {
+                fread( legendre_sym_, sizeof( double ), size_sym, file_leg );
+                fread( legendre_asym_, sizeof( double ), size_asym, file_leg );
+                fclose( file_leg );
+            }
+            else {
+                compute_legendre_polynomialsopt2( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_,
+                                                  legendre_asym_, legendre_sym_begin_.data(),
+                                                  legendre_asym_begin_.data() );
+                /*file_leg = fopen( "legendre.bin", "wb" );
+                fwrite( legendre_sym_, sizeof( double ), size_sym, file_leg );
+                fwrite( legendre_asym_, sizeof( double ), size_asym, file_leg );
+                fclose( file_leg );*/
+            }
         }
-    }
 
-    // precomputations for Fourier transformations:
-    if ( useFFT_ ) {
+        // precomputations for Fourier transformations:
+        if ( useFFT_ ) {
 #if ATLAS_HAVE_FFTW
-        {
-            ATLAS_TRACE( "opt2 precomp FFTW" );
-            int num_complex = ( nlonsGlobal_ / 2 ) + 1;
-            fft_in_         = fftw_alloc_complex( nlats * num_complex );
-            fft_out_        = fftw_alloc_real( nlats * nlonsGlobal_ );
-            plan_ = fftw_plan_many_dft_c2r( 1, &nlonsGlobal_, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1,
-                                            nlonsGlobal_, FFTW_ESTIMATE );
-        }
-            // other FFT implementations should be added with #elif statements
-#else
-        useFFT_ = false;                                 // no FFT implemented => default to dgemm
-#endif
-    }
-    if ( !useFFT_ ) {
-        alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons );
-        if ( dgemmMethod1_ ) {
             {
-                ATLAS_TRACE( "opt2 precomp Fourier" );
-                int idx = 0;
-                for ( int jlon = 0; jlon < nlons; jlon++ ) {
-                    double factor = 1.;
-                    for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
-                        if ( jm > 0 ) { factor = 2.; }
-                        fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor;  // real part
-                        fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor;  // imaginary part
-                    }
-                }
+                ATLAS_TRACE( "opt2 precomp FFTW" );
+                int num_complex = ( nlonsGlobal_ / 2 ) + 1;
+                fft_in_         = fftw_alloc_complex( nlats * num_complex );
+                fft_out_        = fftw_alloc_real( nlats * nlonsGlobal_ );
+                plan_ = fftw_plan_many_dft_c2r( 1, &nlonsGlobal_, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL,
+                                                1, nlonsGlobal_, FFTW_ESTIMATE );
             }
+                // other FFT implementations should be added with #elif statements
+#else
+            useFFT_ = false;                             // no FFT implemented => default to dgemm
+#endif
         }
-        else {
-            {
-                ATLAS_TRACE( "opt2 precomp Fourier tp" );
-                int idx = 0;
-                for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
-                    double factor = 1.;
-                    if ( jm > 0 ) { factor = 2.; }
+        if ( !useFFT_ ) {
+            alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons );
+            if ( dgemmMethod1_ ) {
+                {
+                    ATLAS_TRACE( "opt2 precomp Fourier" );
+                    int idx = 0;
                     for ( int jlon = 0; jlon < nlons; jlon++ ) {
-                        fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor;  // real part
+                        double factor = 1.;
+                        for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
+                            if ( jm > 0 ) { factor = 2.; }
+                            fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor;  // real part
+                            fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor;  // imaginary part
+                        }
                     }
-                    for ( int jlon = 0; jlon < nlons; jlon++ ) {
-                        fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor;  // imaginary part
+                }
+            }
+            else {
+                {
+                    ATLAS_TRACE( "opt2 precomp Fourier tp" );
+                    int idx = 0;
+                    for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
+                        double factor = 1.;
+                        if ( jm > 0 ) { factor = 2.; }
+                        for ( int jlon = 0; jlon < nlons; jlon++ ) {
+                            fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor;  // real part
+                        }
+                        for ( int jlon = 0; jlon < nlons; jlon++ ) {
+                            fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor;  // imaginary part
+                        }
                     }
                 }
             }
@@ -270,17 +253,19 @@ TransLocalopt2::TransLocalopt2( const Grid& grid, const long truncation, const e
 // --------------------------------------------------------------------------------------------------------------------
 
 TransLocalopt2::~TransLocalopt2() {
-    free_aligned( legendre_sym_ );
-    free_aligned( legendre_asym_ );
-    if ( useFFT_ ) {
+    if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
+        free_aligned( legendre_sym_ );
+        free_aligned( legendre_asym_ );
+        if ( useFFT_ ) {
 #if ATLAS_HAVE_FFTW
-        fftw_destroy_plan( plan_ );
-        fftw_free( fft_in_ );
-        fftw_free( fft_out_ );
+            fftw_destroy_plan( plan_ );
+            fftw_free( fft_in_ );
+            fftw_free( fft_out_ );
 #endif
-    }
-    else {
-        free_aligned( fourier_ );
+        }
+        else {
+            free_aligned( fourier_ );
+        }
     }
 }
 
@@ -561,24 +546,78 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel
         }  // namespace atlas
         else {
             ATLAS_TRACE( "invtrans_uv unstructured opt2" );
-            int idx = 0;
-            for ( PointXY p : grid_.xy() ) {
-                double lon   = p.x() * util::Constants::degreesToRadians();
-                double lat   = p.y() * util::Constants::degreesToRadians();
-                double trcFT = truncation;
-
+            grid::UnstructuredGrid gu = grid_;
+            double* zfn;
+            alloc_aligned( zfn, ( truncation + 1 ) * ( truncation + 1 ) );
+            compute_zfnopt2( truncation, zfn );
+            int size_fourier = nb_fields * 2;
+            double* legendre;
+            double* scl_fourier;
+            double* scl_fourier_tp;
+            double* fouriertp;
+            double* gp_opt;
+            alloc_aligned( legendre, legendre_size( truncation + 1 ) );
+            alloc_aligned( scl_fourier, size_fourier * ( truncation + 1 ) );
+            alloc_aligned( scl_fourier_tp, size_fourier * ( truncation + 1 ) );
+            alloc_aligned( fouriertp, 2 * ( truncation + 1 ) );
+            alloc_aligned( gp_opt, nb_fields );
+
+            // loop over all points:
+            for ( int ip = 0; ip < grid_.size(); ip++ ) {
+                PointXY p  = gu.xy( ip );
+                double lon = p.x() * util::Constants::degreesToRadians();
+                double lat = p.y() * util::Constants::degreesToRadians();
+                compute_legendre_polynomials_latopt2( truncation, lat, legendre, zfn );
                 // Legendre transform:
-                //invtrans_legendreopt2( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra,
-                //                      legReal.data(), legImag.data() );
-
-                // Fourier transform:
-                //invtrans_fourieropt2( trcFT, lon, nb_fields, legReal.data(), legImag.data(),
-                //                     gp_tmp.data() + ( nb_fields * idx ) );
-                for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
-                    //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat );
+                {
+                    //ATLAS_TRACE( "opt Legendre dgemm" );
+                    for ( int jm = 0; jm <= truncation; jm++ ) {
+                        int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1;
+                        eckit::linalg::Matrix A( eckit::linalg::Matrix(
+                            const_cast<double*>( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) );
+                        eckit::linalg::Matrix B( legendre + noff, ns, 1 );
+                        eckit::linalg::Matrix C( scl_fourier + jm * size_fourier, nb_fields * 2, 1 );
+                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+                    }
+                }
+                {
+                    //ATLAS_TRACE( "opt transposition in Fourier" );
+                    int idx = 0;
+                    for ( int jm = 0; jm < truncation + 1; jm++ ) {
+                        for ( int imag = 0; imag < 2; imag++ ) {
+                            for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                int pos_tp = imag + 2 * ( jm + ( truncation + 1 ) * ( jfld ) );
+                                //int pos  = jfld + nb_fields * ( imag + 2 * ( jm ) );
+                                scl_fourier_tp[pos_tp] = scl_fourier[idx++];  // = scl_fourier[pos]
+                            }
+                        }
+                    }
+                }
+
+                // Fourier transformation:
+                int idx          = 0;
+                fouriertp[idx++] = 1.;  // real part
+                fouriertp[idx++] = 0.;  // imaginary part
+                for ( int jm = 1; jm < truncation + 1; jm++ ) {
+                    fouriertp[idx++] = +2. * std::cos( jm * lon );  // real part
+                    fouriertp[idx++] = -2. * std::sin( jm * lon );  // imaginary part
+                }
+                {
+                    //ATLAS_TRACE( "opt Fourier dgemm" );
+                    eckit::linalg::Matrix A( fouriertp, 1, ( truncation + 1 ) * 2 );
+                    eckit::linalg::Matrix B( scl_fourier_tp, ( truncation + 1 ) * 2, nb_fields );
+                    eckit::linalg::Matrix C( gp_opt, 1, nb_fields );
+                    eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+                    for ( int j = 0; j < nb_fields; j++ ) {
+                        gp_fields[ip + j * grid_.size()] = gp_opt[j];
+                    }
                 }
-                ++idx;
             }
+            free_aligned( legendre );
+            free_aligned( scl_fourier );
+            free_aligned( scl_fourier_tp );
+            free_aligned( fouriertp );
+            free_aligned( gp_opt );
         }
     }  // namespace trans
 }  // namespace atlas
diff --git a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc
index c285d32b2..0d440bffd 100644
--- a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc
+++ b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc
@@ -204,6 +204,34 @@ void compute_legendre_polynomialsopt3(
     }
 }
 
+void compute_legendre_polynomials_allopt3(
+    const size_t trc,     // truncation (in)
+    const int nlats,      // number of latitudes
+    const double lats[],  // latitudes in radians (in)
+    double legendre[] )   // start indices for different zonal wave numbers, asymmetric part
+{
+    auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; };
+    std::vector<double> legpol( legendre_size( trc ) );
+    std::vector<double> zfn( ( trc + 1 ) * ( trc + 1 ) );
+    auto idxmn  = [&]( int jm, int jn ) { return ( 2 * trc + 3 - jm ) * jm / 2 + jn - jm; };
+    auto idxmnl = [&]( int jm, int jn, int jlat ) {
+        return ( 2 * trc + 3 - jm ) * jm / 2 * nlats + jlat * ( trc - jm + 1 ) + jn - jm;
+    };
+    compute_zfnopt3( trc, zfn.data() );
+
+    // Loop over latitudes:
+    for ( int jlat = 0; jlat < nlats; ++jlat ) {
+        // compute legendre polynomials for current latitude:
+        compute_legendre_polynomials_latopt3( trc, lats[jlat], legpol.data(), zfn.data() );
+
+        for ( int jm = 1; jm <= trc; ++jm ) {
+            for ( int jn = jm; jn <= trc; ++jn ) {
+                legendre[idxmnl( jm, jn, jlat )] = legpol[idxmn( jm, jn )];
+            }
+        }
+    }
+}  // namespace trans
+
 // --------------------------------------------------------------------------------------------------------------------
 
 }  // namespace trans
diff --git a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.h b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.h
index f5dbd7aa3..93ebb49f7 100644
--- a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.h
+++ b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.h
@@ -48,6 +48,11 @@ void compute_legendre_polynomialsopt3(
     size_t leg_start_sym[],     // start indices for different zonal wave numbers, symmetric part
     size_t leg_start_asym[] );  // start indices for different zonal wave numbers, asymmetric part
 
+void compute_legendre_polynomials_allopt3( const size_t trc,     // truncation (in)
+                                           const int nlats,      // number of latitudes
+                                           const double lats[],  // latitudes in radians (in)
+                                           double legendre[] );  // legendre polynomials for all latitudes
+
 // --------------------------------------------------------------------------------------------------------------------
 
 }  // namespace trans
diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc
index f214c6309..45f0245c4 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.cc
+++ b/src/atlas/trans/localopt3/TransLocalopt3.cc
@@ -243,6 +243,17 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
             }
         }
     }
+    else {
+        // unstructured grid
+        ATLAS_TRACE( "opt2 precomp unstructured" );
+        std::vector<double> lats( grid_.size() );
+        alloc_aligned( legendre_, legendre_size( truncation_ ) * grid_.size() );
+        int j( 0 );
+        for ( PointXY p : grid_.xy() ) {
+            lats[j++] = p.y() * util::Constants::degreesToRadians();
+        }
+        compute_legendre_polynomials_allopt3( truncation_, grid_.size(), lats.data(), legendre_ );
+    }
 }  // namespace trans
 
 // --------------------------------------------------------------------------------------------------------------------
@@ -267,6 +278,9 @@ TransLocalopt3::~TransLocalopt3() {
             free_aligned( fourier_ );
         }
     }
+    else {
+        free_aligned( legendre_ );
+    }
 }
 
 // --------------------------------------------------------------------------------------------------------------------
@@ -550,6 +564,7 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
             double* zfn;
             alloc_aligned( zfn, ( truncation + 1 ) * ( truncation + 1 ) );
             compute_zfnopt3( truncation, zfn );
+            int nlats        = grid_.size();
             int size_fourier = nb_fields * 2;
             double* legendre;
             double* scl_fourier;
@@ -557,33 +572,32 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
             double* fouriertp;
             double* gp_opt;
             alloc_aligned( legendre, legendre_size( truncation + 1 ) );
-            alloc_aligned( scl_fourier, size_fourier * ( truncation + 1 ) );
+            alloc_aligned( scl_fourier, size_fourier * ( truncation + 1 ) * nlats );
             alloc_aligned( scl_fourier_tp, size_fourier * ( truncation + 1 ) );
             alloc_aligned( fouriertp, 2 * ( truncation + 1 ) );
             alloc_aligned( gp_opt, nb_fields );
 
+            {
+                ATLAS_TRACE( "opt Legendre dgemm" );
+                for ( int jm = 0; jm <= truncation; jm++ ) {
+                    int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1;
+                    eckit::linalg::Matrix A( eckit::linalg::Matrix(
+                        const_cast<double*>( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) );
+                    eckit::linalg::Matrix B( legendre_ + noff * nlats, ns, nlats );
+                    eckit::linalg::Matrix C( scl_fourier + jm * size_fourier, nb_fields * 2, nlats );
+                    eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+                }
+            }
+
             // loop over all points:
             for ( int ip = 0; ip < grid_.size(); ip++ ) {
                 PointXY p  = gu.xy( ip );
                 double lon = p.x() * util::Constants::degreesToRadians();
                 double lat = p.y() * util::Constants::degreesToRadians();
-                compute_legendre_polynomials_latopt3( truncation, lat, legendre, zfn );
-                // Legendre transform:
-                {
-                    //ATLAS_TRACE( "opt Legendre dgemm" );
-                    for ( int jm = 0; jm <= truncation; jm++ ) {
-                        int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1;
-                        eckit::linalg::Matrix A( eckit::linalg::Matrix(
-                            const_cast<double*>( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) );
-                        eckit::linalg::Matrix B( legendre + noff, ns, 1 );
-                        eckit::linalg::Matrix C( scl_fourier + jm * size_fourier, nb_fields * 2, 1 );
-                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-                    }
-                }
                 {
                     //ATLAS_TRACE( "opt transposition in Fourier" );
-                    int idx = 0;
                     for ( int jm = 0; jm < truncation + 1; jm++ ) {
+                        int idx = nb_fields * 2 * ( ip + nlats * jm );
                         for ( int imag = 0; imag < 2; imag++ ) {
                             for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
                                 int pos_tp = imag + 2 * ( jm + ( truncation + 1 ) * ( jfld ) );
@@ -619,6 +633,11 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
             free_aligned( fouriertp );
             free_aligned( gp_opt );
         }
+        for ( int j = 0; j < nb_fields * grid_.size(); j++ ) {
+            Log::info() << gp_fields[j] << " ";
+        }
+        Log::info() << std::endl;
+
     }  // namespace trans
 }  // namespace atlas
 
diff --git a/src/atlas/trans/localopt3/TransLocalopt3.h b/src/atlas/trans/localopt3/TransLocalopt3.h
index ac33f6395..c97bb1b7f 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.h
+++ b/src/atlas/trans/localopt3/TransLocalopt3.h
@@ -120,6 +120,7 @@ class TransLocalopt3 : public trans::TransImpl {
     int jlonMin_;
     int nlonsGlobal_;
     bool precompute_;
+    double* legendre_;
     double* legendre_sym_;
     double* legendre_asym_;
     double* fourier_;
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index 95a49eeb3..a41a954c4 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -1098,10 +1098,10 @@ CASE( "test_trans_unstructured" ) {
     }
     Grid gu = grid::UnstructuredGrid( new std::vector<PointXY>( pts ) );
 
-    int trc     = 120;
+    int trc     = 12;
     double rav1 = 0., rav2 = 0.;  // compute average rms errors of transLocal1 and transLocal2
 
-    int nb_scalar = 100, nb_vordiv = 0;
+    int nb_scalar = 1, nb_vordiv = 0;
     int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv;
     std::vector<double> sp( 2 * N * nb_scalar );
     std::vector<double> vor( 2 * N * nb_vordiv );
@@ -1133,7 +1133,7 @@ CASE( "test_trans_unstructured" ) {
                             if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. &&
                                  icase < 1000 ) {
                                 auto start = std::chrono::system_clock::now();
-                                trans::Trans transLocal1( g, trc, util::Config( "type", "localopt3" ) );
+                                trans::Trans transLocal1( gu, trc, util::Config( "type", "localopt3" ) );
                                 trans::Trans transLocal2( gu, trc, util::Config( "type", "localopt3" ) );
                                 for ( int j = 0; j < 2 * N * nb_scalar; j++ ) {
                                     sp[j] = 0.;
@@ -1158,11 +1158,12 @@ CASE( "test_trans_unstructured" ) {
                                 spectral_transform_grid_analytic( trc, trc, n, m, imag, g, rspecg.data(),
                                                                   rgp_analytic.data(), ivar_in, ivar_out );
 
-                                //Log::info() << icase << " m=" << m << " n=" << n << " imag=" << imag << " structured: ";
+                                Log::info()
+                                    << icase << " m=" << m << " n=" << n << " imag=" << imag << " unstructured: ";
                                 EXPECT_NO_THROW( transLocal1.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
                                                                        div.data(), rgp1.data() ) );
 
-                                //Log::info() << icase << " m=" << m << " n=" << n << " imag=" << imag << " unstructured: ";
+                                Log::info() << icase << " m=" << m << " n=" << n << " imag=" << imag << " structured: ";
                                 EXPECT_NO_THROW( transLocal2.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
                                                                        div.data(), rgp2.data() ) );
 

From 71be7170a91df73fd44d606302cb9fade2018620 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Thu, 5 Apr 2018 19:13:31 +0100
Subject: [PATCH 042/123] two options for unstructured grids are working now in
 opt3. One option precomputes Legendre-polynomials to create bigger dgemm
 calls (unstruct_precomp_==true), the other computes them on the fly. Also
 started some cleanup.

---
 src/atlas/CMakeLists.txt                      |   4 -
 .../trans/localopt3/FourierTransformsopt3.cc  |  78 ---
 .../trans/localopt3/FourierTransformsopt3.h   |  38 --
 .../localopt3/LegendrePolynomialsopt3.cc      |  11 +-
 .../trans/localopt3/LegendreTransformsopt3.cc |  62 ---
 .../trans/localopt3/LegendreTransformsopt3.h  |  37 --
 src/atlas/trans/localopt3/TransLocalopt3.cc   | 486 +++++++++++-------
 src/atlas/trans/localopt3/TransLocalopt3.h    |  16 +
 src/tests/trans/test_transgeneral.cc          |  17 +-
 9 files changed, 317 insertions(+), 432 deletions(-)
 delete mode 100644 src/atlas/trans/localopt3/FourierTransformsopt3.cc
 delete mode 100644 src/atlas/trans/localopt3/FourierTransformsopt3.h
 delete mode 100644 src/atlas/trans/localopt3/LegendreTransformsopt3.cc
 delete mode 100644 src/atlas/trans/localopt3/LegendreTransformsopt3.h

diff --git a/src/atlas/CMakeLists.txt b/src/atlas/CMakeLists.txt
index bf6f686c3..b018107ec 100644
--- a/src/atlas/CMakeLists.txt
+++ b/src/atlas/CMakeLists.txt
@@ -355,10 +355,6 @@ trans/localopt3/TransLocalopt3.h
 trans/localopt3/TransLocalopt3.cc
 trans/localopt3/LegendrePolynomialsopt3.h
 trans/localopt3/LegendrePolynomialsopt3.cc
-trans/localopt3/LegendreTransformsopt3.h
-trans/localopt3/LegendreTransformsopt3.cc
-trans/localopt3/FourierTransformsopt3.h
-trans/localopt3/FourierTransformsopt3.cc
 trans/localopt3/VorDivToUVLocalopt3.h
 trans/localopt3/VorDivToUVLocalopt3.cc
 
diff --git a/src/atlas/trans/localopt3/FourierTransformsopt3.cc b/src/atlas/trans/localopt3/FourierTransformsopt3.cc
deleted file mode 100644
index c02b57ada..000000000
--- a/src/atlas/trans/localopt3/FourierTransformsopt3.cc
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * (C) Copyright 2013 ECMWF.
- *
- * This software is licensed under the terms of the Apache Licence Version 2.0
- * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
- * In applying this licence, ECMWF does not waive the privileges and immunities
- * granted to it by virtue of its status as an intergovernmental organisation
- * nor
- * does it submit to any jurisdiction.
- */
-
-#include <algorithm>
-#include <cmath>
-#include <iostream>
-
-#include "atlas/trans/localopt3/FourierTransformsopt3.h"
-
-namespace atlas {
-namespace trans {
-
-//-----------------------------------------------------------------------------
-
-void invtrans_fourieropt3( const size_t trcFT,
-                           const double lon,         // longitude in radians (in)
-                           const int nb_fields,      // Number of fields
-                           const double rlegReal[],  // associated Legendre functions, size (trc+1)*trc/2 (in)
-                           const double rlegImag[],  // associated Legendre functions, size (trc+1)*trc/2 (in)
-                           double rgp[] )            // gridpoint
-{
-    for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
-        rgp[jfld] = 0.;
-    }
-    // local Fourier transformation:
-    for ( int jm = 0; jm <= trcFT; ++jm ) {
-        const double cos = std::cos( jm * lon );
-        const double sin = std::sin( jm * lon );
-        for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
-            double real = cos * rlegReal[jm * nb_fields + jfld];
-            double imag = sin * rlegImag[jm * nb_fields + jfld];
-            rgp[jfld] += real - imag;
-        }
-    }
-}
-
-int fourier_truncationopt3( const int truncation,    // truncation
-                            const int nx,            // number of longitudes
-                            const int nxmax,         // maximum nx
-                            const int ndgl,          // number of latitudes
-                            const double lat,        // latitude in radian
-                            const bool fullgrid ) {  // regular grid
-    int trc     = truncation;
-    int trclin  = ndgl - 1;
-    int trcquad = ndgl * 2 / 3 - 1;
-    if ( truncation >= trclin || fullgrid ) {
-        // linear
-        trc = ( nx - 1 ) / 2;
-    }
-    else if ( truncation >= trcquad ) {
-        // quadratic
-        double weight = 3 * ( trclin - truncation ) / ndgl;
-        double sqcos  = std::pow( std::cos( lat ), 2 );
-
-        trc = ( nx - 1 ) / ( 2 + weight * sqcos );
-    }
-    else {
-        // cubic
-        double sqcos = std::pow( std::cos( lat ), 2 );
-
-        trc = ( nx - 1 ) / ( 2 + sqcos ) - 1;
-    }
-    trc = std::min( truncation, trc );
-    return trc;
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-}  // namespace trans
-}  // namespace atlas
diff --git a/src/atlas/trans/localopt3/FourierTransformsopt3.h b/src/atlas/trans/localopt3/FourierTransformsopt3.h
deleted file mode 100644
index b735ed145..000000000
--- a/src/atlas/trans/localopt3/FourierTransformsopt3.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * (C) Copyright 2013 ECMWF.
- *
- * This software is licensed under the terms of the Apache Licence Version 2.0
- * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
- * In applying this licence, ECMWF does not waive the privileges and immunities
- * granted to it by virtue of its status as an intergovernmental organisation
- * nor does it submit to any jurisdiction.
- */
-
-#pragma once
-
-#include <cstddef>
-
-namespace atlas {
-namespace trans {
-
-//-----------------------------------------------------------------------------
-// Routine to compute the local Fourier transformation
-//
-// Author:
-// Andreas Mueller *ECMWF*
-//
-
-void invtrans_fourieropt3( const size_t trcFT,
-                           const double lon,         // longitude in radians (in)
-                           const int nb_fields,      // Number of fields
-                           const double rlegReal[],  // values of associated Legendre functions, size (trc+1)*trc/2 (in)
-                           const double rlegImag[],  // values of associated Legendre functions, size (trc+1)*trc/2 (in)
-                           double rgp[] );           // gridpoint
-
-int fourier_truncationopt3( const int truncation, const int nx, const int nxmax, const int ndgl, const double lat,
-                            const bool fullgrid );
-
-// --------------------------------------------------------------------------------------------------------------------
-
-}  // namespace trans
-}  // namespace atlas
diff --git a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc
index 0d440bffd..1cddbc18b 100644
--- a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc
+++ b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc
@@ -204,11 +204,10 @@ void compute_legendre_polynomialsopt3(
     }
 }
 
-void compute_legendre_polynomials_allopt3(
-    const size_t trc,     // truncation (in)
-    const int nlats,      // number of latitudes
-    const double lats[],  // latitudes in radians (in)
-    double legendre[] )   // start indices for different zonal wave numbers, asymmetric part
+void compute_legendre_polynomials_allopt3( const size_t trc,     // truncation (in)
+                                           const int nlats,      // number of latitudes
+                                           const double lats[],  // latitudes in radians (in)
+                                           double legendre[] )   // legendre polynomials for all latitudes
 {
     auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; };
     std::vector<double> legpol( legendre_size( trc ) );
@@ -224,7 +223,7 @@ void compute_legendre_polynomials_allopt3(
         // compute legendre polynomials for current latitude:
         compute_legendre_polynomials_latopt3( trc, lats[jlat], legpol.data(), zfn.data() );
 
-        for ( int jm = 1; jm <= trc; ++jm ) {
+        for ( int jm = 0; jm <= trc; ++jm ) {
             for ( int jn = jm; jn <= trc; ++jn ) {
                 legendre[idxmnl( jm, jn, jlat )] = legpol[idxmn( jm, jn )];
             }
diff --git a/src/atlas/trans/localopt3/LegendreTransformsopt3.cc b/src/atlas/trans/localopt3/LegendreTransformsopt3.cc
deleted file mode 100644
index 919e9246c..000000000
--- a/src/atlas/trans/localopt3/LegendreTransformsopt3.cc
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * (C) Copyright 2013 ECMWF.
- *
- * This software is licensed under the terms of the Apache Licence Version 2.0
- * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
- * In applying this licence, ECMWF does not waive the privileges and immunities
- * granted to it by virtue of its status as an intergovernmental organisation
- * nor does it submit to any jurisdiction.
- */
-
-#include <cstddef>
-
-#include "atlas/trans/localopt3/LegendreTransformsopt3.h"
-
-namespace atlas {
-namespace trans {
-
-//-----------------------------------------------------------------------------
-
-void invtrans_legendreopt3(
-    const size_t trc,       // truncation (in)
-    const size_t trcFT,     // truncation for Fourier transformation (in)
-    const size_t trcLP,     // truncation of Legendre polynomials data legpol. Needs to be >= trc (in)
-    const double legpol[],  // values of associated Legendre functions, size (trc+1)*trc/2 (in)
-    const int nb_fields,    // number of fields
-    const double spec[],    // spectral data, size (trc+1)*trc (in)
-    double leg_real[],      // values of associated Legendre functions, size (trc+1)*trc/2 (out)
-    double leg_imag[] )     // values of associated Legendre functions, size (trc+1)*trc/2 (out)
-{
-    // Legendre transformation:
-    int k = 0, klp = 0;
-    for ( int jm = 0; jm <= trcFT; ++jm ) {
-        for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
-            leg_real[jm * nb_fields + jfld] = 0.;
-            leg_imag[jm * nb_fields + jfld] = 0.;
-        }
-        for ( int jn = jm; jn <= trcLP; ++jn, ++klp ) {
-            if ( jn <= trc ) {
-                for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
-                    // not completely sure where this factor 2 comes from. One possible
-                    // explanation:
-                    // normalization of trigonometric functions in the spherical harmonics
-                    // integral over square of trig function is 1 for m=0 and 0.5 (?) for
-                    // m>0
-                    leg_real[jm * nb_fields + jfld] += 2. * spec[( 2 * k ) * nb_fields + jfld] * legpol[klp];
-                    leg_imag[jm * nb_fields + jfld] += 2. * spec[( 2 * k + 1 ) * nb_fields + jfld] * legpol[klp];
-                }
-                ++k;
-            }
-        }
-    }
-    // Undo factor 2 for (jm == 0)
-    for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
-        leg_real[jfld] /= 2.;
-        leg_imag[jfld] /= 2.;
-    }
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-}  // namespace trans
-}  // namespace atlas
diff --git a/src/atlas/trans/localopt3/LegendreTransformsopt3.h b/src/atlas/trans/localopt3/LegendreTransformsopt3.h
deleted file mode 100644
index 7205b3bf0..000000000
--- a/src/atlas/trans/localopt3/LegendreTransformsopt3.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * (C) Copyright 2013 ECMWF.
- *
- * This software is licensed under the terms of the Apache Licence Version 2.0
- * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
- * In applying this licence, ECMWF does not waive the privileges and immunities
- * granted to it by virtue of its status as an intergovernmental organisation
- * nor does it submit to any jurisdiction.
- */
-
-#pragma once
-
-#include <cstddef>
-
-namespace atlas {
-namespace trans {
-
-//-----------------------------------------------------------------------------
-// Routine to compute the Legendre transformation
-//
-// Author:
-// Andreas Mueller *ECMWF*
-//
-void invtrans_legendreopt3(
-    const size_t trc,       // truncation (in)
-    const size_t trcFT,     // truncation for Fourier transformation (in)
-    const size_t trcLP,     // truncation of Legendre polynomials data legpol. Needs to be >= trc (in)
-    const double legpol[],  // values of associated Legendre functions, size (trc+1)*trc/2 (in)
-    const int nb_fields,    // number of fields
-    const double spec[],    // spectral data, size (trc+1)*trc (in)
-    double leg_real[],      // values of associated Legendre functions, size (trc+1)*trc/2 (out)
-    double leg_imag[] );    // values of associated Legendre functions, size (trc+1)*trc/2 (out)
-
-// --------------------------------------------------------------------------------------------------------------------
-
-}  // namespace trans
-}  // namespace atlas
diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc
index 45f0245c4..d7184cb78 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.cc
+++ b/src/atlas/trans/localopt3/TransLocalopt3.cc
@@ -17,9 +17,7 @@
 #include "atlas/runtime/Log.h"
 #include "atlas/trans/VorDivToUV.h"
 #include "atlas/trans/local/LegendrePolynomials.h"
-#include "atlas/trans/localopt3/FourierTransformsopt3.h"
 #include "atlas/trans/localopt3/LegendrePolynomialsopt3.h"
-#include "atlas/trans/localopt3/LegendreTransformsopt3.h"
 #include "atlas/util/Constants.h"
 #include "eckit/linalg/LinearAlgebra.h"
 #include "eckit/linalg/Matrix.h"
@@ -99,14 +97,15 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
     double fft_threshold = 0.05;  // fraction of latitudes of the full grid up to which FFT is used.
     // This threshold needs to be adjusted depending on the dgemm and FFT performance of the machine
     // on which this code is running!
-    int nlats     = 0;
-    int nlons     = 0;
-    int neqtr     = 0;
-    useFFT_       = true;
-    dgemmMethod1_ = false;
-    nlatsNH_      = 0;
-    nlatsSH_      = 0;
-    nlatsLeg_     = 0;
+    int nlats         = 0;
+    int nlons         = 0;
+    int neqtr         = 0;
+    useFFT_           = true;
+    dgemmMethod1_     = false;
+    unstruct_precomp_ = true;
+    nlatsNH_          = 0;
+    nlatsSH_          = 0;
+    nlatsLeg_         = 0;
     if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
         grid::StructuredGrid g( grid_ );
         nlats = g.ny();
@@ -245,14 +244,16 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
     }
     else {
         // unstructured grid
-        ATLAS_TRACE( "opt2 precomp unstructured" );
-        std::vector<double> lats( grid_.size() );
-        alloc_aligned( legendre_, legendre_size( truncation_ ) * grid_.size() );
-        int j( 0 );
-        for ( PointXY p : grid_.xy() ) {
-            lats[j++] = p.y() * util::Constants::degreesToRadians();
+        if ( unstruct_precomp_ ) {
+            ATLAS_TRACE( "opt3 precomp unstructured" );
+            std::vector<double> lats( grid_.size() );
+            alloc_aligned( legendre_, legendre_size( truncation_ ) * grid_.size() );
+            int j( 0 );
+            for ( PointXY p : grid_.xy() ) {
+                lats[j++] = p.y() * util::Constants::degreesToRadians();
+            }
+            compute_legendre_polynomials_allopt3( truncation_, grid_.size(), lats.data(), legendre_ );
         }
-        compute_legendre_polynomials_allopt3( truncation_, grid_.size(), lats.data(), legendre_ );
     }
 }  // namespace trans
 
@@ -279,7 +280,7 @@ TransLocalopt3::~TransLocalopt3() {
         }
     }
     else {
-        free_aligned( legendre_ );
+        if ( unstruct_precomp_ ) { free_aligned( legendre_ ); }
     }
 }
 
@@ -329,6 +330,263 @@ void gp_transposeopt3( const int nb_size, const int nb_fields, const double gp_t
     }
 }
 
+int TransLocalopt3::posMethod( const int jfld, const int imag, const int jlat, const int jm, const int nb_fields,
+                               const int nlats ) const {
+    if ( useFFT_ || !dgemmMethod1_ ) { return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) ); }
+    else {
+        return jfld + nb_fields * ( jlat + nlats * ( imag + 2 * ( jm ) ) );
+    };
+};
+
+void TransLocalopt3::invtrans_legendreopt3( const int truncation, const int nlats, const int nb_fields,
+                                            const double scalar_spectra[], double scl_fourier[],
+                                            const eckit::Configuration& config ) const {
+    // Legendre transform:
+    {
+        ATLAS_TRACE( "opt3 Legendre dgemm" );
+        for ( int jm = 0; jm <= truncation_; jm++ ) {
+            int size_sym  = num_n( truncation_ + 1, jm, true );
+            int size_asym = num_n( truncation_ + 1, jm, false );
+            int n_imag    = 2;
+            if ( jm == 0 ) { n_imag = 1; }
+            int size_fourier = nb_fields * n_imag * nlatsLeg_;
+            auto posFourier  = [&]( int jfld, int imag, int jlat, int jm, int nlatsH ) {
+                return jfld + nb_fields * ( imag + n_imag * ( nlatsLeg_ - nlatsH + jlat ) );
+            };
+            double* scalar_sym;
+            double* scalar_asym;
+            double* scl_fourier_sym;
+            double* scl_fourier_asym;
+            alloc_aligned( scalar_sym, n_imag * nb_fields * size_sym );
+            alloc_aligned( scalar_asym, n_imag * nb_fields * size_asym );
+            alloc_aligned( scl_fourier_sym, size_fourier );
+            alloc_aligned( scl_fourier_asym, size_fourier );
+            {
+                //ATLAS_TRACE( "opt3 Legendre split" );
+                int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2;
+                // the choice between the following two code lines determines whether
+                // total wavenumbers are summed in an ascending or descending order.
+                // The trans library in IFS uses descending order because it should
+                // be more accurate (higher wavenumbers have smaller contributions).
+                // This also needs to be changed when splitting the spectral data in
+                // compute_legendre_polynomialsopt3!
+                //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) {
+                for ( int jn = truncation_ + 1; jn >= jm; jn-- ) {
+                    for ( int imag = 0; imag < n_imag; imag++ ) {
+                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                            idx = jfld + nb_fields * ( imag + 2 * ( jn - jm ) );
+                            if ( jn <= truncation && jm < truncation ) {
+                                if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; }
+                                else {
+                                    scalar_asym[ia++] = scalar_spectra[idx + ioff];
+                                }
+                            }
+                            else {
+                                if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = 0.; }
+                                else {
+                                    scalar_asym[ia++] = 0.;
+                                }
+                            }
+                        }
+                    }
+                }
+                ASSERT( ia == n_imag * nb_fields * size_asym && is == n_imag * nb_fields * size_sym );
+            }
+            {
+                eckit::linalg::Matrix A( scalar_sym, nb_fields * n_imag, size_sym );
+                eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm], size_sym, nlatsLeg_ );
+                eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsLeg_ );
+                eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+            }
+            if ( size_asym > 0 ) {
+                eckit::linalg::Matrix A( scalar_asym, nb_fields * n_imag, size_asym );
+                eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm], size_asym, nlatsLeg_ );
+                eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsLeg_ );
+                eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+            }
+            {
+                //ATLAS_TRACE( "opt3 merge spheres" );
+                // northern hemisphere:
+                for ( int jlat = 0; jlat < nlatsNH_; jlat++ ) {
+                    for ( int imag = 0; imag < n_imag; imag++ ) {
+                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                            int idx = posFourier( jfld, imag, jlat, jm, nlatsNH_ );
+                            scl_fourier[posMethod( jfld, imag, jlat, jm, nb_fields, nlats )] =
+                                scl_fourier_sym[idx] + scl_fourier_asym[idx];
+                        }
+                    }
+                }
+                // southern hemisphere:
+                for ( int jlat = 0; jlat < nlatsSH_; jlat++ ) {
+                    for ( int imag = 0; imag < n_imag; imag++ ) {
+                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                            int idx   = posFourier( jfld, imag, jlat, jm, nlatsSH_ );
+                            int jslat = nlats - jlat - 1;
+                            scl_fourier[posMethod( jfld, imag, jslat, jm, nb_fields, nlats )] =
+                                scl_fourier_sym[idx] - scl_fourier_asym[idx];
+                        }
+                    }
+                }
+            }
+            free_aligned( scalar_sym );
+            free_aligned( scalar_asym );
+            free_aligned( scl_fourier_sym );
+            free_aligned( scl_fourier_asym );
+        }
+    }
+}
+
+void TransLocalopt3::invtrans_unstructured_precomp( const int truncation, const int nb_fields,
+                                                    const int nb_vordiv_fields, const double scalar_spectra[],
+                                                    double gp_fields[], const eckit::Configuration& config ) const {
+    ATLAS_TRACE( "invtrans_uv unstructured opt3" );
+    grid::UnstructuredGrid gu = grid_;
+    int nlats                 = grid_.size();
+    int size_fourier          = nb_fields * 2;
+    double* legendre;
+    double* scl_fourier;
+    double* scl_fourier_tp;
+    double* fouriertp;
+    double* gp_opt;
+    alloc_aligned( scl_fourier, size_fourier * (truncation)*nlats );
+    alloc_aligned( scl_fourier_tp, size_fourier * ( truncation ) );
+    alloc_aligned( fouriertp, 2 * ( truncation ) );
+    alloc_aligned( gp_opt, nb_fields );
+
+    {
+        ATLAS_TRACE( "opt Legendre dgemm" );
+        for ( int jm = 0; jm < truncation; jm++ ) {
+            int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1;
+            eckit::linalg::Matrix A( eckit::linalg::Matrix(
+                const_cast<double*>( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) );
+            eckit::linalg::Matrix B( legendre_ + noff * nlats, ns, nlats );
+            eckit::linalg::Matrix C( scl_fourier + jm * size_fourier * nlats, nb_fields * 2, nlats );
+            eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+        }
+    }
+
+    // loop over all points:
+    for ( int ip = 0; ip < grid_.size(); ip++ ) {
+        PointXY p  = gu.xy( ip );
+        double lon = p.x() * util::Constants::degreesToRadians();
+        double lat = p.y() * util::Constants::degreesToRadians();
+        {
+            //ATLAS_TRACE( "opt transposition in Fourier" );
+            for ( int jm = 0; jm < truncation; jm++ ) {
+                int idx = nb_fields * 2 * ( ip + nlats * jm );
+                for ( int imag = 0; imag < 2; imag++ ) {
+                    for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                        int pos_tp = imag + 2 * ( jm + ( truncation ) * ( jfld ) );
+                        //int pos  = jfld + nb_fields * ( imag + 2 * ( jm ) );
+                        scl_fourier_tp[pos_tp] = scl_fourier[idx++];  // = scl_fourier[pos]
+                    }
+                }
+            }
+        }
+
+        // Fourier transformation:
+        int idx          = 0;
+        fouriertp[idx++] = 1.;  // real part
+        fouriertp[idx++] = 0.;  // imaginary part
+        for ( int jm = 1; jm < truncation; jm++ ) {
+            fouriertp[idx++] = +2. * std::cos( jm * lon );  // real part
+            fouriertp[idx++] = -2. * std::sin( jm * lon );  // imaginary part
+        }
+        {
+            //ATLAS_TRACE( "opt Fourier dgemm" );
+            eckit::linalg::Matrix A( fouriertp, 1, (truncation)*2 );
+            eckit::linalg::Matrix B( scl_fourier_tp, (truncation)*2, nb_fields );
+            eckit::linalg::Matrix C( gp_opt, 1, nb_fields );
+            eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+            for ( int j = 0; j < nb_fields; j++ ) {
+                gp_fields[ip + j * grid_.size()] = gp_opt[j];
+            }
+        }
+    }
+    free_aligned( scl_fourier );
+    free_aligned( scl_fourier_tp );
+    free_aligned( fouriertp );
+    free_aligned( gp_opt );
+}
+
+void TransLocalopt3::invtrans_unstructured( const int truncation, const int nb_fields, const int nb_vordiv_fields,
+                                            const double scalar_spectra[], double gp_fields[],
+                                            const eckit::Configuration& config ) const {
+    ATLAS_TRACE( "invtrans_uv unstructured opt3" );
+    grid::UnstructuredGrid gu = grid_;
+    double* zfn;
+    alloc_aligned( zfn, ( truncation + 1 ) * ( truncation + 1 ) );
+    compute_zfnopt3( truncation, zfn );
+    int size_fourier = nb_fields * 2;
+    double* legendre;
+    double* scl_fourier;
+    double* scl_fourier_tp;
+    double* fouriertp;
+    double* gp_opt;
+    alloc_aligned( legendre, legendre_size( truncation + 1 ) );
+    alloc_aligned( scl_fourier, size_fourier * ( truncation + 1 ) );
+    alloc_aligned( scl_fourier_tp, size_fourier * ( truncation + 1 ) );
+    alloc_aligned( fouriertp, 2 * ( truncation + 1 ) );
+    alloc_aligned( gp_opt, nb_fields );
+
+    // loop over all points:
+    for ( int ip = 0; ip < grid_.size(); ip++ ) {
+        PointXY p  = gu.xy( ip );
+        double lon = p.x() * util::Constants::degreesToRadians();
+        double lat = p.y() * util::Constants::degreesToRadians();
+        compute_legendre_polynomials_latopt3( truncation, lat, legendre, zfn );
+        // Legendre transform:
+        {
+            //ATLAS_TRACE( "opt Legendre dgemm" );
+            for ( int jm = 0; jm <= truncation; jm++ ) {
+                int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1;
+                eckit::linalg::Matrix A( eckit::linalg::Matrix(
+                    const_cast<double*>( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) );
+                eckit::linalg::Matrix B( legendre + noff, ns, 1 );
+                eckit::linalg::Matrix C( scl_fourier + jm * size_fourier, nb_fields * 2, 1 );
+                eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+            }
+        }
+        {
+            //ATLAS_TRACE( "opt transposition in Fourier" );
+            int idx = 0;
+            for ( int jm = 0; jm < truncation + 1; jm++ ) {
+                for ( int imag = 0; imag < 2; imag++ ) {
+                    for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                        int pos_tp = imag + 2 * ( jm + ( truncation + 1 ) * ( jfld ) );
+                        //int pos  = jfld + nb_fields * ( imag + 2 * ( jm ) );
+                        scl_fourier_tp[pos_tp] = scl_fourier[idx++];  // = scl_fourier[pos]
+                    }
+                }
+            }
+        }
+
+        // Fourier transformation:
+        int idx          = 0;
+        fouriertp[idx++] = 1.;  // real part
+        fouriertp[idx++] = 0.;  // imaginary part
+        for ( int jm = 1; jm < truncation + 1; jm++ ) {
+            fouriertp[idx++] = +2. * std::cos( jm * lon );  // real part
+            fouriertp[idx++] = -2. * std::sin( jm * lon );  // imaginary part
+        }
+        {
+            //ATLAS_TRACE( "opt Fourier dgemm" );
+            eckit::linalg::Matrix A( fouriertp, 1, ( truncation + 1 ) * 2 );
+            eckit::linalg::Matrix B( scl_fourier_tp, ( truncation + 1 ) * 2, nb_fields );
+            eckit::linalg::Matrix C( gp_opt, 1, nb_fields );
+            eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+            for ( int j = 0; j < nb_fields; j++ ) {
+                gp_fields[ip + j * grid_.size()] = gp_opt[j];
+            }
+        }
+    }
+    free_aligned( legendre );
+    free_aligned( scl_fourier );
+    free_aligned( scl_fourier_tp );
+    free_aligned( fouriertp );
+    free_aligned( gp_opt );
+}
+
 //-----------------------------------------------------------------------------
 // Routine to compute the spectral transform by using a localopt3 Fourier transformation
 // for a grid (same latitude for all longitudes, allows to compute Legendre functions
@@ -353,113 +611,13 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
         // Transform
         if ( grid::StructuredGrid g = grid_ ) {
             ATLAS_TRACE( "invtrans_uv structured opt3" );
-            int nlats      = g.ny();
-            int nlons      = g.nxmax();
-            auto posMethod = [&]( int jfld, int imag, int jlat, int jm ) {
-                if ( useFFT_ || !dgemmMethod1_ ) {
-                    return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) );
-                }
-                else {
-                    return jfld + nb_fields * ( jlat + nlats * ( imag + 2 * ( jm ) ) );
-                };
-            };
+            int nlats            = g.ny();
+            int nlons            = g.nxmax();
             int size_fourier_max = nb_fields * 2 * nlats;
             double* scl_fourier;
             alloc_aligned( scl_fourier, size_fourier_max * ( truncation_ + 1 ) );
+            invtrans_legendreopt3( truncation, nlats, nb_scalar_fields, scalar_spectra, scl_fourier, config );
 
-            // Legendre transform:
-            {
-                ATLAS_TRACE( "opt3 Legendre dgemm" );
-                for ( int jm = 0; jm <= truncation_; jm++ ) {
-                    int size_sym  = num_n( truncation_ + 1, jm, true );
-                    int size_asym = num_n( truncation_ + 1, jm, false );
-                    int n_imag    = 2;
-                    if ( jm == 0 ) { n_imag = 1; }
-                    int size_fourier = nb_fields * n_imag * nlatsLeg_;
-                    auto posFourier  = [&]( int jfld, int imag, int jlat, int jm, int nlatsH ) {
-                        return jfld + nb_fields * ( imag + n_imag * ( nlatsLeg_ - nlatsH + jlat ) );
-                    };
-                    double* scalar_sym;
-                    double* scalar_asym;
-                    double* scl_fourier_sym;
-                    double* scl_fourier_asym;
-                    alloc_aligned( scalar_sym, n_imag * nb_fields * size_sym );
-                    alloc_aligned( scalar_asym, n_imag * nb_fields * size_asym );
-                    alloc_aligned( scl_fourier_sym, size_fourier );
-                    alloc_aligned( scl_fourier_asym, size_fourier );
-                    {
-                        //ATLAS_TRACE( "opt3 Legendre split" );
-                        int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2;
-                        // the choice between the following two code lines determines whether
-                        // total wavenumbers are summed in an ascending or descending order.
-                        // The trans library in IFS uses descending order because it should
-                        // be more accurate (higher wavenumbers have smaller contributions).
-                        // This also needs to be changed when splitting the spectral data in
-                        // compute_legendre_polynomialsopt3!
-                        //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) {
-                        for ( int jn = truncation_ + 1; jn >= jm; jn-- ) {
-                            for ( int imag = 0; imag < n_imag; imag++ ) {
-                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                                    idx = jfld + nb_fields * ( imag + 2 * ( jn - jm ) );
-                                    if ( jn <= truncation && jm < truncation ) {
-                                        if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; }
-                                        else {
-                                            scalar_asym[ia++] = scalar_spectra[idx + ioff];
-                                        }
-                                    }
-                                    else {
-                                        if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = 0.; }
-                                        else {
-                                            scalar_asym[ia++] = 0.;
-                                        }
-                                    }
-                                }
-                            }
-                        }
-                        ASSERT( ia == n_imag * nb_fields * size_asym && is == n_imag * nb_fields * size_sym );
-                    }
-                    {
-                        eckit::linalg::Matrix A( scalar_sym, nb_fields * n_imag, size_sym );
-                        eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm], size_sym, nlatsLeg_ );
-                        eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsLeg_ );
-                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-                    }
-                    if ( size_asym > 0 ) {
-                        eckit::linalg::Matrix A( scalar_asym, nb_fields * n_imag, size_asym );
-                        eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm], size_asym, nlatsLeg_ );
-                        eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsLeg_ );
-                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-                    }
-                    {
-                        //ATLAS_TRACE( "opt3 merge spheres" );
-                        // northern hemisphere:
-                        for ( int jlat = 0; jlat < nlatsNH_; jlat++ ) {
-                            for ( int imag = 0; imag < n_imag; imag++ ) {
-                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                                    int idx = posFourier( jfld, imag, jlat, jm, nlatsNH_ );
-                                    scl_fourier[posMethod( jfld, imag, jlat, jm )] =
-                                        scl_fourier_sym[idx] + scl_fourier_asym[idx];
-                                }
-                            }
-                        }
-                        // southern hemisphere:
-                        for ( int jlat = 0; jlat < nlatsSH_; jlat++ ) {
-                            for ( int imag = 0; imag < n_imag; imag++ ) {
-                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                                    int idx   = posFourier( jfld, imag, jlat, jm, nlatsSH_ );
-                                    int jslat = nlats - jlat - 1;
-                                    scl_fourier[posMethod( jfld, imag, jslat, jm )] =
-                                        scl_fourier_sym[idx] - scl_fourier_asym[idx];
-                                }
-                            }
-                        }
-                    }
-                    free_aligned( scalar_sym );
-                    free_aligned( scalar_asym );
-                    free_aligned( scl_fourier_sym );
-                    free_aligned( scl_fourier_asym );
-                }
-            }
             // Fourier transformation:
             if ( useFFT_ ) {
 #if ATLAS_HAVE_FFTW
@@ -470,11 +628,12 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
                         for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
                             int idx = 0;
                             for ( int jlat = 0; jlat < nlats; jlat++ ) {
-                                fft_in_[idx++][0] = scl_fourier[posMethod( jfld, 0, jlat, 0 )];
+                                fft_in_[idx++][0] = scl_fourier[posMethod( jfld, 0, jlat, 0, nb_fields, nlats )];
                                 for ( int jm = 1; jm < num_complex; jm++, idx++ ) {
                                     for ( int imag = 0; imag < 2; imag++ ) {
                                         if ( jm <= truncation_ ) {
-                                            fft_in_[idx][imag] = scl_fourier[posMethod( jfld, imag, jlat, jm )];
+                                            fft_in_[idx][imag] =
+                                                scl_fourier[posMethod( jfld, imag, jlat, jm, nb_fields, nlats )];
                                         }
                                         else {
                                             fft_in_[idx][imag] = 0.;
@@ -559,84 +718,15 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
             free_aligned( scl_fourier );
         }  // namespace atlas
         else {
-            ATLAS_TRACE( "invtrans_uv unstructured opt3" );
-            grid::UnstructuredGrid gu = grid_;
-            double* zfn;
-            alloc_aligned( zfn, ( truncation + 1 ) * ( truncation + 1 ) );
-            compute_zfnopt3( truncation, zfn );
-            int nlats        = grid_.size();
-            int size_fourier = nb_fields * 2;
-            double* legendre;
-            double* scl_fourier;
-            double* scl_fourier_tp;
-            double* fouriertp;
-            double* gp_opt;
-            alloc_aligned( legendre, legendre_size( truncation + 1 ) );
-            alloc_aligned( scl_fourier, size_fourier * ( truncation + 1 ) * nlats );
-            alloc_aligned( scl_fourier_tp, size_fourier * ( truncation + 1 ) );
-            alloc_aligned( fouriertp, 2 * ( truncation + 1 ) );
-            alloc_aligned( gp_opt, nb_fields );
-
-            {
-                ATLAS_TRACE( "opt Legendre dgemm" );
-                for ( int jm = 0; jm <= truncation; jm++ ) {
-                    int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1;
-                    eckit::linalg::Matrix A( eckit::linalg::Matrix(
-                        const_cast<double*>( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) );
-                    eckit::linalg::Matrix B( legendre_ + noff * nlats, ns, nlats );
-                    eckit::linalg::Matrix C( scl_fourier + jm * size_fourier, nb_fields * 2, nlats );
-                    eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-                }
+            if ( unstruct_precomp_ ) {
+                invtrans_unstructured_precomp( truncation, nb_scalar_fields, nb_vordiv_fields, scalar_spectra,
+                                               gp_fields, config );
             }
-
-            // loop over all points:
-            for ( int ip = 0; ip < grid_.size(); ip++ ) {
-                PointXY p  = gu.xy( ip );
-                double lon = p.x() * util::Constants::degreesToRadians();
-                double lat = p.y() * util::Constants::degreesToRadians();
-                {
-                    //ATLAS_TRACE( "opt transposition in Fourier" );
-                    for ( int jm = 0; jm < truncation + 1; jm++ ) {
-                        int idx = nb_fields * 2 * ( ip + nlats * jm );
-                        for ( int imag = 0; imag < 2; imag++ ) {
-                            for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                                int pos_tp = imag + 2 * ( jm + ( truncation + 1 ) * ( jfld ) );
-                                //int pos  = jfld + nb_fields * ( imag + 2 * ( jm ) );
-                                scl_fourier_tp[pos_tp] = scl_fourier[idx++];  // = scl_fourier[pos]
-                            }
-                        }
-                    }
-                }
-
-                // Fourier transformation:
-                int idx          = 0;
-                fouriertp[idx++] = 1.;  // real part
-                fouriertp[idx++] = 0.;  // imaginary part
-                for ( int jm = 1; jm < truncation + 1; jm++ ) {
-                    fouriertp[idx++] = +2. * std::cos( jm * lon );  // real part
-                    fouriertp[idx++] = -2. * std::sin( jm * lon );  // imaginary part
-                }
-                {
-                    //ATLAS_TRACE( "opt Fourier dgemm" );
-                    eckit::linalg::Matrix A( fouriertp, 1, ( truncation + 1 ) * 2 );
-                    eckit::linalg::Matrix B( scl_fourier_tp, ( truncation + 1 ) * 2, nb_fields );
-                    eckit::linalg::Matrix C( gp_opt, 1, nb_fields );
-                    eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-                    for ( int j = 0; j < nb_fields; j++ ) {
-                        gp_fields[ip + j * grid_.size()] = gp_opt[j];
-                    }
-                }
+            else {
+                invtrans_unstructured( truncation, nb_scalar_fields, nb_vordiv_fields, scalar_spectra, gp_fields,
+                                       config );
             }
-            free_aligned( legendre );
-            free_aligned( scl_fourier );
-            free_aligned( scl_fourier_tp );
-            free_aligned( fouriertp );
-            free_aligned( gp_opt );
-        }
-        for ( int j = 0; j < nb_fields * grid_.size(); j++ ) {
-            Log::info() << gp_fields[j] << " ";
         }
-        Log::info() << std::endl;
 
     }  // namespace trans
 }  // namespace atlas
diff --git a/src/atlas/trans/localopt3/TransLocalopt3.h b/src/atlas/trans/localopt3/TransLocalopt3.h
index c97bb1b7f..44be5a848 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.h
+++ b/src/atlas/trans/localopt3/TransLocalopt3.h
@@ -105,6 +105,21 @@ class TransLocalopt3 : public trans::TransImpl {
                            double divergence_spectra[], const eckit::Configuration& = util::NoConfig() ) const override;
 
 private:
+    int posMethod( const int jfld, const int imag, const int jlat, const int jm, const int nb_fields,
+                   const int nlats ) const;
+
+    void invtrans_legendreopt3( const int truncation, const int nlats, const int nb_fields,
+                                const double scalar_spectra[], double scl_fourier[],
+                                const eckit::Configuration& config ) const;
+
+    void invtrans_unstructured_precomp( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields,
+                                        const double scalar_spectra[], double gp_fields[],
+                                        const eckit::Configuration& = util::NoConfig() ) const;
+
+    void invtrans_unstructured( const int truncation, const int nb_fields, const int nb_vordiv_fields,
+                                const double scalar_spectra[], double gp_fields[],
+                                const eckit::Configuration& config ) const;
+
     void invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields,
                       const double scalar_spectra[], double gp_fields[],
                       const eckit::Configuration& = util::NoConfig() ) const;
@@ -113,6 +128,7 @@ class TransLocalopt3 : public trans::TransImpl {
     Grid grid_;
     bool useFFT_;
     bool dgemmMethod1_;
+    bool unstruct_precomp_;
     int truncation_;
     int nlatsNH_;
     int nlatsSH_;
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index a41a954c4..8b8a4af26 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -1082,9 +1082,9 @@ CASE( "test_trans_unstructured" ) {
     double tolerance  = 1.e-13;
 
     //Domain testdomain = RectangularDomain( {20., 25.}, {40., 60.} );
-    Domain testdomain = RectangularDomain( {20., 25.}, {45., 50.} );
+    //Domain testdomain = RectangularDomain( {10., 25.}, {35., 50.} );
     // Grid: (Adjust the following line if the test takes too long!)
-    Grid g( "F12", testdomain );
+    Grid g( "F120" );
     grid::StructuredGrid gs( g );
     std::vector<PointXY> pts( g.size() );
     int idx( 0 );
@@ -1092,13 +1092,13 @@ CASE( "test_trans_unstructured" ) {
         double lat = gs.y( j );
         for ( size_t i = 0; i < gs.nx( j ); ++i ) {
             double lon = gs.x( i, j );
-            Log::info() << "idx=" << idx << " lon=" << lon << " lat=" << lat << std::endl;
+            //Log::info() << "idx=" << idx << " lon=" << lon << " lat=" << lat << std::endl;
             pts[idx++].assign( lon, lat );
         }
     }
     Grid gu = grid::UnstructuredGrid( new std::vector<PointXY>( pts ) );
 
-    int trc     = 12;
+    int trc     = 120;
     double rav1 = 0., rav2 = 0.;  // compute average rms errors of transLocal1 and transLocal2
 
     int nb_scalar = 1, nb_vordiv = 0;
@@ -1131,9 +1131,9 @@ CASE( "test_trans_unstructured" ) {
                         for ( int imag = 0; imag <= 1; imag++ ) {  // real and imaginary part
 
                             if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. &&
-                                 icase < 1000 ) {
+                                 icase < 1 ) {
                                 auto start = std::chrono::system_clock::now();
-                                trans::Trans transLocal1( gu, trc, util::Config( "type", "localopt3" ) );
+                                trans::Trans transLocal1( g, trc, util::Config( "type", "localopt3" ) );
                                 trans::Trans transLocal2( gu, trc, util::Config( "type", "localopt3" ) );
                                 for ( int j = 0; j < 2 * N * nb_scalar; j++ ) {
                                     sp[j] = 0.;
@@ -1158,12 +1158,11 @@ CASE( "test_trans_unstructured" ) {
                                 spectral_transform_grid_analytic( trc, trc, n, m, imag, g, rspecg.data(),
                                                                   rgp_analytic.data(), ivar_in, ivar_out );
 
-                                Log::info()
-                                    << icase << " m=" << m << " n=" << n << " imag=" << imag << " unstructured: ";
+                                //Log::info() << icase << " m=" << m << " n=" << n << " imag=" << imag << " structured: ";
                                 EXPECT_NO_THROW( transLocal1.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
                                                                        div.data(), rgp1.data() ) );
 
-                                Log::info() << icase << " m=" << m << " n=" << n << " imag=" << imag << " structured: ";
+                                //Log::info() << icase << " m=" << m << " n=" << n << " imag=" << imag << " unstructured: ";
                                 EXPECT_NO_THROW( transLocal2.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
                                                                        div.data(), rgp2.data() ) );
 

From 01bc8dca392412a5b8fa3bc6624f8781ad1a8266 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Fri, 6 Apr 2018 12:12:08 +0100
Subject: [PATCH 043/123] more cleanup. Switched names of the two
 dgemm-methods. dgemm-method is now chosen via preprocessor variable
 TRANSLOCAL_DGEMM2. Current active test compares regular and unstructured grid
 with same number of different latitudes and longitudes.

---
 src/atlas/trans/localopt3/TransLocalopt3.cc | 298 ++++++++++----------
 src/atlas/trans/localopt3/TransLocalopt3.h  |  13 +-
 src/tests/trans/test_transgeneral.cc        |  41 ++-
 3 files changed, 194 insertions(+), 158 deletions(-)

diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc
index d7184cb78..983a2ac0d 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.cc
+++ b/src/atlas/trans/localopt3/TransLocalopt3.cc
@@ -9,7 +9,7 @@
  */
 
 #include "atlas/trans/localopt3/TransLocalopt3.h"
-#include <math.h>
+#include <cmath>
 #include "atlas/array.h"
 #include "atlas/option.h"
 #include "atlas/parallel/mpi/mpi.h"
@@ -101,7 +101,6 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
     int nlons         = 0;
     int neqtr         = 0;
     useFFT_           = true;
-    dgemmMethod1_     = false;
     unstruct_precomp_ = true;
     nlatsNH_          = 0;
     nlatsSH_          = 0;
@@ -194,7 +193,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
 
         // precomputations for Fourier transformations:
         if ( useFFT_ ) {
-#if ATLAS_HAVE_FFTW
+#if ATLAS_HAVE_FFTW && !TRANSLOCAL_DGEMM2
             {
                 ATLAS_TRACE( "opt3 precomp FFTW" );
                 int num_complex = ( nlonsGlobal_ / 2 ) + 1;
@@ -210,36 +209,35 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
         }
         if ( !useFFT_ ) {
             alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons );
-            if ( dgemmMethod1_ ) {
-                {
-                    ATLAS_TRACE( "opt3 precomp Fourier" );
-                    int idx = 0;
+#if !TRANSLOCAL_DGEMM2
+            {
+                ATLAS_TRACE( "opt3 precomp Fourier tp" );
+                int idx = 0;
+                for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
+                    double factor = 1.;
+                    if ( jm > 0 ) { factor = 2.; }
                     for ( int jlon = 0; jlon < nlons; jlon++ ) {
-                        double factor = 1.;
-                        for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
-                            if ( jm > 0 ) { factor = 2.; }
-                            fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor;  // real part
-                            fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor;  // imaginary part
-                        }
+                        fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor;  // real part
+                    }
+                    for ( int jlon = 0; jlon < nlons; jlon++ ) {
+                        fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor;  // imaginary part
                     }
                 }
             }
-            else {
-                {
-                    ATLAS_TRACE( "opt3 precomp Fourier tp" );
-                    int idx = 0;
+#else
+            {
+                ATLAS_TRACE( "opt3 precomp Fourier" );
+                int idx = 0;
+                for ( int jlon = 0; jlon < nlons; jlon++ ) {
+                    double factor = 1.;
                     for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
-                        double factor = 1.;
                         if ( jm > 0 ) { factor = 2.; }
-                        for ( int jlon = 0; jlon < nlons; jlon++ ) {
-                            fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor;  // real part
-                        }
-                        for ( int jlon = 0; jlon < nlons; jlon++ ) {
-                            fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor;  // imaginary part
-                        }
+                        fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor;  // real part
+                        fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor;  // imaginary part
                     }
                 }
             }
+#endif
         }
     }
     else {
@@ -269,7 +267,7 @@ TransLocalopt3::~TransLocalopt3() {
         free_aligned( legendre_sym_ );
         free_aligned( legendre_asym_ );
         if ( useFFT_ ) {
-#if ATLAS_HAVE_FFTW
+#if ATLAS_HAVE_FFTW && !TRANSLOCAL_DGEMM2
             fftw_destroy_plan( plan_ );
             fftw_free( fft_in_ );
             fftw_free( fft_out_ );
@@ -317,11 +315,15 @@ void TransLocalopt3::invtrans_vordiv2wind( const Field& spvor, const Field& spdi
     NOTIMP;
 }
 
+// --------------------------------------------------------------------------------------------------------------------
+
 void TransLocalopt3::invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[],
                                const eckit::Configuration& config ) const {
     invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields, config );
 }
 
+// --------------------------------------------------------------------------------------------------------------------
+
 void gp_transposeopt3( const int nb_size, const int nb_fields, const double gp_tmp[], double gp_fields[] ) {
     for ( int jgp = 0; jgp < nb_size; jgp++ ) {
         for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
@@ -330,13 +332,7 @@ void gp_transposeopt3( const int nb_size, const int nb_fields, const double gp_t
     }
 }
 
-int TransLocalopt3::posMethod( const int jfld, const int imag, const int jlat, const int jm, const int nb_fields,
-                               const int nlats ) const {
-    if ( useFFT_ || !dgemmMethod1_ ) { return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) ); }
-    else {
-        return jfld + nb_fields * ( jlat + nlats * ( imag + 2 * ( jm ) ) );
-    };
-};
+// --------------------------------------------------------------------------------------------------------------------
 
 void TransLocalopt3::invtrans_legendreopt3( const int truncation, const int nlats, const int nb_fields,
                                             const double scalar_spectra[], double scl_fourier[],
@@ -436,6 +432,91 @@ void TransLocalopt3::invtrans_legendreopt3( const int truncation, const int nlat
     }
 }
 
+// --------------------------------------------------------------------------------------------------------------------
+
+void TransLocalopt3::invtrans_fourieropt3( const int nlats, const int nlons, const int nb_fields, double scl_fourier[],
+                                           double gp_fields[], const eckit::Configuration& config ) const {
+    // Fourier transformation:
+    if ( useFFT_ ) {
+#if ATLAS_HAVE_FFTW && !TRANSLOCAL_DGEMM2
+        {
+            int num_complex = ( nlonsGlobal_ / 2 ) + 1;
+            {
+                ATLAS_TRACE( "opt3 FFTW" );
+                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                    int idx = 0;
+                    for ( int jlat = 0; jlat < nlats; jlat++ ) {
+                        fft_in_[idx++][0] = scl_fourier[posMethod( jfld, 0, jlat, 0, nb_fields, nlats )];
+                        for ( int jm = 1; jm < num_complex; jm++, idx++ ) {
+                            for ( int imag = 0; imag < 2; imag++ ) {
+                                if ( jm <= truncation_ ) {
+                                    fft_in_[idx][imag] =
+                                        scl_fourier[posMethod( jfld, imag, jlat, jm, nb_fields, nlats )];
+                                }
+                                else {
+                                    fft_in_[idx][imag] = 0.;
+                                }
+                            }
+                        }
+                    }
+                    fftw_execute_dft_c2r( plan_, fft_in_, fft_out_ );
+                    for ( int jlat = 0; jlat < nlats; jlat++ ) {
+                        for ( int jlon = 0; jlon < nlons; jlon++ ) {
+                            int j = jlon + jlonMin_;
+                            if ( j >= nlonsGlobal_ ) { j -= nlonsGlobal_; }
+                            gp_fields[jlon + nlons * ( jlat + nlats * jfld )] = fft_out_[j + nlonsGlobal_ * jlat];
+                        }
+                    }
+                }
+            }
+        }
+#endif
+    }
+    else {
+#if !TRANSLOCAL_DGEMM2
+        // dgemm-method 1
+        {
+            ATLAS_TRACE( "opt3 Fourier dgemm method 1" );
+            eckit::linalg::Matrix A( fourier_, nlons, ( truncation_ + 1 ) * 2 );
+            eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * nlats );
+            eckit::linalg::Matrix C( gp_fields, nlons, nb_fields * nlats );
+            eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+        }
+#else
+        // dgemm-method 2
+        // should be faster for small domains or large truncation
+        // but have not found any significant speedup so far
+        double* gp_opt3;
+        alloc_aligned( gp_opt3, nb_fields * grid_.size() );
+        {
+            ATLAS_TRACE( "opt3 Fourier dgemm method 2" );
+            eckit::linalg::Matrix A( scl_fourier, nb_fields * nlats, ( truncation_ + 1 ) * 2 );
+            eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, nlons );
+            eckit::linalg::Matrix C( gp_opt3, nb_fields * nlats, nlons );
+            eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+        }
+
+        // Transposition in grid point space:
+        {
+            ATLAS_TRACE( "opt3 transposition in gp-space" );
+            int idx = 0;
+            for ( int jlon = 0; jlon < nlons; jlon++ ) {
+                for ( int jlat = 0; jlat < nlats; jlat++ ) {
+                    for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                        int pos_tp = jlon + nlons * ( jlat + nlats * ( jfld ) );
+                        //int pos  = jfld + nb_fields * ( jlat + nlats * ( jlon ) );
+                        gp_fields[pos_tp] = gp_opt3[idx++];  // = gp_opt3[pos]
+                    }
+                }
+            }
+        }
+        free_aligned( gp_opt3 );
+#endif
+    }
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
 void TransLocalopt3::invtrans_unstructured_precomp( const int truncation, const int nb_fields,
                                                     const int nb_vordiv_fields, const double scalar_spectra[],
                                                     double gp_fields[], const eckit::Configuration& config ) const {
@@ -466,40 +547,43 @@ void TransLocalopt3::invtrans_unstructured_precomp( const int truncation, const
     }
 
     // loop over all points:
-    for ( int ip = 0; ip < grid_.size(); ip++ ) {
-        PointXY p  = gu.xy( ip );
-        double lon = p.x() * util::Constants::degreesToRadians();
-        double lat = p.y() * util::Constants::degreesToRadians();
-        {
-            //ATLAS_TRACE( "opt transposition in Fourier" );
-            for ( int jm = 0; jm < truncation; jm++ ) {
-                int idx = nb_fields * 2 * ( ip + nlats * jm );
-                for ( int imag = 0; imag < 2; imag++ ) {
-                    for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                        int pos_tp = imag + 2 * ( jm + ( truncation ) * ( jfld ) );
-                        //int pos  = jfld + nb_fields * ( imag + 2 * ( jm ) );
-                        scl_fourier_tp[pos_tp] = scl_fourier[idx++];  // = scl_fourier[pos]
+    {
+        ATLAS_TRACE( "opt Fourier dgemm" );
+
+        for ( int ip = 0; ip < grid_.size(); ip++ ) {
+            PointXY p  = gu.xy( ip );
+            double lon = p.x() * util::Constants::degreesToRadians();
+            double lat = p.y() * util::Constants::degreesToRadians();
+            {
+                //ATLAS_TRACE( "opt transposition in Fourier" );
+                for ( int jm = 0; jm < truncation; jm++ ) {
+                    int idx = nb_fields * 2 * ( ip + nlats * jm );
+                    for ( int imag = 0; imag < 2; imag++ ) {
+                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                            int pos_tp = imag + 2 * ( jm + ( truncation ) * ( jfld ) );
+                            //int pos  = jfld + nb_fields * ( imag + 2 * ( jm ) );
+                            scl_fourier_tp[pos_tp] = scl_fourier[idx++];  // = scl_fourier[pos]
+                        }
                     }
                 }
             }
-        }
 
-        // Fourier transformation:
-        int idx          = 0;
-        fouriertp[idx++] = 1.;  // real part
-        fouriertp[idx++] = 0.;  // imaginary part
-        for ( int jm = 1; jm < truncation; jm++ ) {
-            fouriertp[idx++] = +2. * std::cos( jm * lon );  // real part
-            fouriertp[idx++] = -2. * std::sin( jm * lon );  // imaginary part
-        }
-        {
-            //ATLAS_TRACE( "opt Fourier dgemm" );
-            eckit::linalg::Matrix A( fouriertp, 1, (truncation)*2 );
-            eckit::linalg::Matrix B( scl_fourier_tp, (truncation)*2, nb_fields );
-            eckit::linalg::Matrix C( gp_opt, 1, nb_fields );
-            eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-            for ( int j = 0; j < nb_fields; j++ ) {
-                gp_fields[ip + j * grid_.size()] = gp_opt[j];
+            // Fourier transformation:
+            int idx          = 0;
+            fouriertp[idx++] = 1.;  // real part
+            fouriertp[idx++] = 0.;  // imaginary part
+            for ( int jm = 1; jm < truncation; jm++ ) {
+                fouriertp[idx++] = +2. * std::cos( jm * lon );  // real part
+                fouriertp[idx++] = -2. * std::sin( jm * lon );  // imaginary part
+            }
+            {
+                eckit::linalg::Matrix A( fouriertp, 1, (truncation)*2 );
+                eckit::linalg::Matrix B( scl_fourier_tp, (truncation)*2, nb_fields );
+                eckit::linalg::Matrix C( gp_opt, 1, nb_fields );
+                eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+                for ( int j = 0; j < nb_fields; j++ ) {
+                    gp_fields[ip + j * grid_.size()] = gp_opt[j];
+                }
             }
         }
     }
@@ -509,6 +593,8 @@ void TransLocalopt3::invtrans_unstructured_precomp( const int truncation, const
     free_aligned( gp_opt );
 }
 
+// --------------------------------------------------------------------------------------------------------------------
+
 void TransLocalopt3::invtrans_unstructured( const int truncation, const int nb_fields, const int nb_vordiv_fields,
                                             const double scalar_spectra[], double gp_fields[],
                                             const eckit::Configuration& config ) const {
@@ -616,86 +702,13 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
             int size_fourier_max = nb_fields * 2 * nlats;
             double* scl_fourier;
             alloc_aligned( scl_fourier, size_fourier_max * ( truncation_ + 1 ) );
+
+            // Legendre transformation:
             invtrans_legendreopt3( truncation, nlats, nb_scalar_fields, scalar_spectra, scl_fourier, config );
 
             // Fourier transformation:
-            if ( useFFT_ ) {
-#if ATLAS_HAVE_FFTW
-                {
-                    int num_complex = ( nlonsGlobal_ / 2 ) + 1;
-                    {
-                        ATLAS_TRACE( "opt3 FFTW" );
-                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                            int idx = 0;
-                            for ( int jlat = 0; jlat < nlats; jlat++ ) {
-                                fft_in_[idx++][0] = scl_fourier[posMethod( jfld, 0, jlat, 0, nb_fields, nlats )];
-                                for ( int jm = 1; jm < num_complex; jm++, idx++ ) {
-                                    for ( int imag = 0; imag < 2; imag++ ) {
-                                        if ( jm <= truncation_ ) {
-                                            fft_in_[idx][imag] =
-                                                scl_fourier[posMethod( jfld, imag, jlat, jm, nb_fields, nlats )];
-                                        }
-                                        else {
-                                            fft_in_[idx][imag] = 0.;
-                                        }
-                                    }
-                                }
-                            }
-                            fftw_execute_dft_c2r( plan_, fft_in_, fft_out_ );
-                            for ( int jlat = 0; jlat < nlats; jlat++ ) {
-                                for ( int jlon = 0; jlon < nlons; jlon++ ) {
-                                    int j = jlon + jlonMin_;
-                                    if ( j >= nlonsGlobal_ ) { j -= nlonsGlobal_; }
-                                    gp_fields[jlon + nlons * ( jlat + nlats * jfld )] =
-                                        fft_out_[j + nlonsGlobal_ * jlat];
-                                }
-                            }
-                        }
-                    }
-                }
-#endif
-            }
-            else {
-                if ( dgemmMethod1_ ) {
-                    // dgemm-method 1
-                    // should be faster for small domains or large truncation
-                    double* gp_opt3;
-                    alloc_aligned( gp_opt3, nb_fields * grid_.size() );
-                    {
-                        ATLAS_TRACE( "opt3 Fourier dgemm method 1" );
-                        eckit::linalg::Matrix A( scl_fourier, nb_fields * nlats, ( truncation_ + 1 ) * 2 );
-                        eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, nlons );
-                        eckit::linalg::Matrix C( gp_opt3, nb_fields * nlats, nlons );
-                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-                    }
+            invtrans_fourieropt3( nlats, nlons, nb_fields, scl_fourier, gp_fields, config );
 
-                    // Transposition in grid point space:
-                    {
-                        ATLAS_TRACE( "opt3 transposition in gp-space" );
-                        int idx = 0;
-                        for ( int jlon = 0; jlon < nlons; jlon++ ) {
-                            for ( int jlat = 0; jlat < nlats; jlat++ ) {
-                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                                    int pos_tp = jlon + nlons * ( jlat + nlats * ( jfld ) );
-                                    //int pos  = jfld + nb_fields * ( jlat + nlats * ( jlon ) );
-                                    gp_fields[pos_tp] = gp_opt3[idx++];  // = gp_opt3[pos]
-                                }
-                            }
-                        }
-                    }
-                    free_aligned( gp_opt3 );
-                }
-                else {
-                    // dgemm-method 2
-                    {
-                        ATLAS_TRACE( "opt3 Fourier dgemm method 2" );
-                        eckit::linalg::Matrix A( fourier_, nlons, ( truncation_ + 1 ) * 2 );
-                        eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * nlats );
-                        eckit::linalg::Matrix C( gp_fields, nlons, nb_fields * nlats );
-                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-                    }
-                }
-            }  // namespace trans
             // Computing u,v from U,V:
             {
                 if ( nb_vordiv_fields > 0 ) {
@@ -716,7 +729,7 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
                 }
             }
             free_aligned( scl_fourier );
-        }  // namespace atlas
+        }
         else {
             if ( unstruct_precomp_ ) {
                 invtrans_unstructured_precomp( truncation, nb_scalar_fields, nb_vordiv_fields, scalar_spectra,
@@ -727,9 +740,8 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
                                        config );
             }
         }
-
-    }  // namespace trans
-}  // namespace atlas
+    }
+}
 
 // --------------------------------------------------------------------------------------------------------------------
 
@@ -739,6 +751,8 @@ void TransLocalopt3::invtrans( const int nb_vordiv_fields, const double vorticit
     invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config );
 }
 
+// --------------------------------------------------------------------------------------------------------------------
+
 void extend_truncationopt3( const int old_truncation, const int nb_fields, const double old_spectra[],
                             double new_spectra[] ) {
     int k = 0, k_old = 0;
diff --git a/src/atlas/trans/localopt3/TransLocalopt3.h b/src/atlas/trans/localopt3/TransLocalopt3.h
index 44be5a848..af8360801 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.h
+++ b/src/atlas/trans/localopt3/TransLocalopt3.h
@@ -19,6 +19,8 @@
 #include <fftw3.h>
 #endif
 
+#define TRANSLOCAL_DGEMM2 0
+
 //-----------------------------------------------------------------------------
 // Forward declarations
 
@@ -106,12 +108,21 @@ class TransLocalopt3 : public trans::TransImpl {
 
 private:
     int posMethod( const int jfld, const int imag, const int jlat, const int jm, const int nb_fields,
-                   const int nlats ) const;
+                   const int nlats ) const {
+#if !TRANSLOCAL_DGEMM2
+        return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) );
+#else
+        return jfld + nb_fields * ( jlat + nlats * ( imag + 2 * ( jm ) ) );
+#endif
+    };
 
     void invtrans_legendreopt3( const int truncation, const int nlats, const int nb_fields,
                                 const double scalar_spectra[], double scl_fourier[],
                                 const eckit::Configuration& config ) const;
 
+    void invtrans_fourieropt3( const int nlats, const int nlons, const int nb_fields, double scl_fourier[],
+                               double gp_fields[], const eckit::Configuration& config ) const;
+
     void invtrans_unstructured_precomp( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields,
                                         const double scalar_spectra[], double gp_fields[],
                                         const eckit::Configuration& = util::NoConfig() ) const;
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index 8b8a4af26..3d023ebfd 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -1082,9 +1082,9 @@ CASE( "test_trans_unstructured" ) {
     double tolerance  = 1.e-13;
 
     //Domain testdomain = RectangularDomain( {20., 25.}, {40., 60.} );
-    //Domain testdomain = RectangularDomain( {10., 25.}, {35., 50.} );
+    Domain testdomain = RectangularDomain( {0., 90.}, {0., 90.} );
     // Grid: (Adjust the following line if the test takes too long!)
-    Grid g( "F120" );
+    Grid g( "F120", testdomain );
     grid::StructuredGrid gs( g );
     std::vector<PointXY> pts( g.size() );
     int idx( 0 );
@@ -1092,13 +1092,15 @@ CASE( "test_trans_unstructured" ) {
         double lat = gs.y( j );
         for ( size_t i = 0; i < gs.nx( j ); ++i ) {
             double lon = gs.x( i, j );
-            //Log::info() << "idx=" << idx << " lon=" << lon << " lat=" << lat << std::endl;
-            pts[idx++].assign( lon, lat );
+            if ( i == j && lat > 0 ) {
+                //Log::info() << "idx=" << idx << " lon=" << lon << " lat=" << lat << std::endl;
+                pts[idx++].assign( lon, lat );
+            }
         }
     }
-    Grid gu = grid::UnstructuredGrid( new std::vector<PointXY>( pts ) );
-
-    int trc     = 120;
+    Grid gu = grid::UnstructuredGrid( new std::vector<PointXY>( &pts[0], &pts[idx] ) );
+    Log::info() << "gu: size=" << gu.size() << std::endl;
+    int trc     = 1280;
     double rav1 = 0., rav2 = 0.;  // compute average rms errors of transLocal1 and transLocal2
 
     int nb_scalar = 1, nb_vordiv = 0;
@@ -1110,7 +1112,11 @@ CASE( "test_trans_unstructured" ) {
     std::vector<double> gp( nb_all * g.size() );
     std::vector<double> rgp1( nb_all * g.size() );
     std::vector<double> rgp2( nb_all * g.size() );
-    std::vector<double> rgp_analytic( g.size() );
+    std::vector<double> rgp_analytic1( g.size() );
+    std::vector<double> rgp_analytic2( gu.size() );
+
+    trans::Trans transLocal1( g, trc, util::Config( "type", "localopt3" ) );
+    trans::Trans transLocal2( gu, trc, util::Config( "type", "localopt3" ) );
 
     int icase = 0;
     for ( int ivar_in = 2; ivar_in < 3; ivar_in++ ) {         // vorticity, divergence, scalar
@@ -1131,10 +1137,8 @@ CASE( "test_trans_unstructured" ) {
                         for ( int imag = 0; imag <= 1; imag++ ) {  // real and imaginary part
 
                             if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. &&
-                                 icase < 1 ) {
+                                 icase < 100 ) {
                                 auto start = std::chrono::system_clock::now();
-                                trans::Trans transLocal1( g, trc, util::Config( "type", "localopt3" ) );
-                                trans::Trans transLocal2( gu, trc, util::Config( "type", "localopt3" ) );
                                 for ( int j = 0; j < 2 * N * nb_scalar; j++ ) {
                                     sp[j] = 0.;
                                 }
@@ -1152,16 +1156,23 @@ CASE( "test_trans_unstructured" ) {
                                     rgp2[j] = 0.;
                                 }
                                 for ( int j = 0; j < g.size(); j++ ) {
-                                    rgp_analytic[j] = 0.;
+                                    rgp_analytic1[j] = 0.;
+                                }
+
+                                for ( int j = 0; j < gu.size(); j++ ) {
+                                    rgp_analytic2[j] = 0.;
                                 }
 
                                 spectral_transform_grid_analytic( trc, trc, n, m, imag, g, rspecg.data(),
-                                                                  rgp_analytic.data(), ivar_in, ivar_out );
+                                                                  rgp_analytic1.data(), ivar_in, ivar_out );
 
                                 //Log::info() << icase << " m=" << m << " n=" << n << " imag=" << imag << " structured: ";
                                 EXPECT_NO_THROW( transLocal1.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
                                                                        div.data(), rgp1.data() ) );
 
+                                spectral_transform_grid_analytic( trc, trc, n, m, imag, gu, rspecg.data(),
+                                                                  rgp_analytic2.data(), ivar_in, ivar_out );
+
                                 //Log::info() << icase << " m=" << m << " n=" << n << " imag=" << imag << " unstructured: ";
                                 EXPECT_NO_THROW( transLocal2.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
                                                                        div.data(), rgp2.data() ) );
@@ -1169,10 +1180,10 @@ CASE( "test_trans_unstructured" ) {
                                 int pos = ( ivar_out * nb_vordiv + jfld );
 
                                 double rms_gen1 =
-                                    compute_rms( g.size(), rgp1.data() + pos * g.size(), rgp_analytic.data() );
+                                    compute_rms( g.size(), rgp1.data() + pos * g.size(), rgp_analytic1.data() );
 
                                 double rms_gen2 =
-                                    compute_rms( g.size(), rgp2.data() + pos * g.size(), rgp_analytic.data() );
+                                    compute_rms( gu.size(), rgp2.data() + pos * gu.size(), rgp_analytic2.data() );
 
                                 rav1 += rms_gen1;
                                 rav2 += rms_gen2;

From 42c2adc95851cf399ba38bdd83aa4269534004e6 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Tue, 10 Apr 2018 15:59:20 +0100
Subject: [PATCH 044/123] reduced meshes are working now with FFTW and added
 storing wisdom; still need to add reduced truncation towards the poles

---
 src/atlas/trans/localopt3/TransLocalopt3.cc | 236 ++++++++++++++++----
 src/atlas/trans/localopt3/TransLocalopt3.h  |  19 +-
 src/tests/trans/test_transgeneral.cc        |  26 ++-
 3 files changed, 228 insertions(+), 53 deletions(-)

diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc
index 983a2ac0d..6ea7093f6 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.cc
+++ b/src/atlas/trans/localopt3/TransLocalopt3.cc
@@ -94,11 +94,11 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
 #else
     eckit::linalg::LinearAlgebra::backend( "generic" );  // might want to choose backend with this command
 #endif
-    double fft_threshold = 0.05;  // fraction of latitudes of the full grid up to which FFT is used.
+    double fft_threshold = 0.0;  // fraction of latitudes of the full grid up to which FFT is used.
     // This threshold needs to be adjusted depending on the dgemm and FFT performance of the machine
     // on which this code is running!
     int nlats         = 0;
-    int nlons         = 0;
+    int nlonsMax      = 0;
     int neqtr         = 0;
     useFFT_           = true;
     unstruct_precomp_ = true;
@@ -107,8 +107,8 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
     nlatsLeg_         = 0;
     if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
         grid::StructuredGrid g( grid_ );
-        nlats = g.ny();
-        nlons = g.nxmax();
+        nlats    = g.ny();
+        nlonsMax = g.nxmax();
         for ( size_t j = 0; j < nlats; ++j ) {
             // assumptions: latitudes in g.y(j) are monotone and decreasing
             // no assumption on whether we have 0, 1 or 2 latitudes at the equator
@@ -125,24 +125,48 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
         else {
             nlatsLeg_ = nlatsSH_;
         }
-        Grid g_global( grid.name() );
-        grid::StructuredGrid gs_global( g_global );
-        nlonsGlobal_  = gs_global.nxmax();
-        jlonMin_      = 0;
-        double lonmin = fmod( g.x( 0, 0 ), 360 );
-        if ( lonmin < 0. ) { lonmin += 360.; }
-        if ( nlons < fft_threshold * nlonsGlobal_ ) { useFFT_ = false; }
+        gridGlobal_ = Grid( grid.name() );
+        grid::StructuredGrid gs_global( gridGlobal_ );
+        nlonsMaxGlobal_ = gs_global.nxmax();
+        jlonMin_.resize( 1 );
+        jlonMin_[0]  = 0;
+        jlatMin_     = 0;
+        nlatsGlobal_ = gs_global.ny();
+        for ( int jlat = 0; jlat < nlatsGlobal_; jlat++ ) {
+            if ( gs_global.y( jlat ) > g.y( 0 ) ) { jlatMin_++; };
+        }
+        int jlatMinLeg_ = jlatMin_;
+        if ( nlatsNH_ < nlatsSH_ ) { jlatMinLeg_ += nlatsNH_ - nlatsSH_; };
+        auto wrapAngle = [&]( double angle ) {
+            double result = fmod( angle, 360 );
+            if ( result < 0. ) { result += 360.; }
+            return result;
+        };
+        double lonmin = wrapAngle( g.x( 0, 0 ) );
+        if ( nlonsMax < fft_threshold * nlonsMaxGlobal_ ) { useFFT_ = false; }
         else {
-            if ( nlons < nlonsGlobal_ ) {
-                // need to use FFT with cropped grid
-                for ( size_t j = 0; j < nlonsGlobal_; ++j ) {
-                    if ( gs_global.x( j, 0 ) == lonmin ) { jlonMin_ = j; }
+            // need to use FFT with cropped grid
+            if ( grid::RegularGrid( gridGlobal_ ) ) {
+                for ( size_t jlon = 0; jlon < nlonsMaxGlobal_; ++jlon ) {
+                    if ( gs_global.x( jlon, 0 ) < lonmin ) { jlonMin_[0]++; }
+                }
+            }
+            else {
+                nlonsGlobal_.resize( nlats );
+                jlonMin_.resize( nlats );
+                for ( size_t jlat = 0; jlat < nlats; jlat++ ) {
+                    double lonmin      = wrapAngle( g.x( 0, jlat ) );
+                    nlonsGlobal_[jlat] = gs_global.nx( jlat + jlatMin_ );
+                    jlonMin_[jlat]     = 0;
+                    for ( size_t jlon = 0; jlon < nlonsGlobal_[jlat]; ++jlon ) {
+                        if ( gs_global.x( jlon, jlat + jlatMin_ ) < lonmin ) { jlonMin_[jlat]++; }
+                    }
                 }
             }
         }
         //Log::info() << "nlats=" << g.ny() << " nlatsGlobal=" << gs_global.ny() << std::endl;
         std::vector<double> lats( nlatsLeg_ );
-        std::vector<double> lons( nlons );
+        std::vector<double> lons( nlonsMax );
         if ( nlatsNH_ >= nlatsSH_ ) {
             for ( size_t j = 0; j < nlatsLeg_; ++j ) {
                 lats[j] = g.y( j ) * util::Constants::degreesToRadians();
@@ -153,7 +177,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
                 lats[idx] = -g.y( j ) * util::Constants::degreesToRadians();
             }
         }
-        for ( size_t j = 0; j < nlons; ++j ) {
+        for ( size_t j = 0; j < nlonsMax; ++j ) {
             lons[j] = g.x( j, 0 ) * util::Constants::degreesToRadians();
         }
         // precomputations for Legendre polynomials:
@@ -196,11 +220,37 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
 #if ATLAS_HAVE_FFTW && !TRANSLOCAL_DGEMM2
             {
                 ATLAS_TRACE( "opt3 precomp FFTW" );
-                int num_complex = ( nlonsGlobal_ / 2 ) + 1;
+                int num_complex = ( nlonsMaxGlobal_ / 2 ) + 1;
                 fft_in_         = fftw_alloc_complex( nlats * num_complex );
-                fft_out_        = fftw_alloc_real( nlats * nlonsGlobal_ );
-                plan_ = fftw_plan_many_dft_c2r( 1, &nlonsGlobal_, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL,
-                                                1, nlonsGlobal_, FFTW_ESTIMATE );
+                fft_out_        = fftw_alloc_real( nlats * nlonsMaxGlobal_ );
+                if ( grid::RegularGrid( gridGlobal_ ) ) {
+                    plans_.resize( 1 );
+                    FILE* file_fftw;
+                    file_fftw = fopen( "wisdom.bin", "r" );
+                    if ( file_fftw ) {
+                        fftw_import_wisdom_from_file( file_fftw );
+                        fclose( file_fftw );
+                    }
+                    plans_[0] = fftw_plan_many_dft_c2r( 1, &nlonsMaxGlobal_, nlats, fft_in_, NULL, 1, num_complex,
+                                                        fft_out_, NULL, 1, nlonsMaxGlobal_, FFTW_ESTIMATE );
+                }
+                else {
+                    plans_.resize( nlatsLeg_ );
+                    FILE* file_fftw;
+                    file_fftw = fopen( "wisdom.bin", "r" );
+                    if ( file_fftw ) {
+                        fftw_import_wisdom_from_file( file_fftw );
+                        fclose( file_fftw );
+                    }
+                    for ( int j = 0; j < nlatsLeg_; j++ ) {
+                        int nlonsGlobalj = gs_global.nx( jlatMinLeg_ + j );
+                        //ASSERT( nlonsGlobalj > 0 && nlonsGlobalj <= nlonsMaxGlobal_ );
+                        plans_[j] = fftw_plan_dft_c2r_1d( nlonsGlobalj, fft_in_, fft_out_, FFTW_ESTIMATE );
+                    }
+                    file_fftw = fopen( "wisdom.bin", "wb" );
+                    fftw_export_wisdom_to_file( file_fftw );
+                    fclose( file_fftw );
+                }
             }
                 // other FFT implementations should be added with #elif statements
 #else
@@ -208,7 +258,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
 #endif
         }
         if ( !useFFT_ ) {
-            alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons );
+            alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlonsMax );
 #if !TRANSLOCAL_DGEMM2
             {
                 ATLAS_TRACE( "opt3 precomp Fourier tp" );
@@ -216,10 +266,10 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
                 for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
                     double factor = 1.;
                     if ( jm > 0 ) { factor = 2.; }
-                    for ( int jlon = 0; jlon < nlons; jlon++ ) {
+                    for ( int jlon = 0; jlon < nlonsMax; jlon++ ) {
                         fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor;  // real part
                     }
-                    for ( int jlon = 0; jlon < nlons; jlon++ ) {
+                    for ( int jlon = 0; jlon < nlonsMax; jlon++ ) {
                         fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor;  // imaginary part
                     }
                 }
@@ -228,7 +278,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
             {
                 ATLAS_TRACE( "opt3 precomp Fourier" );
                 int idx = 0;
-                for ( int jlon = 0; jlon < nlons; jlon++ ) {
+                for ( int jlon = 0; jlon < nlonsMax; jlon++ ) {
                     double factor = 1.;
                     for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
                         if ( jm > 0 ) { factor = 2.; }
@@ -268,7 +318,9 @@ TransLocalopt3::~TransLocalopt3() {
         free_aligned( legendre_asym_ );
         if ( useFFT_ ) {
 #if ATLAS_HAVE_FFTW && !TRANSLOCAL_DGEMM2
-            fftw_destroy_plan( plan_ );
+            for ( int j = 0; j < plans_.size(); j++ ) {
+                fftw_destroy_plan( plans_[j] );
+            }
             fftw_free( fft_in_ );
             fftw_free( fft_out_ );
 #endif
@@ -434,15 +486,16 @@ void TransLocalopt3::invtrans_legendreopt3( const int truncation, const int nlat
 
 // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt3::invtrans_fourieropt3( const int nlats, const int nlons, const int nb_fields, double scl_fourier[],
-                                           double gp_fields[], const eckit::Configuration& config ) const {
+void TransLocalopt3::invtrans_fourier_regularopt3( const int nlats, const int nlons, const int nb_fields,
+                                                   double scl_fourier[], double gp_fields[],
+                                                   const eckit::Configuration& config ) const {
     // Fourier transformation:
     if ( useFFT_ ) {
 #if ATLAS_HAVE_FFTW && !TRANSLOCAL_DGEMM2
         {
-            int num_complex = ( nlonsGlobal_ / 2 ) + 1;
+            int num_complex = ( nlonsMaxGlobal_ / 2 ) + 1;
             {
-                ATLAS_TRACE( "opt3 FFTW" );
+                ATLAS_TRACE( "opt3 FFTW regular" );
                 for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
                     int idx = 0;
                     for ( int jlat = 0; jlat < nlats; jlat++ ) {
@@ -459,12 +512,12 @@ void TransLocalopt3::invtrans_fourieropt3( const int nlats, const int nlons, con
                             }
                         }
                     }
-                    fftw_execute_dft_c2r( plan_, fft_in_, fft_out_ );
+                    fftw_execute_dft_c2r( plans_[0], fft_in_, fft_out_ );
                     for ( int jlat = 0; jlat < nlats; jlat++ ) {
                         for ( int jlon = 0; jlon < nlons; jlon++ ) {
-                            int j = jlon + jlonMin_;
-                            if ( j >= nlonsGlobal_ ) { j -= nlonsGlobal_; }
-                            gp_fields[jlon + nlons * ( jlat + nlats * jfld )] = fft_out_[j + nlonsGlobal_ * jlat];
+                            int j = jlon + jlonMin_[0];
+                            if ( j >= nlonsMaxGlobal_ ) { j -= nlonsMaxGlobal_; }
+                            gp_fields[jlon + nlons * ( jlat + nlats * jfld )] = fft_out_[j + nlonsMaxGlobal_ * jlat];
                         }
                     }
                 }
@@ -517,6 +570,98 @@ void TransLocalopt3::invtrans_fourieropt3( const int nlats, const int nlons, con
 
 // --------------------------------------------------------------------------------------------------------------------
 
+void TransLocalopt3::invtrans_fourier_reducedopt3( const int nlats, const grid::StructuredGrid g, const int nb_fields,
+                                                   double scl_fourier[], double gp_fields[],
+                                                   const eckit::Configuration& config ) const {
+    // Fourier transformation:
+    int nlonsMax = g.nxmax();
+    if ( useFFT_ ) {
+#if ATLAS_HAVE_FFTW && !TRANSLOCAL_DGEMM2
+        {
+            {
+                ATLAS_TRACE( "opt3 FFTW reduced" );
+                int jgp = 0;
+                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                    for ( int jlat = 0; jlat < nlats; jlat++ ) {
+                        int idx           = 0;
+                        int num_complex   = ( nlonsGlobal_[jlat] / 2 ) + 1;
+                        fft_in_[idx++][0] = scl_fourier[posMethod( jfld, 0, jlat, 0, nb_fields, nlats )];
+                        for ( int jm = 1; jm < num_complex; jm++, idx++ ) {
+                            for ( int imag = 0; imag < 2; imag++ ) {
+                                if ( jm <= truncation_ ) {
+                                    fft_in_[idx][imag] =
+                                        scl_fourier[posMethod( jfld, imag, jlat, jm, nb_fields, nlats )];
+                                }
+                                else {
+                                    fft_in_[idx][imag] = 0.;
+                                }
+                            }
+                        }
+                        //Log::info() << std::endl;
+                        //Log::info() << jlat << "out:" << std::endl;
+                        int jplan = nlatsLeg_ - nlatsNH_ + jlat;
+                        if ( jplan >= nlatsLeg_ ) { jplan = nlats - 1 + nlatsLeg_ - nlatsSH_ - jlat; };
+                        //ASSERT( jplan < nlatsLeg_ && jplan >= 0 );
+                        fftw_execute_dft_c2r( plans_[jplan], fft_in_, fft_out_ );
+                        for ( int jlon = 0; jlon < g.nx( jlat ); jlon++ ) {
+                            int j = jlon + jlonMin_[jlat];
+                            if ( j >= nlonsGlobal_[jlat] ) { j -= nlonsGlobal_[jlat]; }
+                            //Log::info() << fft_out_[j] << " ";
+                            ASSERT( j < nlonsMaxGlobal_ );
+                            gp_fields[jgp++] = fft_out_[j];
+                        }
+                        //Log::info() << std::endl;
+                    }
+                }
+            }
+        }
+#endif
+    }
+    else {
+#if !TRANSLOCAL_DGEMM2
+        // dgemm-method 1
+        {
+            ATLAS_TRACE( "opt3 Fourier dgemm method 1" );
+            eckit::linalg::Matrix A( fourier_, nlonsMax, ( truncation_ + 1 ) * 2 );
+            eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * nlats );
+            eckit::linalg::Matrix C( gp_fields, nlonsMax, nb_fields * nlats );
+            eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+        }
+#else
+        // dgemm-method 2
+        // should be faster for small domains or large truncation
+        // but have not found any significant speedup so far
+        double* gp_opt3;
+        alloc_aligned( gp_opt3, nb_fields * grid_.size() );
+        {
+            ATLAS_TRACE( "opt3 Fourier dgemm method 2" );
+            eckit::linalg::Matrix A( scl_fourier, nb_fields * nlats, ( truncation_ + 1 ) * 2 );
+            eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, nlonsMax );
+            eckit::linalg::Matrix C( gp_opt3, nb_fields * nlats, nlonsMax );
+            eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+        }
+
+        // Transposition in grid point space:
+        {
+            ATLAS_TRACE( "opt3 transposition in gp-space" );
+            int idx = 0;
+            for ( int jlon = 0; jlon < nlonsMax; jlon++ ) {
+                for ( int jlat = 0; jlat < nlats; jlat++ ) {
+                    for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                        int pos_tp = jlon + nlonsMax * ( jlat + nlats * ( jfld ) );
+                        //int pos  = jfld + nb_fields * ( jlat + nlats * ( jlon ) );
+                        gp_fields[pos_tp] = gp_opt3[idx++];  // = gp_opt3[pos]
+                    }
+                }
+            }
+        }
+        free_aligned( gp_opt3 );
+#endif
+    }
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
 void TransLocalopt3::invtrans_unstructured_precomp( const int truncation, const int nb_fields,
                                                     const int nb_vordiv_fields, const double scalar_spectra[],
                                                     double gp_fields[], const eckit::Configuration& config ) const {
@@ -569,14 +714,18 @@ void TransLocalopt3::invtrans_unstructured_precomp( const int truncation, const
             }
 
             // Fourier transformation:
-            int idx          = 0;
-            fouriertp[idx++] = 1.;  // real part
-            fouriertp[idx++] = 0.;  // imaginary part
-            for ( int jm = 1; jm < truncation; jm++ ) {
-                fouriertp[idx++] = +2. * std::cos( jm * lon );  // real part
-                fouriertp[idx++] = -2. * std::sin( jm * lon );  // imaginary part
+            {
+                //ATLAS_TRACE( "opt compute fouriertp" );
+                int idx          = 0;
+                fouriertp[idx++] = 1.;  // real part
+                fouriertp[idx++] = 0.;  // imaginary part
+                for ( int jm = 1; jm < truncation; jm++ ) {
+                    fouriertp[idx++] = +2. * std::cos( jm * lon );  // real part
+                    fouriertp[idx++] = -2. * std::sin( jm * lon );  // imaginary part
+                }
             }
             {
+                //ATLAS_TRACE( "opt Fourier dgemm" );
                 eckit::linalg::Matrix A( fouriertp, 1, (truncation)*2 );
                 eckit::linalg::Matrix B( scl_fourier_tp, (truncation)*2, nb_fields );
                 eckit::linalg::Matrix C( gp_opt, 1, nb_fields );
@@ -707,7 +856,12 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
             invtrans_legendreopt3( truncation, nlats, nb_scalar_fields, scalar_spectra, scl_fourier, config );
 
             // Fourier transformation:
-            invtrans_fourieropt3( nlats, nlons, nb_fields, scl_fourier, gp_fields, config );
+            if ( grid::RegularGrid( gridGlobal_ ) ) {
+                invtrans_fourier_regularopt3( nlats, nlons, nb_fields, scl_fourier, gp_fields, config );
+            }
+            else {
+                invtrans_fourier_reducedopt3( nlats, g, nb_fields, scl_fourier, gp_fields, config );
+            }
 
             // Computing u,v from U,V:
             {
diff --git a/src/atlas/trans/localopt3/TransLocalopt3.h b/src/atlas/trans/localopt3/TransLocalopt3.h
index af8360801..099f4b0e1 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.h
+++ b/src/atlas/trans/localopt3/TransLocalopt3.h
@@ -120,8 +120,12 @@ class TransLocalopt3 : public trans::TransImpl {
                                 const double scalar_spectra[], double scl_fourier[],
                                 const eckit::Configuration& config ) const;
 
-    void invtrans_fourieropt3( const int nlats, const int nlons, const int nb_fields, double scl_fourier[],
-                               double gp_fields[], const eckit::Configuration& config ) const;
+    void invtrans_fourier_regularopt3( const int nlats, const int nlons, const int nb_fields, double scl_fourier[],
+                                       double gp_fields[], const eckit::Configuration& config ) const;
+
+    void invtrans_fourier_reducedopt3( const int nlats, const grid::StructuredGrid g, const int nb_fields,
+                                       double scl_fourier[], double gp_fields[],
+                                       const eckit::Configuration& config ) const;
 
     void invtrans_unstructured_precomp( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields,
                                         const double scalar_spectra[], double gp_fields[],
@@ -137,6 +141,7 @@ class TransLocalopt3 : public trans::TransImpl {
 
 private:
     Grid grid_;
+    Grid gridGlobal_;
     bool useFFT_;
     bool dgemmMethod1_;
     bool unstruct_precomp_;
@@ -144,8 +149,12 @@ class TransLocalopt3 : public trans::TransImpl {
     int nlatsNH_;
     int nlatsSH_;
     int nlatsLeg_;
-    int jlonMin_;
-    int nlonsGlobal_;
+    std::vector<size_t> jlonMin_;
+    int jlatMin_;
+    int jlatMinLeg_;
+    int nlonsMaxGlobal_;
+    std::vector<size_t> nlonsGlobal_;
+    int nlatsGlobal_;
     bool precompute_;
     double* legendre_;
     double* legendre_sym_;
@@ -158,7 +167,7 @@ class TransLocalopt3 : public trans::TransImpl {
 #if ATLAS_HAVE_FFTW
     fftw_complex* fft_in_;
     double* fft_out_;
-    fftw_plan plan_;
+    std::vector<fftw_plan> plans_;
 #endif
 };
 
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index 3d023ebfd..f67c8704c 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -938,7 +938,7 @@ CASE( "test_trans_hires" ) {
 }
 #endif
 //-----------------------------------------------------------------------------
-#if 0
+#if 1
 CASE( "test_trans_domain" ) {
     Log::info() << "test_trans_domain" << std::endl;
     // test transgeneral by comparing with analytic solution on a cropped domain
@@ -950,9 +950,9 @@ CASE( "test_trans_domain" ) {
     //Domain testdomain = ZonalBandDomain( {-.5, .5} );
     //Domain testdomain = RectangularDomain( {0., 30.}, {-.05, .05} );
     //Domain testdomain = ZonalBandDomain( {-85., -86.} );
-    Domain testdomain = RectangularDomain( {-1., 1.}, {5., 5.5} );
+    Domain testdomain = RectangularDomain( {-2., 2.}, {20., 30.} );
     // Grid: (Adjust the following line if the test takes too long!)
-    Grid g( "F1280", testdomain );
+    Grid g( "O1280" );
     Grid g_global( g.name() );
 
     grid::StructuredGrid gs( g );
@@ -960,8 +960,9 @@ CASE( "test_trans_domain" ) {
     Log::info() << "nlats: " << gs.ny() << " nlons:" << gs.nxmax() << std::endl;
     int ndgl = gs_global.ny();
     //int trc  = ndgl - 1;  // linear
-    int trc = ndgl / 2. - 1;  // cubic
-    trans::Trans transLocal1( g, trc, util::Config( "type", "localopt2" ) );
+    //int trc = ndgl / 2. - 1;  // cubic
+    int trc = 120;
+    trans::Trans transLocal1( g, trc, util::Config( "type", "ifs" ) );
     trans::Trans transLocal2( g, trc, util::Config( "type", "localopt3" ) );
     double rav1 = 0., rav2 = 0.;  // compute average rms errors of transLocal1 and transLocal2
 
@@ -998,7 +999,7 @@ CASE( "test_trans_domain" ) {
                         for ( int imag = 0; imag <= 1; imag++ ) {  // real and imaginary part
 
                             if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. &&
-                                 icase < 1000 ) {
+                                 icase < 1 ) {
                                 auto start = std::chrono::system_clock::now();
                                 for ( int j = 0; j < 2 * N * nb_scalar; j++ ) {
                                     sp[j] = 0.;
@@ -1037,6 +1038,17 @@ CASE( "test_trans_domain" ) {
                                 double rms_gen2 =
                                     compute_rms( g.size(), rgp2.data() + pos * g.size(), rgp_analytic.data() );
 
+                                //Log::info() << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out
+                                //            << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl
+                                //            << "rgp2:";
+                                //for ( int j = 0; j < g.size(); j++ ) {
+                                //    Log::info() << rgp2[pos * g.size() + j] << " ";
+                                //};
+                                //Log::info() << std::endl << "analytic:";
+                                //for ( int j = 0; j < g.size(); j++ ) {
+                                //    Log::info() << rgp_analytic[j] << " ";
+                                //};
+                                //Log::info() << std::endl;
                                 rav1 += rms_gen1;
                                 rav2 += rms_gen2;
                                 if ( !( rms_gen1 < tolerance ) || !( rms_gen2 < tolerance ) ) {
@@ -1073,7 +1085,7 @@ CASE( "test_trans_domain" ) {
 #endif
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
-#if 1
+#if 0
 CASE( "test_trans_unstructured" ) {
     Log::info() << "test_trans_unstructured" << std::endl;
     // test transgeneral by comparing with analytic solution on an unstructured grid

From 4a3e7a60873d1f255cc3a67ab7536b17190825ae Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Wed, 11 Apr 2018 18:11:31 +0100
Subject: [PATCH 045/123] reduced truncation towards the poles is now working

---
 src/atlas/trans/localopt3/TransLocalopt3.cc | 146 ++++++++++++++++----
 src/atlas/trans/localopt3/TransLocalopt3.h  |   1 +
 src/tests/trans/test_transgeneral.cc        |  22 ++-
 3 files changed, 140 insertions(+), 29 deletions(-)

diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc
index 6ea7093f6..e38defb8d 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.cc
+++ b/src/atlas/trans/localopt3/TransLocalopt3.cc
@@ -77,6 +77,37 @@ void free_aligned( double*& ptr ) {
 int add_padding( int n ) {
     return std::ceil( n / 8. ) * 8;
 }
+
+int fourier_truncation( const int truncation,    // truncation
+                        const int nx,            // number of longitudes
+                        const int nxmax,         // maximum nx
+                        const int ndgl,          // number of latitudes
+                        const double lat,        // latitude in radian
+                        const bool fullgrid ) {  // regular grid
+    int trc     = truncation;
+    int trclin  = ndgl - 1;
+    int trcquad = ndgl * 2 / 3 - 1;
+    if ( truncation >= trclin || fullgrid ) {
+        // linear
+        trc = ( nx - 1 ) / 2;
+    }
+    else if ( truncation >= trcquad ) {
+        // quadratic
+        double weight = 3 * ( trclin - truncation ) / ndgl;
+        double sqcos  = std::pow( std::cos( lat ), 2 );
+
+        trc = ( nx - 1 ) / ( 2 + weight * sqcos );
+    }
+    else {
+        // cubic
+        double sqcos = std::pow( std::cos( lat ), 2 );
+
+        trc = ( nx - 1 ) / ( 2 + sqcos ) - 1;
+    }
+    trc = std::min( truncation, trc );
+    return trc;
+}
+
 }  // namespace
 
 // --------------------------------------------------------------------------------------------------------------------
@@ -109,6 +140,8 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
         grid::StructuredGrid g( grid_ );
         nlats    = g.ny();
         nlonsMax = g.nxmax();
+
+        // check location of domain relative to the equator:
         for ( size_t j = 0; j < nlats; ++j ) {
             // assumptions: latitudes in g.y(j) are monotone and decreasing
             // no assumption on whether we have 0, 1 or 2 latitudes at the equator
@@ -125,6 +158,8 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
         else {
             nlatsLeg_ = nlatsSH_;
         }
+
+        // compute latitudinal location of domain relative to global grid:
         gridGlobal_ = Grid( grid.name() );
         grid::StructuredGrid gs_global( gridGlobal_ );
         nlonsMaxGlobal_ = gs_global.nxmax();
@@ -137,6 +172,35 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
         }
         int jlatMinLeg_ = jlatMin_;
         if ( nlatsNH_ < nlatsSH_ ) { jlatMinLeg_ += nlatsNH_ - nlatsSH_; };
+        if ( jlatMin_ > nlatsGlobal_ / 2 ) { jlatMinLeg_ -= jlatMin_ - nlatsGlobal_ / 2 + 1; };
+        //Log::info() << "jlatMinLeg:" << jlatMinLeg_ << std::endl;
+        // reduce truncation towards the pole for reduced meshes:
+        nlat0_.resize( truncation_ + 1 );
+        int nmen0 = -1;
+        for ( int jlat = 0; jlat < nlatsGlobal_ / 2; jlat++ ) {
+            double lat = gs_global.y( jlat ) * util::Constants::degreesToRadians();
+            int nmen   = fourier_truncation( truncation_, gs_global.nx( jlat ), gs_global.nxmax(), nlatsGlobal_, lat,
+                                           grid::RegularGrid( gs_global ) );
+            /*Log::info() << "jlat=" << jlat << " nmen=" << nmen << " trc=" << truncation_
+                        << " nx=" << gs_global.nx( jlat ) << " nxmax=" << gs_global.nxmax() << " nlats=" << nlatsGlobal_
+                        << " lat=" << lat << std::endl;*/
+            nmen       = std::max( nmen0, nmen );
+            int ndgluj = nlatsLeg_ - std::min( nlatsLeg_, nlatsLeg_ + jlatMinLeg_ - jlat );
+            for ( int j = nmen0 + 1; j <= nmen; j++ ) {
+                nlat0_[j] = ndgluj;
+            }
+            nmen0 = nmen;
+        }
+        for ( int j = nmen0 + 1; j <= truncation_; j++ ) {
+            nlat0_[j] = nlatsLeg_;
+        }
+        /*Log::info() << "localopt:" << std::endl;
+        for ( int j = 0; j <= truncation_; j++ ) {
+            Log::info() << nlatsLeg_ - nlat0_[j] << " ";
+        }
+        Log::info() << std::endl;*/
+
+        // compute longitudinal location of domain within global grid for using FFT:
         auto wrapAngle = [&]( double angle ) {
             double result = fmod( angle, 360 );
             if ( result < 0. ) { result += 360.; }
@@ -180,6 +244,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
         for ( size_t j = 0; j < nlonsMax; ++j ) {
             lons[j] = g.x( j, 0 ) * util::Constants::degreesToRadians();
         }
+
         // precomputations for Legendre polynomials:
         {
             ATLAS_TRACE( "opt3 precomp Legendre" );
@@ -399,7 +464,7 @@ void TransLocalopt3::invtrans_legendreopt3( const int truncation, const int nlat
             if ( jm == 0 ) { n_imag = 1; }
             int size_fourier = nb_fields * n_imag * nlatsLeg_;
             auto posFourier  = [&]( int jfld, int imag, int jlat, int jm, int nlatsH ) {
-                return jfld + nb_fields * ( imag + n_imag * ( nlatsLeg_ - nlatsH + jlat ) );
+                return jfld + nb_fields * ( imag + n_imag * ( nlatsLeg_ - nlat0_[jm] - nlatsH + jlat ) );
             };
             double* scalar_sym;
             double* scalar_asym;
@@ -440,38 +505,68 @@ void TransLocalopt3::invtrans_legendreopt3( const int truncation, const int nlat
                 }
                 ASSERT( ia == n_imag * nb_fields * size_asym && is == n_imag * nb_fields * size_sym );
             }
-            {
-                eckit::linalg::Matrix A( scalar_sym, nb_fields * n_imag, size_sym );
-                eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm], size_sym, nlatsLeg_ );
-                eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsLeg_ );
-                eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-            }
-            if ( size_asym > 0 ) {
-                eckit::linalg::Matrix A( scalar_asym, nb_fields * n_imag, size_asym );
-                eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm], size_asym, nlatsLeg_ );
-                eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsLeg_ );
-                eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+            if ( nlatsLeg_ - nlat0_[jm] > 0 ) {
+                {
+                    eckit::linalg::Matrix A( scalar_sym, nb_fields * n_imag, size_sym );
+                    eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm] + nlat0_[jm] * size_sym, size_sym,
+                                             nlatsLeg_ - nlat0_[jm] );
+                    eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsLeg_ - nlat0_[jm] );
+                    eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+                }
+                if ( size_asym > 0 ) {
+                    eckit::linalg::Matrix A( scalar_asym, nb_fields * n_imag, size_asym );
+                    eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm] + nlat0_[jm] * size_sym,
+                                             size_asym, nlatsLeg_ - nlat0_[jm] );
+                    eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsLeg_ - nlat0_[jm] );
+                    eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+                }
             }
             {
                 //ATLAS_TRACE( "opt3 merge spheres" );
                 // northern hemisphere:
                 for ( int jlat = 0; jlat < nlatsNH_; jlat++ ) {
-                    for ( int imag = 0; imag < n_imag; imag++ ) {
-                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                            int idx = posFourier( jfld, imag, jlat, jm, nlatsNH_ );
-                            scl_fourier[posMethod( jfld, imag, jlat, jm, nb_fields, nlats )] =
-                                scl_fourier_sym[idx] + scl_fourier_asym[idx];
+                    if ( nlatsLeg_ - nlat0_[jm] - nlatsNH_ + jlat >= 0 ) {
+                        for ( int imag = 0; imag < n_imag; imag++ ) {
+                            for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                int idx = posFourier( jfld, imag, jlat, jm, nlatsNH_ );
+                                scl_fourier[posMethod( jfld, imag, jlat, jm, nb_fields, nlats )] =
+                                    scl_fourier_sym[idx] + scl_fourier_asym[idx];
+                            }
+                        }
+                    }
+                    else {
+                        for ( int imag = 0; imag < n_imag; imag++ ) {
+                            for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                scl_fourier[posMethod( jfld, imag, jlat, jm, nb_fields, nlats )] = 0.;
+                            }
                         }
                     }
+                    /*for ( int imag = 0; imag < n_imag; imag++ ) {
+                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                            if ( scl_fourier[posMethod( jfld, imag, jlat, jm, nb_fields, nlats )] > 0. ) {
+                                Log::info() << "jm=" << jm << " jlat=" << jlat << " nlatsLeg_=" << nlatsLeg_
+                                            << " nlat0=" << nlat0_[jm] << " nlatsNH=" << nlatsNH_ << std::endl;
+                            }
+                        }
+                    }*/
                 }
                 // southern hemisphere:
                 for ( int jlat = 0; jlat < nlatsSH_; jlat++ ) {
-                    for ( int imag = 0; imag < n_imag; imag++ ) {
-                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                            int idx   = posFourier( jfld, imag, jlat, jm, nlatsSH_ );
-                            int jslat = nlats - jlat - 1;
-                            scl_fourier[posMethod( jfld, imag, jslat, jm, nb_fields, nlats )] =
-                                scl_fourier_sym[idx] - scl_fourier_asym[idx];
+                    int jslat = nlats - jlat - 1;
+                    if ( nlatsLeg_ - nlat0_[jm] - nlatsSH_ + jlat >= 0 ) {
+                        for ( int imag = 0; imag < n_imag; imag++ ) {
+                            for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                int idx = posFourier( jfld, imag, jlat, jm, nlatsSH_ );
+                                scl_fourier[posMethod( jfld, imag, jslat, jm, nb_fields, nlats )] =
+                                    scl_fourier_sym[idx] - scl_fourier_asym[idx];
+                            }
+                        }
+                    }
+                    else {
+                        for ( int imag = 0; imag < n_imag; imag++ ) {
+                            for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                scl_fourier[posMethod( jfld, imag, jslat, jm, nb_fields, nlats )] = 0.;
+                            }
                         }
                     }
                 }
@@ -583,9 +678,11 @@ void TransLocalopt3::invtrans_fourier_reducedopt3( const int nlats, const grid::
                 int jgp = 0;
                 for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
                     for ( int jlat = 0; jlat < nlats; jlat++ ) {
-                        int idx           = 0;
+                        int idx = 0;
+                        //Log::info() << jlat << "in:" << std::endl;
                         int num_complex   = ( nlonsGlobal_[jlat] / 2 ) + 1;
                         fft_in_[idx++][0] = scl_fourier[posMethod( jfld, 0, jlat, 0, nb_fields, nlats )];
+                        //Log::info() << fft_in_[0][0] << " ";
                         for ( int jm = 1; jm < num_complex; jm++, idx++ ) {
                             for ( int imag = 0; imag < 2; imag++ ) {
                                 if ( jm <= truncation_ ) {
@@ -595,6 +692,7 @@ void TransLocalopt3::invtrans_fourier_reducedopt3( const int nlats, const grid::
                                 else {
                                     fft_in_[idx][imag] = 0.;
                                 }
+                                //Log::info() << fft_in_[idx][imag] << " ";
                             }
                         }
                         //Log::info() << std::endl;
diff --git a/src/atlas/trans/localopt3/TransLocalopt3.h b/src/atlas/trans/localopt3/TransLocalopt3.h
index 099f4b0e1..1ca002436 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.h
+++ b/src/atlas/trans/localopt3/TransLocalopt3.h
@@ -154,6 +154,7 @@ class TransLocalopt3 : public trans::TransImpl {
     int jlatMinLeg_;
     int nlonsMaxGlobal_;
     std::vector<size_t> nlonsGlobal_;
+    std::vector<int> nlat0_;
     int nlatsGlobal_;
     bool precompute_;
     double* legendre_;
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index f67c8704c..8b1a78a54 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -457,16 +457,28 @@ void spectral_transform_grid_analytic(
 
     if ( grid::StructuredGrid( grid ) ) {
         grid::StructuredGrid g( grid );
+        Grid gridGlobal( grid.name() );
+        grid::StructuredGrid gs_global( gridGlobal );
+        int nlatsGlobal = gs_global.ny();
+        int jlatMin     = 0;
+        for ( int jlat = 0; jlat < nlatsGlobal; jlat++ ) {
+            if ( gs_global.y( jlat ) > g.y( 0 ) ) { jlatMin++; };
+        }
+
         int idx = 0;
         for ( size_t j = 0; j < g.ny(); ++j ) {
             double lat = g.y( j ) * util::Constants::degreesToRadians();
 
+            int ftrc = trans::fourier_truncation( trc, gs_global.nx( jlatMin + j ), gs_global.nxmax(), gs_global.ny(),
+                                                  lat, grid::RegularGrid( gs_global ) );
+            /*Log::info() << "j=" << j << " ftrc=" << ftrc << " trc=" << trc << " nx=" << gs_global.nx( jlatMin + j )
+                        << " nxmax=" << gs_global.nxmax() << " nlats=" << gs_global.ny() << " lat=" << g.y( j )
+                        << " jlatMin=" << jlatMin << std::endl;*/
             for ( size_t i = 0; i < g.nx( j ); ++i ) {
                 double lon = g.x( i, j ) * util::Constants::degreesToRadians();
 
                 // compute spherical harmonics:
-                if ( trans::fourier_truncation( trc, g.nx( j ), g.nxmax(), g.ny(), lat, grid::RegularGrid( g ) ) >=
-                     m ) {
+                if ( ftrc >= m ) {
                     rgp[idx++] = sphericalharmonics_analytic_point( n, m, imag, lon, lat, ivar_in, ivar_out );
                 }
                 else {
@@ -950,9 +962,9 @@ CASE( "test_trans_domain" ) {
     //Domain testdomain = ZonalBandDomain( {-.5, .5} );
     //Domain testdomain = RectangularDomain( {0., 30.}, {-.05, .05} );
     //Domain testdomain = ZonalBandDomain( {-85., -86.} );
-    Domain testdomain = RectangularDomain( {-2., 2.}, {20., 30.} );
+    Domain testdomain = RectangularDomain( {15., 20.}, {10., 20.} );
     // Grid: (Adjust the following line if the test takes too long!)
-    Grid g( "O1280" );
+    Grid g( "O120" );
     Grid g_global( g.name() );
 
     grid::StructuredGrid gs( g );
@@ -999,7 +1011,7 @@ CASE( "test_trans_domain" ) {
                         for ( int imag = 0; imag <= 1; imag++ ) {  // real and imaginary part
 
                             if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. &&
-                                 icase < 1 ) {
+                                 icase < 1000 ) {
                                 auto start = std::chrono::system_clock::now();
                                 for ( int j = 0; j < 2 * N * nb_scalar; j++ ) {
                                     sp[j] = 0.;

From fbb51a60f54176aacfe576851f4bde1c98ad34eb Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Wed, 11 Apr 2018 18:34:28 +0100
Subject: [PATCH 046/123] fixed vordiv in localopt3 (VorDivToUV is not yet
 optimised)

---
 src/atlas/trans/localopt3/TransLocalopt3.cc | 22 ++++++++++++++++++++-
 src/tests/trans/test_transgeneral.cc        |  6 +++---
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc
index e38defb8d..09b84fc34 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.cc
+++ b/src/atlas/trans/localopt3/TransLocalopt3.cc
@@ -125,7 +125,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
 #else
     eckit::linalg::LinearAlgebra::backend( "generic" );  // might want to choose backend with this command
 #endif
-    double fft_threshold = 0.0;  // fraction of latitudes of the full grid up to which FFT is used.
+    double fft_threshold = 0.0;  // fraction of latitudes of the full grid down to which FFT is used.
     // This threshold needs to be adjusted depending on the dgemm and FFT performance of the machine
     // on which this code is running!
     int nlats         = 0;
@@ -832,6 +832,16 @@ void TransLocalopt3::invtrans_unstructured_precomp( const int truncation, const
                     gp_fields[ip + j * grid_.size()] = gp_opt[j];
                 }
             }
+            // Computing u,v from U,V:
+            {
+                if ( nb_vordiv_fields > 0 ) {
+                    //ATLAS_TRACE( "opt3 u,v from U,V" );
+                    double coslat = std::cos( lat );
+                    for ( int j = 0; j < nb_fields; j++ ) {
+                        gp_fields[ip + j * grid_.size()] /= coslat;
+                    }
+                }
+            }
         }
     }
     free_aligned( scl_fourier );
@@ -912,6 +922,16 @@ void TransLocalopt3::invtrans_unstructured( const int truncation, const int nb_f
                 gp_fields[ip + j * grid_.size()] = gp_opt[j];
             }
         }
+        // Computing u,v from U,V:
+        {
+            if ( nb_vordiv_fields > 0 ) {
+                //ATLAS_TRACE( "opt3 u,v from U,V" );
+                double coslat = std::cos( lat );
+                for ( int j = 0; j < nb_fields; j++ ) {
+                    gp_fields[ip + j * grid_.size()] /= coslat;
+                }
+            }
+        }
     }
     free_aligned( legendre );
     free_aligned( scl_fourier );
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index 8b1a78a54..2f2c2705e 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -981,7 +981,7 @@ CASE( "test_trans_domain" ) {
     functionspace::Spectral spectral( trc );
     functionspace::StructuredColumns gridpoints( g );
 
-    int nb_scalar = 1, nb_vordiv = 0;
+    int nb_scalar = 2, nb_vordiv = 2;
     int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv;
     std::vector<double> sp( 2 * N * nb_scalar );
     std::vector<double> vor( 2 * N * nb_vordiv );
@@ -1124,10 +1124,10 @@ CASE( "test_trans_unstructured" ) {
     }
     Grid gu = grid::UnstructuredGrid( new std::vector<PointXY>( &pts[0], &pts[idx] ) );
     Log::info() << "gu: size=" << gu.size() << std::endl;
-    int trc     = 1280;
+    int trc     = 120;
     double rav1 = 0., rav2 = 0.;  // compute average rms errors of transLocal1 and transLocal2
 
-    int nb_scalar = 1, nb_vordiv = 0;
+    int nb_scalar = 1, nb_vordiv = 1;
     int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv;
     std::vector<double> sp( 2 * N * nb_scalar );
     std::vector<double> vor( 2 * N * nb_vordiv );

From 9f3cc6334f5721a32ef0ebe246196ca8e37ab958 Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Thu, 19 Apr 2018 10:57:05 +0100
Subject: [PATCH 047/123] No need to look for MKL when eckit already does

---
 CMakeLists.txt                              | 6 ------
 src/CMakeLists.txt                          | 6 ------
 src/atlas/CMakeLists.txt                    | 2 --
 src/atlas/library/defines.h.in              | 1 -
 src/atlas/trans/localopt/TransLocalopt.cc   | 9 +++++----
 src/atlas/trans/localopt2/TransLocalopt2.cc | 9 +++++----
 src/atlas/trans/localopt3/TransLocalopt3.cc | 9 +++++----
 7 files changed, 15 insertions(+), 27 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e71fcc03f..787c96898 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -105,12 +105,6 @@ ecbuild_add_option( FEATURE FFTW
                     DESCRIPTION "Support for fftw"
                     REQUIRED_PACKAGES "FFTW COMPONENTS double" )
 
-### MKL ...
-
-ecbuild_add_option( FEATURE MKL
-                    DESCRIPTION "MKL linear algebra library"
-                    REQUIRED_PACKAGES MKL )
-
 ### trans ...
 
 ecbuild_add_option( FEATURE TRANS
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 48175e157..98a5e1dd0 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -42,12 +42,6 @@ else()
   set( ATLAS_HAVE_FFTW 0 )
 endif()
 
-if( ATLAS_HAVE_MKL )
-  set( ATLAS_HAVE_MKL 1 )
-else()
-  set( ATLAS_HAVE_MKL 0 )
-endif()
-
 if( ATLAS_HAVE_BOUNDSCHECKING )
   set( ATLAS_HAVE_BOUNDSCHECKING 1 )
 else()
diff --git a/src/atlas/CMakeLists.txt b/src/atlas/CMakeLists.txt
index b018107ec..a8ddfe4b4 100644
--- a/src/atlas/CMakeLists.txt
+++ b/src/atlas/CMakeLists.txt
@@ -583,7 +583,6 @@ ecbuild_add_library( TARGET atlas
     "${TRANSI_INCLUDE_DIRS}"
     "${MPI_CXX_INCLUDE_DIRS}"
     "${FFTW_INCLUDES}"
-    "${MKL_INCLUDE_DIRS}"
   LIBS
     eckit_geometry
     eckit_linalg
@@ -593,7 +592,6 @@ ecbuild_add_library( TARGET atlas
     "${TRANSI_LIBRARIES}"
     "${FCKIT_LIBRARIES}"
     "${FFTW_LIBRARIES}"
-    "${MKL_LIBRARIES}"
   DEFINITIONS
     ${ATLAS_DEFINITIONS}
 )
diff --git a/src/atlas/library/defines.h.in b/src/atlas/library/defines.h.in
index 75f91f2b5..e644fc73e 100644
--- a/src/atlas/library/defines.h.in
+++ b/src/atlas/library/defines.h.in
@@ -11,7 +11,6 @@
 #define ATLAS_HAVE_FORTRAN                   @ATLAS_HAVE_FORTRAN@
 #define ATLAS_HAVE_EIGEN                     @ATLAS_HAVE_EIGEN@
 #define ATLAS_HAVE_FFTW                      @ATLAS_HAVE_FFTW@
-#define ATLAS_HAVE_MKL                       @ATLAS_HAVE_MKL@
 #define ATLAS_BITS_GLOBAL                    @ATLAS_BITS_GLOBAL@
 #define ATLAS_ARRAYVIEW_BOUNDS_CHECKING      @ATLAS_HAVE_BOUNDSCHECKING@
 #define ATLAS_INDEXVIEW_BOUNDS_CHECKING      @ATLAS_HAVE_BOUNDSCHECKING@
diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc
index 89434ff6b..d0dfbfe4a 100644
--- a/src/atlas/trans/localopt/TransLocalopt.cc
+++ b/src/atlas/trans/localopt/TransLocalopt.cc
@@ -23,7 +23,8 @@
 #include "atlas/util/Constants.h"
 #include "eckit/linalg/LinearAlgebra.h"
 #include "eckit/linalg/Matrix.h"
-#if ATLAS_HAVE_MKL
+#include "eckit/eckit_config.h"
+#ifdef ECKIT_HAVE_MKL
 #include "mkl.h"
 #endif
 
@@ -58,7 +59,7 @@ int num_n( const int truncation, const int m, const bool symmetric ) {
 }
 
 void alloc_aligned( double*& ptr, size_t n ) {
-#if ATLAS_HAVE_MKL
+#ifdef ECKIT_HAVE_MKL
     int al = 64;
     ptr    = (double*)mkl_malloc( sizeof( double ) * n, al );
 #else
@@ -69,7 +70,7 @@ void alloc_aligned( double*& ptr, size_t n ) {
 }
 
 void free_aligned( double*& ptr ) {
-#if ATLAS_HAVE_MKL
+#ifdef ECKIT_HAVE_MKL
     mkl_free( ptr );
 #else
     free( ptr );
@@ -91,7 +92,7 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t
     truncation_( truncation ),
     precompute_( config.getBool( "precompute", true ) ) {
     ATLAS_TRACE( "Precompute legendre opt" );
-#if ATLAS_HAVE_MKL
+#ifdef ECKIT_HAVE_MKL
     eckit::linalg::LinearAlgebra::backend( "mkl" );  // might want to choose backend with this command
 #else
     eckit::linalg::LinearAlgebra::backend( "generic" );  // might want to choose backend with this command
diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc
index 16062989d..6b51c2dc5 100644
--- a/src/atlas/trans/localopt2/TransLocalopt2.cc
+++ b/src/atlas/trans/localopt2/TransLocalopt2.cc
@@ -23,7 +23,8 @@
 #include "atlas/util/Constants.h"
 #include "eckit/linalg/LinearAlgebra.h"
 #include "eckit/linalg/Matrix.h"
-#if ATLAS_HAVE_MKL
+#include "eckit/eckit_config.h"
+#ifdef ECKIT_HAVE_MKL
 #include "mkl.h"
 #endif
 
@@ -58,7 +59,7 @@ int num_n( const int truncation, const int m, const bool symmetric ) {
 }
 
 void alloc_aligned( double*& ptr, size_t n ) {
-#if ATLAS_HAVE_MKL
+#ifdef ECKIT_HAVE_MKL
     int al = 64;
     ptr    = (double*)mkl_malloc( sizeof( double ) * n, al );
 #else
@@ -69,7 +70,7 @@ void alloc_aligned( double*& ptr, size_t n ) {
 }
 
 void free_aligned( double*& ptr ) {
-#if ATLAS_HAVE_MKL
+#ifdef ECKIT_HAVE_MKL
     mkl_free( ptr );
 #else
     free( ptr );
@@ -91,7 +92,7 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long
     truncation_( truncation ),
     precompute_( config.getBool( "precompute", true ) ) {
     ATLAS_TRACE( "Precompute legendre opt2" );
-#if ATLAS_HAVE_MKL
+#ifdef ECKIT_HAVE_MKL
     eckit::linalg::LinearAlgebra::backend( "mkl" );  // might want to choose backend with this command
 #else
     eckit::linalg::LinearAlgebra::backend( "generic" );  // might want to choose backend with this command
diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc
index 09b84fc34..abf36baf0 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.cc
+++ b/src/atlas/trans/localopt3/TransLocalopt3.cc
@@ -21,7 +21,8 @@
 #include "atlas/util/Constants.h"
 #include "eckit/linalg/LinearAlgebra.h"
 #include "eckit/linalg/Matrix.h"
-#if ATLAS_HAVE_MKL
+#include "eckit/eckit_config.h"
+#ifdef ECKIT_HAVE_MKL
 #include "mkl.h"
 #endif
 
@@ -56,7 +57,7 @@ int num_n( const int truncation, const int m, const bool symmetric ) {
 }
 
 void alloc_aligned( double*& ptr, size_t n ) {
-#if ATLAS_HAVE_MKL
+#ifdef ECKIT_HAVE_MKL
     int al = 64;
     ptr    = (double*)mkl_malloc( sizeof( double ) * n, al );
 #else
@@ -67,7 +68,7 @@ void alloc_aligned( double*& ptr, size_t n ) {
 }
 
 void free_aligned( double*& ptr ) {
-#if ATLAS_HAVE_MKL
+#ifdef ECKIT_HAVE_MKL
     mkl_free( ptr );
 #else
     free( ptr );
@@ -120,7 +121,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
     truncation_( truncation ),
     precompute_( config.getBool( "precompute", true ) ) {
     ATLAS_TRACE( "Precompute legendre opt3" );
-#if ATLAS_HAVE_MKL
+#ifdef ECKIT_HAVE_MKL
     eckit::linalg::LinearAlgebra::backend( "mkl" );  // might want to choose backend with this command
 #else
     eckit::linalg::LinearAlgebra::backend( "generic" );  // might want to choose backend with this command

From 821bf1302b07b76290e806a76dc6e2f4c622315b Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Sun, 22 Apr 2018 00:43:02 +0100
Subject: [PATCH 048/123] using global Legendre matrices independent of the
 domain works

---
 src/atlas/trans/localopt3/TransLocalopt3.cc | 271 +++++++++++---------
 src/atlas/trans/localopt3/TransLocalopt3.h  |   2 +
 src/tests/trans/test_transgeneral.cc        |  80 +++---
 3 files changed, 202 insertions(+), 151 deletions(-)

diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc
index abf36baf0..1b9fbc39b 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.cc
+++ b/src/atlas/trans/localopt3/TransLocalopt3.cc
@@ -19,9 +19,9 @@
 #include "atlas/trans/local/LegendrePolynomials.h"
 #include "atlas/trans/localopt3/LegendrePolynomialsopt3.h"
 #include "atlas/util/Constants.h"
+#include "eckit/eckit_config.h"
 #include "eckit/linalg/LinearAlgebra.h"
 #include "eckit/linalg/Matrix.h"
-#include "eckit/eckit_config.h"
 #ifdef ECKIT_HAVE_MKL
 #include "mkl.h"
 #endif
@@ -137,6 +137,9 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
     nlatsNH_          = 0;
     nlatsSH_          = 0;
     nlatsLeg_         = 0;
+    nlatsLegDomain_   = 0;
+    nlatsLegReduced_  = 0;
+    bool useGlobalLeg = true;
     if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
         grid::StructuredGrid g( grid_ );
         nlats    = g.ny();
@@ -155,26 +158,38 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
             nlatsNH_++;
             nlatsSH_++;
         }
-        if ( nlatsNH_ >= nlatsSH_ ) { nlatsLeg_ = nlatsNH_; }
+        if ( nlatsNH_ >= nlatsSH_ ) { nlatsLegDomain_ = nlatsNH_; }
         else {
-            nlatsLeg_ = nlatsSH_;
+            nlatsLegDomain_ = nlatsSH_;
         }
 
         // compute latitudinal location of domain relative to global grid:
         gridGlobal_ = Grid( grid.name() );
         grid::StructuredGrid gs_global( gridGlobal_ );
+        grid::StructuredGrid* gsLeg = &g;
+        if ( useGlobalLeg ) { gsLeg = &gs_global; };
         nlonsMaxGlobal_ = gs_global.nxmax();
         jlonMin_.resize( 1 );
         jlonMin_[0]  = 0;
         jlatMin_     = 0;
         nlatsGlobal_ = gs_global.ny();
+        if ( useGlobalLeg ) { nlatsLeg_ = nlatsGlobal_ / 2; }
+        else {
+            nlatsLeg_        = nlatsLegDomain_;
+            nlatsLegReduced_ = nlatsLeg_;
+        }
         for ( int jlat = 0; jlat < nlatsGlobal_; jlat++ ) {
-            if ( gs_global.y( jlat ) > g.y( 0 ) ) { jlatMin_++; };
+            if ( gs_global.y( jlat ) > g.y( 0 ) ) {
+                //Log::info() << gs_global.y( jlat ) << ">" << g.y( 0 ) << " ";
+                jlatMin_++;
+            };
         }
+        //Log::info() << std::endl;
         int jlatMinLeg_ = jlatMin_;
         if ( nlatsNH_ < nlatsSH_ ) { jlatMinLeg_ += nlatsNH_ - nlatsSH_; };
-        if ( jlatMin_ > nlatsGlobal_ / 2 ) { jlatMinLeg_ -= jlatMin_ - nlatsGlobal_ / 2 + 1; };
-        //Log::info() << "jlatMinLeg:" << jlatMinLeg_ << std::endl;
+        if ( jlatMin_ > nlatsGlobal_ / 2 ) { jlatMinLeg_ -= 2 * ( jlatMin_ - nlatsGlobal_ / 2 ); };
+        if ( useGlobalLeg ) { nlatsLegReduced_ = jlatMinLeg_ + nlatsLegDomain_; }
+
         // reduce truncation towards the pole for reduced meshes:
         nlat0_.resize( truncation_ + 1 );
         int nmen0 = -1;
@@ -182,11 +197,9 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
             double lat = gs_global.y( jlat ) * util::Constants::degreesToRadians();
             int nmen   = fourier_truncation( truncation_, gs_global.nx( jlat ), gs_global.nxmax(), nlatsGlobal_, lat,
                                            grid::RegularGrid( gs_global ) );
-            /*Log::info() << "jlat=" << jlat << " nmen=" << nmen << " trc=" << truncation_
-                        << " nx=" << gs_global.nx( jlat ) << " nxmax=" << gs_global.nxmax() << " nlats=" << nlatsGlobal_
-                        << " lat=" << lat << std::endl;*/
             nmen       = std::max( nmen0, nmen );
             int ndgluj = nlatsLeg_ - std::min( nlatsLeg_, nlatsLeg_ + jlatMinLeg_ - jlat );
+            if ( useGlobalLeg ) { ndgluj = std::max( jlatMinLeg_, jlat ); }
             for ( int j = nmen0 + 1; j <= nmen; j++ ) {
                 nlat0_[j] = ndgluj;
             }
@@ -195,11 +208,9 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
         for ( int j = nmen0 + 1; j <= truncation_; j++ ) {
             nlat0_[j] = nlatsLeg_;
         }
-        /*Log::info() << "localopt:" << std::endl;
-        for ( int j = 0; j <= truncation_; j++ ) {
-            Log::info() << nlatsLeg_ - nlat0_[j] << " ";
-        }
-        Log::info() << std::endl;*/
+        /*Log::info() << "nlats=" << g.ny() << " nlatsGlobal=" << gs_global.ny() << " jlatMin=" << jlatMin_
+                    << " jlatMinLeg=" << jlatMinLeg_ << " nlatsGlobal/2-nlatsLeg=" << nlatsGlobal_ / 2 - nlatsLeg_
+                    << " nlatsLeg_=" << nlatsLeg_ << " nlatsLegDomain_=" << nlatsLegDomain_ << std::endl;*/
 
         // compute longitudinal location of domain within global grid for using FFT:
         auto wrapAngle = [&]( double angle ) {
@@ -232,19 +243,24 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
         //Log::info() << "nlats=" << g.ny() << " nlatsGlobal=" << gs_global.ny() << std::endl;
         std::vector<double> lats( nlatsLeg_ );
         std::vector<double> lons( nlonsMax );
-        if ( nlatsNH_ >= nlatsSH_ ) {
+        if ( nlatsNH_ >= nlatsSH_ || useGlobalLeg ) {
             for ( size_t j = 0; j < nlatsLeg_; ++j ) {
-                lats[j] = g.y( j ) * util::Constants::degreesToRadians();
+                lats[j] = gsLeg->y( j ) * util::Constants::degreesToRadians();
             }
         }
         else {
             for ( size_t j = nlats - 1, idx = 0; idx < nlatsLeg_; --j, ++idx ) {
-                lats[idx] = -g.y( j ) * util::Constants::degreesToRadians();
+                lats[idx] = -gsLeg->y( j ) * util::Constants::degreesToRadians();
             }
         }
         for ( size_t j = 0; j < nlonsMax; ++j ) {
             lons[j] = g.x( j, 0 ) * util::Constants::degreesToRadians();
         }
+        /*Log::info() << "lats: ";
+        for ( int j = 0; j < nlatsLeg_; j++ ) {
+            Log::info() << lats[j] << " ";
+        }
+        Log::info() << std::endl;*/
 
         // precomputations for Legendre polynomials:
         {
@@ -265,7 +281,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
             alloc_aligned( legendre_asym_, size_asym );
             FILE* file_leg;
             file_leg = fopen( "legendre.bin", "r" );
-            if ( false ) {  //if ( file_leg ) {
+            if ( file_leg ) {
                 fread( legendre_sym_, sizeof( double ), size_sym, file_leg );
                 fread( legendre_asym_, sizeof( double ), size_asym, file_leg );
                 fclose( file_leg );
@@ -274,10 +290,10 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
                 compute_legendre_polynomialsopt3( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_,
                                                   legendre_asym_, legendre_sym_begin_.data(),
                                                   legendre_asym_begin_.data() );
-                /*file_leg = fopen( "legendre.bin", "wb" );
+                file_leg = fopen( "legendre.bin", "wb" );
                 fwrite( legendre_sym_, sizeof( double ), size_sym, file_leg );
                 fwrite( legendre_asym_, sizeof( double ), size_asym, file_leg );
-                fclose( file_leg );*/
+                fclose( file_leg );
             }
         }
 
@@ -301,21 +317,23 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
                                                         fft_out_, NULL, 1, nlonsMaxGlobal_, FFTW_ESTIMATE );
                 }
                 else {
-                    plans_.resize( nlatsLeg_ );
+                    plans_.resize( nlatsLegDomain_ );
                     FILE* file_fftw;
                     file_fftw = fopen( "wisdom.bin", "r" );
                     if ( file_fftw ) {
                         fftw_import_wisdom_from_file( file_fftw );
                         fclose( file_fftw );
                     }
-                    for ( int j = 0; j < nlatsLeg_; j++ ) {
+                    for ( int j = 0; j < nlatsLegDomain_; j++ ) {
                         int nlonsGlobalj = gs_global.nx( jlatMinLeg_ + j );
                         //ASSERT( nlonsGlobalj > 0 && nlonsGlobalj <= nlonsMaxGlobal_ );
                         plans_[j] = fftw_plan_dft_c2r_1d( nlonsGlobalj, fft_in_, fft_out_, FFTW_ESTIMATE );
                     }
-                    file_fftw = fopen( "wisdom.bin", "wb" );
-                    fftw_export_wisdom_to_file( file_fftw );
-                    fclose( file_fftw );
+                    if ( !file_fftw ) {
+                        file_fftw = fopen( "wisdom.bin", "wb" );
+                        fftw_export_wisdom_to_file( file_fftw );
+                        fclose( file_fftw );
+                    }
                 }
             }
                 // other FFT implementations should be added with #elif statements
@@ -463,86 +481,97 @@ void TransLocalopt3::invtrans_legendreopt3( const int truncation, const int nlat
             int size_asym = num_n( truncation_ + 1, jm, false );
             int n_imag    = 2;
             if ( jm == 0 ) { n_imag = 1; }
-            int size_fourier = nb_fields * n_imag * nlatsLeg_;
-            auto posFourier  = [&]( int jfld, int imag, int jlat, int jm, int nlatsH ) {
-                return jfld + nb_fields * ( imag + n_imag * ( nlatsLeg_ - nlat0_[jm] - nlatsH + jlat ) );
-            };
-            double* scalar_sym;
-            double* scalar_asym;
-            double* scl_fourier_sym;
-            double* scl_fourier_asym;
-            alloc_aligned( scalar_sym, n_imag * nb_fields * size_sym );
-            alloc_aligned( scalar_asym, n_imag * nb_fields * size_asym );
-            alloc_aligned( scl_fourier_sym, size_fourier );
-            alloc_aligned( scl_fourier_asym, size_fourier );
-            {
-                //ATLAS_TRACE( "opt3 Legendre split" );
-                int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2;
-                // the choice between the following two code lines determines whether
-                // total wavenumbers are summed in an ascending or descending order.
-                // The trans library in IFS uses descending order because it should
-                // be more accurate (higher wavenumbers have smaller contributions).
-                // This also needs to be changed when splitting the spectral data in
-                // compute_legendre_polynomialsopt3!
-                //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) {
-                for ( int jn = truncation_ + 1; jn >= jm; jn-- ) {
-                    for ( int imag = 0; imag < n_imag; imag++ ) {
-                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                            idx = jfld + nb_fields * ( imag + 2 * ( jn - jm ) );
-                            if ( jn <= truncation && jm < truncation ) {
-                                if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; }
-                                else {
-                                    scalar_asym[ia++] = scalar_spectra[idx + ioff];
+            int size_fourier = nb_fields * n_imag * ( nlatsLegReduced_ - nlat0_[jm] );
+            if ( size_fourier > 0 ) {
+                auto posFourier = [&]( int jfld, int imag, int jlat, int jm, int nlatsH ) {
+                    return jfld + nb_fields * ( imag + n_imag * ( nlatsLegReduced_ - nlat0_[jm] - nlatsH + jlat ) );
+                };
+                double* scalar_sym;
+                double* scalar_asym;
+                double* scl_fourier_sym;
+                double* scl_fourier_asym;
+                alloc_aligned( scalar_sym, n_imag * nb_fields * size_sym );
+                alloc_aligned( scalar_asym, n_imag * nb_fields * size_asym );
+                alloc_aligned( scl_fourier_sym, size_fourier );
+                alloc_aligned( scl_fourier_asym, size_fourier );
+                {
+                    //ATLAS_TRACE( "opt3 Legendre split" );
+                    int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2;
+                    // the choice between the following two code lines determines whether
+                    // total wavenumbers are summed in an ascending or descending order.
+                    // The trans library in IFS uses descending order because it should
+                    // be more accurate (higher wavenumbers have smaller contributions).
+                    // This also needs to be changed when splitting the spectral data in
+                    // compute_legendre_polynomialsopt3!
+                    //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) {
+                    for ( int jn = truncation_ + 1; jn >= jm; jn-- ) {
+                        for ( int imag = 0; imag < n_imag; imag++ ) {
+                            for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                idx = jfld + nb_fields * ( imag + 2 * ( jn - jm ) );
+                                if ( jn <= truncation && jm < truncation ) {
+                                    if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; }
+                                    else {
+                                        scalar_asym[ia++] = scalar_spectra[idx + ioff];
+                                    }
                                 }
-                            }
-                            else {
-                                if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = 0.; }
                                 else {
-                                    scalar_asym[ia++] = 0.;
+                                    if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = 0.; }
+                                    else {
+                                        scalar_asym[ia++] = 0.;
+                                    }
                                 }
                             }
                         }
                     }
+                    ASSERT( ia == n_imag * nb_fields * size_asym && is == n_imag * nb_fields * size_sym );
                 }
-                ASSERT( ia == n_imag * nb_fields * size_asym && is == n_imag * nb_fields * size_sym );
-            }
-            if ( nlatsLeg_ - nlat0_[jm] > 0 ) {
-                {
-                    eckit::linalg::Matrix A( scalar_sym, nb_fields * n_imag, size_sym );
-                    eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm] + nlat0_[jm] * size_sym, size_sym,
-                                             nlatsLeg_ - nlat0_[jm] );
-                    eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsLeg_ - nlat0_[jm] );
-                    eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-                }
-                if ( size_asym > 0 ) {
-                    eckit::linalg::Matrix A( scalar_asym, nb_fields * n_imag, size_asym );
-                    eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm] + nlat0_[jm] * size_sym,
-                                             size_asym, nlatsLeg_ - nlat0_[jm] );
-                    eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsLeg_ - nlat0_[jm] );
-                    eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+                if ( nlatsLegReduced_ - nlat0_[jm] > 0 ) {
+                    {
+                        eckit::linalg::Matrix A( scalar_sym, nb_fields * n_imag, size_sym );
+                        eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm] + nlat0_[jm] * size_sym,
+                                                 size_sym, nlatsLegReduced_ - nlat0_[jm] );
+                        eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsLegReduced_ - nlat0_[jm] );
+                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+                        /*Log::info() << "sym: ";
+                        for ( int j = 0; j < size_sym * ( nlatsLegReduced_ - nlat0_[jm] ); j++ ) {
+                            Log::info() << legendre_sym_[j + legendre_sym_begin_[jm] + nlat0_[jm] * size_sym] << " ";
+                        }
+                        Log::info() << std::endl;*/
+                    }
+                    if ( size_asym > 0 ) {
+                        eckit::linalg::Matrix A( scalar_asym, nb_fields * n_imag, size_asym );
+                        eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm] + nlat0_[jm] * size_asym,
+                                                 size_asym, nlatsLegReduced_ - nlat0_[jm] );
+                        eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsLegReduced_ - nlat0_[jm] );
+                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+                        /*Log::info() << "asym: ";
+                        for ( int j = 0; j < size_asym * ( nlatsLegReduced_ - nlat0_[jm] ); j++ ) {
+                            Log::info() << legendre_asym_[j + legendre_asym_begin_[jm] + nlat0_[jm] * size_asym] << " ";
+                        }
+                        Log::info() << std::endl;*/
+                    }
                 }
-            }
-            {
-                //ATLAS_TRACE( "opt3 merge spheres" );
-                // northern hemisphere:
-                for ( int jlat = 0; jlat < nlatsNH_; jlat++ ) {
-                    if ( nlatsLeg_ - nlat0_[jm] - nlatsNH_ + jlat >= 0 ) {
-                        for ( int imag = 0; imag < n_imag; imag++ ) {
-                            for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                                int idx = posFourier( jfld, imag, jlat, jm, nlatsNH_ );
-                                scl_fourier[posMethod( jfld, imag, jlat, jm, nb_fields, nlats )] =
-                                    scl_fourier_sym[idx] + scl_fourier_asym[idx];
+                {
+                    //ATLAS_TRACE( "opt3 merge spheres" );
+                    // northern hemisphere:
+                    for ( int jlat = 0; jlat < nlatsNH_; jlat++ ) {
+                        if ( nlatsLegReduced_ - nlat0_[jm] - nlatsNH_ + jlat >= 0 ) {
+                            for ( int imag = 0; imag < n_imag; imag++ ) {
+                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                    int idx = posFourier( jfld, imag, jlat, jm, nlatsNH_ );
+                                    scl_fourier[posMethod( jfld, imag, jlat, jm, nb_fields, nlats )] =
+                                        scl_fourier_sym[idx] + scl_fourier_asym[idx];
+                                }
                             }
                         }
-                    }
-                    else {
-                        for ( int imag = 0; imag < n_imag; imag++ ) {
-                            for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                                scl_fourier[posMethod( jfld, imag, jlat, jm, nb_fields, nlats )] = 0.;
+                        else {
+                            for ( int imag = 0; imag < n_imag; imag++ ) {
+                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                    scl_fourier[posMethod( jfld, imag, jlat, jm, nb_fields, nlats )] = 0.;
+                                }
                             }
                         }
-                    }
-                    /*for ( int imag = 0; imag < n_imag; imag++ ) {
+                        /*for ( int imag = 0; imag < n_imag; imag++ ) {
                         for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
                             if ( scl_fourier[posMethod( jfld, imag, jlat, jm, nb_fields, nlats )] > 0. ) {
                                 Log::info() << "jm=" << jm << " jlat=" << jlat << " nlatsLeg_=" << nlatsLeg_
@@ -550,32 +579,42 @@ void TransLocalopt3::invtrans_legendreopt3( const int truncation, const int nlat
                             }
                         }
                     }*/
-                }
-                // southern hemisphere:
-                for ( int jlat = 0; jlat < nlatsSH_; jlat++ ) {
-                    int jslat = nlats - jlat - 1;
-                    if ( nlatsLeg_ - nlat0_[jm] - nlatsSH_ + jlat >= 0 ) {
-                        for ( int imag = 0; imag < n_imag; imag++ ) {
-                            for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                                int idx = posFourier( jfld, imag, jlat, jm, nlatsSH_ );
-                                scl_fourier[posMethod( jfld, imag, jslat, jm, nb_fields, nlats )] =
-                                    scl_fourier_sym[idx] - scl_fourier_asym[idx];
+                    }
+                    // southern hemisphere:
+                    for ( int jlat = 0; jlat < nlatsSH_; jlat++ ) {
+                        int jslat = nlats - jlat - 1;
+                        if ( nlatsLegReduced_ - nlat0_[jm] - nlatsSH_ + jlat >= 0 ) {
+                            for ( int imag = 0; imag < n_imag; imag++ ) {
+                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                    int idx = posFourier( jfld, imag, jlat, jm, nlatsSH_ );
+                                    scl_fourier[posMethod( jfld, imag, jslat, jm, nb_fields, nlats )] =
+                                        scl_fourier_sym[idx] - scl_fourier_asym[idx];
+                                }
                             }
                         }
-                    }
-                    else {
-                        for ( int imag = 0; imag < n_imag; imag++ ) {
-                            for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                                scl_fourier[posMethod( jfld, imag, jslat, jm, nb_fields, nlats )] = 0.;
+                        else {
+                            for ( int imag = 0; imag < n_imag; imag++ ) {
+                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                                    scl_fourier[posMethod( jfld, imag, jslat, jm, nb_fields, nlats )] = 0.;
+                                }
                             }
                         }
                     }
                 }
+                free_aligned( scalar_sym );
+                free_aligned( scalar_asym );
+                free_aligned( scl_fourier_sym );
+                free_aligned( scl_fourier_asym );
+            }
+            else {
+                for ( int jlat = 0; jlat < nlats; jlat++ ) {
+                    for ( int imag = 0; imag < n_imag; imag++ ) {
+                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                            scl_fourier[posMethod( jfld, imag, jlat, jm, nb_fields, nlats )] = 0.;
+                        }
+                    }
+                }
             }
-            free_aligned( scalar_sym );
-            free_aligned( scalar_asym );
-            free_aligned( scl_fourier_sym );
-            free_aligned( scl_fourier_asym );
         }
     }
 }
@@ -698,8 +737,8 @@ void TransLocalopt3::invtrans_fourier_reducedopt3( const int nlats, const grid::
                         }
                         //Log::info() << std::endl;
                         //Log::info() << jlat << "out:" << std::endl;
-                        int jplan = nlatsLeg_ - nlatsNH_ + jlat;
-                        if ( jplan >= nlatsLeg_ ) { jplan = nlats - 1 + nlatsLeg_ - nlatsSH_ - jlat; };
+                        int jplan = nlatsLegDomain_ - nlatsNH_ + jlat;
+                        if ( jplan >= nlatsLegDomain_ ) { jplan = nlats - 1 + nlatsLegDomain_ - nlatsSH_ - jlat; };
                         //ASSERT( jplan < nlatsLeg_ && jplan >= 0 );
                         fftw_execute_dft_c2r( plans_[jplan], fft_in_, fft_out_ );
                         for ( int jlon = 0; jlon < g.nx( jlat ); jlon++ ) {
@@ -779,7 +818,7 @@ void TransLocalopt3::invtrans_unstructured_precomp( const int truncation, const
     alloc_aligned( gp_opt, nb_fields );
 
     {
-        ATLAS_TRACE( "opt Legendre dgemm" );
+        ATLAS_TRACE( "opt3 Legendre dgemm" );
         for ( int jm = 0; jm < truncation; jm++ ) {
             int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1;
             eckit::linalg::Matrix A( eckit::linalg::Matrix(
@@ -792,7 +831,7 @@ void TransLocalopt3::invtrans_unstructured_precomp( const int truncation, const
 
     // loop over all points:
     {
-        ATLAS_TRACE( "opt Fourier dgemm" );
+        ATLAS_TRACE( "opt3 Fourier dgemm" );
 
         for ( int ip = 0; ip < grid_.size(); ip++ ) {
             PointXY p  = gu.xy( ip );
diff --git a/src/atlas/trans/localopt3/TransLocalopt3.h b/src/atlas/trans/localopt3/TransLocalopt3.h
index 1ca002436..056d4e304 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.h
+++ b/src/atlas/trans/localopt3/TransLocalopt3.h
@@ -149,6 +149,8 @@ class TransLocalopt3 : public trans::TransImpl {
     int nlatsNH_;
     int nlatsSH_;
     int nlatsLeg_;
+    int nlatsLegReduced_;
+    int nlatsLegDomain_;
     std::vector<size_t> jlonMin_;
     int jlatMin_;
     int jlatMinLeg_;
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index 2f2c2705e..1a411227a 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -478,7 +478,7 @@ void spectral_transform_grid_analytic(
                 double lon = g.x( i, j ) * util::Constants::degreesToRadians();
 
                 // compute spherical harmonics:
-                if ( ftrc >= m ) {
+                if ( ftrc > m ) {
                     rgp[idx++] = sphericalharmonics_analytic_point( n, m, imag, lon, lat, ivar_in, ivar_out );
                 }
                 else {
@@ -961,36 +961,33 @@ CASE( "test_trans_domain" ) {
     //Domain testdomain = ZonalBandDomain( {-90., 90.} );
     //Domain testdomain = ZonalBandDomain( {-.5, .5} );
     //Domain testdomain = RectangularDomain( {0., 30.}, {-.05, .05} );
-    //Domain testdomain = ZonalBandDomain( {-85., -86.} );
-    Domain testdomain = RectangularDomain( {15., 20.}, {10., 20.} );
+    Domain testdomain1 = ZonalBandDomain( {-10., 5.} );
+    //Domain testdomain1 = RectangularDomain( {-1., 1.}, {50., 55.} );
+    Domain testdomain2 = RectangularDomain( {-1., 1.}, {-5., 40.} );
     // Grid: (Adjust the following line if the test takes too long!)
-    Grid g( "O120" );
-    Grid g_global( g.name() );
-
-    grid::StructuredGrid gs( g );
-    grid::StructuredGrid gs_global( g_global );
-    Log::info() << "nlats: " << gs.ny() << " nlons:" << gs.nxmax() << std::endl;
-    int ndgl = gs_global.ny();
-    //int trc  = ndgl - 1;  // linear
-    //int trc = ndgl / 2. - 1;  // cubic
-    int trc = 120;
-    trans::Trans transLocal1( g, trc, util::Config( "type", "ifs" ) );
-    trans::Trans transLocal2( g, trc, util::Config( "type", "localopt3" ) );
+    std::string gridString = "O640";
+    Grid g1( gridString, testdomain1 );
+    Grid g2( gridString, testdomain2 );
+
+    int trc = 640;
+    //Log::info() << "rgp1:" << std::endl;
+    trans::Trans transLocal1( g1, trc, util::Config( "type", "localopt3" ) );
+    //Log::info() << "rgp2:" << std::endl;
+    trans::Trans transLocal2( g2, trc, util::Config( "type", "localopt3" ) );
     double rav1 = 0., rav2 = 0.;  // compute average rms errors of transLocal1 and transLocal2
 
     functionspace::Spectral spectral( trc );
-    functionspace::StructuredColumns gridpoints( g );
 
-    int nb_scalar = 2, nb_vordiv = 2;
+    int nb_scalar = 1, nb_vordiv = 0;
     int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv;
     std::vector<double> sp( 2 * N * nb_scalar );
     std::vector<double> vor( 2 * N * nb_vordiv );
     std::vector<double> div( 2 * N * nb_vordiv );
     std::vector<double> rspecg( 2 * N );
-    std::vector<double> gp( nb_all * g.size() );
-    std::vector<double> rgp1( nb_all * g.size() );
-    std::vector<double> rgp2( nb_all * g.size() );
-    std::vector<double> rgp_analytic( g.size() );
+    std::vector<double> rgp1( nb_all * g1.size() );
+    std::vector<double> rgp2( nb_all * g2.size() );
+    std::vector<double> rgp1_analytic( g1.size() );
+    std::vector<double> rgp2_analytic( g2.size() );
 
     int icase = 0;
     for ( int ivar_in = 2; ivar_in < 3; ivar_in++ ) {         // vorticity, divergence, scalar
@@ -1024,41 +1021,54 @@ CASE( "test_trans_domain" ) {
                                 if ( ivar_in == 1 ) div[k * nb_vordiv + jfld] = 1.;
                                 if ( ivar_in == 2 ) sp[k * nb_scalar + jfld] = 1.;
 
-                                for ( int j = 0; j < nb_all * g.size(); j++ ) {
-                                    gp[j]   = 0.;
+                                for ( int j = 0; j < nb_all * g1.size(); j++ ) {
                                     rgp1[j] = 0.;
+                                }
+                                for ( int j = 0; j < nb_all * g2.size(); j++ ) {
                                     rgp2[j] = 0.;
                                 }
-                                for ( int j = 0; j < g.size(); j++ ) {
-                                    rgp_analytic[j] = 0.;
+                                for ( int j = 0; j < g1.size(); j++ ) {
+                                    rgp1_analytic[j] = 0.;
+                                }
+                                for ( int j = 0; j < g2.size(); j++ ) {
+                                    rgp2_analytic[j] = 0.;
                                 }
 
-                                spectral_transform_grid_analytic( trc, trc, n, m, imag, g, rspecg.data(),
-                                                                  rgp_analytic.data(), ivar_in, ivar_out );
+                                spectral_transform_grid_analytic( trc, trc, n, m, imag, g1, rspecg.data(),
+                                                                  rgp1_analytic.data(), ivar_in, ivar_out );
+
+                                spectral_transform_grid_analytic( trc, trc, n, m, imag, g2, rspecg.data(),
+                                                                  rgp2_analytic.data(), ivar_in, ivar_out );
 
+                                //Log::info() << std::endl << "rgp1:";
                                 EXPECT_NO_THROW( transLocal1.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
                                                                        div.data(), rgp1.data() ) );
 
+                                //Log::info() << std::endl << "rgp2:";
                                 EXPECT_NO_THROW( transLocal2.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
                                                                        div.data(), rgp2.data() ) );
 
                                 int pos = ( ivar_out * nb_vordiv + jfld );
 
                                 double rms_gen1 =
-                                    compute_rms( g.size(), rgp1.data() + pos * g.size(), rgp_analytic.data() );
+                                    compute_rms( g1.size(), rgp1.data() + pos * g1.size(), rgp1_analytic.data() );
 
                                 double rms_gen2 =
-                                    compute_rms( g.size(), rgp2.data() + pos * g.size(), rgp_analytic.data() );
+                                    compute_rms( g2.size(), rgp2.data() + pos * g2.size(), rgp2_analytic.data() );
 
                                 //Log::info() << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out
                                 //            << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl
-                                //            << "rgp2:";
-                                //for ( int j = 0; j < g.size(); j++ ) {
-                                //    Log::info() << rgp2[pos * g.size() + j] << " ";
+                                //            << "rgp1:";
+                                //for ( int j = 0; j < g1.size(); j++ ) {
+                                //    Log::info() << rgp1[pos * g1.size() + j] << " ";
+                                //};
+                                //Log::info() << std::endl << "rgp2:";
+                                //for ( int j = 0; j < g2.size(); j++ ) {
+                                //    Log::info() << rgp2[pos * g2.size() + j] << " ";
                                 //};
-                                //Log::info() << std::endl << "analytic:";
-                                //for ( int j = 0; j < g.size(); j++ ) {
-                                //    Log::info() << rgp_analytic[j] << " ";
+                                //Log::info() << std::endl << "analytic1:";
+                                //for ( int j = 0; j < g1.size(); j++ ) {
+                                //    Log::info() << rgp1_analytic[j] << " ";
                                 //};
                                 //Log::info() << std::endl;
                                 rav1 += rms_gen1;

From 69ab84d5ac040bb30e77cfd3c9702de9afda21c9 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Sun, 22 Apr 2018 02:02:04 +0100
Subject: [PATCH 049/123] reading and writing fftw wisdom now via strings and
 streams

---
 src/atlas/trans/localopt3/TransLocalopt3.cc | 35 +++++++++++----------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc
index 1b9fbc39b..f47c3a68b 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.cc
+++ b/src/atlas/trans/localopt3/TransLocalopt3.cc
@@ -305,35 +305,36 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
                 int num_complex = ( nlonsMaxGlobal_ / 2 ) + 1;
                 fft_in_         = fftw_alloc_complex( nlats * num_complex );
                 fft_out_        = fftw_alloc_real( nlats * nlonsMaxGlobal_ );
+                std::string wisdomString( "" );
+                std::ifstream read( "wisdom.bin" );
+                if ( read.is_open() ) {
+                    std::getline( read, wisdomString );
+                    while ( read ) {
+                        std::string line;
+                        std::getline( read, line );
+                        wisdomString += line;
+                    }
+                }
+                read.close();
+                if ( wisdomString.length() > 0 ) { fftw_import_wisdom_from_string( &wisdomString[0u] ); }
                 if ( grid::RegularGrid( gridGlobal_ ) ) {
                     plans_.resize( 1 );
-                    FILE* file_fftw;
-                    file_fftw = fopen( "wisdom.bin", "r" );
-                    if ( file_fftw ) {
-                        fftw_import_wisdom_from_file( file_fftw );
-                        fclose( file_fftw );
-                    }
                     plans_[0] = fftw_plan_many_dft_c2r( 1, &nlonsMaxGlobal_, nlats, fft_in_, NULL, 1, num_complex,
                                                         fft_out_, NULL, 1, nlonsMaxGlobal_, FFTW_ESTIMATE );
                 }
                 else {
                     plans_.resize( nlatsLegDomain_ );
-                    FILE* file_fftw;
-                    file_fftw = fopen( "wisdom.bin", "r" );
-                    if ( file_fftw ) {
-                        fftw_import_wisdom_from_file( file_fftw );
-                        fclose( file_fftw );
-                    }
                     for ( int j = 0; j < nlatsLegDomain_; j++ ) {
                         int nlonsGlobalj = gs_global.nx( jlatMinLeg_ + j );
                         //ASSERT( nlonsGlobalj > 0 && nlonsGlobalj <= nlonsMaxGlobal_ );
                         plans_[j] = fftw_plan_dft_c2r_1d( nlonsGlobalj, fft_in_, fft_out_, FFTW_ESTIMATE );
                     }
-                    if ( !file_fftw ) {
-                        file_fftw = fopen( "wisdom.bin", "wb" );
-                        fftw_export_wisdom_to_file( file_fftw );
-                        fclose( file_fftw );
-                    }
+                }
+                std::string newWisdom( fftw_export_wisdom_to_string() );
+                if ( 1.1 * wisdomString.length() < newWisdom.length() ) {
+                    std::ofstream write( "wisdom.bin" );
+                    write << newWisdom;
+                    write.close();
                 }
             }
                 // other FFT implementations should be added with #elif statements

From 0cb6bfb98bb672a21c2eb4f38b79163e76e42140 Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Wed, 25 Apr 2018 12:04:07 +0100
Subject: [PATCH 050/123] Trans type=local preliminary LegendreCache support

---
 VERSION.cmake                                 |   2 +-
 cmake/CompileFlags.cmake                      |   4 +-
 src/atlas/CMakeLists.txt                      |  20 +-
 src/atlas/option/TransOptions.cc              |   8 +
 src/atlas/option/TransOptions.h               |  14 ++
 src/atlas/trans/Trans.cc                      |   4 +-
 src/atlas/trans/Trans.h                       |  22 +-
 src/atlas/trans/VorDivToUV.cc                 |   4 +-
 .../FourierTransforms.cc                      |   2 +-
 .../FourierTransforms.h                       |   0
 .../LegendrePolynomials.cc                    |   2 +-
 .../LegendrePolynomials.h                     |   0
 .../LegendreTransforms.cc                     |   2 +-
 .../LegendreTransforms.h                      |   0
 .../{local => local_noopt}/TransLocal.cc      |  10 +-
 .../trans/{local => local_noopt}/TransLocal.h |   0
 .../{local => local_noopt}/VorDivToUVLocal.cc |   4 +-
 .../{local => local_noopt}/VorDivToUVLocal.h  |   0
 src/atlas/trans/localopt/TransLocalopt.cc     |   2 +-
 src/atlas/trans/localopt2/TransLocalopt2.cc   |   2 +-
 .../localopt3/LegendrePolynomialsopt3.cc      |   1 +
 src/atlas/trans/localopt3/TransLocalopt3.cc   | 203 ++++++++++++++----
 src/atlas/trans/localopt3/TransLocalopt3.h    |   6 +
 .../trans/localopt3/VorDivToUVLocalopt3.cc    |   3 +-
 src/tests/trans/test_transgeneral.cc          |  18 +-
 25 files changed, 260 insertions(+), 73 deletions(-)
 rename src/atlas/trans/{local => local_noopt}/FourierTransforms.cc (98%)
 rename src/atlas/trans/{local => local_noopt}/FourierTransforms.h (100%)
 rename src/atlas/trans/{local => local_noopt}/LegendrePolynomials.cc (99%)
 rename src/atlas/trans/{local => local_noopt}/LegendrePolynomials.h (100%)
 rename src/atlas/trans/{local => local_noopt}/LegendreTransforms.cc (97%)
 rename src/atlas/trans/{local => local_noopt}/LegendreTransforms.h (100%)
 rename src/atlas/trans/{local => local_noopt}/TransLocal.cc (98%)
 rename src/atlas/trans/{local => local_noopt}/TransLocal.h (100%)
 rename src/atlas/trans/{local => local_noopt}/VorDivToUVLocal.cc (98%)
 rename src/atlas/trans/{local => local_noopt}/VorDivToUVLocal.h (100%)

diff --git a/VERSION.cmake b/VERSION.cmake
index 756656b6a..62025fc5d 100644
--- a/VERSION.cmake
+++ b/VERSION.cmake
@@ -6,5 +6,5 @@
 # granted to it by virtue of its status as an intergovernmental organisation nor
 # does it submit to any jurisdiction.
 
-set  ( ${PROJECT_NAME}_VERSION_STR  "0.14.0" )
+set  ( ${PROJECT_NAME}_VERSION_STR  "0.14.0-opt-translocal" )
 
diff --git a/cmake/CompileFlags.cmake b/cmake/CompileFlags.cmake
index c92b7d581..ef238a21e 100644
--- a/cmake/CompileFlags.cmake
+++ b/cmake/CompileFlags.cmake
@@ -9,8 +9,8 @@ if( CMAKE_CXX_COMPILER_ID MATCHES Cray )
 
 endif()
 
-ecbuild_add_cxx_flags("-Wl,-ydgemm_")
-ecbuild_add_fortran_flags("-Wl,-ydgemm_")
+#ecbuild_add_cxx_flags("-Wl,-ydgemm_")
+#ecbuild_add_fortran_flags("-Wl,-ydgemm_")
 #ecbuild_add_cxx_flags("-fsanitize=address")
 #ecbuild_add_cxx_flags("-fsanitize=thread")
 #ecbuild_add_cxx_flags("-fsanitize=memory")
diff --git a/src/atlas/CMakeLists.txt b/src/atlas/CMakeLists.txt
index a8ddfe4b4..6a10f2ff1 100644
--- a/src/atlas/CMakeLists.txt
+++ b/src/atlas/CMakeLists.txt
@@ -321,16 +321,16 @@ trans/Trans.h
 trans/Trans.cc
 trans/VorDivToUV.h
 trans/VorDivToUV.cc
-trans/local/TransLocal.h
-trans/local/TransLocal.cc
-trans/local/LegendrePolynomials.h
-trans/local/LegendrePolynomials.cc
-trans/local/LegendreTransforms.h
-trans/local/LegendreTransforms.cc
-trans/local/FourierTransforms.h
-trans/local/FourierTransforms.cc
-trans/local/VorDivToUVLocal.h
-trans/local/VorDivToUVLocal.cc
+trans/local_noopt/TransLocal.h
+trans/local_noopt/TransLocal.cc
+trans/local_noopt/LegendrePolynomials.h
+trans/local_noopt/LegendrePolynomials.cc
+trans/local_noopt/LegendreTransforms.h
+trans/local_noopt/LegendreTransforms.cc
+trans/local_noopt/FourierTransforms.h
+trans/local_noopt/FourierTransforms.cc
+trans/local_noopt/VorDivToUVLocal.h
+trans/local_noopt/VorDivToUVLocal.cc
 trans/localopt/TransLocalopt.h
 trans/localopt/TransLocalopt.cc
 trans/localopt/LegendrePolynomialsopt.h
diff --git a/src/atlas/option/TransOptions.cc b/src/atlas/option/TransOptions.cc
index 0f00dcd0f..b321838fd 100644
--- a/src/atlas/option/TransOptions.cc
+++ b/src/atlas/option/TransOptions.cc
@@ -52,6 +52,14 @@ read_legendre::read_legendre( const eckit::PathName& filepath ) {
     set( "read_legendre", filepath );
 }
 
+write_fft::write_fft( const eckit::PathName& filepath ) {
+    set( "write_fft", filepath );
+}
+
+read_fft::read_fft( const eckit::PathName& filepath ) {
+    set( "read_fft", filepath );
+}
+
 nproma::nproma( int nproma ) {
     set( "nproma", nproma );
 }
diff --git a/src/atlas/option/TransOptions.h b/src/atlas/option/TransOptions.h
index 3e548eb2c..952ea7465 100644
--- a/src/atlas/option/TransOptions.h
+++ b/src/atlas/option/TransOptions.h
@@ -84,6 +84,20 @@ class read_legendre : public util::Config {
 
 // ----------------------------------------------------------------------------
 
+class write_fft : public util::Config {
+public:
+    write_fft( const eckit::PathName& );
+};
+
+// ----------------------------------------------------------------------------
+
+class read_fft : public util::Config {
+public:
+    read_fft( const eckit::PathName& );
+};
+
+// ----------------------------------------------------------------------------
+
 class nproma : public util::Config {
     nproma( int );
 };
diff --git a/src/atlas/trans/Trans.cc b/src/atlas/trans/Trans.cc
index b264cc7ee..af6b325eb 100644
--- a/src/atlas/trans/Trans.cc
+++ b/src/atlas/trans/Trans.cc
@@ -27,10 +27,10 @@
 #else
 #define TRANS_DEFAULT "local"
 #endif
-#include "atlas/trans/local/TransLocal.h"
+#include "atlas/trans/local_noopt/TransLocal.h"
 #include "atlas/trans/localopt/TransLocalopt.h"
 #include "atlas/trans/localopt2/TransLocalopt2.h"
-#include "atlas/trans/localopt3/TransLocalopt3.h"
+#include "atlas/trans/localopt3/TransLocalopt3.h" // --> recommended "local"
 
 namespace atlas {
 namespace trans {
diff --git a/src/atlas/trans/Trans.h b/src/atlas/trans/Trans.h
index c5c417201..a78cc1e49 100644
--- a/src/atlas/trans/Trans.h
+++ b/src/atlas/trans/Trans.h
@@ -19,6 +19,8 @@
 #include "eckit/memory/SharedPtr.h"
 
 #include "atlas/util/Config.h"
+#include "atlas/runtime/Trace.h"
+#include "atlas/runtime/Log.h"
 
 //-----------------------------------------------------------------------------
 // Forward declarations
@@ -55,6 +57,8 @@ class TransCacheFileEntry final : public TransCacheEntry {
 
 public:
     TransCacheFileEntry( const eckit::PathName& path ) : buffer_( path.size() ) {
+        ATLAS_TRACE();
+        Log::debug() << "Loading cache from file " << path << std::endl;
         std::unique_ptr<eckit::DataHandle> dh( path.fileHandle() );
         dh->openForRead();
         dh->read( buffer_.data(), buffer_.size() );
@@ -102,10 +106,24 @@ class Cache {
 class LegendreCache : public Cache {
 public:
     LegendreCache(const void* address, size_t size) :
-        Cache(std::make_shared<atlas::trans::TransCacheMemoryEntry>(address, size)) {
+        Cache( std::make_shared<TransCacheMemoryEntry>( address, size ) ) {
     }
     LegendreCache( const eckit::PathName& path ) :
-        Cache( std::shared_ptr<TransCacheEntry>( new TransCacheFileEntry( path ) ) ) {}
+        Cache( std::shared_ptr<TransCacheEntry>( new TransCacheFileEntry( path ) ) ) {
+    }
+};
+
+class LegendreFFTCache : public Cache {
+public:
+    LegendreFFTCache( const void* legendre_address, size_t legendre_size,
+                      const void* fft_address, size_t fft_size ) :
+        Cache( std::make_shared<TransCacheMemoryEntry>( legendre_address, legendre_size ),
+               std::make_shared<TransCacheMemoryEntry>( fft_address, fft_size ) ) {
+    }
+    LegendreFFTCache( const eckit::PathName& legendre_path, const eckit::PathName& fft_path ) :
+        Cache( std::shared_ptr<TransCacheEntry>( new TransCacheFileEntry( legendre_path ) ),
+               std::shared_ptr<TransCacheEntry>( new TransCacheFileEntry( fft_path ) ) ) {
+    }
 };
 
 class TransImpl : public eckit::Owned {
diff --git a/src/atlas/trans/VorDivToUV.cc b/src/atlas/trans/VorDivToUV.cc
index 727ead312..70feef4d8 100644
--- a/src/atlas/trans/VorDivToUV.cc
+++ b/src/atlas/trans/VorDivToUV.cc
@@ -26,10 +26,10 @@
 #else
 #define TRANS_DEFAULT "local"
 #endif
-#include "atlas/trans/local/VorDivToUVLocal.h"
+#include "atlas/trans/local_noopt/VorDivToUVLocal.h"
 #include "atlas/trans/localopt/VorDivToUVLocalopt.h"
 #include "atlas/trans/localopt2/VorDivToUVLocalopt2.h"
-#include "atlas/trans/localopt3/VorDivToUVLocalopt3.h"
+#include "atlas/trans/localopt3/VorDivToUVLocalopt3.h"  // --> recommended "local"
 
 namespace atlas {
 namespace trans {
diff --git a/src/atlas/trans/local/FourierTransforms.cc b/src/atlas/trans/local_noopt/FourierTransforms.cc
similarity index 98%
rename from src/atlas/trans/local/FourierTransforms.cc
rename to src/atlas/trans/local_noopt/FourierTransforms.cc
index 886cc2ee7..c9f6f2974 100644
--- a/src/atlas/trans/local/FourierTransforms.cc
+++ b/src/atlas/trans/local_noopt/FourierTransforms.cc
@@ -13,7 +13,7 @@
 #include <cmath>
 #include <iostream>
 
-#include "atlas/trans/local/FourierTransforms.h"
+#include "atlas/trans/local_noopt/FourierTransforms.h"
 
 namespace atlas {
 namespace trans {
diff --git a/src/atlas/trans/local/FourierTransforms.h b/src/atlas/trans/local_noopt/FourierTransforms.h
similarity index 100%
rename from src/atlas/trans/local/FourierTransforms.h
rename to src/atlas/trans/local_noopt/FourierTransforms.h
diff --git a/src/atlas/trans/local/LegendrePolynomials.cc b/src/atlas/trans/local_noopt/LegendrePolynomials.cc
similarity index 99%
rename from src/atlas/trans/local/LegendrePolynomials.cc
rename to src/atlas/trans/local_noopt/LegendrePolynomials.cc
index 639f76a82..26854fd69 100644
--- a/src/atlas/trans/local/LegendrePolynomials.cc
+++ b/src/atlas/trans/local_noopt/LegendrePolynomials.cc
@@ -13,7 +13,7 @@
 #include <limits>
 
 #include "atlas/array.h"
-#include "atlas/trans/local/LegendrePolynomials.h"
+#include "atlas/trans/local_noopt/LegendrePolynomials.h"
 
 namespace atlas {
 namespace trans {
diff --git a/src/atlas/trans/local/LegendrePolynomials.h b/src/atlas/trans/local_noopt/LegendrePolynomials.h
similarity index 100%
rename from src/atlas/trans/local/LegendrePolynomials.h
rename to src/atlas/trans/local_noopt/LegendrePolynomials.h
diff --git a/src/atlas/trans/local/LegendreTransforms.cc b/src/atlas/trans/local_noopt/LegendreTransforms.cc
similarity index 97%
rename from src/atlas/trans/local/LegendreTransforms.cc
rename to src/atlas/trans/local_noopt/LegendreTransforms.cc
index b18d28ca8..f82d9f401 100644
--- a/src/atlas/trans/local/LegendreTransforms.cc
+++ b/src/atlas/trans/local_noopt/LegendreTransforms.cc
@@ -10,7 +10,7 @@
 
 #include <cstddef>
 
-#include "atlas/trans/local/LegendreTransforms.h"
+#include "atlas/trans/local_noopt/LegendreTransforms.h"
 
 namespace atlas {
 namespace trans {
diff --git a/src/atlas/trans/local/LegendreTransforms.h b/src/atlas/trans/local_noopt/LegendreTransforms.h
similarity index 100%
rename from src/atlas/trans/local/LegendreTransforms.h
rename to src/atlas/trans/local_noopt/LegendreTransforms.h
diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local_noopt/TransLocal.cc
similarity index 98%
rename from src/atlas/trans/local/TransLocal.cc
rename to src/atlas/trans/local_noopt/TransLocal.cc
index f0f5973ef..33947d15f 100644
--- a/src/atlas/trans/local/TransLocal.cc
+++ b/src/atlas/trans/local_noopt/TransLocal.cc
@@ -8,23 +8,23 @@
  * nor does it submit to any jurisdiction.
  */
 
-#include "atlas/trans/local/TransLocal.h"
+#include "atlas/trans/local_noopt/TransLocal.h"
 #include "atlas/array.h"
 #include "atlas/option.h"
 #include "atlas/parallel/mpi/mpi.h"
 #include "atlas/runtime/ErrorHandling.h"
 #include "atlas/runtime/Log.h"
 #include "atlas/trans/VorDivToUV.h"
-#include "atlas/trans/local/FourierTransforms.h"
-#include "atlas/trans/local/LegendrePolynomials.h"
-#include "atlas/trans/local/LegendreTransforms.h"
+#include "atlas/trans/local_noopt/FourierTransforms.h"
+#include "atlas/trans/local_noopt/LegendrePolynomials.h"
+#include "atlas/trans/local_noopt/LegendreTransforms.h"
 #include "atlas/util/Constants.h"
 
 namespace atlas {
 namespace trans {
 
 namespace {
-static TransBuilderGrid<TransLocal> builder( "local" );
+static TransBuilderGrid<TransLocal> builder( "local_noopt" );
 }
 
 // --------------------------------------------------------------------------------------------------------------------
diff --git a/src/atlas/trans/local/TransLocal.h b/src/atlas/trans/local_noopt/TransLocal.h
similarity index 100%
rename from src/atlas/trans/local/TransLocal.h
rename to src/atlas/trans/local_noopt/TransLocal.h
diff --git a/src/atlas/trans/local/VorDivToUVLocal.cc b/src/atlas/trans/local_noopt/VorDivToUVLocal.cc
similarity index 98%
rename from src/atlas/trans/local/VorDivToUVLocal.cc
rename to src/atlas/trans/local_noopt/VorDivToUVLocal.cc
index 15065d4e1..0db63e792 100644
--- a/src/atlas/trans/local/VorDivToUVLocal.cc
+++ b/src/atlas/trans/local_noopt/VorDivToUVLocal.cc
@@ -9,7 +9,7 @@
  */
 
 #include <cmath>  // for std::sqrt
-#include "atlas/trans/local/VorDivToUVLocal.h"
+#include "atlas/trans/local_noopt/VorDivToUVLocal.h"
 #include "atlas/functionspace/Spectral.h"
 #include "atlas/runtime/Log.h"
 #include "atlas/util/Earth.h"
@@ -21,7 +21,7 @@ namespace atlas {
 namespace trans {
 
 namespace {
-static VorDivToUVBuilder<VorDivToUVLocal> builder( "local" );
+static VorDivToUVBuilder<VorDivToUVLocal> builder( "local_noopt" );
 }
 
 // --------------------------------------------------------------------------------------------------------------------
diff --git a/src/atlas/trans/local/VorDivToUVLocal.h b/src/atlas/trans/local_noopt/VorDivToUVLocal.h
similarity index 100%
rename from src/atlas/trans/local/VorDivToUVLocal.h
rename to src/atlas/trans/local_noopt/VorDivToUVLocal.h
diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc
index d0dfbfe4a..29ac535c1 100644
--- a/src/atlas/trans/localopt/TransLocalopt.cc
+++ b/src/atlas/trans/localopt/TransLocalopt.cc
@@ -16,7 +16,7 @@
 #include "atlas/runtime/ErrorHandling.h"
 #include "atlas/runtime/Log.h"
 #include "atlas/trans/VorDivToUV.h"
-#include "atlas/trans/local/LegendrePolynomials.h"
+#include "atlas/trans/local_noopt/LegendrePolynomials.h"
 #include "atlas/trans/localopt/FourierTransformsopt.h"
 #include "atlas/trans/localopt/LegendrePolynomialsopt.h"
 #include "atlas/trans/localopt/LegendreTransformsopt.h"
diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc
index 6b51c2dc5..7a46c7245 100644
--- a/src/atlas/trans/localopt2/TransLocalopt2.cc
+++ b/src/atlas/trans/localopt2/TransLocalopt2.cc
@@ -16,7 +16,7 @@
 #include "atlas/runtime/ErrorHandling.h"
 #include "atlas/runtime/Log.h"
 #include "atlas/trans/VorDivToUV.h"
-#include "atlas/trans/local/LegendrePolynomials.h"
+#include "atlas/trans/local_noopt/LegendrePolynomials.h"
 #include "atlas/trans/localopt2/FourierTransformsopt2.h"
 #include "atlas/trans/localopt2/LegendrePolynomialsopt2.h"
 #include "atlas/trans/localopt2/LegendreTransformsopt2.h"
diff --git a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc
index 1cddbc18b..2f16cc43b 100644
--- a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc
+++ b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc
@@ -158,6 +158,7 @@ void compute_legendre_polynomialsopt3(
     size_t leg_start_sym[],    // start indices for different zonal wave numbers, symmetric part
     size_t leg_start_asym[] )  // start indices for different zonal wave numbers, asymmetric part
 {
+    ATLAS_TRACE();
     auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; };
     std::vector<double> legpol( legendre_size( trc ) );
     std::vector<double> zfn( ( trc + 1 ) * ( trc + 1 ) );
diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc
index f47c3a68b..eefe99c22 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.cc
+++ b/src/atlas/trans/localopt3/TransLocalopt3.cc
@@ -16,12 +16,13 @@
 #include "atlas/runtime/ErrorHandling.h"
 #include "atlas/runtime/Log.h"
 #include "atlas/trans/VorDivToUV.h"
-#include "atlas/trans/local/LegendrePolynomials.h"
+#include "atlas/trans/local_noopt/LegendrePolynomials.h"
 #include "atlas/trans/localopt3/LegendrePolynomialsopt3.h"
 #include "atlas/util/Constants.h"
 #include "eckit/eckit_config.h"
 #include "eckit/linalg/LinearAlgebra.h"
 #include "eckit/linalg/Matrix.h"
+#include "eckit/log/Bytes.h"
 #ifdef ECKIT_HAVE_MKL
 #include "mkl.h"
 #endif
@@ -30,7 +31,84 @@ namespace atlas {
 namespace trans {
 
 namespace {
-static TransBuilderGrid<TransLocalopt3> builder( "localopt3" );
+static TransBuilderGrid<TransLocalopt3> builder_deprecated( "localopt3" );
+static TransBuilderGrid<TransLocalopt3> builder( "local" );
+}
+
+namespace {
+class TransParameters {
+public:
+    TransParameters( const eckit::Configuration& config ) : config_( config ) {}
+    ~TransParameters() {}
+
+    bool scalar_derivatives() const { return config_.getBool( "scalar_derivatives", false ); }
+
+    bool wind_EW_derivatives() const { return config_.getBool( "wind_EW_derivatives", false ); }
+
+    bool vorticity_divergence_fields() const { return config_.getBool( "vorticity_divergence_fields", false ); }
+
+    std::string read_legendre() const { return config_.getString( "read_legendre", "" ); }
+
+    std::string write_legendre() const { return config_.getString( "write_legendre", "" ); }
+
+    std::string read_fft() const { return config_.getString( "read_fft", "" ); }
+
+    std::string write_fft() const { return config_.getString( "write_fft", "" ); }
+
+    bool global() const { return config_.getBool( "global", false ); }
+
+private:
+    const eckit::Configuration& config_;
+};
+
+struct ReadCache {
+ReadCache( const void* cache ) {
+    begin = (char*) cache;
+    pos = 0;
+}
+template <typename T>  T* read(size_t size) {
+    T* v = (T*) (begin + pos);
+    pos += size * sizeof(T);
+    return v;
+}
+char*  begin;
+size_t pos;
+};
+
+struct WriteCache {
+WriteCache( const eckit::PathName& file_path, long estimated_length = 0 ) : 
+    dh_( file_path.fileHandle( /*overwrite = */ true ) )
+{
+    dh_->openForWrite( estimated_length );
+    pos = 0;
+}
+~WriteCache() {
+    dh_->close();
+}
+template <typename T> void write( const T* v, long size) {
+    dh_->write( v , size * sizeof(T) );
+    pos += size * sizeof(T);
+}
+std::unique_ptr<eckit::DataHandle> dh_;
+size_t pos;
+};
+
+#if ATLAS_HAVE_FFTW
+struct FFTW_Wisdom {
+    char* wisdom;
+    FFTW_Wisdom() {
+        wisdom = fftw_export_wisdom_to_string();
+    }
+    ~FFTW_Wisdom() {
+        free( wisdom );
+    }
+};
+std::ostream& operator<< (std::ostream& out, const FFTW_Wisdom& w) {
+    out << w.wisdom;
+    return out;
+}
+#endif
+
 }
 
 // --------------------------------------------------------------------------------------------------------------------
@@ -119,7 +197,13 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
                                 const eckit::Configuration& config ) :
     grid_( grid ),
     truncation_( truncation ),
-    precompute_( config.getBool( "precompute", true ) ) {
+    precompute_( config.getBool( "precompute", true ) ),
+    cache_( cache ),
+    legendre_cache_( cache.legendre().data() ),
+    legendre_cachesize_( cache.legendre().size() ),
+    fft_cache_( cache.fft().data() ),
+    fft_cachesize_( cache.fft().size() )
+{
     ATLAS_TRACE( "Precompute legendre opt3" );
 #ifdef ECKIT_HAVE_MKL
     eckit::linalg::LinearAlgebra::backend( "mkl" );  // might want to choose backend with this command
@@ -272,28 +356,38 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
             legendre_sym_begin_[0]  = 0;
             legendre_asym_begin_[0] = 0;
             for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
-                size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsLeg_ );
+                size_sym  += add_padding( num_n( truncation_ + 1, jm, true  ) * nlatsLeg_ );
                 size_asym += add_padding( num_n( truncation_ + 1, jm, false ) * nlatsLeg_ );
                 legendre_sym_begin_[jm + 1]  = size_sym;
                 legendre_asym_begin_[jm + 1] = size_asym;
             }
-            alloc_aligned( legendre_sym_, size_sym );
-            alloc_aligned( legendre_asym_, size_asym );
-            FILE* file_leg;
-            file_leg = fopen( "legendre.bin", "r" );
-            if ( file_leg ) {
-                fread( legendre_sym_, sizeof( double ), size_sym, file_leg );
-                fread( legendre_asym_, sizeof( double ), size_asym, file_leg );
-                fclose( file_leg );
-            }
-            else {
+
+            if( legendre_cache_ ) {
+                ReadCache legendre( legendre_cache_ );
+                legendre_sym_  = legendre.read<double>( size_sym  );
+                legendre_asym_ = legendre.read<double>( size_asym );
+                ASSERT( legendre.pos == legendre_cachesize_ );
+                // TODO: check this is all aligned...
+            } else {
+
+                alloc_aligned( legendre_sym_, size_sym );
+                alloc_aligned( legendre_asym_, size_asym );
+
                 compute_legendre_polynomialsopt3( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_,
                                                   legendre_asym_, legendre_sym_begin_.data(),
                                                   legendre_asym_begin_.data() );
-                file_leg = fopen( "legendre.bin", "wb" );
-                fwrite( legendre_sym_, sizeof( double ), size_sym, file_leg );
-                fwrite( legendre_asym_, sizeof( double ), size_asym, file_leg );
-                fclose( file_leg );
+                std::string file_path = TransParameters(config).write_legendre();
+                if( file_path.size() ) {
+                    ATLAS_TRACE( "write_legendre" );
+                    size_t estimated_length = sizeof(double) * ( size_sym + size_asym );
+                    Log::debug() << "Writing Legendre cache file ..." << std::endl;
+                    Log::debug() << "    path      = " << file_path << std::endl;
+                    Log::debug() << "    estimated = " << eckit::Bytes(estimated_length) << std::endl;
+                    WriteCache legendre( file_path, estimated_length );
+                    legendre.write( legendre_sym_,  size_sym  );
+                    legendre.write( legendre_asym_, size_asym );
+                    Log::debug() << "Cache file size: " << eckit::Bytes(legendre.pos) << std::endl;
+                }
             }
         }
 
@@ -305,18 +399,23 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
                 int num_complex = ( nlonsMaxGlobal_ / 2 ) + 1;
                 fft_in_         = fftw_alloc_complex( nlats * num_complex );
                 fft_out_        = fftw_alloc_real( nlats * nlonsMaxGlobal_ );
-                std::string wisdomString( "" );
-                std::ifstream read( "wisdom.bin" );
-                if ( read.is_open() ) {
-                    std::getline( read, wisdomString );
-                    while ( read ) {
-                        std::string line;
-                        std::getline( read, line );
-                        wisdomString += line;
-                    }
+
+                if( fft_cache_ ) {
+                    Log::debug() << "Import FFTW wisdom from cache" << std::endl;
+                    fftw_import_wisdom_from_string( (const char*)fft_cache_ );
                 }
-                read.close();
-                if ( wisdomString.length() > 0 ) { fftw_import_wisdom_from_string( &wisdomString[0u] ); }
+//                std::string wisdomString( "" );
+//                std::ifstream read( "wisdom.bin" );
+//                if ( read.is_open() ) {
+//                    std::getline( read, wisdomString );
+//                    while ( read ) {
+//                        std::string line;
+//                        std::getline( read, line );
+//                        wisdomString += line;
+//                    }
+//                }
+//                read.close();
+//                if ( wisdomString.length() > 0 ) { fftw_import_wisdom_from_string( &wisdomString[0u] ); }
                 if ( grid::RegularGrid( gridGlobal_ ) ) {
                     plans_.resize( 1 );
                     plans_[0] = fftw_plan_many_dft_c2r( 1, &nlonsMaxGlobal_, nlats, fft_in_, NULL, 1, num_complex,
@@ -330,16 +429,36 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
                         plans_[j] = fftw_plan_dft_c2r_1d( nlonsGlobalj, fft_in_, fft_out_, FFTW_ESTIMATE );
                     }
                 }
-                std::string newWisdom( fftw_export_wisdom_to_string() );
-                if ( 1.1 * wisdomString.length() < newWisdom.length() ) {
-                    std::ofstream write( "wisdom.bin" );
-                    write << newWisdom;
-                    write.close();
+                std::string file_path = TransParameters(config).write_fft();
+                if( file_path.size() ) {
+                    Log::debug() << "Write FFTW wisdom to file " << file_path << std::endl;
+                    //bool success = fftw_export_wisdom_to_filename( "wisdom.bin" );
+                    //ASSERT( success );
+                    //std::ofstream write( file_path );
+                    //write << FFTW_Wisdom();
+
+                    FILE* file_fftw = fopen( file_path.c_str(), "wb" );
+                    fftw_export_wisdom_to_file( file_fftw );
+                    fclose( file_fftw );
+
                 }
+//                std::string newWisdom( fftw_export_wisdom_to_string() );
+//                if ( 1.1 * wisdomString.length() < newWisdom.length() ) {
+//                    std::ofstream write( "wisdom.bin" );
+//                    write << newWisdom;
+//                    write.close();
+//                }
             }
                 // other FFT implementations should be added with #elif statements
 #else
             useFFT_ = false;                             // no FFT implemented => default to dgemm
+            std::string file_path = TransParameters(config).write_fft();
+            if( file_path.size() ) {
+                std::ofstream write( file_path );
+                write << "No cache available, as FFTW is not enabled" << std::endl;
+                write.close();
+            }
+
 #endif
         }
         if ( !useFFT_ ) {
@@ -399,8 +518,10 @@ TransLocalopt3::TransLocalopt3( const Grid& grid, const long truncation, const e
 
 TransLocalopt3::~TransLocalopt3() {
     if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
-        free_aligned( legendre_sym_ );
-        free_aligned( legendre_asym_ );
+        if( not legendre_cache_ ) {
+            free_aligned( legendre_sym_ );
+            free_aligned( legendre_asym_ );
+        }
         if ( useFFT_ ) {
 #if ATLAS_HAVE_FFTW && !TRANSLOCAL_DGEMM2
             for ( int j = 0; j < plans_.size(); j++ ) {
@@ -662,6 +783,9 @@ void TransLocalopt3::invtrans_fourier_regularopt3( const int nlats, const int nl
 #endif
     }
     else {
+
+        throw eckit::SeriousBug("dgemm for Fourier transforms currently broken. Make sure atlas is compiled with FFTW.",Here());
+
 #if !TRANSLOCAL_DGEMM2
         // dgemm-method 1
         {
@@ -757,9 +881,15 @@ void TransLocalopt3::invtrans_fourier_reducedopt3( const int nlats, const grid::
 #endif
     }
     else {
+
+        throw eckit::SeriousBug("dgemm for Fourier transforms currently broken. Make sure atlas is compiled with FFTW.",Here());
+
 #if !TRANSLOCAL_DGEMM2
         // dgemm-method 1
         {
+#warning dgemm currently broken for Fourier transforms. FFTW required!
+// Noticed that Matrix C is trying to access more than is actually allocated
+// Memory error!!! BEWARE!!!
             ATLAS_TRACE( "opt3 Fourier dgemm method 1" );
             eckit::linalg::Matrix A( fourier_, nlonsMax, ( truncation_ + 1 ) * 2 );
             eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * nlats );
@@ -797,6 +927,7 @@ void TransLocalopt3::invtrans_fourier_reducedopt3( const int nlats, const grid::
         free_aligned( gp_opt3 );
 #endif
     }
+
 }
 
 // --------------------------------------------------------------------------------------------------------------------
diff --git a/src/atlas/trans/localopt3/TransLocalopt3.h b/src/atlas/trans/localopt3/TransLocalopt3.h
index 056d4e304..0651abeab 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.h
+++ b/src/atlas/trans/localopt3/TransLocalopt3.h
@@ -172,6 +172,12 @@ class TransLocalopt3 : public trans::TransImpl {
     double* fft_out_;
     std::vector<fftw_plan> plans_;
 #endif
+
+    Cache cache_;
+    const void* legendre_cache_{nullptr};
+    size_t legendre_cachesize_{0};
+    const void* fft_cache_{nullptr};
+    size_t fft_cachesize_{0};
 };
 
 //-----------------------------------------------------------------------------
diff --git a/src/atlas/trans/localopt3/VorDivToUVLocalopt3.cc b/src/atlas/trans/localopt3/VorDivToUVLocalopt3.cc
index 4e7267748..b7b9474af 100644
--- a/src/atlas/trans/localopt3/VorDivToUVLocalopt3.cc
+++ b/src/atlas/trans/localopt3/VorDivToUVLocalopt3.cc
@@ -21,7 +21,8 @@ namespace atlas {
 namespace trans {
 
 namespace {
-static VorDivToUVBuilder<VorDivToUVLocalopt3> builder( "localopt3" );
+static VorDivToUVBuilder<VorDivToUVLocalopt3> builder_deprecated( "localopt3" );
+static VorDivToUVBuilder<VorDivToUVLocalopt3> builder( "local" );
 }
 
 // --------------------------------------------------------------------------------------------------------------------
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index 1a411227a..1d34fb7fb 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -29,9 +29,9 @@
 #include "atlas/parallel/mpi/mpi.h"
 #include "atlas/runtime/Trace.h"
 #include "atlas/trans/Trans.h"
-#include "atlas/trans/local/FourierTransforms.h"
-#include "atlas/trans/local/LegendrePolynomials.h"
-#include "atlas/trans/local/LegendreTransforms.h"
+#include "atlas/trans/local_noopt/FourierTransforms.h"
+#include "atlas/trans/local_noopt/LegendrePolynomials.h"
+#include "atlas/trans/local_noopt/LegendreTransforms.h"
 #include "atlas/util/Constants.h"
 #include "atlas/util/Earth.h"
 
@@ -965,15 +965,23 @@ CASE( "test_trans_domain" ) {
     //Domain testdomain1 = RectangularDomain( {-1., 1.}, {50., 55.} );
     Domain testdomain2 = RectangularDomain( {-1., 1.}, {-5., 40.} );
     // Grid: (Adjust the following line if the test takes too long!)
+
     std::string gridString = "O640";
     Grid g1( gridString, testdomain1 );
     Grid g2( gridString, testdomain2 );
 
     int trc = 640;
     //Log::info() << "rgp1:" << std::endl;
-    trans::Trans transLocal1( g1, trc, util::Config( "type", "localopt3" ) );
+    Trace t1(Here(),"translocal1 construction");
+    trans::Trans transLocal1( g1, trc, option::type("local") | option::write_legendre("legcache.bin") );
+    t1.stop();
     //Log::info() << "rgp2:" << std::endl;
-    trans::Trans transLocal2( g2, trc, util::Config( "type", "localopt3" ) );
+    trans::Cache cache;
+    ATLAS_TRACE_SCOPE("Read cache") cache = trans::LegendreCache("legcache.bin");
+    Trace t2(Here(),"translocal2 construction");
+    trans::Trans transLocal2( cache, g2, trc, option::type("local") );
+    t2.stop();
+
     double rav1 = 0., rav2 = 0.;  // compute average rms errors of transLocal1 and transLocal2
 
     functionspace::Spectral spectral( trc );

From 948a74e2b76f617faee2fb8233e7aa1ebf5738f0 Mon Sep 17 00:00:00 2001
From: Pedro Maciel <Pedro.Maciel@ecmwf.int>
Date: Fri, 13 Apr 2018 18:34:46 +0100
Subject: [PATCH 051/123] MIR-178, MIR-191, MIR-192, MIR-193: finite element
 'linear'/'bilinear' using point k-d tree

---
 .../meshgenerator/StructuredMeshGenerator.cc  | 37 +++++++++++--------
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/src/atlas/meshgenerator/StructuredMeshGenerator.cc b/src/atlas/meshgenerator/StructuredMeshGenerator.cc
index 342cf608c..85c4f0a28 100644
--- a/src/atlas/meshgenerator/StructuredMeshGenerator.cc
+++ b/src/atlas/meshgenerator/StructuredMeshGenerator.cc
@@ -77,6 +77,10 @@ StructuredMeshGenerator::StructuredMeshGenerator( const eckit::Parametrisation&
     bool unique_pole;
     if ( p.get( "unique_pole", unique_pole ) ) options.set( "unique_pole", unique_pole );
 
+    bool force_include_pole;
+    if ( p.get( "force_include_north_pole", force_include_pole ) ) options.set( "force_include_north_pole", force_include_pole );
+    if ( p.get( "force_include_south_pole", force_include_pole ) ) options.set( "force_include_south_pole", force_include_pole );
+
     bool three_dimensional;
     if ( p.get( "three_dimensional", three_dimensional ) || p.get( "3d", three_dimensional ) )
         options.set( "3d", three_dimensional );
@@ -689,24 +693,23 @@ void StructuredMeshGenerator::generate_mesh( const grid::StructuredGrid& rg, con
     int nparts = options.get<size_t>( "nb_parts" );
     int n, l;
 
-    bool has_point_at_north_pole       = rg.y().front() == 90 && rg.nx().front() > 0;
-    bool has_point_at_south_pole       = rg.y().back() == -90 && rg.nx().back() > 0;
     bool three_dimensional             = options.get<bool>( "3d" );
     bool periodic_east_west            = rg.periodic();
     bool include_periodic_ghost_points = periodic_east_west && !three_dimensional;
     bool remove_periodic_ghost_points  = periodic_east_west && three_dimensional;
 
-    bool include_north_pole = ( mypart == 0 ) && options.get<bool>( "include_pole" ) && !has_point_at_north_pole &&
-                              rg.domain().containsNorthPole();
-
-    bool include_south_pole = ( mypart == nparts - 1 ) && options.get<bool>( "include_pole" ) &&
-                              !has_point_at_south_pole && rg.domain().containsSouthPole();
+    bool has_point_at_north_pole = rg.y().front() == 90 && rg.nx().front() > 0;
+    bool has_point_at_south_pole = rg.y().back() == -90 && rg.nx().back() > 0;
+    bool possible_north_pole = !has_point_at_north_pole && rg.domain().containsNorthPole() && ( mypart == 0 );
+    bool possible_south_pole = !has_point_at_south_pole && rg.domain().containsSouthPole() && ( mypart == nparts - 1 );
 
-    bool patch_north_pole = ( mypart == 0 ) && options.get<bool>( "patch_pole" ) && !has_point_at_north_pole &&
-                            rg.domain().containsNorthPole() && rg.nx( 1 ) > 0;
+    bool include_north_pole = (possible_north_pole && options.get<bool>( "include_pole" )) || options.get<bool>( "force_include_north_pole" );
+    bool include_south_pole = (possible_south_pole && options.get<bool>( "include_pole" )) || options.get<bool>( "force_include_south_pole" );
+    bool patch_north_pole   =  possible_north_pole && options.get<bool>( "patch_pole" ) && rg.nx( 1 ) > 0;
+    bool patch_south_pole   =  possible_south_pole && options.get<bool>( "patch_pole" ) && rg.nx( rg.ny() - 2 ) > 0;
 
-    bool patch_south_pole = ( mypart == nparts - 1 ) && options.get<bool>( "patch_pole" ) && !has_point_at_south_pole &&
-                            rg.domain().containsSouthPole() && rg.nx( rg.ny() - 2 ) > 0;
+    int nnewnodes = (!has_point_at_north_pole && include_north_pole ? 1 : 0)
+                  + (!has_point_at_south_pole && include_south_pole ? 1 : 0);
 
     if ( three_dimensional && nparts != 1 )
         throw BadParameter( "Cannot generate three_dimensional mesh in parallel", Here() );
@@ -735,6 +738,7 @@ void StructuredMeshGenerator::generate_mesh( const grid::StructuredGrid& rg, con
             if ( region.lat_end[jlat] >= rg.nx( jlat ) ) --nnodes;
         }
     }
+    ASSERT( nnodes >= nnewnodes );
 
 #if DEBUG_OUTPUT
     ATLAS_DEBUG_VAR( include_periodic_ghost_points );
@@ -976,6 +980,9 @@ void StructuredMeshGenerator::generate_mesh( const grid::StructuredGrid& rg, con
         ++jnode;
     }
 
+    nodes.metadata().set<size_t>( "NbRealPts", size_t(nnodes - nnewnodes) );
+    nodes.metadata().set<size_t>( "NbVirtualPts", size_t(nnewnodes) );
+
     nodes.global_index().metadata().set( "human_readable", true );
     nodes.global_index().metadata().set( "min", 1 );
     nodes.global_index().metadata().set( "max", max_glb_idx );
@@ -992,13 +999,13 @@ void StructuredMeshGenerator::generate_mesh( const grid::StructuredGrid& rg, con
     array::ArrayView<int, 1> cells_patch                  = array::make_view<int, 1>( mesh.cells().field( "patch" ) );
 
     /*
- * label all patch cells a non-patch
- */
+     * label all patch cells a non-patch
+     */
     cells_patch.assign( 0 );
 
     /*
-Fill in connectivity tables with global node indices first
-*/
+     * Fill in connectivity tables with global node indices first
+     */
     int jcell;
     int jquad       = 0;
     int jtriag      = 0;

From 271616cd86bef4982b94b921a89bfdba4811a7ea Mon Sep 17 00:00:00 2001
From: Pedro Maciel <Pedro.Maciel@ecmwf.int>
Date: Wed, 25 Apr 2018 16:25:52 +0100
Subject: [PATCH 052/123] MIR-178, MIR-191, MIR-192, MIR-193: finite element
 'linear'/'bilinear' using point k-d tree

---
 src/atlas/meshgenerator/StructuredMeshGenerator.cc | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/atlas/meshgenerator/StructuredMeshGenerator.cc b/src/atlas/meshgenerator/StructuredMeshGenerator.cc
index 85c4f0a28..6f5497e69 100644
--- a/src/atlas/meshgenerator/StructuredMeshGenerator.cc
+++ b/src/atlas/meshgenerator/StructuredMeshGenerator.cc
@@ -703,8 +703,11 @@ void StructuredMeshGenerator::generate_mesh( const grid::StructuredGrid& rg, con
     bool possible_north_pole = !has_point_at_north_pole && rg.domain().containsNorthPole() && ( mypart == 0 );
     bool possible_south_pole = !has_point_at_south_pole && rg.domain().containsSouthPole() && ( mypart == nparts - 1 );
 
-    bool include_north_pole = (possible_north_pole && options.get<bool>( "include_pole" )) || options.get<bool>( "force_include_north_pole" );
-    bool include_south_pole = (possible_south_pole && options.get<bool>( "include_pole" )) || options.get<bool>( "force_include_south_pole" );
+    bool force_include_north_pole(options.has("force_include_north_pole") && options.get<bool>( "force_include_north_pole" ));
+    bool force_include_south_pole(options.has("force_include_south_pole") && options.get<bool>( "force_include_south_pole" ));
+
+    bool include_north_pole = (possible_north_pole && options.get<bool>( "include_pole" )) || force_include_north_pole;
+    bool include_south_pole = (possible_south_pole && options.get<bool>( "include_pole" )) || force_include_south_pole;
     bool patch_north_pole   =  possible_north_pole && options.get<bool>( "patch_pole" ) && rg.nx( 1 ) > 0;
     bool patch_south_pole   =  possible_south_pole && options.get<bool>( "patch_pole" ) && rg.nx( rg.ny() - 2 ) > 0;
 

From a7f2e4ccf6412b1e1683d4c7a7962ed14982fd84 Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Fri, 27 Apr 2018 14:07:17 +0100
Subject: [PATCH 053/123] Add unit-tests for caching indicating some work todo

---
 src/atlas/option/TransOptions.cc            |   7 +-
 src/atlas/option/TransOptions.h             |  19 +-
 src/atlas/runtime/trace/CallStack.cc        |   4 +-
 src/atlas/runtime/trace/CallStack.h         |   3 +-
 src/atlas/runtime/trace/Nesting.cc          |  12 +-
 src/atlas/runtime/trace/Nesting.h           |   3 +-
 src/atlas/runtime/trace/TraceT.h            |   4 +-
 src/atlas/trans/localopt3/TransLocalopt3.cc |  94 ++++++--
 src/tests/trans/CMakeLists.txt              |   6 +
 src/tests/trans/test_trans_localcache.cc    | 243 ++++++++++++++++++++
 src/tests/trans/test_transgeneral.cc        |   5 +-
 11 files changed, 372 insertions(+), 28 deletions(-)
 create mode 100644 src/tests/trans/test_trans_localcache.cc

diff --git a/src/atlas/option/TransOptions.cc b/src/atlas/option/TransOptions.cc
index b321838fd..82131506f 100644
--- a/src/atlas/option/TransOptions.cc
+++ b/src/atlas/option/TransOptions.cc
@@ -9,6 +9,7 @@
  */
 
 #include "atlas/option/TransOptions.h"
+#include "atlas/grid.h"
 
 // ----------------------------------------------------------------------------
 
@@ -32,7 +33,7 @@ flt::flt( bool flt ) {
 }
 
 fft::fft( FFT fft ) {
-    static const std::map<FFT, std::string> FFT_to_string = {{FFT::FFT992, "FFT992"}, {FFT::FFTW, "FFTW"}};
+    static const std::map<FFT, std::string> FFT_to_string = { {FFT::OFF, "OFF"}, {FFT::FFT992, "FFT992"}, {FFT::FFTW, "FFTW"}};
     set( "fft", FFT_to_string.at( fft ) );
 }
 
@@ -48,6 +49,10 @@ write_legendre::write_legendre( const eckit::PathName& filepath ) {
     set( "write_legendre", filepath );
 }
 
+global_grid::global_grid( const Grid& grid ) {
+    set( "global_grid", grid.spec() );
+}
+
 read_legendre::read_legendre( const eckit::PathName& filepath ) {
     set( "read_legendre", filepath );
 }
diff --git a/src/atlas/option/TransOptions.h b/src/atlas/option/TransOptions.h
index 952ea7465..a43e836c8 100644
--- a/src/atlas/option/TransOptions.h
+++ b/src/atlas/option/TransOptions.h
@@ -14,6 +14,10 @@
 
 // ----------------------------------------------------------------------------
 
+namespace atlas { class Grid; }
+
+// ----------------------------------------------------------------------------
+
 namespace atlas {
 namespace option {
 
@@ -21,8 +25,9 @@ namespace option {
 
 enum class FFT
 {
+    OFF    = 0,
     FFT992 = 1,
-    FFTW   = 2
+    FFTW   = 2,
 };
 
 // ----------------------------------------------------------------------------
@@ -61,6 +66,11 @@ class fft : public util::Config {
     fft( const std::string& );
 };
 
+class no_fft : public fft {
+public:
+    no_fft() : fft( FFT::OFF ) {}
+};
+
 // ----------------------------------------------------------------------------
 
 class split_latitudes : public util::Config {
@@ -77,6 +87,13 @@ class write_legendre : public util::Config {
 
 // ----------------------------------------------------------------------------
 
+class global_grid : public util::Config {
+public:
+    global_grid( const Grid& );
+};
+
+// ----------------------------------------------------------------------------
+
 class read_legendre : public util::Config {
 public:
     read_legendre( const eckit::PathName& );
diff --git a/src/atlas/runtime/trace/CallStack.cc b/src/atlas/runtime/trace/CallStack.cc
index bb2c043d5..41b6366a9 100644
--- a/src/atlas/runtime/trace/CallStack.cc
+++ b/src/atlas/runtime/trace/CallStack.cc
@@ -9,8 +9,8 @@ namespace atlas {
 namespace runtime {
 namespace trace {
 
-void CallStack::push_front( const eckit::CodeLocation& loc ) {
-    stack_.push_front( std::hash<std::string>{}( loc.asString() ) );
+void CallStack::push_front( const eckit::CodeLocation& loc, const std::string& id ) {
+    stack_.push_front( std::hash<std::string>{}( loc.asString()+id ) );
 }
 
 void CallStack::pop_front() {
diff --git a/src/atlas/runtime/trace/CallStack.h b/src/atlas/runtime/trace/CallStack.h
index 793234dd7..72c50d06c 100644
--- a/src/atlas/runtime/trace/CallStack.h
+++ b/src/atlas/runtime/trace/CallStack.h
@@ -2,6 +2,7 @@
 
 #include <cstddef>
 #include <list>
+#include <string>
 
 namespace eckit {
 class CodeLocation;
@@ -19,7 +20,7 @@ class CallStack {
     using const_reverse_iterator = std::list<size_t>::const_reverse_iterator;
 
 public:
-    void push_front( const eckit::CodeLocation& );
+    void push_front( const eckit::CodeLocation&, const std::string& id = "" );
     void pop_front();
 
     const_iterator begin() const { return stack_.begin(); }
diff --git a/src/atlas/runtime/trace/Nesting.cc b/src/atlas/runtime/trace/Nesting.cc
index b34f98513..fe46dbc8c 100644
--- a/src/atlas/runtime/trace/Nesting.cc
+++ b/src/atlas/runtime/trace/Nesting.cc
@@ -29,14 +29,18 @@ class NestingState {
         return state;
     }
     operator CallStack() const { return stack_; }
-    CallStack& push( const eckit::CodeLocation& loc ) {
-        stack_.push_front( loc );
+    CallStack& push( const eckit::CodeLocation& loc, const std::string& id ) {
+        stack_.push_front( loc, id );
         return stack_;
     }
     void pop() { stack_.pop_front(); }
 };
 
-Nesting::Nesting( const eckit::CodeLocation& loc ) : loc_( loc ), stack_( NestingState::instance().push( loc ) ) {}
+Nesting::Nesting( const eckit::CodeLocation& loc, const std::string& id ) : 
+    loc_( loc ),
+    id_( id ),
+    stack_( NestingState::instance().push( loc, id ) ) {
+}
 
 Nesting::~Nesting() {
     stop();
@@ -51,7 +55,7 @@ void Nesting::stop() {
 
 void Nesting::start() {
     if ( not running_ ) {
-        NestingState::instance().push( loc_ );
+        NestingState::instance().push( loc_, id_ );
         running_ = true;
     }
 }
diff --git a/src/atlas/runtime/trace/Nesting.h b/src/atlas/runtime/trace/Nesting.h
index ae8a5effd..ccf64cc49 100644
--- a/src/atlas/runtime/trace/Nesting.h
+++ b/src/atlas/runtime/trace/Nesting.h
@@ -22,7 +22,7 @@ namespace trace {
 
 class Nesting {
 public:
-    Nesting( const eckit::CodeLocation& );
+    Nesting( const eckit::CodeLocation&, const std::string& id = "" );
     ~Nesting();
     operator CallStack() const { return stack_; }
     void stop();
@@ -31,6 +31,7 @@ class Nesting {
 private:
     CallStack stack_;
     eckit::CodeLocation loc_;
+    std::string id_;
     bool running_{true};
 };
 
diff --git a/src/atlas/runtime/trace/TraceT.h b/src/atlas/runtime/trace/TraceT.h
index 672f14af6..8f86e863f 100644
--- a/src/atlas/runtime/trace/TraceT.h
+++ b/src/atlas/runtime/trace/TraceT.h
@@ -90,7 +90,7 @@ template <typename TraceTraits>
 inline TraceT<TraceTraits>::TraceT( const eckit::CodeLocation& loc, const std::string& title ) :
     loc_( loc ),
     title_( title ),
-    nesting_( loc ) {
+    nesting_( loc, title ) {
     start();
 }
 
@@ -106,7 +106,7 @@ template <typename TraceTraits>
 inline TraceT<TraceTraits>::TraceT( const eckit::CodeLocation& loc, const std::string& title, const Labels& labels ) :
     loc_( loc ),
     title_( title ),
-    nesting_( loc ),
+    nesting_( loc, title ),
     labels_( labels ) {
     start();
 }
diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc
index eefe99c22..7e7851bda 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.cc
+++ b/src/atlas/trans/localopt3/TransLocalopt3.cc
@@ -22,7 +22,9 @@
 #include "eckit/eckit_config.h"
 #include "eckit/linalg/LinearAlgebra.h"
 #include "eckit/linalg/Matrix.h"
+#include "eckit/parser/JSON.h"
 #include "eckit/log/Bytes.h"
+#include "eckit/config/YAMLConfiguration.h"
 #ifdef ECKIT_HAVE_MKL
 #include "mkl.h"
 #endif
@@ -55,8 +57,28 @@ class TransParameters {
 
     std::string write_fft() const { return config_.getString( "write_fft", "" ); }
 
+    Grid global_grid() const {
+        Grid g;
+        util::Config spec;
+        if( config_.get("global_grid",spec) ) {
+            g = Grid( spec );
+        }
+        return g;
+    }
+
     bool global() const { return config_.getBool( "global", false ); }
 
+    int fft() const {
+        static const std::map<std::string, int> string_to_FFT =
+          { { "OFF", (int) option::FFT::OFF }, { "FFTW", (int) option::FFT::FFTW } };
+#ifdef ATLAS_HAVE_FFTW
+        std::string fft_default = "FFTW";
+#else
+        std::string fft_default = "OFF";
+#endif
+        return string_to_FFT.at( config_.getString( "fft", fft_default ) );
+    }
+
 private:
     const eckit::Configuration& config_;
 };
@@ -71,15 +93,27 @@ template <typename T>  T* read(size_t size) {
     pos += size * sizeof(T);
     return v;
 }
+
+Grid read_grid() {
+    long& size = *read<long>(1);
+    char* json = read<char>(size);
+    return Grid( eckit::YAMLConfiguration( std::string( json, size ) ) );
+}
+
 char*  begin;
 size_t pos;
 };
 
 struct WriteCache {
-WriteCache( const eckit::PathName& file_path, long estimated_length = 0 ) : 
+WriteCache( const eckit::PathName& file_path) :
     dh_( file_path.fileHandle( /*overwrite = */ true ) )
 {
-    dh_->openForWrite( estimated_length );
+    if( file_path.exists() ) {
+        std::stringstream err;
+        err << "Cannot open cache file " << file_path << " for writing as it already exists. Remove first.";
+        throw eckit::BadParameter( err.str(), Here() );
+    }
+    dh_->openForWrite(0);
     pos = 0;
 }
 ~WriteCache() {
@@ -89,6 +123,24 @@ template <typename T> void write( const T* v, long size) {
     dh_->write( v , size * sizeof(T) );
     pos += size * sizeof(T);
 }
+
+//void write( long v ) {
+//    dh_->write( &v , sizeof(long) );
+//    pos += sizeof(long);
+//}
+
+//void write( const Grid& grid ) {
+//    std::stringstream s;
+//    eckit::JSON json(s);
+//    json << grid.spec();
+//    std::string grid_spec( s.str() );
+//    long size = grid_spec.size();
+//    write( size );
+//    dh_->write( grid_spec.c_str(), grid_spec.size() );
+//    pos += grid_spec.size();
+//}
+
+
 std::unique_ptr<eckit::DataHandle> dh_;
 size_t pos;
 };
@@ -135,6 +187,11 @@ int num_n( const int truncation, const int m, const bool symmetric ) {
 }
 
 void alloc_aligned( double*& ptr, size_t n ) {
+#warning todo1
+    // If we can assume that posix_memalign gives the same result, we would not need to support mkl_malloc
+    // We can then remove the include of mkl.h above (simplifying things).
+    // As well there is the C++ functions "std::align" (http://en.cppreference.com/w/cpp/memory/align)
+    // that we could look into.
 #ifdef ECKIT_HAVE_MKL
     int al = 64;
     ptr    = (double*)mkl_malloc( sizeof( double ) * n, al );
@@ -216,7 +273,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
     int nlats         = 0;
     int nlonsMax      = 0;
     int neqtr         = 0;
-    useFFT_           = true;
+    useFFT_           = TransParameters(config).fft();
     unstruct_precomp_ = true;
     nlatsNH_          = 0;
     nlatsSH_          = 0;
@@ -247,11 +304,19 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
             nlatsLegDomain_ = nlatsSH_;
         }
 
-        // compute latitudinal location of domain relative to global grid:
-        gridGlobal_ = Grid( grid.name() );
+
+        gridGlobal_ = TransParameters(config).global_grid();
+        if( not gridGlobal_ ) {
+            if ( grid_.domain().global() ) {
+                gridGlobal_ = grid_;
+            } else {
+                throw eckit::BadParameter("A global structured grid is required to be passed in the optional arguments",Here());
+            }
+        }
+
         grid::StructuredGrid gs_global( gridGlobal_ );
-        grid::StructuredGrid* gsLeg = &g;
-        if ( useGlobalLeg ) { gsLeg = &gs_global; };
+        ASSERT( gs_global ); // assert structured grid
+        grid::StructuredGrid gsLeg = ( useGlobalLeg ? gs_global : g );
         nlonsMaxGlobal_ = gs_global.nxmax();
         jlonMin_.resize( 1 );
         jlonMin_[0]  = 0;
@@ -298,7 +363,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
 
         // compute longitudinal location of domain within global grid for using FFT:
         auto wrapAngle = [&]( double angle ) {
-            double result = fmod( angle, 360 );
+            double result = std::fmod( angle, 360. );
             if ( result < 0. ) { result += 360.; }
             return result;
         };
@@ -329,12 +394,12 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
         std::vector<double> lons( nlonsMax );
         if ( nlatsNH_ >= nlatsSH_ || useGlobalLeg ) {
             for ( size_t j = 0; j < nlatsLeg_; ++j ) {
-                lats[j] = gsLeg->y( j ) * util::Constants::degreesToRadians();
+                lats[j] = gsLeg.y( j ) * util::Constants::degreesToRadians();
             }
         }
         else {
             for ( size_t j = nlats - 1, idx = 0; idx < nlatsLeg_; --j, ++idx ) {
-                lats[idx] = -gsLeg->y( j ) * util::Constants::degreesToRadians();
+                lats[idx] = -gsLeg.y( j ) * util::Constants::degreesToRadians();
             }
         }
         for ( size_t j = 0; j < nlonsMax; ++j ) {
@@ -379,11 +444,9 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
                 std::string file_path = TransParameters(config).write_legendre();
                 if( file_path.size() ) {
                     ATLAS_TRACE( "write_legendre" );
-                    size_t estimated_length = sizeof(double) * ( size_sym + size_asym );
                     Log::debug() << "Writing Legendre cache file ..." << std::endl;
                     Log::debug() << "    path      = " << file_path << std::endl;
-                    Log::debug() << "    estimated = " << eckit::Bytes(estimated_length) << std::endl;
-                    WriteCache legendre( file_path, estimated_length );
+                    WriteCache legendre( file_path );
                     legendre.write( legendre_sym_,  size_sym  );
                     legendre.write( legendre_asym_, size_asym );
                     Log::debug() << "Cache file size: " << eckit::Bytes(legendre.pos) << std::endl;
@@ -440,7 +503,6 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
                     FILE* file_fftw = fopen( file_path.c_str(), "wb" );
                     fftw_export_wisdom_to_file( file_fftw );
                     fclose( file_fftw );
-
                 }
 //                std::string newWisdom( fftw_export_wisdom_to_string() );
 //                if ( 1.1 * wisdomString.length() < newWisdom.length() ) {
@@ -506,6 +568,10 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
             }
             compute_legendre_polynomials_allopt3( truncation_, grid_.size(), lats.data(), legendre_ );
         }
+        if( TransParameters(config).write_legendre().size() ) {
+            throw eckit::NotImplemented("Caching for unstructured grids not implemented",Here());
+        }
+
     }
 }  // namespace trans
 
diff --git a/src/tests/trans/CMakeLists.txt b/src/tests/trans/CMakeLists.txt
index 98139106c..e9cbad59f 100644
--- a/src/tests/trans/CMakeLists.txt
+++ b/src/tests/trans/CMakeLists.txt
@@ -51,3 +51,9 @@ ecbuild_add_test( TARGET atlas_test_transgeneral
   ENVIRONMENT ATLAS_TRACE_REPORT=1
 )
 
+ecbuild_add_test( TARGET atlas_test_trans_localcache
+  SOURCES   test_trans_localcache.cc
+  LIBS      atlas
+  ENVIRONMENT ATLAS_TRACE_REPORT=1
+)
+
diff --git a/src/tests/trans/test_trans_localcache.cc b/src/tests/trans/test_trans_localcache.cc
new file mode 100644
index 000000000..9dce62122
--- /dev/null
+++ b/src/tests/trans/test_trans_localcache.cc
@@ -0,0 +1,243 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#include <algorithm>
+#include <iomanip>
+
+#include "atlas/array/MakeView.h"
+#include "atlas/field/FieldSet.h"
+#include "atlas/functionspace/NodeColumns.h"
+#include "atlas/functionspace/Spectral.h"
+#include "atlas/functionspace/StructuredColumns.h"
+#include "atlas/grid.h"
+#include "atlas/grid/Distribution.h"
+#include "atlas/grid/Partitioner.h"
+#include "atlas/grid/detail/partitioner/EqualRegionsPartitioner.h"
+#include "atlas/grid/detail/partitioner/TransPartitioner.h"
+#include "atlas/library/Library.h"
+#include "atlas/mesh/Mesh.h"
+#include "atlas/mesh/Nodes.h"
+#include "atlas/meshgenerator/StructuredMeshGenerator.h"
+#include "atlas/output/Gmsh.h"
+#include "atlas/parallel/mpi/mpi.h"
+#include "atlas/runtime/Trace.h"
+#include "atlas/trans/Trans.h"
+#include "atlas/trans/local_noopt/FourierTransforms.h"
+#include "atlas/trans/local_noopt/LegendrePolynomials.h"
+#include "atlas/trans/local_noopt/LegendreTransforms.h"
+#include "atlas/util/Constants.h"
+#include "atlas/util/Earth.h"
+#include "eckit/utils/MD5.h"
+
+#include "tests/AtlasTestEnvironment.h"
+
+#if ATLAS_HAVE_TRANS
+#include "transi/trans.h"
+#endif
+
+using namespace eckit;
+
+using atlas::array::Array;
+using atlas::array::ArrayView;
+using atlas::array::make_view;
+
+namespace atlas {
+namespace test {
+
+//-----------------------------------------------------------------------------
+
+struct AtlasTransEnvironment : public AtlasTestEnvironment {
+    AtlasTransEnvironment( int argc, char* argv[] ) : AtlasTestEnvironment( argc, argv ) {
+#if ATLAS_HAVE_TRANS
+        trans_use_mpi( mpi::comm().size() > 1 );
+        trans_init();
+#endif
+    }
+
+    ~AtlasTransEnvironment() {
+#if ATLAS_HAVE_TRANS
+        trans_finalize();
+#endif
+    }
+};
+
+using trans::Trans;
+using trans::LegendreCache;
+using trans::Cache;
+using grid::StructuredGrid;
+using grid::GaussianGrid;
+using XSpace = StructuredGrid::XSpace;
+using YSpace = StructuredGrid::YSpace;
+using LinearSpacing = grid::LinearSpacing;
+
+eckit::PathName CacheFile(const std::string& path) {
+    eckit::PathName cachefile(path);
+    if( cachefile.exists() ) cachefile.unlink();
+    return cachefile;
+}
+
+std::string hash( const trans::Cache& c ) {
+    return eckit::MD5( c.legendre().data(), c.legendre().size() ).digest();
+}
+
+std::string hash( const eckit::PathName& f ) {
+    return hash( LegendreCache(f) );
+}
+
+std::string F(int n)    { return "F"   +std::to_string(n); }
+std::string O(int n)    { return "O"   +std::to_string(n); }
+std::string N(int n)    { return "N"   +std::to_string(n); }
+std::string L(int n)    { return "L"   +std::to_string(n); }
+std::string S(int n)    { return "S"   +std::to_string(n); }
+std::string Slon(int n) { return "Slon"+std::to_string(n); }
+std::string Slat(int n) { return "Slat"+std::to_string(n); }
+
+//-----------------------------------------------------------------------------
+
+CASE( "test_global_grids" ) {
+    // auto resolutions = { 32, 64, 160, 320, 640 };
+    auto resolutions = { 32, 64 };
+    for( int n : resolutions ) {
+        int t = n-1;
+        auto cases = { 
+            std::make_pair(F(n),t),
+            std::make_pair(O(n),t),
+            std::make_pair(N(n),t),
+            std::make_pair(L(n),t),
+            std::make_pair(S(n),t),
+            std::make_pair(Slon(n),t),
+            std::make_pair(Slat(n),t),
+        };
+
+        auto F_cachefile = CacheFile("leg_"+F(n)+"-T"+std::to_string(t)+".bin");
+        Trans( Grid(F(n)), t, option::type("local") | option::write_legendre( F_cachefile ) );
+        Cache F_cache = LegendreCache( F_cachefile );
+        auto F_cache_hash = hash(F_cache);
+
+        Cache cache;
+        for( auto _case : cases )
+        {
+            auto gridname   = _case.first;
+            auto truncation = _case.second;
+            Log::info() << "Case "+gridname+" T"+std::to_string(truncation) << std::endl;
+            ATLAS_TRACE("Case "+gridname+" T"+std::to_string(truncation));
+            Grid grid(gridname);
+            auto cachefile = CacheFile("leg_"+gridname+"-T"+std::to_string(truncation)+".bin");
+            ATLAS_TRACE_SCOPE("create without cache")
+                Trans( grid, truncation, option::type("local") );
+            ATLAS_TRACE_SCOPE("create without cache and write")
+                Trans( grid, truncation, option::type("local") | option::write_legendre( cachefile ) );
+            ATLAS_TRACE_SCOPE("read cache")
+                cache = LegendreCache( cachefile );
+            ATLAS_TRACE_SCOPE("create with cache")
+                Trans( cache, grid, truncation, option::type("local") );
+
+            if( GaussianGrid(grid) ) {
+                ASSERT( hash(cache) == F_cache_hash );
+            }
+        }
+    }
+}
+
+CASE( "test_global_grids_with_subdomain" ) {
+    int n = 64;
+    int t = n-1;
+    auto cases = { 
+        std::make_pair(F(n),t),
+        std::make_pair(O(n),t),
+        std::make_pair(N(n),t),
+        std::make_pair(L(n),t),
+        std::make_pair(S(n),t),
+        std::make_pair(Slon(n),t),
+        std::make_pair(Slat(n),t)
+    };
+    auto domains = std::vector<Domain>{
+        ZonalBandDomain  ( {-10., 5.} ),
+        RectangularDomain( {-1., 1.}, {50., 55.} ),
+        RectangularDomain( {-1., 1.}, {-5., 40.} ),
+    };
+    for( auto _case : cases )
+    {
+        auto gridname   = _case.first;
+        auto truncation = _case.second;
+
+        ATLAS_TRACE("Case "+gridname+" T"+std::to_string(truncation));
+
+        Grid global_grid( gridname );
+
+        auto global_cachefile = CacheFile( "leg_"+gridname+"-T"+std::to_string(truncation)+".bin" );
+        Trans( Grid(gridname), truncation, option::type("local") | option::write_legendre( global_cachefile ) );
+
+        Cache global_cache;
+        ATLAS_TRACE_SCOPE("read cache")
+            global_cache = LegendreCache( global_cachefile );
+        auto global_hash = hash(global_cache);
+
+        for( auto domain : domains ) {
+            Grid grid( gridname, domain );
+            auto cachefile = CacheFile("leg_"+gridname+"-T"+std::to_string(truncation)+"-domain.bin");
+            ATLAS_TRACE_SCOPE("create without cache and write")
+                Trans( Grid(gridname), truncation, option::type("local") | option::global_grid(global_grid) | option::write_legendre( cachefile ) );
+            LegendreCache new_cache = LegendreCache(cachefile);
+            ASSERT( hash(new_cache) == global_hash );
+            ATLAS_TRACE_SCOPE("create with cache")
+                Trans( global_cache, Grid(gridname), truncation, option::type("local") );
+        }
+    }
+}
+
+CASE( "test_regional_grids_nested_in_global" ) {
+    Cache cache;
+    {
+        auto truncation = 89;
+
+        ATLAS_TRACE("regional_lonlat");
+
+        auto cachefile = CacheFile("regional_lonlat.bin");
+        StructuredGrid grid_global( 
+            LinearSpacing( {  0., 360.}, 360, false ), 
+            LinearSpacing( {-90.,  90.}, 181, true  )
+        );
+        ASSERT( grid_global.domain().global() );
+        StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ) );
+        ATLAS_TRACE_SCOPE("create without cache")
+            Trans( grid, truncation, option::type("local") | option::global_grid( grid_global ) );
+        ATLAS_TRACE_SCOPE("create without cache and write")
+            Trans( grid, truncation, option::type("local") | option::global_grid( grid_global ) | option::write_legendre( cachefile ) );
+        ATLAS_TRACE_SCOPE("read cache")
+            cache = LegendreCache( cachefile );
+        ATLAS_TRACE_SCOPE("create with cache")
+            Trans( grid, truncation, option::type("local") | option::global_grid( grid_global ) );
+    }
+//    {
+//        StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ) );
+//        Trans( grid, 89 );
+//    }
+}
+
+CASE( "test_regional_grids not nested" ) {
+   if (false) {
+       StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ) );
+       Trans( grid, 89 );
+   } else {
+       Log::warning() << "This test fails if enabled!!! " << Here() << std::endl;
+   }
+}
+
+CASE( "test_regional_grids with projection" ) {
+    Log::warning() << "TODO" << std::endl;
+}
+
+}  // namespace test
+}  // namespace atlas
+
+int main( int argc, char** argv ) {
+    return atlas::test::run<atlas::test::AtlasTransEnvironment>( argc, argv );
+}
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index 1d34fb7fb..e2b6be252 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -972,14 +972,15 @@ CASE( "test_trans_domain" ) {
 
     int trc = 640;
     //Log::info() << "rgp1:" << std::endl;
+    if( eckit::PathName("legcache.bin").exists() ) eckit::PathName("legcache.bin").unlink();
     Trace t1(Here(),"translocal1 construction");
-    trans::Trans transLocal1( g1, trc, option::type("local") | option::write_legendre("legcache.bin") );
+    trans::Trans transLocal1( g1, trc, option::type("local") | option::write_legendre("legcache.bin" ) | option::global_grid( Grid("O640")) );
     t1.stop();
     //Log::info() << "rgp2:" << std::endl;
     trans::Cache cache;
     ATLAS_TRACE_SCOPE("Read cache") cache = trans::LegendreCache("legcache.bin");
     Trace t2(Here(),"translocal2 construction");
-    trans::Trans transLocal2( cache, g2, trc, option::type("local") );
+    trans::Trans transLocal2( cache, g2, trc, option::type("local") | option::global_grid( Grid("O640")) );
     t2.stop();
 
     double rav1 = 0., rav2 = 0.;  // compute average rms errors of transLocal1 and transLocal2

From 89a8f176a08914bc77fa125ed0dec4130e9bd17a Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Tue, 1 May 2018 14:18:42 +0100
Subject: [PATCH 054/123] added some debug output and applied clang-format

---
 src/atlas/trans/localopt3/TransLocalopt3.cc | 243 ++++++++++----------
 src/tests/trans/test_transgeneral.cc        |  16 +-
 2 files changed, 127 insertions(+), 132 deletions(-)

diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc
index 7e7851bda..f768f6c9e 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.cc
+++ b/src/atlas/trans/localopt3/TransLocalopt3.cc
@@ -19,12 +19,12 @@
 #include "atlas/trans/local_noopt/LegendrePolynomials.h"
 #include "atlas/trans/localopt3/LegendrePolynomialsopt3.h"
 #include "atlas/util/Constants.h"
+#include "eckit/config/YAMLConfiguration.h"
 #include "eckit/eckit_config.h"
 #include "eckit/linalg/LinearAlgebra.h"
 #include "eckit/linalg/Matrix.h"
-#include "eckit/parser/JSON.h"
 #include "eckit/log/Bytes.h"
-#include "eckit/config/YAMLConfiguration.h"
+#include "eckit/parser/JSON.h"
 #ifdef ECKIT_HAVE_MKL
 #include "mkl.h"
 #endif
@@ -35,7 +35,7 @@ namespace trans {
 namespace {
 static TransBuilderGrid<TransLocalopt3> builder_deprecated( "localopt3" );
 static TransBuilderGrid<TransLocalopt3> builder( "local" );
-}
+}  // namespace
 
 namespace {
 class TransParameters {
@@ -60,17 +60,15 @@ class TransParameters {
     Grid global_grid() const {
         Grid g;
         util::Config spec;
-        if( config_.get("global_grid",spec) ) {
-            g = Grid( spec );
-        }
+        if ( config_.get( "global_grid", spec ) ) { g = Grid( spec ); }
         return g;
     }
 
     bool global() const { return config_.getBool( "global", false ); }
 
     int fft() const {
-        static const std::map<std::string, int> string_to_FFT =
-          { { "OFF", (int) option::FFT::OFF }, { "FFTW", (int) option::FFT::FFTW } };
+        static const std::map<std::string, int> string_to_FFT = {{"OFF", (int)option::FFT::OFF},
+                                                                 {"FFTW", (int)option::FFT::FFTW}};
 #ifdef ATLAS_HAVE_FFTW
         std::string fft_default = "FFTW";
 #else
@@ -84,84 +82,78 @@ class TransParameters {
 };
 
 struct ReadCache {
-ReadCache( const void* cache ) {
-    begin = (char*) cache;
-    pos = 0;
-}
-template <typename T>  T* read(size_t size) {
-    T* v = (T*) (begin + pos);
-    pos += size * sizeof(T);
-    return v;
-}
+    ReadCache( const void* cache ) {
+        begin = (char*)cache;
+        pos   = 0;
+    }
+    template <typename T>
+    T* read( size_t size ) {
+        T* v = (T*)( begin + pos );
+        pos += size * sizeof( T );
+        return v;
+    }
 
-Grid read_grid() {
-    long& size = *read<long>(1);
-    char* json = read<char>(size);
-    return Grid( eckit::YAMLConfiguration( std::string( json, size ) ) );
-}
+    Grid read_grid() {
+        long& size = *read<long>( 1 );
+        char* json = read<char>( size );
+        return Grid( eckit::YAMLConfiguration( std::string( json, size ) ) );
+    }
 
-char*  begin;
-size_t pos;
+    char* begin;
+    size_t pos;
 };
 
 struct WriteCache {
-WriteCache( const eckit::PathName& file_path) :
-    dh_( file_path.fileHandle( /*overwrite = */ true ) )
-{
-    if( file_path.exists() ) {
-        std::stringstream err;
-        err << "Cannot open cache file " << file_path << " for writing as it already exists. Remove first.";
-        throw eckit::BadParameter( err.str(), Here() );
+    WriteCache( const eckit::PathName& file_path ) : dh_( file_path.fileHandle( /*overwrite = */ true ) ) {
+        if ( file_path.exists() ) {
+            std::stringstream err;
+            err << "Cannot open cache file " << file_path << " for writing as it already exists. Remove first.";
+            throw eckit::BadParameter( err.str(), Here() );
+        }
+        dh_->openForWrite( 0 );
+        pos = 0;
+    }
+    ~WriteCache() { dh_->close(); }
+    template <typename T>
+    void write( const T* v, long size ) {
+        dh_->write( v, size * sizeof( T ) );
+        pos += size * sizeof( T );
     }
-    dh_->openForWrite(0);
-    pos = 0;
-}
-~WriteCache() {
-    dh_->close();
-}
-template <typename T> void write( const T* v, long size) {
-    dh_->write( v , size * sizeof(T) );
-    pos += size * sizeof(T);
-}
 
-//void write( long v ) {
-//    dh_->write( &v , sizeof(long) );
-//    pos += sizeof(long);
-//}
-
-//void write( const Grid& grid ) {
-//    std::stringstream s;
-//    eckit::JSON json(s);
-//    json << grid.spec();
-//    std::string grid_spec( s.str() );
-//    long size = grid_spec.size();
-//    write( size );
-//    dh_->write( grid_spec.c_str(), grid_spec.size() );
-//    pos += grid_spec.size();
-//}
-
-
-std::unique_ptr<eckit::DataHandle> dh_;
-size_t pos;
+    //void write( long v ) {
+    //    dh_->write( &v , sizeof(long) );
+    //    pos += sizeof(long);
+    //}
+
+    //void write( const Grid& grid ) {
+    //    std::stringstream s;
+    //    eckit::JSON json(s);
+    //    json << grid.spec();
+    //    std::string grid_spec( s.str() );
+    //    long size = grid_spec.size();
+    //    write( size );
+    //    dh_->write( grid_spec.c_str(), grid_spec.size() );
+    //    pos += grid_spec.size();
+    //}
+
+
+    std::unique_ptr<eckit::DataHandle> dh_;
+    size_t pos;
 };
 
 #if ATLAS_HAVE_FFTW
 struct FFTW_Wisdom {
     char* wisdom;
-    FFTW_Wisdom() {
-        wisdom = fftw_export_wisdom_to_string();
-    }
-    ~FFTW_Wisdom() {
-        free( wisdom );
-    }
+    FFTW_Wisdom() { wisdom = fftw_export_wisdom_to_string(); }
+    ~FFTW_Wisdom() { free( wisdom ); }
 };
-std::ostream& operator<< (std::ostream& out, const FFTW_Wisdom& w) {
+std::ostream& operator<<( std::ostream& out, const FFTW_Wisdom& w ) {
     out << w.wisdom;
     return out;
 }
 #endif
 
-}
+}  // namespace
 
 // --------------------------------------------------------------------------------------------------------------------
 // Helper functions
@@ -188,10 +180,10 @@ int num_n( const int truncation, const int m, const bool symmetric ) {
 
 void alloc_aligned( double*& ptr, size_t n ) {
 #warning todo1
-    // If we can assume that posix_memalign gives the same result, we would not need to support mkl_malloc
-    // We can then remove the include of mkl.h above (simplifying things).
-    // As well there is the C++ functions "std::align" (http://en.cppreference.com/w/cpp/memory/align)
-    // that we could look into.
+// If we can assume that posix_memalign gives the same result, we would not need to support mkl_malloc
+// We can then remove the include of mkl.h above (simplifying things).
+// As well there is the C++ functions "std::align" (http://en.cppreference.com/w/cpp/memory/align)
+// that we could look into.
 #ifdef ECKIT_HAVE_MKL
     int al = 64;
     ptr    = (double*)mkl_malloc( sizeof( double ) * n, al );
@@ -259,8 +251,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
     legendre_cache_( cache.legendre().data() ),
     legendre_cachesize_( cache.legendre().size() ),
     fft_cache_( cache.fft().data() ),
-    fft_cachesize_( cache.fft().size() )
-{
+    fft_cachesize_( cache.fft().size() ) {
     ATLAS_TRACE( "Precompute legendre opt3" );
 #ifdef ECKIT_HAVE_MKL
     eckit::linalg::LinearAlgebra::backend( "mkl" );  // might want to choose backend with this command
@@ -273,7 +264,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
     int nlats         = 0;
     int nlonsMax      = 0;
     int neqtr         = 0;
-    useFFT_           = TransParameters(config).fft();
+    useFFT_           = TransParameters( config ).fft();
     unstruct_precomp_ = true;
     nlatsNH_          = 0;
     nlatsSH_          = 0;
@@ -305,19 +296,19 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
         }
 
 
-        gridGlobal_ = TransParameters(config).global_grid();
-        if( not gridGlobal_ ) {
-            if ( grid_.domain().global() ) {
-                gridGlobal_ = grid_;
-            } else {
-                throw eckit::BadParameter("A global structured grid is required to be passed in the optional arguments",Here());
+        gridGlobal_ = TransParameters( config ).global_grid();
+        if ( not gridGlobal_ ) {
+            if ( grid_.domain().global() ) { gridGlobal_ = grid_; }
+            else {
+                throw eckit::BadParameter(
+                    "A global structured grid is required to be passed in the optional arguments", Here() );
             }
         }
 
         grid::StructuredGrid gs_global( gridGlobal_ );
-        ASSERT( gs_global ); // assert structured grid
+        ASSERT( gs_global );  // assert structured grid
         grid::StructuredGrid gsLeg = ( useGlobalLeg ? gs_global : g );
-        nlonsMaxGlobal_ = gs_global.nxmax();
+        nlonsMaxGlobal_            = gs_global.nxmax();
         jlonMin_.resize( 1 );
         jlonMin_[0]  = 0;
         jlatMin_     = 0;
@@ -421,35 +412,35 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
             legendre_sym_begin_[0]  = 0;
             legendre_asym_begin_[0] = 0;
             for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
-                size_sym  += add_padding( num_n( truncation_ + 1, jm, true  ) * nlatsLeg_ );
+                size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsLeg_ );
                 size_asym += add_padding( num_n( truncation_ + 1, jm, false ) * nlatsLeg_ );
                 legendre_sym_begin_[jm + 1]  = size_sym;
                 legendre_asym_begin_[jm + 1] = size_asym;
             }
 
-            if( legendre_cache_ ) {
+            if ( legendre_cache_ ) {
                 ReadCache legendre( legendre_cache_ );
-                legendre_sym_  = legendre.read<double>( size_sym  );
+                legendre_sym_  = legendre.read<double>( size_sym );
                 legendre_asym_ = legendre.read<double>( size_asym );
                 ASSERT( legendre.pos == legendre_cachesize_ );
                 // TODO: check this is all aligned...
-            } else {
-
+            }
+            else {
                 alloc_aligned( legendre_sym_, size_sym );
                 alloc_aligned( legendre_asym_, size_asym );
 
                 compute_legendre_polynomialsopt3( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_,
                                                   legendre_asym_, legendre_sym_begin_.data(),
                                                   legendre_asym_begin_.data() );
-                std::string file_path = TransParameters(config).write_legendre();
-                if( file_path.size() ) {
+                std::string file_path = TransParameters( config ).write_legendre();
+                if ( file_path.size() ) {
                     ATLAS_TRACE( "write_legendre" );
                     Log::debug() << "Writing Legendre cache file ..." << std::endl;
                     Log::debug() << "    path      = " << file_path << std::endl;
                     WriteCache legendre( file_path );
-                    legendre.write( legendre_sym_,  size_sym  );
+                    legendre.write( legendre_sym_, size_sym );
                     legendre.write( legendre_asym_, size_asym );
-                    Log::debug() << "Cache file size: " << eckit::Bytes(legendre.pos) << std::endl;
+                    Log::debug() << "Cache file size: " << eckit::Bytes( legendre.pos ) << std::endl;
                 }
             }
         }
@@ -463,22 +454,22 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
                 fft_in_         = fftw_alloc_complex( nlats * num_complex );
                 fft_out_        = fftw_alloc_real( nlats * nlonsMaxGlobal_ );
 
-                if( fft_cache_ ) {
+                if ( fft_cache_ ) {
                     Log::debug() << "Import FFTW wisdom from cache" << std::endl;
                     fftw_import_wisdom_from_string( (const char*)fft_cache_ );
                 }
-//                std::string wisdomString( "" );
-//                std::ifstream read( "wisdom.bin" );
-//                if ( read.is_open() ) {
-//                    std::getline( read, wisdomString );
-//                    while ( read ) {
-//                        std::string line;
-//                        std::getline( read, line );
-//                        wisdomString += line;
-//                    }
-//                }
-//                read.close();
-//                if ( wisdomString.length() > 0 ) { fftw_import_wisdom_from_string( &wisdomString[0u] ); }
+                //                std::string wisdomString( "" );
+                //                std::ifstream read( "wisdom.bin" );
+                //                if ( read.is_open() ) {
+                //                    std::getline( read, wisdomString );
+                //                    while ( read ) {
+                //                        std::string line;
+                //                        std::getline( read, line );
+                //                        wisdomString += line;
+                //                    }
+                //                }
+                //                read.close();
+                //                if ( wisdomString.length() > 0 ) { fftw_import_wisdom_from_string( &wisdomString[0u] ); }
                 if ( grid::RegularGrid( gridGlobal_ ) ) {
                     plans_.resize( 1 );
                     plans_[0] = fftw_plan_many_dft_c2r( 1, &nlonsMaxGlobal_, nlats, fft_in_, NULL, 1, num_complex,
@@ -492,8 +483,8 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
                         plans_[j] = fftw_plan_dft_c2r_1d( nlonsGlobalj, fft_in_, fft_out_, FFTW_ESTIMATE );
                     }
                 }
-                std::string file_path = TransParameters(config).write_fft();
-                if( file_path.size() ) {
+                std::string file_path = TransParameters( config ).write_fft();
+                if ( file_path.size() ) {
                     Log::debug() << "Write FFTW wisdom to file " << file_path << std::endl;
                     //bool success = fftw_export_wisdom_to_filename( "wisdom.bin" );
                     //ASSERT( success );
@@ -504,18 +495,18 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
                     fftw_export_wisdom_to_file( file_fftw );
                     fclose( file_fftw );
                 }
-//                std::string newWisdom( fftw_export_wisdom_to_string() );
-//                if ( 1.1 * wisdomString.length() < newWisdom.length() ) {
-//                    std::ofstream write( "wisdom.bin" );
-//                    write << newWisdom;
-//                    write.close();
-//                }
+                //                std::string newWisdom( fftw_export_wisdom_to_string() );
+                //                if ( 1.1 * wisdomString.length() < newWisdom.length() ) {
+                //                    std::ofstream write( "wisdom.bin" );
+                //                    write << newWisdom;
+                //                    write.close();
+                //                }
             }
                 // other FFT implementations should be added with #elif statements
 #else
             useFFT_ = false;                             // no FFT implemented => default to dgemm
-            std::string file_path = TransParameters(config).write_fft();
-            if( file_path.size() ) {
+            std::string file_path = TransParameters( config ).write_fft();
+            if ( file_path.size() ) {
                 std::ofstream write( file_path );
                 write << "No cache available, as FFTW is not enabled" << std::endl;
                 write.close();
@@ -568,10 +559,9 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
             }
             compute_legendre_polynomials_allopt3( truncation_, grid_.size(), lats.data(), legendre_ );
         }
-        if( TransParameters(config).write_legendre().size() ) {
-            throw eckit::NotImplemented("Caching for unstructured grids not implemented",Here());
+        if ( TransParameters( config ).write_legendre().size() ) {
+            throw eckit::NotImplemented( "Caching for unstructured grids not implemented", Here() );
         }
-
     }
 }  // namespace trans
 
@@ -584,7 +574,7 @@ TransLocalopt3::TransLocalopt3( const Grid& grid, const long truncation, const e
 
 TransLocalopt3::~TransLocalopt3() {
     if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
-        if( not legendre_cache_ ) {
+        if ( not legendre_cache_ ) {
             free_aligned( legendre_sym_ );
             free_aligned( legendre_asym_ );
         }
@@ -663,6 +653,8 @@ void TransLocalopt3::invtrans_legendreopt3( const int truncation, const int nlat
                                             const eckit::Configuration& config ) const {
     // Legendre transform:
     {
+        Log::debug() << "Legendre dgemm: using " << nlatsLegReduced_ - nlat0_[0] << " latitudes out of "
+                     << nlatsGlobal_ / 2 << std::endl;
         ATLAS_TRACE( "opt3 Legendre dgemm" );
         for ( int jm = 0; jm <= truncation_; jm++ ) {
             int size_sym  = num_n( truncation_ + 1, jm, true );
@@ -849,8 +841,8 @@ void TransLocalopt3::invtrans_fourier_regularopt3( const int nlats, const int nl
 #endif
     }
     else {
-
-        throw eckit::SeriousBug("dgemm for Fourier transforms currently broken. Make sure atlas is compiled with FFTW.",Here());
+        throw eckit::SeriousBug(
+            "dgemm for Fourier transforms currently broken. Make sure atlas is compiled with FFTW.", Here() );
 
 #if !TRANSLOCAL_DGEMM2
         // dgemm-method 1
@@ -947,15 +939,15 @@ void TransLocalopt3::invtrans_fourier_reducedopt3( const int nlats, const grid::
 #endif
     }
     else {
-
-        throw eckit::SeriousBug("dgemm for Fourier transforms currently broken. Make sure atlas is compiled with FFTW.",Here());
+        throw eckit::SeriousBug(
+            "dgemm for Fourier transforms currently broken. Make sure atlas is compiled with FFTW.", Here() );
 
 #if !TRANSLOCAL_DGEMM2
         // dgemm-method 1
         {
 #warning dgemm currently broken for Fourier transforms. FFTW required!
-// Noticed that Matrix C is trying to access more than is actually allocated
-// Memory error!!! BEWARE!!!
+            // Noticed that Matrix C is trying to access more than is actually allocated
+            // Memory error!!! BEWARE!!!
             ATLAS_TRACE( "opt3 Fourier dgemm method 1" );
             eckit::linalg::Matrix A( fourier_, nlonsMax, ( truncation_ + 1 ) * 2 );
             eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * nlats );
@@ -993,7 +985,6 @@ void TransLocalopt3::invtrans_fourier_reducedopt3( const int nlats, const grid::
         free_aligned( gp_opt3 );
 #endif
     }
-
 }
 
 // --------------------------------------------------------------------------------------------------------------------
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index e2b6be252..58362327c 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -972,15 +972,17 @@ CASE( "test_trans_domain" ) {
 
     int trc = 640;
     //Log::info() << "rgp1:" << std::endl;
-    if( eckit::PathName("legcache.bin").exists() ) eckit::PathName("legcache.bin").unlink();
-    Trace t1(Here(),"translocal1 construction");
-    trans::Trans transLocal1( g1, trc, option::type("local") | option::write_legendre("legcache.bin" ) | option::global_grid( Grid("O640")) );
+    if ( eckit::PathName( "legcache.bin" ).exists() ) eckit::PathName( "legcache.bin" ).unlink();
+    Trace t1( Here(), "translocal1 construction" );
+    trans::Trans transLocal1(
+        g1, trc,
+        option::type( "local" ) | option::write_legendre( "legcache.bin" ) | option::global_grid( Grid( "O640" ) ) );
     t1.stop();
     //Log::info() << "rgp2:" << std::endl;
     trans::Cache cache;
-    ATLAS_TRACE_SCOPE("Read cache") cache = trans::LegendreCache("legcache.bin");
-    Trace t2(Here(),"translocal2 construction");
-    trans::Trans transLocal2( cache, g2, trc, option::type("local") | option::global_grid( Grid("O640")) );
+    ATLAS_TRACE_SCOPE( "Read cache" ) cache = trans::LegendreCache( "legcache.bin" );
+    Trace t2( Here(), "translocal2 construction" );
+    trans::Trans transLocal2( cache, g2, trc, option::type( "local" ) | option::global_grid( Grid( "O640" ) ) );
     t2.stop();
 
     double rav1 = 0., rav2 = 0.;  // compute average rms errors of transLocal1 and transLocal2
@@ -1050,10 +1052,12 @@ CASE( "test_trans_domain" ) {
                                                                   rgp2_analytic.data(), ivar_in, ivar_out );
 
                                 //Log::info() << std::endl << "rgp1:";
+                                ATLAS_TRACE_SCOPE( "translocal1" )
                                 EXPECT_NO_THROW( transLocal1.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
                                                                        div.data(), rgp1.data() ) );
 
                                 //Log::info() << std::endl << "rgp2:";
+                                ATLAS_TRACE_SCOPE( "translocal2" )
                                 EXPECT_NO_THROW( transLocal2.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
                                                                        div.data(), rgp2.data() ) );
 

From 2616b79f2bc593b56f4862f4f233b4f3536c5b25 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Tue, 1 May 2018 14:39:43 +0100
Subject: [PATCH 055/123] added again option to run local transform without
 passing global grid

---
 src/atlas/trans/localopt3/TransLocalopt3.cc | 11 +++++++++--
 src/tests/trans/test_transgeneral.cc        |  3 ++-
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc
index f768f6c9e..91f8ccfa8 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.cc
+++ b/src/atlas/trans/localopt3/TransLocalopt3.cc
@@ -300,8 +300,15 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
         if ( not gridGlobal_ ) {
             if ( grid_.domain().global() ) { gridGlobal_ = grid_; }
             else {
-                throw eckit::BadParameter(
-                    "A global structured grid is required to be passed in the optional arguments", Here() );
+                if ( Grid( grid_.name() ).domain().global() ) {
+                    Log::warning() << Here() << " Deprecated. We should pass a global grid as optional argument"
+                                   << std::endl;
+                    gridGlobal_ = Grid( grid_.name() );
+                }
+                else {
+                    throw eckit::BadParameter(
+                        "A global structured grid is required to be passed in the optional arguments", Here() );
+                }
             }
         }
 
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index 58362327c..2c734b432 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -982,7 +982,8 @@ CASE( "test_trans_domain" ) {
     trans::Cache cache;
     ATLAS_TRACE_SCOPE( "Read cache" ) cache = trans::LegendreCache( "legcache.bin" );
     Trace t2( Here(), "translocal2 construction" );
-    trans::Trans transLocal2( cache, g2, trc, option::type( "local" ) | option::global_grid( Grid( "O640" ) ) );
+    //    trans::Trans transLocal2( cache, g2, trc, option::type( "local" ) | option::global_grid( Grid( "O640" ) ) );
+    trans::Trans transLocal2( cache, g2, trc, option::type( "local" ) );
     t2.stop();
 
     double rav1 = 0., rav2 = 0.;  // compute average rms errors of transLocal1 and transLocal2

From 7de89623dab2865cfec76a549537b9f2237f5f0f Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Tue, 1 May 2018 15:03:30 +0100
Subject: [PATCH 056/123] some more debug output

---
 src/atlas/trans/localopt3/TransLocalopt3.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc
index 91f8ccfa8..3807bfcbe 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.cc
+++ b/src/atlas/trans/localopt3/TransLocalopt3.cc
@@ -320,6 +320,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
         jlonMin_[0]  = 0;
         jlatMin_     = 0;
         nlatsGlobal_ = gs_global.ny();
+        Log::debug() << "Grid has " << nlats << " latitudes. Global grid has " << nlatsGlobal_ << std::endl;
         if ( useGlobalLeg ) { nlatsLeg_ = nlatsGlobal_ / 2; }
         else {
             nlatsLeg_        = nlatsLegDomain_;

From 549ce04d930d162ca0187e5ba4d97d3cb7270331 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Wed, 2 May 2018 09:48:07 +0100
Subject: [PATCH 057/123] updated the debug output

---
 src/atlas/trans/localopt3/TransLocalopt3.cc | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc
index 3807bfcbe..1e380faa6 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.cc
+++ b/src/atlas/trans/localopt3/TransLocalopt3.cc
@@ -320,7 +320,10 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
         jlonMin_[0]  = 0;
         jlatMin_     = 0;
         nlatsGlobal_ = gs_global.ny();
-        Log::debug() << "Grid has " << nlats << " latitudes. Global grid has " << nlatsGlobal_ << std::endl;
+        if ( grid_.domain().global() ) { Log::debug() << "Global grid with " << nlats << " latitudes." << std::endl; }
+        else {
+            Log::debug() << "Grid has " << nlats << " latitudes. Global grid has " << nlatsGlobal_ << std::endl;
+        }
         if ( useGlobalLeg ) { nlatsLeg_ = nlatsGlobal_ / 2; }
         else {
             nlatsLeg_        = nlatsLegDomain_;

From 96da40e577fdb16534521296db79dd3a02922cc7 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Thu, 3 May 2018 16:01:40 +0100
Subject: [PATCH 058/123] no_fft working for regular grids

---
 src/atlas/trans/localopt3/TransLocalopt3.cc | 49 +--------------------
 src/tests/trans/test_transgeneral.cc        | 13 +++---
 2 files changed, 8 insertions(+), 54 deletions(-)

diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc
index 1e380faa6..5099add36 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.cc
+++ b/src/atlas/trans/localopt3/TransLocalopt3.cc
@@ -852,9 +852,6 @@ void TransLocalopt3::invtrans_fourier_regularopt3( const int nlats, const int nl
 #endif
     }
     else {
-        throw eckit::SeriousBug(
-            "dgemm for Fourier transforms currently broken. Make sure atlas is compiled with FFTW.", Here() );
-
 #if !TRANSLOCAL_DGEMM2
         // dgemm-method 1
         {
@@ -950,51 +947,7 @@ void TransLocalopt3::invtrans_fourier_reducedopt3( const int nlats, const grid::
 #endif
     }
     else {
-        throw eckit::SeriousBug(
-            "dgemm for Fourier transforms currently broken. Make sure atlas is compiled with FFTW.", Here() );
-
-#if !TRANSLOCAL_DGEMM2
-        // dgemm-method 1
-        {
-#warning dgemm currently broken for Fourier transforms. FFTW required!
-            // Noticed that Matrix C is trying to access more than is actually allocated
-            // Memory error!!! BEWARE!!!
-            ATLAS_TRACE( "opt3 Fourier dgemm method 1" );
-            eckit::linalg::Matrix A( fourier_, nlonsMax, ( truncation_ + 1 ) * 2 );
-            eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * nlats );
-            eckit::linalg::Matrix C( gp_fields, nlonsMax, nb_fields * nlats );
-            eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-        }
-#else
-        // dgemm-method 2
-        // should be faster for small domains or large truncation
-        // but have not found any significant speedup so far
-        double* gp_opt3;
-        alloc_aligned( gp_opt3, nb_fields * grid_.size() );
-        {
-            ATLAS_TRACE( "opt3 Fourier dgemm method 2" );
-            eckit::linalg::Matrix A( scl_fourier, nb_fields * nlats, ( truncation_ + 1 ) * 2 );
-            eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, nlonsMax );
-            eckit::linalg::Matrix C( gp_opt3, nb_fields * nlats, nlonsMax );
-            eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-        }
-
-        // Transposition in grid point space:
-        {
-            ATLAS_TRACE( "opt3 transposition in gp-space" );
-            int idx = 0;
-            for ( int jlon = 0; jlon < nlonsMax; jlon++ ) {
-                for ( int jlat = 0; jlat < nlats; jlat++ ) {
-                    for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                        int pos_tp = jlon + nlonsMax * ( jlat + nlats * ( jfld ) );
-                        //int pos  = jfld + nb_fields * ( jlat + nlats * ( jlon ) );
-                        gp_fields[pos_tp] = gp_opt3[idx++];  // = gp_opt3[pos]
-                    }
-                }
-            }
-        }
-        free_aligned( gp_opt3 );
-#endif
+        throw eckit::SeriousBug( "dgemm for Fourier transforms not implemented for reduced grids", Here() );
     }
 }
 
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index 2c734b432..13b8dd711 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -966,7 +966,7 @@ CASE( "test_trans_domain" ) {
     Domain testdomain2 = RectangularDomain( {-1., 1.}, {-5., 40.} );
     // Grid: (Adjust the following line if the test takes too long!)
 
-    std::string gridString = "O640";
+    std::string gridString = "F640";
     Grid g1( gridString, testdomain1 );
     Grid g2( gridString, testdomain2 );
 
@@ -974,16 +974,17 @@ CASE( "test_trans_domain" ) {
     //Log::info() << "rgp1:" << std::endl;
     if ( eckit::PathName( "legcache.bin" ).exists() ) eckit::PathName( "legcache.bin" ).unlink();
     Trace t1( Here(), "translocal1 construction" );
-    trans::Trans transLocal1(
-        g1, trc,
-        option::type( "local" ) | option::write_legendre( "legcache.bin" ) | option::global_grid( Grid( "O640" ) ) );
+    trans::Trans transLocal1( g1, trc,
+                              option::type( "local" ) | option::write_legendre( "legcache.bin" ) |
+                                  option::global_grid( Grid( gridString ) ) );
     t1.stop();
     //Log::info() << "rgp2:" << std::endl;
     trans::Cache cache;
     ATLAS_TRACE_SCOPE( "Read cache" ) cache = trans::LegendreCache( "legcache.bin" );
     Trace t2( Here(), "translocal2 construction" );
-    //    trans::Trans transLocal2( cache, g2, trc, option::type( "local" ) | option::global_grid( Grid( "O640" ) ) );
-    trans::Trans transLocal2( cache, g2, trc, option::type( "local" ) );
+    trans::Trans transLocal2( cache, g2, trc,
+                              option::type( "local" ) | option::global_grid( Grid( gridString ) ) | option::no_fft() );
+    //trans::Trans transLocal2( cache, g2, trc, option::type( "local" ) );
     t2.stop();
 
     double rav1 = 0., rav2 = 0.;  // compute average rms errors of transLocal1 and transLocal2

From 0202fb16204de84c149959dbd59f48591dd2ad59 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Thu, 3 May 2018 16:10:03 +0100
Subject: [PATCH 059/123] added NOTIMP for Fourier dgemm with reduced grids

---
 src/atlas/trans/localopt3/TransLocalopt3.cc | 4 +++-
 src/tests/trans/test_transgeneral.cc        | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc
index 5099add36..e8d8fc679 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.cc
+++ b/src/atlas/trans/localopt3/TransLocalopt3.cc
@@ -947,7 +947,9 @@ void TransLocalopt3::invtrans_fourier_reducedopt3( const int nlats, const grid::
 #endif
     }
     else {
-        throw eckit::SeriousBug( "dgemm for Fourier transforms not implemented for reduced grids", Here() );
+        NOTIMP;
+        // Using dgemm in Fourier transform for reduced grids is extremely slow.
+        // Please install and use FFTW!
     }
 }
 
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index 13b8dd711..2804192c9 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -966,7 +966,7 @@ CASE( "test_trans_domain" ) {
     Domain testdomain2 = RectangularDomain( {-1., 1.}, {-5., 40.} );
     // Grid: (Adjust the following line if the test takes too long!)
 
-    std::string gridString = "F640";
+    std::string gridString = "O640";
     Grid g1( gridString, testdomain1 );
     Grid g2( gridString, testdomain2 );
 

From ed80d65da2b7de20cf9fbbce4c5976c0b769e28f Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Thu, 3 May 2018 18:58:46 +0100
Subject: [PATCH 060/123] non-nested regular grids are now supported

---
 src/atlas/trans/localopt3/TransLocalopt3.cc | 96 +++++++++++++--------
 src/atlas/trans/localopt3/TransLocalopt3.h  |  1 +
 src/tests/trans/test_transgeneral.cc        | 44 ++++++----
 3 files changed, 89 insertions(+), 52 deletions(-)

diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc
index e8d8fc679..b878a337c 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.cc
+++ b/src/atlas/trans/localopt3/TransLocalopt3.cc
@@ -266,12 +266,14 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
     int neqtr         = 0;
     useFFT_           = TransParameters( config ).fft();
     unstruct_precomp_ = true;
+    no_symmetry_      = false;
     nlatsNH_          = 0;
     nlatsSH_          = 0;
     nlatsLeg_         = 0;
     nlatsLegDomain_   = 0;
     nlatsLegReduced_  = 0;
     bool useGlobalLeg = true;
+    bool no_nest      = false;
     if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
         grid::StructuredGrid g( grid_ );
         nlats    = g.ny();
@@ -300,15 +302,28 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
         if ( not gridGlobal_ ) {
             if ( grid_.domain().global() ) { gridGlobal_ = grid_; }
             else {
-                if ( Grid( grid_.name() ).domain().global() ) {
+                /*if ( Grid( grid_.name() ).domain().global() ) {
                     Log::warning() << Here() << " Deprecated. We should pass a global grid as optional argument"
                                    << std::endl;
                     gridGlobal_ = Grid( grid_.name() );
                 }
+                else {*/
+                if ( grid::RegularGrid( grid_ ) ) {
+                    // non-nested regular grid
+                    no_nest         = true;
+                    no_symmetry_    = true;
+                    useFFT_         = false;
+                    nlatsNH_        = nlats;
+                    nlatsSH_        = 0;
+                    nlatsLegDomain_ = nlatsNH_;
+                    gridGlobal_     = grid_;
+                    useGlobalLeg    = false;
+                }
                 else {
-                    throw eckit::BadParameter(
-                        "A global structured grid is required to be passed in the optional arguments", Here() );
+                    NOTIMP;
+                    // non-nested reduced grids are not supported
                 }
+                //}
             }
         }
 
@@ -343,21 +358,28 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
 
         // reduce truncation towards the pole for reduced meshes:
         nlat0_.resize( truncation_ + 1 );
-        int nmen0 = -1;
-        for ( int jlat = 0; jlat < nlatsGlobal_ / 2; jlat++ ) {
-            double lat = gs_global.y( jlat ) * util::Constants::degreesToRadians();
-            int nmen   = fourier_truncation( truncation_, gs_global.nx( jlat ), gs_global.nxmax(), nlatsGlobal_, lat,
-                                           grid::RegularGrid( gs_global ) );
-            nmen       = std::max( nmen0, nmen );
-            int ndgluj = nlatsLeg_ - std::min( nlatsLeg_, nlatsLeg_ + jlatMinLeg_ - jlat );
-            if ( useGlobalLeg ) { ndgluj = std::max( jlatMinLeg_, jlat ); }
-            for ( int j = nmen0 + 1; j <= nmen; j++ ) {
-                nlat0_[j] = ndgluj;
+        if ( no_nest ) {
+            for ( int j = 0; j <= truncation_; j++ ) {
+                nlat0_[j] = 0;
             }
-            nmen0 = nmen;
         }
-        for ( int j = nmen0 + 1; j <= truncation_; j++ ) {
-            nlat0_[j] = nlatsLeg_;
+        else {
+            int nmen0 = -1;
+            for ( int jlat = 0; jlat < nlatsGlobal_ / 2; jlat++ ) {
+                double lat = gs_global.y( jlat ) * util::Constants::degreesToRadians();
+                int nmen = fourier_truncation( truncation_, gs_global.nx( jlat ), gs_global.nxmax(), nlatsGlobal_, lat,
+                                               grid::RegularGrid( gs_global ) );
+                nmen     = std::max( nmen0, nmen );
+                int ndgluj = nlatsLeg_ - std::min( nlatsLeg_, nlatsLeg_ + jlatMinLeg_ - jlat );
+                if ( useGlobalLeg ) { ndgluj = std::max( jlatMinLeg_, jlat ); }
+                for ( int j = nmen0 + 1; j <= nmen; j++ ) {
+                    nlat0_[j] = ndgluj;
+                }
+                nmen0 = nmen;
+            }
+            for ( int j = nmen0 + 1; j <= truncation_; j++ ) {
+                nlat0_[j] = nlatsLeg_;
+            }
         }
         /*Log::info() << "nlats=" << g.ny() << " nlatsGlobal=" << gs_global.ny() << " jlatMin=" << jlatMin_
                     << " jlatMinLeg=" << jlatMinLeg_ << " nlatsGlobal/2-nlatsLeg=" << nlatsGlobal_ / 2 - nlatsLeg_
@@ -369,24 +391,26 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
             if ( result < 0. ) { result += 360.; }
             return result;
         };
-        double lonmin = wrapAngle( g.x( 0, 0 ) );
-        if ( nlonsMax < fft_threshold * nlonsMaxGlobal_ ) { useFFT_ = false; }
-        else {
-            // need to use FFT with cropped grid
-            if ( grid::RegularGrid( gridGlobal_ ) ) {
-                for ( size_t jlon = 0; jlon < nlonsMaxGlobal_; ++jlon ) {
-                    if ( gs_global.x( jlon, 0 ) < lonmin ) { jlonMin_[0]++; }
-                }
-            }
+        if ( useFFT_ ) {
+            double lonmin = wrapAngle( g.x( 0, 0 ) );
+            if ( nlonsMax < fft_threshold * nlonsMaxGlobal_ ) { useFFT_ = false; }
             else {
-                nlonsGlobal_.resize( nlats );
-                jlonMin_.resize( nlats );
-                for ( size_t jlat = 0; jlat < nlats; jlat++ ) {
-                    double lonmin      = wrapAngle( g.x( 0, jlat ) );
-                    nlonsGlobal_[jlat] = gs_global.nx( jlat + jlatMin_ );
-                    jlonMin_[jlat]     = 0;
-                    for ( size_t jlon = 0; jlon < nlonsGlobal_[jlat]; ++jlon ) {
-                        if ( gs_global.x( jlon, jlat + jlatMin_ ) < lonmin ) { jlonMin_[jlat]++; }
+                // need to use FFT with cropped grid
+                if ( grid::RegularGrid( gridGlobal_ ) ) {
+                    for ( size_t jlon = 0; jlon < nlonsMaxGlobal_; ++jlon ) {
+                        if ( gs_global.x( jlon, 0 ) < lonmin ) { jlonMin_[0]++; }
+                    }
+                }
+                else {
+                    nlonsGlobal_.resize( nlats );
+                    jlonMin_.resize( nlats );
+                    for ( size_t jlat = 0; jlat < nlats; jlat++ ) {
+                        double lonmin      = wrapAngle( g.x( 0, jlat ) );
+                        nlonsGlobal_[jlat] = gs_global.nx( jlat + jlatMin_ );
+                        jlonMin_[jlat]     = 0;
+                        for ( size_t jlon = 0; jlon < nlonsGlobal_[jlat]; ++jlon ) {
+                            if ( gs_global.x( jlon, jlat + jlatMin_ ) < lonmin ) { jlonMin_[jlat]++; }
+                        }
                     }
                 }
             }
@@ -947,9 +971,9 @@ void TransLocalopt3::invtrans_fourier_reducedopt3( const int nlats, const grid::
 #endif
     }
     else {
-        NOTIMP;
-        // Using dgemm in Fourier transform for reduced grids is extremely slow.
-        // Please install and use FFTW!
+        throw eckit::NotImplemented(
+            "Using dgemm in Fourier transform for reduced grids is extremely slow. Please install and use FFTW!",
+            Here() );
     }
 }
 
diff --git a/src/atlas/trans/localopt3/TransLocalopt3.h b/src/atlas/trans/localopt3/TransLocalopt3.h
index 0651abeab..3826dbc9c 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.h
+++ b/src/atlas/trans/localopt3/TransLocalopt3.h
@@ -145,6 +145,7 @@ class TransLocalopt3 : public trans::TransImpl {
     bool useFFT_;
     bool dgemmMethod1_;
     bool unstruct_precomp_;
+    bool no_symmetry_;
     int truncation_;
     int nlatsNH_;
     int nlatsSH_;
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index 2804192c9..ff8185943 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -427,7 +427,7 @@ double sphericalharmonics_analytic_point(
 //
 void spectral_transform_grid_analytic(
     const size_t trc,     // truncation (in)
-    const size_t trcFT,   // truncation for Fourier transformation (in)
+    bool trcFT,           // truncation for Fourier transformation (in)
     const double n,       // total wave number (implemented so far for n<4
     const double m,       // zonal wave number (implemented so far for m<4, m<n
     const int imag,       // 0: test real part, 1: test imaginary part
@@ -457,20 +457,26 @@ void spectral_transform_grid_analytic(
 
     if ( grid::StructuredGrid( grid ) ) {
         grid::StructuredGrid g( grid );
-        Grid gridGlobal( grid.name() );
-        grid::StructuredGrid gs_global( gridGlobal );
-        int nlatsGlobal = gs_global.ny();
-        int jlatMin     = 0;
-        for ( int jlat = 0; jlat < nlatsGlobal; jlat++ ) {
-            if ( gs_global.y( jlat ) > g.y( 0 ) ) { jlatMin++; };
+        Grid gridGlobal;
+        grid::StructuredGrid gs_global;
+        int jlatMin = 0;
+        if ( trcFT ) {
+            gridGlobal      = Grid( grid.name() );
+            gs_global       = grid::StructuredGrid( gridGlobal );
+            int nlatsGlobal = gs_global.ny();
+            for ( int jlat = 0; jlat < nlatsGlobal; jlat++ ) {
+                if ( gs_global.y( jlat ) > g.y( 0 ) ) { jlatMin++; };
+            }
         }
 
         int idx = 0;
         for ( size_t j = 0; j < g.ny(); ++j ) {
             double lat = g.y( j ) * util::Constants::degreesToRadians();
-
-            int ftrc = trans::fourier_truncation( trc, gs_global.nx( jlatMin + j ), gs_global.nxmax(), gs_global.ny(),
+            int ftrc   = trc + 1;
+            if ( trcFT ) {
+                ftrc = trans::fourier_truncation( trc, gs_global.nx( jlatMin + j ), gs_global.nxmax(), gs_global.ny(),
                                                   lat, grid::RegularGrid( gs_global ) );
+            }
             /*Log::info() << "j=" << j << " ftrc=" << ftrc << " trc=" << trc << " nx=" << gs_global.nx( jlatMin + j )
                         << " nxmax=" << gs_global.nxmax() << " nlats=" << gs_global.ny() << " lat=" << g.y( j )
                         << " jlatMin=" << jlatMin << std::endl;*/
@@ -545,7 +551,7 @@ double spectral_transform_test( double trc,       // truncation
 
     // compute analytic solution (this also initializes rspecg and needs to be
     // done before the actual transform):
-    spectral_transform_grid_analytic( trc, trc, n, m, imag, g, rspecg, rgp_analytic, 2, 2 );
+    spectral_transform_grid_analytic( trc, true, n, m, imag, g, rspecg, rgp_analytic, 2, 2 );
     // perform spectral transform:
 
     spectral_transform_grid( trc, trc, g, rspecg, rgp, pointwise );
@@ -968,7 +974,13 @@ CASE( "test_trans_domain" ) {
 
     std::string gridString = "O640";
     Grid g1( gridString, testdomain1 );
-    Grid g2( gridString, testdomain2 );
+    //Grid g2( gridString, testdomain2 );
+
+    bool fourierTrc1 = true;
+    bool fourierTrc2 = false;
+    using grid::StructuredGrid;
+    using LinearSpacing = grid::LinearSpacing;
+    StructuredGrid g2( LinearSpacing( {0., 180.}, 181 ), LinearSpacing( {0., 45.}, 46 ) );
 
     int trc = 640;
     //Log::info() << "rgp1:" << std::endl;
@@ -982,9 +994,9 @@ CASE( "test_trans_domain" ) {
     trans::Cache cache;
     ATLAS_TRACE_SCOPE( "Read cache" ) cache = trans::LegendreCache( "legcache.bin" );
     Trace t2( Here(), "translocal2 construction" );
-    trans::Trans transLocal2( cache, g2, trc,
-                              option::type( "local" ) | option::global_grid( Grid( gridString ) ) | option::no_fft() );
-    //trans::Trans transLocal2( cache, g2, trc, option::type( "local" ) );
+    //trans::Trans transLocal2( cache, g2, trc,
+    //                          option::type( "local" ) | option::global_grid( Grid( gridString ) ) | option::no_fft() );
+    trans::Trans transLocal2( g2, trc, option::type( "local" ) );
     t2.stop();
 
     double rav1 = 0., rav2 = 0.;  // compute average rms errors of transLocal1 and transLocal2
@@ -1047,10 +1059,10 @@ CASE( "test_trans_domain" ) {
                                     rgp2_analytic[j] = 0.;
                                 }
 
-                                spectral_transform_grid_analytic( trc, trc, n, m, imag, g1, rspecg.data(),
+                                spectral_transform_grid_analytic( trc, fourierTrc1, n, m, imag, g1, rspecg.data(),
                                                                   rgp1_analytic.data(), ivar_in, ivar_out );
 
-                                spectral_transform_grid_analytic( trc, trc, n, m, imag, g2, rspecg.data(),
+                                spectral_transform_grid_analytic( trc, fourierTrc2, n, m, imag, g2, rspecg.data(),
                                                                   rgp2_analytic.data(), ivar_in, ivar_out );
 
                                 //Log::info() << std::endl << "rgp1:";

From b5a7092c4911df6f126b30273d3d0fcac7a344cf Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Fri, 4 May 2018 14:56:46 +0100
Subject: [PATCH 061/123] Cleanup

---
 .../localopt3/LegendrePolynomialsopt3.cc      |   1 -
 src/atlas/trans/localopt3/TransLocalopt3.cc   | 129 ++++++++----------
 src/atlas/trans/localopt3/TransLocalopt3.h    |   8 ++
 src/tests/trans/test_trans_localcache.cc      |  80 ++++++-----
 4 files changed, 113 insertions(+), 105 deletions(-)

diff --git a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc
index 2f16cc43b..1cddbc18b 100644
--- a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc
+++ b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc
@@ -158,7 +158,6 @@ void compute_legendre_polynomialsopt3(
     size_t leg_start_sym[],    // start indices for different zonal wave numbers, symmetric part
     size_t leg_start_asym[] )  // start indices for different zonal wave numbers, asymmetric part
 {
-    ATLAS_TRACE();
     auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; };
     std::vector<double> legpol( legendre_size( trc ) );
     std::vector<double> zfn( ( trc + 1 ) * ( trc + 1 ) );
diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc
index b878a337c..50f3ee7a6 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.cc
+++ b/src/atlas/trans/localopt3/TransLocalopt3.cc
@@ -9,6 +9,7 @@
  */
 
 #include "atlas/trans/localopt3/TransLocalopt3.h"
+#include <cstdlib>
 #include <cmath>
 #include "atlas/array.h"
 #include "atlas/option.h"
@@ -25,9 +26,6 @@
 #include "eckit/linalg/Matrix.h"
 #include "eckit/log/Bytes.h"
 #include "eckit/parser/JSON.h"
-#ifdef ECKIT_HAVE_MKL
-#include "mkl.h"
-#endif
 
 namespace atlas {
 namespace trans {
@@ -179,27 +177,13 @@ int num_n( const int truncation, const int m, const bool symmetric ) {
 }
 
 void alloc_aligned( double*& ptr, size_t n ) {
-#warning todo1
-// If we can assume that posix_memalign gives the same result, we would not need to support mkl_malloc
-// We can then remove the include of mkl.h above (simplifying things).
-// As well there is the C++ functions "std::align" (http://en.cppreference.com/w/cpp/memory/align)
-// that we could look into.
-#ifdef ECKIT_HAVE_MKL
-    int al = 64;
-    ptr    = (double*)mkl_malloc( sizeof( double ) * n, al );
-#else
-    posix_memalign( (void**)&ptr, sizeof( double ) * 64, sizeof( double ) * n );
-    //ptr = (double*)malloc( sizeof( double ) * n );
-    //ptr = new double[n];
-#endif
+    const size_t alignment = 64 * sizeof( double );
+    ptr = (double*) aligned_alloc( alignment, sizeof( double ) * n );
 }
 
 void free_aligned( double*& ptr ) {
-#ifdef ECKIT_HAVE_MKL
-    mkl_free( ptr );
-#else
     free( ptr );
-#endif
+    ptr = nullptr;
 }
 
 int add_padding( int n ) {
@@ -242,6 +226,14 @@ int fourier_truncation( const int truncation,    // truncation
 // Class TransLocalopt3
 // --------------------------------------------------------------------------------------------------------------------
 
+const eckit::linalg::LinearAlgebra& linear_algebra_backend() {
+    if( eckit::linalg::LinearAlgebra::hasBackend("mkl") ) {
+        return eckit::linalg::LinearAlgebra::getBackend("mkl");
+    }
+    // Default backend
+    return eckit::linalg::LinearAlgebra::backend();
+}
+
 TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long truncation,
                                 const eckit::Configuration& config ) :
     grid_( grid ),
@@ -251,13 +243,10 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
     legendre_cache_( cache.legendre().data() ),
     legendre_cachesize_( cache.legendre().size() ),
     fft_cache_( cache.fft().data() ),
-    fft_cachesize_( cache.fft().size() ) {
-    ATLAS_TRACE( "Precompute legendre opt3" );
-#ifdef ECKIT_HAVE_MKL
-    eckit::linalg::LinearAlgebra::backend( "mkl" );  // might want to choose backend with this command
-#else
-    eckit::linalg::LinearAlgebra::backend( "generic" );  // might want to choose backend with this command
-#endif
+    fft_cachesize_( cache.fft().size() ),
+    linalg_( linear_algebra_backend() )
+{
+    ATLAS_TRACE( "TransLocalOpt3 constructor" );
     double fft_threshold = 0.0;  // fraction of latitudes of the full grid down to which FFT is used.
     // This threshold needs to be adjusted depending on the dgemm and FFT performance of the machine
     // on which this code is running!
@@ -439,7 +428,6 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
 
         // precomputations for Legendre polynomials:
         {
-            ATLAS_TRACE( "opt3 precomp Legendre" );
             int size_sym  = 0;
             int size_asym = 0;
             legendre_sym_begin_.resize( truncation_ + 3 );
@@ -461,15 +449,18 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
                 // TODO: check this is all aligned...
             }
             else {
-                alloc_aligned( legendre_sym_, size_sym );
-                alloc_aligned( legendre_asym_, size_asym );
+                ATLAS_TRACE_SCOPE( "Legendre precomputations (structured)" ) {
 
-                compute_legendre_polynomialsopt3( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_,
-                                                  legendre_asym_, legendre_sym_begin_.data(),
-                                                  legendre_asym_begin_.data() );
+                    alloc_aligned( legendre_sym_, size_sym );
+                    alloc_aligned( legendre_asym_, size_asym );
+
+                    compute_legendre_polynomialsopt3( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_,
+                                                      legendre_asym_, legendre_sym_begin_.data(),
+                                                      legendre_asym_begin_.data() );
+                }
                 std::string file_path = TransParameters( config ).write_legendre();
                 if ( file_path.size() ) {
-                    ATLAS_TRACE( "write_legendre" );
+                    ATLAS_TRACE( "Write LegendreCache to file" );
                     Log::debug() << "Writing Legendre cache file ..." << std::endl;
                     Log::debug() << "    path      = " << file_path << std::endl;
                     WriteCache legendre( file_path );
@@ -484,7 +475,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
         if ( useFFT_ ) {
 #if ATLAS_HAVE_FFTW && !TRANSLOCAL_DGEMM2
             {
-                ATLAS_TRACE( "opt3 precomp FFTW" );
+                ATLAS_TRACE( "Fourier precomputations (FFTW)" );
                 int num_complex = ( nlonsMaxGlobal_ / 2 ) + 1;
                 fft_in_         = fftw_alloc_complex( nlats * num_complex );
                 fft_out_        = fftw_alloc_real( nlats * nlonsMaxGlobal_ );
@@ -553,7 +544,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
             alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlonsMax );
 #if !TRANSLOCAL_DGEMM2
             {
-                ATLAS_TRACE( "opt3 precomp Fourier tp" );
+                ATLAS_TRACE( "Fourier precomputations (NoFFT)" );
                 int idx = 0;
                 for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
                     double factor = 1.;
@@ -585,17 +576,17 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
     else {
         // unstructured grid
         if ( unstruct_precomp_ ) {
-            ATLAS_TRACE( "opt3 precomp unstructured" );
+            ATLAS_TRACE( "Legendre precomputations (unstructured)" );
             std::vector<double> lats( grid_.size() );
             alloc_aligned( legendre_, legendre_size( truncation_ ) * grid_.size() );
             int j( 0 );
-            for ( PointXY p : grid_.xy() ) {
-                lats[j++] = p.y() * util::Constants::degreesToRadians();
+            for ( PointLonLat p : grid_.lonlat() ) {
+                lats[j++] = p.lat() * util::Constants::degreesToRadians();
             }
             compute_legendre_polynomials_allopt3( truncation_, grid_.size(), lats.data(), legendre_ );
         }
         if ( TransParameters( config ).write_legendre().size() ) {
-            throw eckit::NotImplemented( "Caching for unstructured grids not implemented", Here() );
+            throw eckit::NotImplemented( "Caching for unstructured grids or structured grids with projections not yet implemented", Here() );
         }
     }
 }  // namespace trans
@@ -690,7 +681,7 @@ void TransLocalopt3::invtrans_legendreopt3( const int truncation, const int nlat
     {
         Log::debug() << "Legendre dgemm: using " << nlatsLegReduced_ - nlat0_[0] << " latitudes out of "
                      << nlatsGlobal_ / 2 << std::endl;
-        ATLAS_TRACE( "opt3 Legendre dgemm" );
+        ATLAS_TRACE( "Inverse Legendre Transform (GEMM)" );
         for ( int jm = 0; jm <= truncation_; jm++ ) {
             int size_sym  = num_n( truncation_ + 1, jm, true );
             int size_asym = num_n( truncation_ + 1, jm, false );
@@ -746,7 +737,7 @@ void TransLocalopt3::invtrans_legendreopt3( const int truncation, const int nlat
                         eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm] + nlat0_[jm] * size_sym,
                                                  size_sym, nlatsLegReduced_ - nlat0_[jm] );
                         eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsLegReduced_ - nlat0_[jm] );
-                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+                        linalg_.gemm( A, B, C );
                         /*Log::info() << "sym: ";
                         for ( int j = 0; j < size_sym * ( nlatsLegReduced_ - nlat0_[jm] ); j++ ) {
                             Log::info() << legendre_sym_[j + legendre_sym_begin_[jm] + nlat0_[jm] * size_sym] << " ";
@@ -758,7 +749,7 @@ void TransLocalopt3::invtrans_legendreopt3( const int truncation, const int nlat
                         eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm] + nlat0_[jm] * size_asym,
                                                  size_asym, nlatsLegReduced_ - nlat0_[jm] );
                         eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsLegReduced_ - nlat0_[jm] );
-                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+                        linalg_.gemm( A, B, C );
                         /*Log::info() << "asym: ";
                         for ( int j = 0; j < size_asym * ( nlatsLegReduced_ - nlat0_[jm] ); j++ ) {
                             Log::info() << legendre_asym_[j + legendre_asym_begin_[jm] + nlat0_[jm] * size_asym] << " ";
@@ -845,7 +836,7 @@ void TransLocalopt3::invtrans_fourier_regularopt3( const int nlats, const int nl
         {
             int num_complex = ( nlonsMaxGlobal_ / 2 ) + 1;
             {
-                ATLAS_TRACE( "opt3 FFTW regular" );
+                ATLAS_TRACE( "Inverse Fourier Transform (FFTW, RegularGrid)" );
                 for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
                     int idx = 0;
                     for ( int jlat = 0; jlat < nlats; jlat++ ) {
@@ -879,11 +870,11 @@ void TransLocalopt3::invtrans_fourier_regularopt3( const int nlats, const int nl
 #if !TRANSLOCAL_DGEMM2
         // dgemm-method 1
         {
-            ATLAS_TRACE( "opt3 Fourier dgemm method 1" );
+            ATLAS_TRACE( "Inverse Fourier Transform (NoFFT)" );
             eckit::linalg::Matrix A( fourier_, nlons, ( truncation_ + 1 ) * 2 );
             eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * nlats );
             eckit::linalg::Matrix C( gp_fields, nlons, nb_fields * nlats );
-            eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+            linalg_.gemm( A, B, C );
         }
 #else
         // dgemm-method 2
@@ -896,7 +887,7 @@ void TransLocalopt3::invtrans_fourier_regularopt3( const int nlats, const int nl
             eckit::linalg::Matrix A( scl_fourier, nb_fields * nlats, ( truncation_ + 1 ) * 2 );
             eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, nlons );
             eckit::linalg::Matrix C( gp_opt3, nb_fields * nlats, nlons );
-            eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+            linalg_.gemm( A, B, C );
         }
 
         // Transposition in grid point space:
@@ -929,7 +920,7 @@ void TransLocalopt3::invtrans_fourier_reducedopt3( const int nlats, const grid::
 #if ATLAS_HAVE_FFTW && !TRANSLOCAL_DGEMM2
         {
             {
-                ATLAS_TRACE( "opt3 FFTW reduced" );
+                ATLAS_TRACE( "Inverse Fourier Transform (FFTW, ReducedGid)" );
                 int jgp = 0;
                 for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
                     for ( int jlat = 0; jlat < nlats; jlat++ ) {
@@ -984,8 +975,8 @@ void TransLocalopt3::invtrans_unstructured_precomp( const int truncation, const
                                                     double gp_fields[], const eckit::Configuration& config ) const {
     ATLAS_TRACE( "invtrans_uv unstructured opt3" );
     grid::UnstructuredGrid gu = grid_;
-    int nlats                 = grid_.size();
-    int size_fourier          = nb_fields * 2;
+    const int nlats                 = grid_.size();
+    const int size_fourier          = nb_fields * 2;
     double* legendre;
     double* scl_fourier;
     double* scl_fourier_tp;
@@ -997,32 +988,32 @@ void TransLocalopt3::invtrans_unstructured_precomp( const int truncation, const
     alloc_aligned( gp_opt, nb_fields );
 
     {
-        ATLAS_TRACE( "opt3 Legendre dgemm" );
+        ATLAS_TRACE( "Inverse Legendre Transform (GEMM)" );
         for ( int jm = 0; jm < truncation; jm++ ) {
-            int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1;
+            const int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1;
             eckit::linalg::Matrix A( eckit::linalg::Matrix(
                 const_cast<double*>( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) );
             eckit::linalg::Matrix B( legendre_ + noff * nlats, ns, nlats );
             eckit::linalg::Matrix C( scl_fourier + jm * size_fourier * nlats, nb_fields * 2, nlats );
-            eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+            linalg_.gemm( A, B, C );
         }
     }
 
     // loop over all points:
     {
-        ATLAS_TRACE( "opt3 Fourier dgemm" );
+        ATLAS_TRACE( "Inverse Fourier Transform (NoFFT)" );
 
         for ( int ip = 0; ip < grid_.size(); ip++ ) {
-            PointXY p  = gu.xy( ip );
-            double lon = p.x() * util::Constants::degreesToRadians();
-            double lat = p.y() * util::Constants::degreesToRadians();
+            const PointLonLat p  = gu.lonlat( ip );
+            const double lon = p.lon() * util::Constants::degreesToRadians();
+            const double lat = p.lat() * util::Constants::degreesToRadians();
             {
                 //ATLAS_TRACE( "opt transposition in Fourier" );
                 for ( int jm = 0; jm < truncation; jm++ ) {
                     int idx = nb_fields * 2 * ( ip + nlats * jm );
                     for ( int imag = 0; imag < 2; imag++ ) {
                         for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                            int pos_tp = imag + 2 * ( jm + ( truncation ) * ( jfld ) );
+                            const int pos_tp = imag + 2 * ( jm + ( truncation ) * ( jfld ) );
                             //int pos  = jfld + nb_fields * ( imag + 2 * ( jm ) );
                             scl_fourier_tp[pos_tp] = scl_fourier[idx++];  // = scl_fourier[pos]
                         }
@@ -1046,7 +1037,7 @@ void TransLocalopt3::invtrans_unstructured_precomp( const int truncation, const
                 eckit::linalg::Matrix A( fouriertp, 1, (truncation)*2 );
                 eckit::linalg::Matrix B( scl_fourier_tp, (truncation)*2, nb_fields );
                 eckit::linalg::Matrix C( gp_opt, 1, nb_fields );
-                eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+                linalg_.gemm( A, B, C );
                 for ( int j = 0; j < nb_fields; j++ ) {
                     gp_fields[ip + j * grid_.size()] = gp_opt[j];
                 }
@@ -1074,7 +1065,7 @@ void TransLocalopt3::invtrans_unstructured_precomp( const int truncation, const
 void TransLocalopt3::invtrans_unstructured( const int truncation, const int nb_fields, const int nb_vordiv_fields,
                                             const double scalar_spectra[], double gp_fields[],
                                             const eckit::Configuration& config ) const {
-    ATLAS_TRACE( "invtrans_uv unstructured opt3" );
+    ATLAS_TRACE( "invtrans_uv unstructured" );
     grid::UnstructuredGrid gu = grid_;
     double* zfn;
     alloc_aligned( zfn, ( truncation + 1 ) * ( truncation + 1 ) );
@@ -1093,20 +1084,20 @@ void TransLocalopt3::invtrans_unstructured( const int truncation, const int nb_f
 
     // loop over all points:
     for ( int ip = 0; ip < grid_.size(); ip++ ) {
-        PointXY p  = gu.xy( ip );
-        double lon = p.x() * util::Constants::degreesToRadians();
-        double lat = p.y() * util::Constants::degreesToRadians();
+        const PointLonLat p  = gu.lonlat( ip );
+        const double lon = p.lon() * util::Constants::degreesToRadians();
+        const double lat = p.lat() * util::Constants::degreesToRadians();
         compute_legendre_polynomials_latopt3( truncation, lat, legendre, zfn );
         // Legendre transform:
         {
             //ATLAS_TRACE( "opt Legendre dgemm" );
             for ( int jm = 0; jm <= truncation; jm++ ) {
-                int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1;
+                const int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1;
                 eckit::linalg::Matrix A( eckit::linalg::Matrix(
                     const_cast<double*>( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) );
                 eckit::linalg::Matrix B( legendre + noff, ns, 1 );
                 eckit::linalg::Matrix C( scl_fourier + jm * size_fourier, nb_fields * 2, 1 );
-                eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+                linalg_.gemm( A, B, C );
             }
         }
         {
@@ -1115,7 +1106,7 @@ void TransLocalopt3::invtrans_unstructured( const int truncation, const int nb_f
             for ( int jm = 0; jm < truncation + 1; jm++ ) {
                 for ( int imag = 0; imag < 2; imag++ ) {
                     for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                        int pos_tp = imag + 2 * ( jm + ( truncation + 1 ) * ( jfld ) );
+                        const int pos_tp = imag + 2 * ( jm + ( truncation + 1 ) * ( jfld ) );
                         //int pos  = jfld + nb_fields * ( imag + 2 * ( jm ) );
                         scl_fourier_tp[pos_tp] = scl_fourier[idx++];  // = scl_fourier[pos]
                     }
@@ -1136,7 +1127,7 @@ void TransLocalopt3::invtrans_unstructured( const int truncation, const int nb_f
             eckit::linalg::Matrix A( fouriertp, 1, ( truncation + 1 ) * 2 );
             eckit::linalg::Matrix B( scl_fourier_tp, ( truncation + 1 ) * 2, nb_fields );
             eckit::linalg::Matrix C( gp_opt, 1, nb_fields );
-            eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
+            linalg_.gemm( A, B, C );
             for ( int j = 0; j < nb_fields; j++ ) {
                 gp_fields[ip + j * grid_.size()] = gp_opt[j];
             }
@@ -1145,7 +1136,7 @@ void TransLocalopt3::invtrans_unstructured( const int truncation, const int nb_f
         {
             if ( nb_vordiv_fields > 0 ) {
                 //ATLAS_TRACE( "opt3 u,v from U,V" );
-                double coslat = std::cos( lat );
+                const double coslat = std::cos( lat );
                 for ( int j = 0; j < nb_fields; j++ ) {
                     gp_fields[ip + j * grid_.size()] /= coslat;
                 }
@@ -1182,7 +1173,7 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
 
         // Transform
         if ( grid::StructuredGrid g = grid_ ) {
-            ATLAS_TRACE( "invtrans_uv structured opt3" );
+            ATLAS_TRACE( "invtrans_uv structured" );
             int nlats            = g.ny();
             int nlons            = g.nxmax();
             int size_fourier_max = nb_fields * 2 * nlats;
diff --git a/src/atlas/trans/localopt3/TransLocalopt3.h b/src/atlas/trans/localopt3/TransLocalopt3.h
index 3826dbc9c..22d77b48b 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.h
+++ b/src/atlas/trans/localopt3/TransLocalopt3.h
@@ -24,6 +24,12 @@
 //-----------------------------------------------------------------------------
 // Forward declarations
 
+namespace eckit {
+namespace linalg {
+class LinearAlgebra;
+}  // namespace linalg
+}  // namespace eckit
+
 namespace atlas {
 class Field;
 class FieldSet;
@@ -179,6 +185,8 @@ class TransLocalopt3 : public trans::TransImpl {
     size_t legendre_cachesize_{0};
     const void* fft_cache_{nullptr};
     size_t fft_cachesize_{0};
+
+    const eckit::linalg::LinearAlgebra& linalg_;
 };
 
 //-----------------------------------------------------------------------------
diff --git a/src/tests/trans/test_trans_localcache.cc b/src/tests/trans/test_trans_localcache.cc
index 9dce62122..2a0e442ac 100644
--- a/src/tests/trans/test_trans_localcache.cc
+++ b/src/tests/trans/test_trans_localcache.cc
@@ -106,7 +106,7 @@ CASE( "test_global_grids" ) {
     auto resolutions = { 32, 64 };
     for( int n : resolutions ) {
         int t = n-1;
-        auto cases = { 
+        auto cases = {
             std::make_pair(F(n),t),
             std::make_pair(O(n),t),
             std::make_pair(N(n),t),
@@ -149,7 +149,7 @@ CASE( "test_global_grids" ) {
 CASE( "test_global_grids_with_subdomain" ) {
     int n = 64;
     int t = n-1;
-    auto cases = { 
+    auto cases = {
         std::make_pair(F(n),t),
         std::make_pair(O(n),t),
         std::make_pair(N(n),t),
@@ -193,46 +193,56 @@ CASE( "test_global_grids_with_subdomain" ) {
     }
 }
 
-CASE( "test_regional_grids_nested_in_global" ) {
+CASE( "test_regional_grids nested_in_global" ) {
+    auto cachefile = CacheFile("regional_lonlat.bin");
+    auto truncation = 89;
     Cache cache;
-    {
-        auto truncation = 89;
-
-        ATLAS_TRACE("regional_lonlat");
-
-        auto cachefile = CacheFile("regional_lonlat.bin");
-        StructuredGrid grid_global( 
-            LinearSpacing( {  0., 360.}, 360, false ), 
-            LinearSpacing( {-90.,  90.}, 181, true  )
-        );
-        ASSERT( grid_global.domain().global() );
-        StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ) );
-        ATLAS_TRACE_SCOPE("create without cache")
-            Trans( grid, truncation, option::type("local") | option::global_grid( grid_global ) );
-        ATLAS_TRACE_SCOPE("create without cache and write")
-            Trans( grid, truncation, option::type("local") | option::global_grid( grid_global ) | option::write_legendre( cachefile ) );
-        ATLAS_TRACE_SCOPE("read cache")
-            cache = LegendreCache( cachefile );
-        ATLAS_TRACE_SCOPE("create with cache")
-            Trans( grid, truncation, option::type("local") | option::global_grid( grid_global ) );
-    }
-//    {
-//        StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ) );
-//        Trans( grid, 89 );
-//    }
+    StructuredGrid grid_global(
+        LinearSpacing( {  0., 360.}, 360, false ),
+        LinearSpacing( {-90.,  90.}, 181, true  )
+    );
+    ASSERT( grid_global.domain().global() );
+    StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ) );
+    ATLAS_TRACE_SCOPE("create without cache")
+        Trans( grid, truncation, option::type("local") | option::global_grid( grid_global ) );
+    ATLAS_TRACE_SCOPE("create without cache and write")
+        Trans( grid, truncation, option::type("local") | option::global_grid( grid_global ) | option::write_legendre( cachefile ) );
+    ATLAS_TRACE_SCOPE("read cache")
+        cache = LegendreCache( cachefile );
+    ATLAS_TRACE_SCOPE("create with cache")
+        Trans( cache, grid, truncation, option::type("local") | option::global_grid( grid_global ) );
 }
 
 CASE( "test_regional_grids not nested" ) {
-   if (false) {
-       StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ) );
-       Trans( grid, 89 );
-   } else {
-       Log::warning() << "This test fails if enabled!!! " << Here() << std::endl;
-   }
+    auto cachefile = CacheFile("cache-regional.bin");
+    auto truncation = 89;
+    Cache cache;
+
+    StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ) );
+    ATLAS_TRACE_SCOPE("create without cache")
+        Trans( grid, truncation, option::type("local") );
+    ATLAS_TRACE_SCOPE("create without cache and write")
+        Trans( grid, truncation, option::type("local") | option::write_legendre( cachefile ) );
+    ATLAS_TRACE_SCOPE("read cache")
+        cache = LegendreCache( cachefile );
+    ATLAS_TRACE_SCOPE("create with cache")
+        Trans( cache, grid, truncation, option::type("local") );
 }
 
 CASE( "test_regional_grids with projection" ) {
-    Log::warning() << "TODO" << std::endl;
+    auto cachefile = CacheFile("cache-regional.bin");
+    auto truncation = 89;
+    Cache cache;
+
+    Projection projection( util::Config
+       ( "type",      "rotated_lonlat")
+       ("north_pole", std::vector<double>{ 4., 54.} ) );
+
+    StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ), projection );
+    ATLAS_TRACE_SCOPE("create without cache")
+        Trans( grid, truncation, option::type("local") );
+
+    // Note: caching not yet implemented for unstructured and projected grids
 }
 
 }  // namespace test

From b237a8ce8ac83c18df17679a3cec7b73455cbb15 Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Fri, 4 May 2018 18:39:20 +0100
Subject: [PATCH 062/123] Introduce trans::LegendreCacheCreator

---
 src/atlas/CMakeLists.txt                      |   4 +
 src/atlas/trans/LegendreCacheCreator.cc       | 148 ++++++++++++++++++
 src/atlas/trans/LegendreCacheCreator.h        | 108 +++++++++++++
 .../localopt3/LegendreCacheCreatorLocal.cc    |  74 +++++++++
 .../localopt3/LegendreCacheCreatorLocal.h     |  42 +++++
 src/tests/trans/test_trans_localcache.cc      |  27 ++++
 6 files changed, 403 insertions(+)
 create mode 100644 src/atlas/trans/LegendreCacheCreator.cc
 create mode 100644 src/atlas/trans/LegendreCacheCreator.h
 create mode 100644 src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc
 create mode 100644 src/atlas/trans/localopt3/LegendreCacheCreatorLocal.h

diff --git a/src/atlas/CMakeLists.txt b/src/atlas/CMakeLists.txt
index 6a10f2ff1..341cfa21a 100644
--- a/src/atlas/CMakeLists.txt
+++ b/src/atlas/CMakeLists.txt
@@ -321,6 +321,8 @@ trans/Trans.h
 trans/Trans.cc
 trans/VorDivToUV.h
 trans/VorDivToUV.cc
+trans/LegendreCacheCreator.h
+trans/LegendreCacheCreator.cc
 trans/local_noopt/TransLocal.h
 trans/local_noopt/TransLocal.cc
 trans/local_noopt/LegendrePolynomials.h
@@ -357,6 +359,8 @@ trans/localopt3/LegendrePolynomialsopt3.h
 trans/localopt3/LegendrePolynomialsopt3.cc
 trans/localopt3/VorDivToUVLocalopt3.h
 trans/localopt3/VorDivToUVLocalopt3.cc
+trans/localopt3/LegendreCacheCreatorLocal.h
+trans/localopt3/LegendreCacheCreatorLocal.cc
 
 )
 if( ATLAS_HAVE_TRANS )
diff --git a/src/atlas/trans/LegendreCacheCreator.cc b/src/atlas/trans/LegendreCacheCreator.cc
new file mode 100644
index 000000000..9a0932e5c
--- /dev/null
+++ b/src/atlas/trans/LegendreCacheCreator.cc
@@ -0,0 +1,148 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#include "eckit/exception/Exceptions.h"
+#include "eckit/thread/AutoLock.h"
+#include "eckit/thread/Mutex.h"
+
+#include "atlas/grid/Grid.h"
+#include "atlas/library/defines.h"
+#include "atlas/runtime/Log.h"
+#include "atlas/trans/LegendreCacheCreator.h"
+
+// For factory registration only:
+#if ATLAS_HAVE_TRANS
+#define TRANS_DEFAULT "ifs"
+#else
+#define TRANS_DEFAULT "local"
+#endif
+#include "atlas/trans/localopt3/LegendreCacheCreatorLocal.h"
+
+namespace atlas {
+namespace trans {
+
+LegendreCacheCreatorImpl::~LegendreCacheCreatorImpl() {}
+
+namespace {
+
+static eckit::Mutex* local_mutex               = 0;
+static std::map<std::string, LegendreCacheCreatorFactory*>* m = 0;
+static pthread_once_t once                     = PTHREAD_ONCE_INIT;
+
+static void init() {
+    local_mutex = new eckit::Mutex();
+    m           = new std::map<std::string, LegendreCacheCreatorFactory*>();
+}
+
+template <typename T>
+void load_builder() {
+    LegendreCacheCreatorBuilder<T>( "tmp" );
+}
+
+struct force_link {
+    force_link() {
+#if ATLAS_HAVE_TRANS
+        //load_builder<LegendreCacheCreatorIFS>();
+#endif
+        load_builder<LegendreCacheCreatorLocal>();
+    }
+};
+
+LegendreCacheCreatorFactory& factory( const std::string& name ) {
+    std::map<std::string, LegendreCacheCreatorFactory*>::const_iterator j = m->find( name );
+    if ( j == m->end() ) {
+        Log::error() << "No LegendreCacheCreatorFactory for [" << name << "]" << std::endl;
+        Log::error() << "TransFactories are:" << std::endl;
+        for ( j = m->begin(); j != m->end(); ++j )
+            Log::error() << "   " << ( *j ).first << std::endl;
+        throw eckit::SeriousBug( std::string( "No LegendreCacheCreatorFactory called " ) + name );
+    }
+    return *j->second;
+}
+
+}  // namespace
+
+LegendreCacheCreatorFactory::LegendreCacheCreatorFactory( const std::string& name ) : name_( name ) {
+    pthread_once( &once, init );
+
+    eckit::AutoLock<eckit::Mutex> lock( local_mutex );
+
+    ASSERT( m->find( name ) == m->end() );
+    ( *m )[name] = this;
+}
+
+LegendreCacheCreatorFactory::~LegendreCacheCreatorFactory() {
+    eckit::AutoLock<eckit::Mutex> lock( local_mutex );
+    m->erase( name_ );
+}
+
+bool LegendreCacheCreatorFactory::has( const std::string& name ) {
+    pthread_once( &once, init );
+
+    eckit::AutoLock<eckit::Mutex> lock( local_mutex );
+
+    static force_link static_linking;
+
+    return ( m->find( name ) != m->end() );
+}
+
+void LegendreCacheCreatorFactory::list( std::ostream& out ) {
+    pthread_once( &once, init );
+
+    eckit::AutoLock<eckit::Mutex> lock( local_mutex );
+
+    static force_link static_linking;
+
+    const char* sep = "";
+    for ( std::map<std::string, LegendreCacheCreatorFactory*>::const_iterator j = m->begin(); j != m->end(); ++j ) {
+        out << sep << ( *j ).first;
+        sep = ", ";
+    }
+}
+
+LegendreCacheCreator::Implementation* LegendreCacheCreatorFactory::build( const Grid& grid, int truncation,
+                                            const eckit::Configuration& config ) {
+    pthread_once( &once, init );
+
+    eckit::AutoLock<eckit::Mutex> lock( local_mutex );
+
+    static force_link static_linking;
+
+    std::string name = config.getString( "type", TRANS_DEFAULT );
+
+    Log::debug() << "Looking for LegendreCacheCreatorFactory [" << name << "]" << std::endl;
+
+    if ( not config.has( "type" ) and not has( name ) ) {
+        name = std::string( "local" );
+        Log::debug() << "Looking for LegendreCacheCreatorFactory [" << name << "]" << std::endl;
+    }
+
+    return factory( name ).make( grid, truncation, config );
+}
+
+LegendreCacheCreator::LegendreCacheCreator() {}
+
+LegendreCacheCreator::LegendreCacheCreator( Implementation* impl ) : impl_( impl ) {}
+
+LegendreCacheCreator::LegendreCacheCreator( const Grid& grid, int truncation, const eckit::Configuration& config ) :
+    impl_( LegendreCacheCreatorFactory::build( grid, truncation, config ) ) {}
+
+LegendreCacheCreator::LegendreCacheCreator( const LegendreCacheCreator& creator ) : impl_( creator.impl_ ) {}
+
+void LegendreCacheCreator::create( const std::string& path ) const {
+    impl_->create( path );
+}
+
+std::string LegendreCacheCreator::uid() const {
+    return impl_->uid();
+}
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/atlas/trans/LegendreCacheCreator.h b/src/atlas/trans/LegendreCacheCreator.h
new file mode 100644
index 000000000..97a21250a
--- /dev/null
+++ b/src/atlas/trans/LegendreCacheCreator.h
@@ -0,0 +1,108 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#pragma once
+
+#include <memory>
+
+#include "eckit/config/Configuration.h"
+#include "eckit/memory/Owned.h"
+#include "eckit/memory/SharedPtr.h"
+
+#include "atlas/util/Config.h"
+
+//-----------------------------------------------------------------------------
+// Forward declarations
+
+namespace atlas {
+class Grid;
+}  // namespace atlas
+
+//-----------------------------------------------------------------------------
+
+namespace atlas {
+namespace trans {
+
+//-----------------------------------------------------------------------------
+
+class LegendreCacheCreatorImpl : public eckit::Owned {
+public:
+    virtual ~LegendreCacheCreatorImpl() = 0;
+
+    virtual std::string uid() const = 0;
+
+    virtual void create( const std::string& path ) const = 0;
+};
+
+// ------------------------------------------------------------------
+
+class LegendreCacheCreator {
+public:
+    using Implementation = LegendreCacheCreatorImpl;
+
+private:
+    eckit::SharedPtr<Implementation> impl_;
+
+public:
+    LegendreCacheCreator();
+    LegendreCacheCreator( Implementation* );
+    LegendreCacheCreator( const LegendreCacheCreator& );
+
+    LegendreCacheCreator( const Grid&, int truncation, const eckit::Configuration& = util::NoConfig() );
+
+    const Implementation* get() const { return impl_.get(); }
+    operator bool() const { return impl_.owners(); }
+
+    std::string uid() const;
+    void create( const std::string& path ) const;
+};
+
+//----------------------------------------------------------------------------------------------------------------------
+
+class LegendreCacheCreatorFactory {
+public:
+    /*!
+   * \brief build Trans
+   * \return TransImpl
+   */
+    static LegendreCacheCreatorImpl* build( const Grid&, int truncation, const eckit::Configuration& = util::Config() );
+
+    /*!
+   * \brief list all registered trans implementations
+   */
+    static void list( std::ostream& );
+
+    static bool has( const std::string& name );
+
+private:
+    std::string name_;
+    virtual LegendreCacheCreatorImpl* make( const Grid& gp, int truncation, const eckit::Configuration& ) { return nullptr; }
+
+protected:
+    LegendreCacheCreatorFactory( const std::string& );
+    virtual ~LegendreCacheCreatorFactory();
+};
+
+//----------------------------------------------------------------------------------------------------------------------
+
+template <class T>
+class LegendreCacheCreatorBuilder : public LegendreCacheCreatorFactory {
+    virtual LegendreCacheCreatorImpl* make( const Grid& grid, int truncation, const eckit::Configuration& config ) {
+        return new T( grid, truncation, config );
+    }
+
+public:
+    LegendreCacheCreatorBuilder( const std::string& name ) : LegendreCacheCreatorFactory( name ) {}
+};
+
+//----------------------------------------------------------------------------------------------------------------------
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc b/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc
new file mode 100644
index 000000000..e67609e8d
--- /dev/null
+++ b/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc
@@ -0,0 +1,74 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#include "atlas/trans/localopt3/LegendreCacheCreatorLocal.h"
+#include <string>
+#include <sstream>
+#include "eckit/utils/MD5.h"
+#include "atlas/grid.h"
+#include "atlas/option.h"
+#include "atlas/trans/Trans.h"
+
+namespace atlas {
+namespace trans {
+
+namespace {
+static LegendreCacheCreatorBuilder<LegendreCacheCreatorLocal> builder( "local" );
+}
+
+namespace {
+std::string hash( const Grid& grid ) {
+  eckit::MD5 h;
+  if( grid::StructuredGrid( grid ) && not grid.projection() ) {
+    auto g = grid::StructuredGrid( grid );
+    h.add( g.y().data(), g.y().size() * sizeof(double) );
+  } else {
+    grid.hash( h );
+  }
+  return h.digest();
+}
+}
+
+std::string LegendreCacheCreatorLocal::uid() const {
+  if( unique_identifier_.empty() ) {
+    std::ostringstream stream;
+    stream << "local-T" << truncation_ << "-";
+    if( grid::GaussianGrid( grid_ ) ) {
+      // Same cache for any global Gaussian grid
+      stream << "F" << grid::GaussianGrid( grid_ ).N();
+    } else if( grid::RegularLonLatGrid( grid_ ) ) {
+      // Same cache for any global regular grid
+      auto g = grid::RegularLonLatGrid( grid_ );
+      stream << ( g.shiftedLat() ? "S" : "L" ) << "+x" << g.ny();
+      // The above '+' is a placeholder for any g.nx()
+    } else {
+      // We cannot make more assumptions on reusability for different grids
+      stream << hash( grid_ );
+    }
+    unique_identifier_ = stream.str();
+  }
+  return unique_identifier_;
+}
+
+LegendreCacheCreatorLocal::~LegendreCacheCreatorLocal() {}
+
+LegendreCacheCreatorLocal::LegendreCacheCreatorLocal( const Grid& grid, int truncation, const eckit::Configuration& ) :
+  grid_(grid),
+  truncation_(truncation) {
+}
+
+void LegendreCacheCreatorLocal::create( const std::string& path ) const {
+  Trans( grid_, truncation_, option::type("local") | option::write_legendre( path ) );
+}
+
+
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.h b/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.h
new file mode 100644
index 000000000..d10df945a
--- /dev/null
+++ b/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.h
@@ -0,0 +1,42 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#pragma once
+
+#include "atlas/trans/LegendreCacheCreator.h"
+#include "atlas/grid/Grid.h"
+
+//-----------------------------------------------------------------------------
+
+namespace atlas {
+namespace trans {
+
+//-----------------------------------------------------------------------------
+
+class LegendreCacheCreatorLocal : public trans::LegendreCacheCreatorImpl {
+public:
+    LegendreCacheCreatorLocal( const Grid&, int truncation, const eckit::Configuration& = util::NoConfig() );
+
+    virtual ~LegendreCacheCreatorLocal();
+
+    virtual std::string uid() const override;
+
+    virtual void create(const std::string &path) const override;
+
+private:
+    Grid grid_;
+    int truncation_;
+    mutable std::string unique_identifier_;
+};
+
+// ------------------------------------------------------------------
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/tests/trans/test_trans_localcache.cc b/src/tests/trans/test_trans_localcache.cc
index 2a0e442ac..153956766 100644
--- a/src/tests/trans/test_trans_localcache.cc
+++ b/src/tests/trans/test_trans_localcache.cc
@@ -29,6 +29,7 @@
 #include "atlas/parallel/mpi/mpi.h"
 #include "atlas/runtime/Trace.h"
 #include "atlas/trans/Trans.h"
+#include "atlas/trans/LegendreCacheCreator.h"
 #include "atlas/trans/local_noopt/FourierTransforms.h"
 #include "atlas/trans/local_noopt/LegendrePolynomials.h"
 #include "atlas/trans/local_noopt/LegendreTransforms.h"
@@ -70,6 +71,7 @@ struct AtlasTransEnvironment : public AtlasTestEnvironment {
 
 using trans::Trans;
 using trans::LegendreCache;
+using trans::LegendreCacheCreator;
 using trans::Cache;
 using grid::StructuredGrid;
 using grid::GaussianGrid;
@@ -245,6 +247,31 @@ CASE( "test_regional_grids with projection" ) {
     // Note: caching not yet implemented for unstructured and projected grids
 }
 
+
+CASE( "test_regional_grids nested_in_global NEW" ) {
+
+    auto truncation = 89;
+    StructuredGrid grid_global(
+        LinearSpacing( {  0., 360.}, 360, false ),
+        LinearSpacing( {-90.,  90.}, 181, true  )
+    );
+
+    LegendreCacheCreator legendre_cache_creator( grid_global, truncation, option::type("local") );
+    auto cachefile = CacheFile( legendre_cache_creator.uid() );
+    ATLAS_TRACE_SCOPE( "Creating cache "+std::string(cachefile) )
+      legendre_cache_creator.create( cachefile );
+
+    Cache cache;
+    ASSERT( grid_global.domain().global() );
+    StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ) );
+    ATLAS_TRACE_SCOPE("create without cache")
+        Trans( grid, truncation, option::type("local") | option::global_grid( grid_global ) );
+    ATLAS_TRACE_SCOPE("read cache")
+        cache = LegendreCache( cachefile );
+    ATLAS_TRACE_SCOPE("create with cache")
+        Trans( cache, grid, truncation, option::type("local") | option::global_grid( grid_global ) );
+}
+
 }  // namespace test
 }  // namespace atlas
 

From 96f96775f1f8c152a2242012ec788148f629813b Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Tue, 8 May 2018 15:03:54 +0100
Subject: [PATCH 063/123] LegendreCacheCreator also for TransIFS completed

---
 src/atlas/CMakeLists.txt                      |   2 +
 src/atlas/trans/LegendreCacheCreator.cc       |  11 +-
 src/atlas/trans/LegendreCacheCreator.h        |   5 +
 .../trans/ifs/LegendreCacheCreatorIFS.cc      | 111 ++++++++++++++++++
 src/atlas/trans/ifs/LegendreCacheCreatorIFS.h |  46 ++++++++
 .../localopt3/LegendreCacheCreatorLocal.cc    |  34 +++++-
 .../localopt3/LegendreCacheCreatorLocal.h     |   8 +-
 src/tests/trans/test_trans_localcache.cc      |  79 +++++++------
 8 files changed, 249 insertions(+), 47 deletions(-)
 create mode 100644 src/atlas/trans/ifs/LegendreCacheCreatorIFS.cc
 create mode 100644 src/atlas/trans/ifs/LegendreCacheCreatorIFS.h

diff --git a/src/atlas/CMakeLists.txt b/src/atlas/CMakeLists.txt
index 341cfa21a..119b81c24 100644
--- a/src/atlas/CMakeLists.txt
+++ b/src/atlas/CMakeLists.txt
@@ -365,6 +365,8 @@ trans/localopt3/LegendreCacheCreatorLocal.cc
 )
 if( ATLAS_HAVE_TRANS )
 list( APPEND atlas_numerics_srcs
+  trans/ifs/LegendreCacheCreatorIFS.h
+  trans/ifs/LegendreCacheCreatorIFS.cc
   trans/ifs/TransIFS.h
   trans/ifs/TransIFS.cc
   trans/ifs/TransIFSNodeColumns.h
diff --git a/src/atlas/trans/LegendreCacheCreator.cc b/src/atlas/trans/LegendreCacheCreator.cc
index 9a0932e5c..66e842323 100644
--- a/src/atlas/trans/LegendreCacheCreator.cc
+++ b/src/atlas/trans/LegendreCacheCreator.cc
@@ -20,6 +20,7 @@
 // For factory registration only:
 #if ATLAS_HAVE_TRANS
 #define TRANS_DEFAULT "ifs"
+#include "atlas/trans/ifs/LegendreCacheCreatorIFS.h"
 #else
 #define TRANS_DEFAULT "local"
 #endif
@@ -49,7 +50,7 @@ void load_builder() {
 struct force_link {
     force_link() {
 #if ATLAS_HAVE_TRANS
-        //load_builder<LegendreCacheCreatorIFS>();
+        load_builder<LegendreCacheCreatorIFS>();
 #endif
         load_builder<LegendreCacheCreatorLocal>();
     }
@@ -136,13 +137,17 @@ LegendreCacheCreator::LegendreCacheCreator( const Grid& grid, int truncation, co
 
 LegendreCacheCreator::LegendreCacheCreator( const LegendreCacheCreator& creator ) : impl_( creator.impl_ ) {}
 
-void LegendreCacheCreator::create( const std::string& path ) const {
-    impl_->create( path );
+bool LegendreCacheCreator::supported() const {
+    return impl_->supported();
 }
 
 std::string LegendreCacheCreator::uid() const {
     return impl_->uid();
 }
 
+void LegendreCacheCreator::create( const std::string& path ) const {
+    impl_->create( path );
+}
+
 }  // namespace trans
 }  // namespace atlas
diff --git a/src/atlas/trans/LegendreCacheCreator.h b/src/atlas/trans/LegendreCacheCreator.h
index 97a21250a..e552c5c80 100644
--- a/src/atlas/trans/LegendreCacheCreator.h
+++ b/src/atlas/trans/LegendreCacheCreator.h
@@ -36,6 +36,8 @@ class LegendreCacheCreatorImpl : public eckit::Owned {
 public:
     virtual ~LegendreCacheCreatorImpl() = 0;
 
+    virtual bool supported() const = 0;
+
     virtual std::string uid() const = 0;
 
     virtual void create( const std::string& path ) const = 0;
@@ -60,7 +62,10 @@ class LegendreCacheCreator {
     const Implementation* get() const { return impl_.get(); }
     operator bool() const { return impl_.owners(); }
 
+    bool supported() const;
+
     std::string uid() const;
+
     void create( const std::string& path ) const;
 };
 
diff --git a/src/atlas/trans/ifs/LegendreCacheCreatorIFS.cc b/src/atlas/trans/ifs/LegendreCacheCreatorIFS.cc
new file mode 100644
index 000000000..7d7bffa47
--- /dev/null
+++ b/src/atlas/trans/ifs/LegendreCacheCreatorIFS.cc
@@ -0,0 +1,111 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#include "atlas/trans/ifs/LegendreCacheCreatorIFS.h"
+#include <string>
+#include <sstream>
+#include "eckit/utils/MD5.h"
+#include "atlas/grid.h"
+#include "atlas/option.h"
+#include "atlas/trans/Trans.h"
+
+namespace atlas {
+namespace trans {
+
+namespace {
+static LegendreCacheCreatorBuilder<LegendreCacheCreatorIFS> builder( "ifs" );
+}
+
+namespace {
+
+std::string truncate( const std::string& str ) {
+  const int trunc = std::min(10ul,str.size());
+  return str.substr( 0, trunc );
+}
+
+std::string hash( const Grid& grid ) {
+  eckit::MD5 h;
+  if( grid::StructuredGrid( grid ) && not grid.projection() ) {
+    auto g = grid::StructuredGrid( grid );
+    h.add( g.y().data(), g.y().size() * sizeof(double) );
+  } else {
+    grid.hash( h );
+  }
+  return truncate( h.digest() );
+}
+
+std::string hash( const eckit::Configuration& config ) {
+  eckit::MD5 h;
+
+  // Add options and other unique keys
+  h << "flt" << config.getBool( "flt", false );
+
+  return truncate( h.digest() );
+}
+
+}
+
+std::string LegendreCacheCreatorIFS::uid() const {
+  if( unique_identifier_.empty() ) {
+    std::ostringstream stream;
+    stream << "ifs-T" << truncation_ << "-";
+    if( grid::GaussianGrid( grid_ ) ) {
+      if( grid::RegularGaussianGrid( grid_ ) ) {
+        stream << "RegularGaussianN" << grid::GaussianGrid( grid_ ).N();
+      } else {
+        stream << "ReducedGaussianN" << grid::GaussianGrid( grid_ ).N() << "-PL";
+        stream << hash( grid_ );
+      }
+    } else if( grid::RegularLonLatGrid( grid_ ) ) {
+      auto g = grid::RegularLonLatGrid( grid_ );
+      if( g.standard() || g.shifted() ) {
+          stream << ( g.standard() ? "L" : "S" ) << g.nx() << "x" << g.ny();
+      } else {
+        // We cannot make more assumptions on reusability for different grids
+        stream << "grid-" << hash( grid_ );
+      }
+    } else {
+      // We cannot make more assumptions on reusability for different grids
+      stream << "grid-" << hash( grid_ );
+    }
+    stream << "-OPT" << hash( config_ );
+    unique_identifier_ = stream.str();
+  }
+  return unique_identifier_;
+}
+
+LegendreCacheCreatorIFS::~LegendreCacheCreatorIFS() {}
+
+bool LegendreCacheCreatorIFS::supported() const {
+  if( grid::GaussianGrid( grid_ ) ) {
+    return true;
+  } else if( grid::RegularLonLatGrid( grid_ ) ) {
+    auto g = grid::RegularLonLatGrid( grid_ );
+    if( g.standard() || g.shifted() ) {
+        return true;
+    }
+  }
+  return false;
+}
+
+LegendreCacheCreatorIFS::LegendreCacheCreatorIFS( const Grid& grid, int truncation, const eckit::Configuration& config ) :
+  grid_(grid),
+  truncation_(truncation),
+  config_(config) {
+}
+
+void LegendreCacheCreatorIFS::create( const std::string& path ) const {
+  Trans( grid_, truncation_, config_ | option::type("ifs") | option::write_legendre( path ) );
+}
+
+
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/atlas/trans/ifs/LegendreCacheCreatorIFS.h b/src/atlas/trans/ifs/LegendreCacheCreatorIFS.h
new file mode 100644
index 000000000..bc76cf00d
--- /dev/null
+++ b/src/atlas/trans/ifs/LegendreCacheCreatorIFS.h
@@ -0,0 +1,46 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#pragma once
+
+#include "atlas/trans/LegendreCacheCreator.h"
+#include "atlas/grid/Grid.h"
+#include "atlas/util/Config.h"
+
+//-----------------------------------------------------------------------------
+
+namespace atlas {
+namespace trans {
+
+//-----------------------------------------------------------------------------
+
+class LegendreCacheCreatorIFS : public trans::LegendreCacheCreatorImpl {
+public:
+    LegendreCacheCreatorIFS( const Grid&, int truncation, const eckit::Configuration& = util::NoConfig() );
+
+    virtual ~LegendreCacheCreatorIFS();
+
+    virtual bool supported() const override;
+
+    virtual std::string uid() const override;
+
+    virtual void create(const std::string &path) const override;
+
+private:
+    const Grid grid_;
+    const int truncation_;
+    const util::Config config_;
+    mutable std::string unique_identifier_;
+};
+
+// ------------------------------------------------------------------
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc b/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc
index e67609e8d..14bf740a7 100644
--- a/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc
+++ b/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc
@@ -24,6 +24,12 @@ static LegendreCacheCreatorBuilder<LegendreCacheCreatorLocal> builder( "local" )
 }
 
 namespace {
+
+std::string truncate( const std::string& str ) {
+  const int trunc = std::min(10ul,str.size());
+  return str.substr( 0, trunc );
+}
+
 std::string hash( const Grid& grid ) {
   eckit::MD5 h;
   if( grid::StructuredGrid( grid ) && not grid.projection() ) {
@@ -32,8 +38,18 @@ std::string hash( const Grid& grid ) {
   } else {
     grid.hash( h );
   }
-  return h.digest();
+  return truncate( h.digest() );
 }
+
+std::string hash( const eckit::Configuration& config ) {
+  eckit::MD5 h;
+
+  // Add options and other unique keys
+  h << "flt" << config.getBool( "flt", false );
+
+  return truncate( h.digest() );
+}
+
 }
 
 std::string LegendreCacheCreatorLocal::uid() const {
@@ -42,7 +58,7 @@ std::string LegendreCacheCreatorLocal::uid() const {
     stream << "local-T" << truncation_ << "-";
     if( grid::GaussianGrid( grid_ ) ) {
       // Same cache for any global Gaussian grid
-      stream << "F" << grid::GaussianGrid( grid_ ).N();
+      stream << "GaussianN" << grid::GaussianGrid( grid_ ).N();
     } else if( grid::RegularLonLatGrid( grid_ ) ) {
       // Same cache for any global regular grid
       auto g = grid::RegularLonLatGrid( grid_ );
@@ -50,8 +66,9 @@ std::string LegendreCacheCreatorLocal::uid() const {
       // The above '+' is a placeholder for any g.nx()
     } else {
       // We cannot make more assumptions on reusability for different grids
-      stream << hash( grid_ );
+      stream << "grid-" << hash( grid_ );
     }
+    stream << "-OPT" << hash( config_ );
     unique_identifier_ = stream.str();
   }
   return unique_identifier_;
@@ -59,13 +76,18 @@ std::string LegendreCacheCreatorLocal::uid() const {
 
 LegendreCacheCreatorLocal::~LegendreCacheCreatorLocal() {}
 
-LegendreCacheCreatorLocal::LegendreCacheCreatorLocal( const Grid& grid, int truncation, const eckit::Configuration& ) :
+LegendreCacheCreatorLocal::LegendreCacheCreatorLocal( const Grid& grid, int truncation, const eckit::Configuration& config ) :
   grid_(grid),
-  truncation_(truncation) {
+  truncation_(truncation),
+  config_(config) {
+}
+
+bool LegendreCacheCreatorLocal::supported() const {
+  return true;
 }
 
 void LegendreCacheCreatorLocal::create( const std::string& path ) const {
-  Trans( grid_, truncation_, option::type("local") | option::write_legendre( path ) );
+  Trans( grid_, truncation_, config_ | option::type("local") | option::write_legendre( path ) );
 }
 
 
diff --git a/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.h b/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.h
index d10df945a..8ef10295e 100644
--- a/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.h
+++ b/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.h
@@ -12,6 +12,7 @@
 
 #include "atlas/trans/LegendreCacheCreator.h"
 #include "atlas/grid/Grid.h"
+#include "atlas/util/Config.h"
 
 //-----------------------------------------------------------------------------
 
@@ -26,13 +27,16 @@ class LegendreCacheCreatorLocal : public trans::LegendreCacheCreatorImpl {
 
     virtual ~LegendreCacheCreatorLocal();
 
+    virtual bool supported() const override;
+
     virtual std::string uid() const override;
 
     virtual void create(const std::string &path) const override;
 
 private:
-    Grid grid_;
-    int truncation_;
+    const Grid grid_;
+    const int truncation_;
+    const util::Config config_;
     mutable std::string unique_identifier_;
 };
 
diff --git a/src/tests/trans/test_trans_localcache.cc b/src/tests/trans/test_trans_localcache.cc
index 153956766..8dd7aaafb 100644
--- a/src/tests/trans/test_trans_localcache.cc
+++ b/src/tests/trans/test_trans_localcache.cc
@@ -118,12 +118,13 @@ CASE( "test_global_grids" ) {
             std::make_pair(Slat(n),t),
         };
 
-        auto F_cachefile = CacheFile("leg_"+F(n)+"-T"+std::to_string(t)+".bin");
-        Trans( Grid(F(n)), t, option::type("local") | option::write_legendre( F_cachefile ) );
+        LegendreCacheCreator F_cache_creator( Grid(F(n)), t, option::type("local") );
+        EXPECT( F_cache_creator.supported() );
+        auto F_cachefile = CacheFile("leg_"+F_cache_creator.uid()+".bin");
+        F_cache_creator.create( F_cachefile );
         Cache F_cache = LegendreCache( F_cachefile );
         auto F_cache_hash = hash(F_cache);
 
-        Cache cache;
         for( auto _case : cases )
         {
             auto gridname   = _case.first;
@@ -131,19 +132,24 @@ CASE( "test_global_grids" ) {
             Log::info() << "Case "+gridname+" T"+std::to_string(truncation) << std::endl;
             ATLAS_TRACE("Case "+gridname+" T"+std::to_string(truncation));
             Grid grid(gridname);
-            auto cachefile = CacheFile("leg_"+gridname+"-T"+std::to_string(truncation)+".bin");
+
+            LegendreCacheCreator cache_creator( grid, truncation, option::type("local") );
+            EXPECT( cache_creator.supported() );
+            auto cachefile = CacheFile("leg_"+cache_creator.uid()+".bin");
+            cache_creator.create( cachefile );
+            if( GaussianGrid(grid) ) {
+                EXPECT( hash(cachefile) == F_cache_hash );
+            }
+
             ATLAS_TRACE_SCOPE("create without cache")
                 Trans( grid, truncation, option::type("local") );
-            ATLAS_TRACE_SCOPE("create without cache and write")
-                Trans( grid, truncation, option::type("local") | option::write_legendre( cachefile ) );
+
+            Cache cache;
             ATLAS_TRACE_SCOPE("read cache")
                 cache = LegendreCache( cachefile );
             ATLAS_TRACE_SCOPE("create with cache")
                 Trans( cache, grid, truncation, option::type("local") );
 
-            if( GaussianGrid(grid) ) {
-                ASSERT( hash(cache) == F_cache_hash );
-            }
         }
     }
 }
@@ -174,8 +180,11 @@ CASE( "test_global_grids_with_subdomain" ) {
 
         Grid global_grid( gridname );
 
-        auto global_cachefile = CacheFile( "leg_"+gridname+"-T"+std::to_string(truncation)+".bin" );
-        Trans( Grid(gridname), truncation, option::type("local") | option::write_legendre( global_cachefile ) );
+        LegendreCacheCreator global_cache_creator( Grid(gridname), truncation, option::type("local") );
+        EXPECT( global_cache_creator.supported() );
+        auto global_cachefile = CacheFile( "leg_" + global_cache_creator.uid() + ".bin" );
+        ATLAS_TRACE_SCOPE( "Creating cache " + std::string( global_cachefile ) )
+            global_cache_creator.create( global_cachefile );
 
         Cache global_cache;
         ATLAS_TRACE_SCOPE("read cache")
@@ -184,13 +193,8 @@ CASE( "test_global_grids_with_subdomain" ) {
 
         for( auto domain : domains ) {
             Grid grid( gridname, domain );
-            auto cachefile = CacheFile("leg_"+gridname+"-T"+std::to_string(truncation)+"-domain.bin");
-            ATLAS_TRACE_SCOPE("create without cache and write")
-                Trans( Grid(gridname), truncation, option::type("local") | option::global_grid(global_grid) | option::write_legendre( cachefile ) );
-            LegendreCache new_cache = LegendreCache(cachefile);
-            ASSERT( hash(new_cache) == global_hash );
             ATLAS_TRACE_SCOPE("create with cache")
-                Trans( global_cache, Grid(gridname), truncation, option::type("local") );
+                Trans( global_cache, grid, truncation, option::type("local") | option::global_grid( global_grid) );
         }
     }
 }
@@ -201,16 +205,25 @@ CASE( "test_regional_grids nested_in_global" ) {
     Cache cache;
     StructuredGrid grid_global(
         LinearSpacing( {  0., 360.}, 360, false ),
-        LinearSpacing( {-90.,  90.}, 181, true  )
+        LinearSpacing( { 90., -90.}, 181, true  )
     );
-    ASSERT( grid_global.domain().global() );
+    EXPECT( grid_global.domain().global() );
+
+    LegendreCacheCreator global_cache_creator( grid_global, truncation, option::type("local") );
+    EXPECT( global_cache_creator.supported() );
+    auto global_cachefile = CacheFile( "leg_" + global_cache_creator.uid() + ".bin" );
+    ATLAS_TRACE_SCOPE( "Creating cache "+std::string(cachefile) )
+        global_cache_creator.create( global_cachefile );
+
+
+
     StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ) );
+
+
     ATLAS_TRACE_SCOPE("create without cache")
         Trans( grid, truncation, option::type("local") | option::global_grid( grid_global ) );
-    ATLAS_TRACE_SCOPE("create without cache and write")
-        Trans( grid, truncation, option::type("local") | option::global_grid( grid_global ) | option::write_legendre( cachefile ) );
     ATLAS_TRACE_SCOPE("read cache")
-        cache = LegendreCache( cachefile );
+        cache = LegendreCache( global_cachefile );
     ATLAS_TRACE_SCOPE("create with cache")
         Trans( cache, grid, truncation, option::type("local") | option::global_grid( grid_global ) );
 }
@@ -221,10 +234,14 @@ CASE( "test_regional_grids not nested" ) {
     Cache cache;
 
     StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ) );
+
+    LegendreCacheCreator cache_creator( grid, truncation, option::type("local") );
+    EXPECT( cache_creator.supported() );
+    ATLAS_TRACE_SCOPE( "Creating cache "+std::string(cachefile) )
+        cache_creator.create( cachefile );
+
     ATLAS_TRACE_SCOPE("create without cache")
         Trans( grid, truncation, option::type("local") );
-    ATLAS_TRACE_SCOPE("create without cache and write")
-        Trans( grid, truncation, option::type("local") | option::write_legendre( cachefile ) );
     ATLAS_TRACE_SCOPE("read cache")
         cache = LegendreCache( cachefile );
     ATLAS_TRACE_SCOPE("create with cache")
@@ -248,28 +265,18 @@ CASE( "test_regional_grids with projection" ) {
 }
 
 
-CASE( "test_regional_grids nested_in_global NEW" ) {
+CASE( "test cache creator" ) {
 
     auto truncation = 89;
     StructuredGrid grid_global(
         LinearSpacing( {  0., 360.}, 360, false ),
-        LinearSpacing( {-90.,  90.}, 181, true  )
+        LinearSpacing( { 90., -90.}, 181, true  )
     );
 
     LegendreCacheCreator legendre_cache_creator( grid_global, truncation, option::type("local") );
     auto cachefile = CacheFile( legendre_cache_creator.uid() );
     ATLAS_TRACE_SCOPE( "Creating cache "+std::string(cachefile) )
       legendre_cache_creator.create( cachefile );
-
-    Cache cache;
-    ASSERT( grid_global.domain().global() );
-    StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ) );
-    ATLAS_TRACE_SCOPE("create without cache")
-        Trans( grid, truncation, option::type("local") | option::global_grid( grid_global ) );
-    ATLAS_TRACE_SCOPE("read cache")
-        cache = LegendreCache( cachefile );
-    ATLAS_TRACE_SCOPE("create with cache")
-        Trans( cache, grid, truncation, option::type("local") | option::global_grid( grid_global ) );
 }
 
 }  // namespace test

From 75e1061c27bfe6d3a1fd4a9fa839d3222fbc9ef9 Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Tue, 8 May 2018 16:09:13 +0100
Subject: [PATCH 064/123] Grid cropping made easier

---
 src/atlas/grid/Grid.cc                  |  9 +++++++++
 src/atlas/grid/Grid.h                   |  2 ++
 src/atlas/grid/detail/grid/Grid.cc      | 10 ++++++++++
 src/atlas/grid/detail/grid/Grid.h       |  2 ++
 src/atlas/grid/detail/grid/Structured.h |  1 +
 src/tests/grid/test_grids.cc            | 11 +++++++++++
 6 files changed, 35 insertions(+)

diff --git a/src/atlas/grid/Grid.cc b/src/atlas/grid/Grid.cc
index 889d3d11c..1c6a03b22 100644
--- a/src/atlas/grid/Grid.cc
+++ b/src/atlas/grid/Grid.cc
@@ -36,6 +36,11 @@ Grid::Grid( const std::string& shortname, const Domain& domain ) {
     grid_ = Grid::Implementation::create( shortname, Config( "domain", domain.spec() ) );
 }
 
+Grid::Grid( const Grid& grid, const Grid::Domain& domain ) {
+    ASSERT( grid );
+    grid_ = Grid::Implementation::create( *grid.get(), domain );
+}
+
 Grid::Grid( const Config& p ) {
     grid_ = Grid::Implementation::create( p );
 }
@@ -89,6 +94,10 @@ StructuredGrid::StructuredGrid( const XSpace& xspace, const YSpace& yspace, cons
     Grid( new detail::grid::Structured( xspace, yspace, projection, domain ) ),
     grid_( structured_grid( get() ) ) {}
 
+StructuredGrid::StructuredGrid( const Grid& grid , const Grid::Domain& domain ) :
+    Grid( grid, domain ),
+    grid_( structured_grid( get() ) ) {}
+
 ReducedGaussianGrid::ReducedGaussianGrid( const std::vector<long>& nx, const Domain& domain ) :
     ReducedGaussianGrid::grid_t( detail::grid::reduced_gaussian( nx, domain ) ) {}
 
diff --git a/src/atlas/grid/Grid.h b/src/atlas/grid/Grid.h
index 9ac565825..e8d497878 100644
--- a/src/atlas/grid/Grid.h
+++ b/src/atlas/grid/Grid.h
@@ -80,6 +80,7 @@ class Grid {
     Grid( const Grid& );
     Grid( const Implementation* );
     Grid( const std::string& name, const Domain& = Domain() );
+    Grid( const Grid&, const Domain& );
     Grid( const Config& );
 
     operator bool() const { return grid_; }
@@ -186,6 +187,7 @@ class StructuredGrid : public Grid {
     StructuredGrid( const std::string& name, const Domain& = Domain() );
     StructuredGrid( const Config& );
     StructuredGrid( const XSpace&, const YSpace&, const Projection& = Projection(), const Domain& = Domain() );
+    StructuredGrid( const Grid&, const Domain& );
 
     operator bool() const { return valid(); }
 
diff --git a/src/atlas/grid/detail/grid/Grid.cc b/src/atlas/grid/detail/grid/Grid.cc
index 1903335b9..68e937678 100644
--- a/src/atlas/grid/detail/grid/Grid.cc
+++ b/src/atlas/grid/detail/grid/Grid.cc
@@ -76,6 +76,16 @@ const Grid* Grid::create( const std::string& name, const Grid::Config& config )
     //    return GridBuilder::createNamed(name);
 }
 
+const Grid* Grid::create( const Grid& grid, const Domain& domain) {
+    if( grid.type() == "structured" ) {
+        const Structured& g = dynamic_cast<const Structured&>(grid);
+        return new Structured( g.name(), g.xspace(), g.yspace(), g.projection(), domain );
+    } else {
+        NOTIMP;
+    }
+}
+
+
 Grid::Grid() {
     checkSizeOfPoint();
 }
diff --git a/src/atlas/grid/detail/grid/Grid.h b/src/atlas/grid/detail/grid/Grid.h
index b93ffba73..3b2011b28 100644
--- a/src/atlas/grid/detail/grid/Grid.h
+++ b/src/atlas/grid/detail/grid/Grid.h
@@ -66,6 +66,8 @@ class Grid : public eckit::Owned {
 
     static const Grid* create( const std::string& name, const Config& = Config() );
 
+    static const Grid* create( const Grid&, const Domain& );
+
     /// ctor (default)
     Grid();
 
diff --git a/src/atlas/grid/detail/grid/Structured.h b/src/atlas/grid/detail/grid/Structured.h
index 20ca91e12..13be697a4 100644
--- a/src/atlas/grid/detail/grid/Structured.h
+++ b/src/atlas/grid/detail/grid/Structured.h
@@ -289,6 +289,7 @@ class Structured : public Grid {
 public:
     Structured( const std::string&, XSpace, YSpace, Projection, Domain );
     Structured( XSpace, YSpace, Projection, Domain );
+    Structured( const Structured&, Domain );
 
     virtual ~Structured();
 
diff --git a/src/tests/grid/test_grids.cc b/src/tests/grid/test_grids.cc
index 37f63e750..64f87095d 100644
--- a/src/tests/grid/test_grids.cc
+++ b/src/tests/grid/test_grids.cc
@@ -137,6 +137,17 @@ CASE( "test_reducedgaussian" ) {
     EXPECT( N640.size() == custom.size() );
 }
 
+CASE( "test_cropping previous case" ) {
+    StructuredGrid grid( "N32" );
+    EXPECT( grid.ny() == 64 );
+    EXPECT( grid.size() == 6114 );
+
+    StructuredGrid cropped( grid, RectangularDomain( {-27, 45}, {33, 73} ) );
+    EXPECT( cropped.ny() == 14 );
+    EXPECT( cropped.size() == 267 );
+}
+
+
 //-----------------------------------------------------------------------------
 
 }  // namespace test

From 1d8024bf85ed21684040ca6e009b5ebaeb80f366 Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Tue, 8 May 2018 15:29:55 +0100
Subject: [PATCH 065/123] New Trans API for passing Grid and Domain

---
 src/atlas/option/TransOptions.cc            |  4 --
 src/atlas/option/TransOptions.h             |  7 ---
 src/atlas/trans/Trans.cc                    | 20 ++++++-
 src/atlas/trans/Trans.h                     | 35 +++++-------
 src/atlas/trans/ifs/TransIFS.cc             | 12 ++++
 src/atlas/trans/ifs/TransIFS.h              |  6 +-
 src/atlas/trans/local_noopt/TransLocal.cc   |  6 ++
 src/atlas/trans/local_noopt/TransLocal.h    |  6 +-
 src/atlas/trans/localopt/TransLocalopt.cc   |  6 ++
 src/atlas/trans/localopt/TransLocalopt.h    |  6 +-
 src/atlas/trans/localopt2/TransLocalopt2.cc |  6 ++
 src/atlas/trans/localopt2/TransLocalopt2.h  |  7 ++-
 src/atlas/trans/localopt3/TransLocalopt3.cc | 61 ++++++++-------------
 src/atlas/trans/localopt3/TransLocalopt3.h  |  7 ++-
 src/tests/trans/test_trans_localcache.cc    |  8 +--
 src/tests/trans/test_transgeneral.cc        | 11 ++--
 16 files changed, 117 insertions(+), 91 deletions(-)

diff --git a/src/atlas/option/TransOptions.cc b/src/atlas/option/TransOptions.cc
index 82131506f..a80582a78 100644
--- a/src/atlas/option/TransOptions.cc
+++ b/src/atlas/option/TransOptions.cc
@@ -49,10 +49,6 @@ write_legendre::write_legendre( const eckit::PathName& filepath ) {
     set( "write_legendre", filepath );
 }
 
-global_grid::global_grid( const Grid& grid ) {
-    set( "global_grid", grid.spec() );
-}
-
 read_legendre::read_legendre( const eckit::PathName& filepath ) {
     set( "read_legendre", filepath );
 }
diff --git a/src/atlas/option/TransOptions.h b/src/atlas/option/TransOptions.h
index a43e836c8..d9c11593d 100644
--- a/src/atlas/option/TransOptions.h
+++ b/src/atlas/option/TransOptions.h
@@ -87,13 +87,6 @@ class write_legendre : public util::Config {
 
 // ----------------------------------------------------------------------------
 
-class global_grid : public util::Config {
-public:
-    global_grid( const Grid& );
-};
-
-// ----------------------------------------------------------------------------
-
 class read_legendre : public util::Config {
 public:
     read_legendre( const eckit::PathName& );
diff --git a/src/atlas/trans/Trans.cc b/src/atlas/trans/Trans.cc
index af6b325eb..2bf1a0096 100644
--- a/src/atlas/trans/Trans.cc
+++ b/src/atlas/trans/Trans.cc
@@ -153,8 +153,17 @@ Trans::Implementation* TransFactory::build( const Grid& grid, int truncation, co
     return build( Cache(), grid, truncation, config );
 }
 
+Trans::Implementation* TransFactory::build( const Grid& grid, const Domain& domain, int truncation, const eckit::Configuration& config ) {
+    return build( Cache(), grid, domain, truncation, config );
+}
+
 Trans::Implementation* TransFactory::build( const Cache& cache, const Grid& grid, int truncation,
                                             const eckit::Configuration& config ) {
+    return build( cache, grid, grid.domain(), truncation, config );
+}
+
+Trans::Implementation* TransFactory::build( const Cache& cache, const Grid& grid, const Domain& domain, int truncation,
+                                            const eckit::Configuration& config ) {
     pthread_once( &once, init );
 
     eckit::AutoLock<eckit::Mutex> lock( local_mutex );
@@ -170,9 +179,12 @@ Trans::Implementation* TransFactory::build( const Cache& cache, const Grid& grid
         Log::debug() << "Looking for TransFactory [" << name << "]" << std::endl;
     }
 
-    return factory( name ).make( cache, grid, truncation, config );
+    return factory( name ).make( cache, grid, domain, truncation, config );
 }
 
+
+
+
 Trans::Trans() {}
 
 Trans::Trans( Implementation* impl ) : impl_( impl ) {}
@@ -183,6 +195,9 @@ Trans::Trans( const FunctionSpace& gp, const FunctionSpace& sp, const eckit::Con
 Trans::Trans( const Grid& grid, int truncation, const eckit::Configuration& config ) :
     impl_( TransFactory::build( grid, truncation, config ) ) {}
 
+Trans::Trans( const Grid& grid, const Domain& domain, int truncation, const eckit::Configuration& config ) :
+    impl_( TransFactory::build( grid, domain, truncation, config ) ) {}
+
 Trans::Trans( const Cache& cache, const FunctionSpace& gp, const FunctionSpace& sp,
               const eckit::Configuration& config ) :
     impl_( TransFactory::build( cache, gp, sp, config ) ) {}
@@ -190,6 +205,9 @@ Trans::Trans( const Cache& cache, const FunctionSpace& gp, const FunctionSpace&
 Trans::Trans( const Cache& cache, const Grid& grid, int truncation, const eckit::Configuration& config ) :
     impl_( TransFactory::build( cache, grid, truncation, config ) ) {}
 
+Trans::Trans( const Cache& cache, const Grid& grid, const Domain& domain, int truncation, const eckit::Configuration& config ) :
+    impl_( TransFactory::build( cache, grid, domain, truncation, config ) ) {}
+
 Trans::Trans( const Trans& trans ) : impl_( trans.impl_ ) {}
 
 int Trans::truncation() const {
diff --git a/src/atlas/trans/Trans.h b/src/atlas/trans/Trans.h
index a78cc1e49..d3f690f8e 100644
--- a/src/atlas/trans/Trans.h
+++ b/src/atlas/trans/Trans.h
@@ -30,6 +30,7 @@ class Field;
 class FieldSet;
 class FunctionSpace;
 class Grid;
+class Domain;
 }  // namespace atlas
 
 //-----------------------------------------------------------------------------
@@ -223,10 +224,15 @@ class TransFactory {
                              const eckit::Configuration& = util::Config() );
     static TransImpl* build( const Grid&, int truncation, const eckit::Configuration& = util::Config() );
 
+    static TransImpl* build( const Grid&, const Domain&, int truncation, const eckit::Configuration& = util::Config() );
+
     static TransImpl* build( const Cache&, const FunctionSpace& gp, const FunctionSpace& sp,
                              const eckit::Configuration& = util::Config() );
+
     static TransImpl* build( const Cache&, const Grid&, int truncation, const eckit::Configuration& = util::Config() );
 
+    static TransImpl* build( const Cache&, const Grid&, const Domain&, int truncation, const eckit::Configuration& = util::Config() );
+
     /*!
    * \brief list all registered trans implementations
    */
@@ -236,15 +242,10 @@ class TransFactory {
 
 private:
     std::string name_;
-    virtual TransImpl* make( const FunctionSpace& gp, const FunctionSpace& sp, const eckit::Configuration& ) {
+    virtual TransImpl* make( const Cache&, const FunctionSpace& gp, const FunctionSpace& sp, const eckit::Configuration& ) {
         return nullptr;
     }
-    virtual TransImpl* make( const Grid& gp, int truncation, const eckit::Configuration& ) { return nullptr; }
-    virtual TransImpl* make( const Cache&, const FunctionSpace& gp, const FunctionSpace& sp,
-                             const eckit::Configuration& ) {
-        return nullptr;
-    }
-    virtual TransImpl* make( const Cache&, const Grid& gp, int truncation, const eckit::Configuration& ) {
+    virtual TransImpl* make( const Cache&, const Grid& gp, const Domain&, int truncation, const eckit::Configuration& ) {
         return nullptr;
     }
 
@@ -257,17 +258,11 @@ class TransFactory {
 
 template <class T>
 class TransBuilderFunctionSpace : public TransFactory {
-    virtual TransImpl* make( const FunctionSpace& gp, const FunctionSpace& sp, const eckit::Configuration& config ) {
-        return new T( gp, sp, config );
-    }
     virtual TransImpl* make( const Cache& cache, const FunctionSpace& gp, const FunctionSpace& sp,
                              const eckit::Configuration& config ) {
         return new T( cache, gp, sp, config );
     }
-    virtual TransImpl* make( const Grid&, int, const eckit::Configuration& ) {
-        throw eckit::SeriousBug( "This function should not be called", Here() );
-    }
-    virtual TransImpl* make( const Cache&, const Grid&, int, const eckit::Configuration& ) {
+    virtual TransImpl* make( const Cache&, const Grid&, const Domain&, int, const eckit::Configuration& ) {
         throw eckit::SeriousBug( "This function should not be called", Here() );
     }
 
@@ -277,15 +272,9 @@ class TransBuilderFunctionSpace : public TransFactory {
 
 template <class T>
 class TransBuilderGrid : public TransFactory {
-    virtual TransImpl* make( const Grid& grid, int truncation, const eckit::Configuration& config ) {
-        return new T( grid, truncation, config );
-    }
-    virtual TransImpl* make( const Cache& cache, const Grid& grid, int truncation,
+    virtual TransImpl* make( const Cache& cache, const Grid& grid, const Domain& domain, int truncation,
                              const eckit::Configuration& config ) {
-        return new T( cache, grid, truncation, config );
-    }
-    virtual TransImpl* make( const FunctionSpace&, const FunctionSpace&, const eckit::Configuration& ) {
-        throw eckit::SeriousBug( "This function should not be called", Here() );
+        return new T( cache, grid, domain, truncation, config );
     }
     virtual TransImpl* make( const Cache&, const FunctionSpace&, const FunctionSpace&, const eckit::Configuration& ) {
         throw eckit::SeriousBug( "This function should not be called", Here() );
@@ -311,10 +300,12 @@ class Trans {
 
     Trans( const FunctionSpace& gp, const FunctionSpace& sp, const eckit::Configuration& = util::NoConfig() );
     Trans( const Grid&, int truncation, const eckit::Configuration& = util::NoConfig() );
+    Trans( const Grid&, const Domain&, int truncation, const eckit::Configuration& = util::NoConfig() );
 
     Trans( const Cache&, const FunctionSpace& gp, const FunctionSpace& sp,
            const eckit::Configuration& = util::NoConfig() );
     Trans( const Cache&, const Grid&, int truncation, const eckit::Configuration& = util::NoConfig() );
+    Trans( const Cache&, const Grid&, const Domain&, int truncation, const eckit::Configuration& = util::NoConfig() );
 
     void hash( eckit::Hash& ) const;
     const Implementation* get() const { return impl_.get(); }
diff --git a/src/atlas/trans/ifs/TransIFS.cc b/src/atlas/trans/ifs/TransIFS.cc
index 0d80fcdf6..7532f237d 100644
--- a/src/atlas/trans/ifs/TransIFS.cc
+++ b/src/atlas/trans/ifs/TransIFS.cc
@@ -637,6 +637,18 @@ TransIFS::TransIFS( const Grid& grid, const long truncation, const eckit::Config
 TransIFS::TransIFS( const Grid& grid, const eckit::Configuration& config ) :
     TransIFS( grid, /*grid-only*/ -1, config ) {}
 
+
+
+TransIFS::TransIFS( const Grid& grid, const Domain& domain, const long truncation, const eckit::Configuration& config ) :
+    TransIFS( Cache(), grid, truncation, config ) {
+    ASSERT( domain.global() );
+}
+
+TransIFS::TransIFS( const Cache& cache, const Grid& grid, const Domain& domain, const long truncation, const eckit::Configuration& config ) :
+    TransIFS( cache, grid, truncation, config ) {
+    ASSERT( domain.global() );
+}
+
 TransIFS::~TransIFS() {}
 
 void TransIFS::ctor( const Grid& grid, long truncation, const eckit::Configuration& config ) {
diff --git a/src/atlas/trans/ifs/TransIFS.h b/src/atlas/trans/ifs/TransIFS.h
index 8d4835180..ee6bf8909 100644
--- a/src/atlas/trans/ifs/TransIFS.h
+++ b/src/atlas/trans/ifs/TransIFS.h
@@ -75,8 +75,10 @@ class TransIFS : public trans::TransImpl {
     typedef struct ::Trans_t Trans_t;
 
 public:
-    TransIFS( const Grid& g, const long truncation, const eckit::Configuration& = util::NoConfig() );
-    TransIFS( const Cache&, const Grid& g, const long truncation, const eckit::Configuration& = util::NoConfig() );
+    TransIFS( const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() );
+    TransIFS( const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() );
+    TransIFS( const Cache&, const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() );
+    TransIFS( const Cache&, const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() );
 
     virtual ~TransIFS();
     operator ::Trans_t*() const { return trans(); }
diff --git a/src/atlas/trans/local_noopt/TransLocal.cc b/src/atlas/trans/local_noopt/TransLocal.cc
index 33947d15f..77d3af78f 100644
--- a/src/atlas/trans/local_noopt/TransLocal.cc
+++ b/src/atlas/trans/local_noopt/TransLocal.cc
@@ -87,6 +87,12 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const long truncat
 TransLocal::TransLocal( const Grid& grid, const long truncation, const eckit::Configuration& config ) :
     TransLocal( Cache(), grid, truncation, config ) {}
 
+TransLocal::TransLocal( const Grid& grid, const Domain&, const long truncation, const eckit::Configuration& config ) :
+    TransLocal( Cache(), grid, truncation, config ) {}
+
+TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain&, const long truncation, const eckit::Configuration& config ) :
+    TransLocal( cache, grid, truncation, config ) {}
+
 // --------------------------------------------------------------------------------------------------------------------
 
 TransLocal::~TransLocal() {}
diff --git a/src/atlas/trans/local_noopt/TransLocal.h b/src/atlas/trans/local_noopt/TransLocal.h
index 29f8984ff..9ad2cb5db 100644
--- a/src/atlas/trans/local_noopt/TransLocal.h
+++ b/src/atlas/trans/local_noopt/TransLocal.h
@@ -44,8 +44,10 @@ namespace trans {
 ///        the grid is global. There are no plans to support this at the moment.
 class TransLocal : public trans::TransImpl {
 public:
-    TransLocal( const Grid& g, const long truncation, const eckit::Configuration& = util::NoConfig() );
-    TransLocal( const Cache&, const Grid& g, const long truncation, const eckit::Configuration& = util::NoConfig() );
+    TransLocal( const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() );
+    TransLocal( const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() );
+    TransLocal( const Cache&, const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() );
+    TransLocal( const Cache&, const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() );
 
     virtual ~TransLocal();
 
diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc
index 29ac535c1..9b139fc4f 100644
--- a/src/atlas/trans/localopt/TransLocalopt.cc
+++ b/src/atlas/trans/localopt/TransLocalopt.cc
@@ -268,6 +268,12 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t
 TransLocalopt::TransLocalopt( const Grid& grid, const long truncation, const eckit::Configuration& config ) :
     TransLocalopt( Cache(), grid, truncation, config ) {}
 
+TransLocalopt::TransLocalopt( const Grid& grid, const Domain&, const long truncation, const eckit::Configuration& config ) :
+    TransLocalopt( Cache(), grid, truncation, config ) {}
+
+TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const Domain&, const long truncation, const eckit::Configuration& config ) :
+    TransLocalopt( cache, grid, truncation, config ) {}
+
 // --------------------------------------------------------------------------------------------------------------------
 
 TransLocalopt::~TransLocalopt() {
diff --git a/src/atlas/trans/localopt/TransLocalopt.h b/src/atlas/trans/localopt/TransLocalopt.h
index d81445f5b..19bbab862 100644
--- a/src/atlas/trans/localopt/TransLocalopt.h
+++ b/src/atlas/trans/localopt/TransLocalopt.h
@@ -48,8 +48,10 @@ namespace trans {
 ///        the grid is global. There are no plans to support this at the moment.
 class TransLocalopt : public trans::TransImpl {
 public:
-    TransLocalopt( const Grid& g, const long truncation, const eckit::Configuration& = util::NoConfig() );
-    TransLocalopt( const Cache&, const Grid& g, const long truncation, const eckit::Configuration& = util::NoConfig() );
+    TransLocalopt( const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() );
+    TransLocalopt( const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() );
+    TransLocalopt( const Cache&, const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() );
+    TransLocalopt( const Cache&, const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() );
 
     virtual ~TransLocalopt();
 
diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc
index 7a46c7245..073ba312e 100644
--- a/src/atlas/trans/localopt2/TransLocalopt2.cc
+++ b/src/atlas/trans/localopt2/TransLocalopt2.cc
@@ -251,6 +251,12 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long
 TransLocalopt2::TransLocalopt2( const Grid& grid, const long truncation, const eckit::Configuration& config ) :
     TransLocalopt2( Cache(), grid, truncation, config ) {}
 
+TransLocalopt2::TransLocalopt2( const Grid& grid, const Domain&, const long truncation, const eckit::Configuration& config ) :
+    TransLocalopt2( Cache(), grid, truncation, config ) {}
+
+TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const Domain&, const long truncation, const eckit::Configuration& config ) :
+    TransLocalopt2( cache, grid, truncation, config ) {}
+
 // --------------------------------------------------------------------------------------------------------------------
 
 TransLocalopt2::~TransLocalopt2() {
diff --git a/src/atlas/trans/localopt2/TransLocalopt2.h b/src/atlas/trans/localopt2/TransLocalopt2.h
index 2bc500ea6..1a2e20801 100644
--- a/src/atlas/trans/localopt2/TransLocalopt2.h
+++ b/src/atlas/trans/localopt2/TransLocalopt2.h
@@ -48,8 +48,11 @@ namespace trans {
 ///        the grid is global. There are no plans to support this at the moment.
 class TransLocalopt2 : public trans::TransImpl {
 public:
-    TransLocalopt2( const Grid& g, const long truncation, const eckit::Configuration& = util::NoConfig() );
-    TransLocalopt2( const Cache&, const Grid& g, const long truncation,
+    TransLocalopt2( const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() );
+    TransLocalopt2( const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() );
+    TransLocalopt2( const Cache&, const Grid&, const long truncation,
+                    const eckit::Configuration& = util::NoConfig() );
+    TransLocalopt2( const Cache&, const Grid&, const Domain&, const long truncation,
                     const eckit::Configuration& = util::NoConfig() );
 
     virtual ~TransLocalopt2();
diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc
index 50f3ee7a6..08ecbcb70 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.cc
+++ b/src/atlas/trans/localopt3/TransLocalopt3.cc
@@ -55,13 +55,6 @@ class TransParameters {
 
     std::string write_fft() const { return config_.getString( "write_fft", "" ); }
 
-    Grid global_grid() const {
-        Grid g;
-        util::Config spec;
-        if ( config_.get( "global_grid", spec ) ) { g = Grid( spec ); }
-        return g;
-    }
-
     bool global() const { return config_.getBool( "global", false ); }
 
     int fft() const {
@@ -234,9 +227,9 @@ const eckit::linalg::LinearAlgebra& linear_algebra_backend() {
     return eckit::linalg::LinearAlgebra::backend();
 }
 
-TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long truncation,
+TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const Domain& domain, const long truncation,
                                 const eckit::Configuration& config ) :
-    grid_( grid ),
+    grid_( grid, domain ),
     truncation_( truncation ),
     precompute_( config.getBool( "precompute", true ) ),
     cache_( cache ),
@@ -286,33 +279,21 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
             nlatsLegDomain_ = nlatsSH_;
         }
 
-
-        gridGlobal_ = TransParameters( config ).global_grid();
-        if ( not gridGlobal_ ) {
-            if ( grid_.domain().global() ) { gridGlobal_ = grid_; }
-            else {
-                /*if ( Grid( grid_.name() ).domain().global() ) {
-                    Log::warning() << Here() << " Deprecated. We should pass a global grid as optional argument"
-                                   << std::endl;
-                    gridGlobal_ = Grid( grid_.name() );
-                }
-                else {*/
-                if ( grid::RegularGrid( grid_ ) ) {
-                    // non-nested regular grid
-                    no_nest         = true;
-                    no_symmetry_    = true;
-                    useFFT_         = false;
-                    nlatsNH_        = nlats;
-                    nlatsSH_        = 0;
-                    nlatsLegDomain_ = nlatsNH_;
-                    gridGlobal_     = grid_;
-                    useGlobalLeg    = false;
-                }
-                else {
-                    NOTIMP;
-                    // non-nested reduced grids are not supported
-                }
-                //}
+        gridGlobal_ = grid;
+        if( not gridGlobal_.domain().global() ) {
+            if ( grid::RegularGrid( grid_ ) ) {
+                // non-nested regular grid
+                no_nest         = true;
+                no_symmetry_    = true;
+                useFFT_         = false;
+                nlatsNH_        = nlats;
+                nlatsSH_        = 0;
+                nlatsLegDomain_ = nlatsNH_;
+                gridGlobal_     = grid_;
+                useGlobalLeg    = false;
+            } else {
+                NOTIMP;
+                // non-nested reduced grids are not supported
             }
         }
 
@@ -594,7 +575,13 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long
 // --------------------------------------------------------------------------------------------------------------------
 
 TransLocalopt3::TransLocalopt3( const Grid& grid, const long truncation, const eckit::Configuration& config ) :
-    TransLocalopt3( Cache(), grid, truncation, config ) {}
+    TransLocalopt3( Cache(), grid, grid.domain(), truncation, config ) {}
+
+TransLocalopt3::TransLocalopt3( const Grid& grid, const Domain& domain, const long truncation, const eckit::Configuration& config ) :
+    TransLocalopt3( Cache(), grid, domain, truncation, config ) {}
+
+TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long truncation, const eckit::Configuration& config ) :
+    TransLocalopt3( cache, grid, grid.domain(), truncation, config ) {}
 
 // --------------------------------------------------------------------------------------------------------------------
 
diff --git a/src/atlas/trans/localopt3/TransLocalopt3.h b/src/atlas/trans/localopt3/TransLocalopt3.h
index 22d77b48b..14d8d4753 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.h
+++ b/src/atlas/trans/localopt3/TransLocalopt3.h
@@ -56,8 +56,11 @@ namespace trans {
 ///        the grid is global. There are no plans to support this at the moment.
 class TransLocalopt3 : public trans::TransImpl {
 public:
-    TransLocalopt3( const Grid& g, const long truncation, const eckit::Configuration& = util::NoConfig() );
-    TransLocalopt3( const Cache&, const Grid& g, const long truncation,
+    TransLocalopt3( const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() );
+    TransLocalopt3( const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() );
+    TransLocalopt3( const Cache&, const Grid&, const long truncation,
+                    const eckit::Configuration& = util::NoConfig() );
+    TransLocalopt3( const Cache&, const Grid&, const Domain&, const long truncation,
                     const eckit::Configuration& = util::NoConfig() );
 
     virtual ~TransLocalopt3();
diff --git a/src/tests/trans/test_trans_localcache.cc b/src/tests/trans/test_trans_localcache.cc
index 8dd7aaafb..1b31af0eb 100644
--- a/src/tests/trans/test_trans_localcache.cc
+++ b/src/tests/trans/test_trans_localcache.cc
@@ -194,7 +194,7 @@ CASE( "test_global_grids_with_subdomain" ) {
         for( auto domain : domains ) {
             Grid grid( gridname, domain );
             ATLAS_TRACE_SCOPE("create with cache")
-                Trans( global_cache, grid, truncation, option::type("local") | option::global_grid( global_grid) );
+                Trans( global_cache, global_grid, domain, truncation, option::type("local") );
         }
     }
 }
@@ -217,15 +217,15 @@ CASE( "test_regional_grids nested_in_global" ) {
 
 
 
-    StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ) );
+    StructuredGrid regional( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ) );
 
 
     ATLAS_TRACE_SCOPE("create without cache")
-        Trans( grid, truncation, option::type("local") | option::global_grid( grid_global ) );
+        Trans( grid_global, regional.domain(), truncation, option::type("local") );
     ATLAS_TRACE_SCOPE("read cache")
         cache = LegendreCache( global_cachefile );
     ATLAS_TRACE_SCOPE("create with cache")
-        Trans( cache, grid, truncation, option::type("local") | option::global_grid( grid_global ) );
+        Trans( cache, grid_global, regional.domain(), truncation, option::type("local") );
 }
 
 CASE( "test_regional_grids not nested" ) {
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index ff8185943..9c2fb2801 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -972,8 +972,8 @@ CASE( "test_trans_domain" ) {
     Domain testdomain2 = RectangularDomain( {-1., 1.}, {-5., 40.} );
     // Grid: (Adjust the following line if the test takes too long!)
 
-    std::string gridString = "O640";
-    Grid g1( gridString, testdomain1 );
+    Grid global_grid( "O640" );
+    Grid g1( global_grid, testdomain1 );
     //Grid g2( gridString, testdomain2 );
 
     bool fourierTrc1 = true;
@@ -986,16 +986,15 @@ CASE( "test_trans_domain" ) {
     //Log::info() << "rgp1:" << std::endl;
     if ( eckit::PathName( "legcache.bin" ).exists() ) eckit::PathName( "legcache.bin" ).unlink();
     Trace t1( Here(), "translocal1 construction" );
-    trans::Trans transLocal1( g1, trc,
-                              option::type( "local" ) | option::write_legendre( "legcache.bin" ) |
-                                  option::global_grid( Grid( gridString ) ) );
+    trans::Trans transLocal1( global_grid, g1.domain(), trc,
+                              option::type( "local" ) | option::write_legendre( "legcache.bin" ) );
     t1.stop();
     //Log::info() << "rgp2:" << std::endl;
     trans::Cache cache;
     ATLAS_TRACE_SCOPE( "Read cache" ) cache = trans::LegendreCache( "legcache.bin" );
     Trace t2( Here(), "translocal2 construction" );
     //trans::Trans transLocal2( cache, g2, trc,
-    //                          option::type( "local" ) | option::global_grid( Grid( gridString ) ) | option::no_fft() );
+    //                          option::type( "local" ) | option::no_fft() );
     trans::Trans transLocal2( g2, trc, option::type( "local" ) );
     t2.stop();
 

From f0c1b7def5c8d0ad7d8d669d6685e563736a7bd5 Mon Sep 17 00:00:00 2001
From: Pedro Maciel <Pedro.Maciel@ecmwf.int>
Date: Tue, 8 May 2018 19:11:00 +0100
Subject: [PATCH 066/123] MIR-178, MIR-191, MIR-192, MIR-193, MIR-270: support
 caching for "local" transforms

---
 src/atlas/grid/detail/grid/Structured.cc               | 2 ++
 src/atlas/trans/LegendreCacheCreator.cc                | 8 ++++++++
 src/atlas/trans/LegendreCacheCreator.h                 | 9 +++++++++
 src/atlas/trans/Trans.h                                | 2 ++
 src/atlas/trans/ifs/LegendreCacheCreatorIFS.cc         | 7 +++++++
 src/atlas/trans/ifs/LegendreCacheCreatorIFS.h          | 4 ++++
 src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc | 7 +++++++
 src/atlas/trans/localopt3/LegendreCacheCreatorLocal.h  | 4 ++++
 8 files changed, 43 insertions(+)

diff --git a/src/atlas/grid/detail/grid/Structured.cc b/src/atlas/grid/detail/grid/Structured.cc
index 76fbc0a41..a2c6f0dcb 100644
--- a/src/atlas/grid/detail/grid/Structured.cc
+++ b/src/atlas/grid/detail/grid/Structured.cc
@@ -363,6 +363,8 @@ void Structured::crop( const Domain& dom ) {
                 jmax = std::max( j, jmax );
             }
         }
+        ASSERT(jmax >= jmin);
+
         size_t cropped_ny = jmax - jmin + 1;
         std::vector<double> cropped_y( y_.begin() + jmin, y_.begin() + jmin + cropped_ny );
         std::vector<double> cropped_dx( dx_.begin() + jmin, dx_.begin() + jmin + cropped_ny );
diff --git a/src/atlas/trans/LegendreCacheCreator.cc b/src/atlas/trans/LegendreCacheCreator.cc
index 66e842323..6d65c76c1 100644
--- a/src/atlas/trans/LegendreCacheCreator.cc
+++ b/src/atlas/trans/LegendreCacheCreator.cc
@@ -149,5 +149,13 @@ void LegendreCacheCreator::create( const std::string& path ) const {
     impl_->create( path );
 }
 
+Cache LegendreCacheCreator::create() const {
+    return impl_->create();
+}
+
+size_t LegendreCacheCreator::estimate() const {
+    return impl_->estimate();
+}
+
 }  // namespace trans
 }  // namespace atlas
diff --git a/src/atlas/trans/LegendreCacheCreator.h b/src/atlas/trans/LegendreCacheCreator.h
index e552c5c80..db0f71aa8 100644
--- a/src/atlas/trans/LegendreCacheCreator.h
+++ b/src/atlas/trans/LegendreCacheCreator.h
@@ -17,6 +17,7 @@
 #include "eckit/memory/SharedPtr.h"
 
 #include "atlas/util/Config.h"
+#include "atlas/trans/Trans.h"
 
 //-----------------------------------------------------------------------------
 // Forward declarations
@@ -41,6 +42,10 @@ class LegendreCacheCreatorImpl : public eckit::Owned {
     virtual std::string uid() const = 0;
 
     virtual void create( const std::string& path ) const = 0;
+
+    virtual Cache create() const = 0;
+
+    virtual size_t estimate() const = 0;
 };
 
 // ------------------------------------------------------------------
@@ -67,6 +72,10 @@ class LegendreCacheCreator {
     std::string uid() const;
 
     void create( const std::string& path ) const;
+
+    Cache create() const;
+
+    size_t estimate() const;
 };
 
 //----------------------------------------------------------------------------------------------------------------------
diff --git a/src/atlas/trans/Trans.h b/src/atlas/trans/Trans.h
index d3f690f8e..20cb8ae65 100644
--- a/src/atlas/trans/Trans.h
+++ b/src/atlas/trans/Trans.h
@@ -96,6 +96,8 @@ class Cache {
         fft_( fft ) {}
 
 public:
+    operator bool() const { return bool(legendre()) ; }
+
     const TransCacheEntry& legendre() const { return *legendre_; }
     const TransCacheEntry& fft() const { return *fft_; }
 
diff --git a/src/atlas/trans/ifs/LegendreCacheCreatorIFS.cc b/src/atlas/trans/ifs/LegendreCacheCreatorIFS.cc
index 7d7bffa47..2c9276270 100644
--- a/src/atlas/trans/ifs/LegendreCacheCreatorIFS.cc
+++ b/src/atlas/trans/ifs/LegendreCacheCreatorIFS.cc
@@ -105,6 +105,13 @@ void LegendreCacheCreatorIFS::create( const std::string& path ) const {
   Trans( grid_, truncation_, config_ | option::type("ifs") | option::write_legendre( path ) );
 }
 
+Cache LegendreCacheCreatorIFS::create() const {
+    NOTIMP;
+}
+
+size_t LegendreCacheCreatorIFS::estimate() const {
+    return size_t(truncation_ * truncation_ * truncation_) / 2 * sizeof(double);
+}
 
 
 }  // namespace trans
diff --git a/src/atlas/trans/ifs/LegendreCacheCreatorIFS.h b/src/atlas/trans/ifs/LegendreCacheCreatorIFS.h
index bc76cf00d..e6629ebea 100644
--- a/src/atlas/trans/ifs/LegendreCacheCreatorIFS.h
+++ b/src/atlas/trans/ifs/LegendreCacheCreatorIFS.h
@@ -33,6 +33,10 @@ class LegendreCacheCreatorIFS : public trans::LegendreCacheCreatorImpl {
 
     virtual void create(const std::string &path) const override;
 
+    virtual Cache create() const override;
+
+    virtual size_t estimate() const override;
+
 private:
     const Grid grid_;
     const int truncation_;
diff --git a/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc b/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc
index 14bf740a7..c87bb4641 100644
--- a/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc
+++ b/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc
@@ -90,6 +90,13 @@ void LegendreCacheCreatorLocal::create( const std::string& path ) const {
   Trans( grid_, truncation_, config_ | option::type("local") | option::write_legendre( path ) );
 }
 
+Cache LegendreCacheCreatorLocal::create() const {
+  NOTIMP;
+}
+
+size_t LegendreCacheCreatorLocal::estimate() const {
+    return size_t(truncation_ * truncation_ * truncation_) / 2 * sizeof(double);
+}
 
 
 }  // namespace trans
diff --git a/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.h b/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.h
index 8ef10295e..b21237e5b 100644
--- a/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.h
+++ b/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.h
@@ -33,6 +33,10 @@ class LegendreCacheCreatorLocal : public trans::LegendreCacheCreatorImpl {
 
     virtual void create(const std::string &path) const override;
 
+    virtual Cache create() const override;
+
+    size_t estimate() const override;
+
 private:
     const Grid grid_;
     const int truncation_;

From 38e02bf162c520204743aeec98774a1bf9ee8f0b Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Wed, 9 May 2018 12:20:26 +0100
Subject: [PATCH 067/123] MIR-178, MIR-191, MIR-192, MIR-193, MIR-270: support
 in memory caching

---
 src/atlas/CMakeLists.txt                      |   2 +
 src/atlas/trans/Cache.cc                      | 132 +++++++++++++++++
 src/atlas/trans/Cache.h                       | 135 ++++++++++++++++++
 src/atlas/trans/Trans.cc                      |  10 ++
 src/atlas/trans/Trans.h                       | 127 +++-------------
 .../trans/ifs/LegendreCacheCreatorIFS.cc      |   3 +-
 .../localopt3/LegendreCacheCreatorLocal.cc    |   8 +-
 src/atlas/trans/localopt3/TransLocalopt3.cc   |  17 ++-
 src/atlas/trans/localopt3/TransLocalopt3.h    |   5 +
 src/tests/trans/test_trans_localcache.cc      |  25 +++-
 10 files changed, 347 insertions(+), 117 deletions(-)
 create mode 100644 src/atlas/trans/Cache.cc
 create mode 100644 src/atlas/trans/Cache.h

diff --git a/src/atlas/CMakeLists.txt b/src/atlas/CMakeLists.txt
index 119b81c24..2e6384ef0 100644
--- a/src/atlas/CMakeLists.txt
+++ b/src/atlas/CMakeLists.txt
@@ -317,6 +317,8 @@ numerics/fvm/Method.cc
 numerics/fvm/Nabla.h
 numerics/fvm/Nabla.cc
 
+trans/Cache.h
+trans/Cache.cc
 trans/Trans.h
 trans/Trans.cc
 trans/VorDivToUV.h
diff --git a/src/atlas/trans/Cache.cc b/src/atlas/trans/Cache.cc
new file mode 100644
index 000000000..14e3726d1
--- /dev/null
+++ b/src/atlas/trans/Cache.cc
@@ -0,0 +1,132 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#include <cstdlib>
+#include "atlas/trans/Cache.h"
+
+#include "eckit/io/DataHandle.h"
+#include "eckit/exception/Exceptions.h"
+#include "eckit/thread/AutoLock.h"
+#include "eckit/thread/Mutex.h"
+
+#include "atlas/runtime/Trace.h"
+#include "atlas/runtime/Log.h"
+#include "atlas/trans/Trans.h"
+
+namespace {
+static eckit::Mutex* local_mutex               = 0;
+static pthread_once_t once                     = PTHREAD_ONCE_INIT;
+static void init() {
+    local_mutex = new eckit::Mutex();
+}
+}
+
+namespace atlas {
+namespace trans {
+
+TransCacheFileEntry::TransCacheFileEntry(const eckit::PathName& path) : buffer_( path.size() ) {
+  ATLAS_TRACE();
+  Log::debug() << "Loading cache from file " << path << std::endl;
+  std::unique_ptr<eckit::DataHandle> dh( path.fileHandle() );
+  dh->openForRead();
+  dh->read( buffer_.data(), buffer_.size() );
+  dh->close();
+}
+
+TransCacheMemoryEntry::TransCacheMemoryEntry(const void* data, size_t size) : data_(data), size_(size) {
+  ASSERT(data_);
+  ASSERT(size_);
+}
+
+LegendreFFTCache::LegendreFFTCache( const void* legendre_address, size_t legendre_size, const void* fft_address, size_t fft_size ) :
+  Cache( std::make_shared<TransCacheMemoryEntry>( legendre_address, legendre_size ),
+         std::make_shared<TransCacheMemoryEntry>( fft_address, fft_size ) ) {
+}
+
+LegendreFFTCache::LegendreFFTCache( const eckit::PathName& legendre_path, const eckit::PathName& fft_path ) :
+  Cache( std::shared_ptr<TransCacheEntry>( new TransCacheFileEntry( legendre_path ) ),
+         std::shared_ptr<TransCacheEntry>( new TransCacheFileEntry( fft_path ) ) ) {
+}
+
+LegendreCache::LegendreCache( const eckit::PathName& path ) :
+  Cache( std::shared_ptr<TransCacheEntry>( new TransCacheFileEntry( path ) ) ) {
+}
+
+LegendreCache::LegendreCache( size_t size) :
+  Cache( std::make_shared<TransCacheOwnedMemoryEntry>( size ) ) {
+}
+
+LegendreCache::LegendreCache( const void* address, size_t size ) :
+  Cache( std::make_shared<TransCacheMemoryEntry>( address, size ) ) {
+}
+
+Cache::Cache(const std::shared_ptr<TransCacheEntry>& legendre) :
+  legendre_( legendre ),
+  fft_( new EmptyCacheEntry() ) {}
+
+Cache::Cache(const std::shared_ptr<TransCacheEntry>& legendre, const std::shared_ptr<TransCacheEntry>& fft) :
+  legendre_( legendre ),
+  fft_( fft ) {}
+
+Cache::Cache( const TransImpl* trans ) :
+  trans_( trans ),
+  legendre_( new EmptyCacheEntry() ),
+  fft_( new EmptyCacheEntry() ) {
+  if( trans_ )
+    trans_->attach();
+}
+
+Cache::Cache() :
+  legendre_( new EmptyCacheEntry() ),
+  fft_( new EmptyCacheEntry() ) {}
+
+Cache::Cache(const Cache& other) :
+  trans_( other.trans_ ),
+  legendre_( other.legendre_ ),
+  fft_( other.fft_ ) {
+  if( trans_ )
+    trans_->attach();
+}
+
+Cache::operator bool() const {
+  return trans_ || bool(legendre()) ;
+}
+
+Cache::~Cache() {
+  pthread_once( &once, init );
+  eckit::AutoLock<eckit::Mutex> lock( local_mutex );
+  if( trans_ ) {
+    trans_->detach();
+    if( trans_->owners() == 0 ) {
+      delete trans_;
+    }
+    trans_ = nullptr;
+  }
+}
+
+TransCache::TransCache( const Trans& trans ) :
+  Cache( trans.get() ) {
+}
+
+TransCacheOwnedMemoryEntry::TransCacheOwnedMemoryEntry(size_t size) :
+  size_(size) {
+  if( size_ ) {
+    data_ = std::malloc( size_ );
+  }
+}
+
+TransCacheOwnedMemoryEntry::~TransCacheOwnedMemoryEntry() {
+  if( size_ ) {
+    std::free( data_ );
+  }
+}
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/atlas/trans/Cache.h b/src/atlas/trans/Cache.h
new file mode 100644
index 000000000..e362d6c34
--- /dev/null
+++ b/src/atlas/trans/Cache.h
@@ -0,0 +1,135 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#pragma once
+
+#include <memory>
+
+#include "eckit/filesystem/PathName.h"
+#include "eckit/io/Buffer.h"
+
+//-----------------------------------------------------------------------------
+// Forward declarations
+
+namespace atlas {
+class Field;
+class FieldSet;
+class FunctionSpace;
+class Grid;
+class Domain;
+namespace trans {
+class TransImpl;
+class Trans;
+}  // namespace trans
+}  // namespace atlas
+
+//-----------------------------------------------------------------------------
+
+namespace atlas {
+namespace trans {
+
+//-----------------------------------------------------------------------------
+
+class TransCacheEntry {
+public:
+    operator bool() const { return size() != 0; }
+    virtual size_t size() const      = 0;
+    virtual const void* data() const = 0;
+};
+
+//-----------------------------------------------------------------------------
+
+class EmptyCacheEntry final : public TransCacheEntry {
+public:
+    virtual size_t size() const override { return 0; }
+    virtual const void* data() const override { return nullptr; }
+};
+
+//-----------------------------------------------------------------------------
+
+class TransCacheFileEntry final : public TransCacheEntry {
+private:
+  eckit::Buffer buffer_;
+public:
+    TransCacheFileEntry( const eckit::PathName& path );
+    virtual size_t size() const override { return buffer_.size(); }
+    virtual const void* data() const override { return buffer_.data(); }
+};
+
+//-----------------------------------------------------------------------------
+
+class TransCacheMemoryEntry final : public TransCacheEntry {
+public:
+    TransCacheMemoryEntry(const void* data, size_t size);
+    virtual const void* data() const override { return data_; }
+    virtual size_t size() const override { return size_; }
+private:
+    const void* data_;
+    const size_t size_;
+};
+
+//-----------------------------------------------------------------------------
+
+class TransCacheOwnedMemoryEntry final : public TransCacheEntry {
+public:
+    TransCacheOwnedMemoryEntry(size_t size);
+    ~TransCacheOwnedMemoryEntry();
+    virtual const void* data() const override { return data_; }
+    virtual size_t size() const override { return size_; }
+private:
+    void* data_ = nullptr;
+    const size_t size_ = 0;
+};
+
+//-----------------------------------------------------------------------------
+
+class Cache {
+public:
+    Cache();
+    Cache( const Cache& other );
+    operator bool() const;
+    const TransImpl* trans() const { return trans_; }
+    const TransCacheEntry& legendre() const { return *legendre_; }
+    const TransCacheEntry& fft() const { return *fft_; }
+    ~Cache();
+protected:
+    Cache( const std::shared_ptr<TransCacheEntry>& legendre );
+    Cache( const std::shared_ptr<TransCacheEntry>& legendre, const std::shared_ptr<TransCacheEntry>& fft );
+    Cache( const TransImpl* );
+private:
+    const TransImpl*                 trans_ = nullptr;
+    std::shared_ptr<TransCacheEntry> legendre_;
+    std::shared_ptr<TransCacheEntry> fft_;
+};
+
+class TransCache : public Cache {
+public:
+    TransCache( const Trans& );
+};
+
+
+class LegendreCache : public Cache {
+public:
+    LegendreCache( size_t size );
+    LegendreCache( const void* address, size_t size );
+    LegendreCache( const eckit::PathName& path );
+};
+
+class LegendreFFTCache : public Cache {
+public:
+    LegendreFFTCache( const void* legendre_address, size_t legendre_size,
+                      const void* fft_address, size_t fft_size );
+    LegendreFFTCache( const eckit::PathName& legendre_path, const eckit::PathName& fft_path );
+};
+
+//----------------------------------------------------------------------------------------------------------------------
+
+}  // namespace trans
+}  // namespace atlas
diff --git a/src/atlas/trans/Trans.cc b/src/atlas/trans/Trans.cc
index 2bf1a0096..cb9452d8e 100644
--- a/src/atlas/trans/Trans.cc
+++ b/src/atlas/trans/Trans.cc
@@ -130,6 +130,11 @@ Trans::Implementation* TransFactory::build( const FunctionSpace& gp, const Funct
 
 Trans::Implementation* TransFactory::build( const Cache& cache, const FunctionSpace& gp, const FunctionSpace& sp,
                                             const eckit::Configuration& config ) {
+    if( cache.trans() ) {
+        Log::debug() << "Creating Trans from cache, ignoring any other arguments" << std::endl;
+        return cache.trans();
+    }
+
     pthread_once( &once, init );
 
     eckit::AutoLock<eckit::Mutex> lock( local_mutex );
@@ -164,6 +169,11 @@ Trans::Implementation* TransFactory::build( const Cache& cache, const Grid& grid
 
 Trans::Implementation* TransFactory::build( const Cache& cache, const Grid& grid, const Domain& domain, int truncation,
                                             const eckit::Configuration& config ) {
+    if( cache.trans() ) {
+        Log::debug() << "Creating Trans from cache, ignoring any other arguments" << std::endl;
+        return cache.trans();
+    }
+
     pthread_once( &once, init );
 
     eckit::AutoLock<eckit::Mutex> lock( local_mutex );
diff --git a/src/atlas/trans/Trans.h b/src/atlas/trans/Trans.h
index 20cb8ae65..63a38817b 100644
--- a/src/atlas/trans/Trans.h
+++ b/src/atlas/trans/Trans.h
@@ -10,17 +10,11 @@
 
 #pragma once
 
-#include <memory>
-
-#include "eckit/config/Configuration.h"
-#include "eckit/io/Buffer.h"
-#include "eckit/io/DataHandle.h"
 #include "eckit/memory/Owned.h"
 #include "eckit/memory/SharedPtr.h"
 
 #include "atlas/util/Config.h"
-#include "atlas/runtime/Trace.h"
-#include "atlas/runtime/Log.h"
+#include "atlas/trans/Cache.h"
 
 //-----------------------------------------------------------------------------
 // Forward declarations
@@ -40,95 +34,6 @@ namespace trans {
 
 //-----------------------------------------------------------------------------
 
-class TransCacheEntry {
-public:
-    operator bool() const { return size() != 0; }
-    virtual size_t size() const      = 0;
-    virtual const void* data() const = 0;
-};
-
-class EmptyCacheEntry final : public TransCacheEntry {
-public:
-    virtual size_t size() const override { return 0; }
-    virtual const void* data() const override { return nullptr; }
-};
-
-class TransCacheFileEntry final : public TransCacheEntry {
-    eckit::Buffer buffer_;
-
-public:
-    TransCacheFileEntry( const eckit::PathName& path ) : buffer_( path.size() ) {
-        ATLAS_TRACE();
-        Log::debug() << "Loading cache from file " << path << std::endl;
-        std::unique_ptr<eckit::DataHandle> dh( path.fileHandle() );
-        dh->openForRead();
-        dh->read( buffer_.data(), buffer_.size() );
-        dh->close();
-    }
-    virtual size_t size() const override { return buffer_.size(); }
-    virtual const void* data() const override { return buffer_.data(); }
-};
-
-class TransCacheMemoryEntry final : public TransCacheEntry {
-public:
-    TransCacheMemoryEntry(const void* data, size_t size) : data_(data), size_(size) {
-        ASSERT(data_);
-        ASSERT(size_);
-    }
-    virtual const void* data() const override { return data_; }
-    virtual size_t size() const override { return size_; }
-private:
-    const void* data_;
-    const size_t size_;
-};
-
-//-----------------------------------------------------------------------------
-
-class Cache {
-public:
-    Cache() : legendre_( new EmptyCacheEntry() ), fft_( new EmptyCacheEntry() ) {}
-    Cache( const Cache& other ) = default;
-
-protected:
-    Cache( const std::shared_ptr<TransCacheEntry>& legendre ) : legendre_( legendre ), fft_( new EmptyCacheEntry() ) {}
-    Cache( const std::shared_ptr<TransCacheEntry>& legendre, const std::shared_ptr<TransCacheEntry>& fft ) :
-        legendre_( legendre ),
-        fft_( fft ) {}
-
-public:
-    operator bool() const { return bool(legendre()) ; }
-
-    const TransCacheEntry& legendre() const { return *legendre_; }
-    const TransCacheEntry& fft() const { return *fft_; }
-
-private:
-    std::shared_ptr<TransCacheEntry> legendre_;
-    std::shared_ptr<TransCacheEntry> fft_;
-};
-
-class LegendreCache : public Cache {
-public:
-    LegendreCache(const void* address, size_t size) :
-        Cache( std::make_shared<TransCacheMemoryEntry>( address, size ) ) {
-    }
-    LegendreCache( const eckit::PathName& path ) :
-        Cache( std::shared_ptr<TransCacheEntry>( new TransCacheFileEntry( path ) ) ) {
-    }
-};
-
-class LegendreFFTCache : public Cache {
-public:
-    LegendreFFTCache( const void* legendre_address, size_t legendre_size,
-                      const void* fft_address, size_t fft_size ) :
-        Cache( std::make_shared<TransCacheMemoryEntry>( legendre_address, legendre_size ),
-               std::make_shared<TransCacheMemoryEntry>( fft_address, fft_size ) ) {
-    }
-    LegendreFFTCache( const eckit::PathName& legendre_path, const eckit::PathName& fft_path ) :
-        Cache( std::shared_ptr<TransCacheEntry>( new TransCacheFileEntry( legendre_path ) ),
-               std::shared_ptr<TransCacheEntry>( new TransCacheFileEntry( fft_path ) ) ) {
-    }
-};
-
 class TransImpl : public eckit::Owned {
 public:
     virtual ~TransImpl() = 0;
@@ -217,23 +122,25 @@ class TransImpl : public eckit::Owned {
 // ------------------------------------------------------------------
 
 class TransFactory {
+protected:
+    using Trans_t = const TransImpl;
 public:
     /*!
    * \brief build Trans
    * \return TransImpl
    */
-    static TransImpl* build( const FunctionSpace& gp, const FunctionSpace& sp,
+    static Trans_t* build( const FunctionSpace& gp, const FunctionSpace& sp,
                              const eckit::Configuration& = util::Config() );
-    static TransImpl* build( const Grid&, int truncation, const eckit::Configuration& = util::Config() );
+    static Trans_t* build( const Grid&, int truncation, const eckit::Configuration& = util::Config() );
 
-    static TransImpl* build( const Grid&, const Domain&, int truncation, const eckit::Configuration& = util::Config() );
+    static Trans_t* build( const Grid&, const Domain&, int truncation, const eckit::Configuration& = util::Config() );
 
-    static TransImpl* build( const Cache&, const FunctionSpace& gp, const FunctionSpace& sp,
+    static Trans_t* build( const Cache&, const FunctionSpace& gp, const FunctionSpace& sp,
                              const eckit::Configuration& = util::Config() );
 
-    static TransImpl* build( const Cache&, const Grid&, int truncation, const eckit::Configuration& = util::Config() );
+    static Trans_t* build( const Cache&, const Grid&, int truncation, const eckit::Configuration& = util::Config() );
 
-    static TransImpl* build( const Cache&, const Grid&, const Domain&, int truncation, const eckit::Configuration& = util::Config() );
+    static Trans_t* build( const Cache&, const Grid&, const Domain&, int truncation, const eckit::Configuration& = util::Config() );
 
     /*!
    * \brief list all registered trans implementations
@@ -244,10 +151,10 @@ class TransFactory {
 
 private:
     std::string name_;
-    virtual TransImpl* make( const Cache&, const FunctionSpace& gp, const FunctionSpace& sp, const eckit::Configuration& ) {
+    virtual Trans_t* make( const Cache&, const FunctionSpace& gp, const FunctionSpace& sp, const eckit::Configuration& ) {
         return nullptr;
     }
-    virtual TransImpl* make( const Cache&, const Grid& gp, const Domain&, int truncation, const eckit::Configuration& ) {
+    virtual Trans_t* make( const Cache&, const Grid& gp, const Domain&, int truncation, const eckit::Configuration& ) {
         return nullptr;
     }
 
@@ -260,11 +167,11 @@ class TransFactory {
 
 template <class T>
 class TransBuilderFunctionSpace : public TransFactory {
-    virtual TransImpl* make( const Cache& cache, const FunctionSpace& gp, const FunctionSpace& sp,
+    virtual Trans_t* make( const Cache& cache, const FunctionSpace& gp, const FunctionSpace& sp,
                              const eckit::Configuration& config ) {
         return new T( cache, gp, sp, config );
     }
-    virtual TransImpl* make( const Cache&, const Grid&, const Domain&, int, const eckit::Configuration& ) {
+    virtual Trans_t* make( const Cache&, const Grid&, const Domain&, int, const eckit::Configuration& ) {
         throw eckit::SeriousBug( "This function should not be called", Here() );
     }
 
@@ -274,11 +181,11 @@ class TransBuilderFunctionSpace : public TransFactory {
 
 template <class T>
 class TransBuilderGrid : public TransFactory {
-    virtual TransImpl* make( const Cache& cache, const Grid& grid, const Domain& domain, int truncation,
+    virtual Trans_t* make( const Cache& cache, const Grid& grid, const Domain& domain, int truncation,
                              const eckit::Configuration& config ) {
         return new T( cache, grid, domain, truncation, config );
     }
-    virtual TransImpl* make( const Cache&, const FunctionSpace&, const FunctionSpace&, const eckit::Configuration& ) {
+    virtual Trans_t* make( const Cache&, const FunctionSpace&, const FunctionSpace&, const eckit::Configuration& ) {
         throw eckit::SeriousBug( "This function should not be called", Here() );
     }
 
@@ -290,7 +197,7 @@ class TransBuilderGrid : public TransFactory {
 
 class Trans {
 public:
-    using Implementation = TransImpl;
+    using Implementation = const TransImpl;
 
 private:
     eckit::SharedPtr<Implementation> impl_;
@@ -310,7 +217,7 @@ class Trans {
     Trans( const Cache&, const Grid&, const Domain&, int truncation, const eckit::Configuration& = util::NoConfig() );
 
     void hash( eckit::Hash& ) const;
-    const Implementation* get() const { return impl_.get(); }
+    Implementation* get() const { return impl_.get(); }
     operator bool() const { return impl_.owners(); }
 
     int truncation() const;
diff --git a/src/atlas/trans/ifs/LegendreCacheCreatorIFS.cc b/src/atlas/trans/ifs/LegendreCacheCreatorIFS.cc
index 2c9276270..7ac8e74b0 100644
--- a/src/atlas/trans/ifs/LegendreCacheCreatorIFS.cc
+++ b/src/atlas/trans/ifs/LegendreCacheCreatorIFS.cc
@@ -106,7 +106,8 @@ void LegendreCacheCreatorIFS::create( const std::string& path ) const {
 }
 
 Cache LegendreCacheCreatorIFS::create() const {
-    NOTIMP;
+  return TransCache(
+      Trans( grid_, truncation_, config_ | option::type("ifs") ) );
 }
 
 size_t LegendreCacheCreatorIFS::estimate() const {
diff --git a/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc b/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc
index c87bb4641..a004348b9 100644
--- a/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc
+++ b/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc
@@ -15,6 +15,7 @@
 #include "atlas/grid.h"
 #include "atlas/option.h"
 #include "atlas/trans/Trans.h"
+#include "atlas/trans/localopt3/TransLocalopt3.h"
 
 namespace atlas {
 namespace trans {
@@ -87,11 +88,14 @@ bool LegendreCacheCreatorLocal::supported() const {
 }
 
 void LegendreCacheCreatorLocal::create( const std::string& path ) const {
-  Trans( grid_, truncation_, config_ | option::type("local") | option::write_legendre( path ) );
+  Trans tmp( grid_, truncation_, config_ | option::type("local") | option::write_legendre( path ) );
 }
 
 Cache LegendreCacheCreatorLocal::create() const {
-  NOTIMP;
+  util::Config export_legendre("export_legendre",true);
+  Trans tmp( grid_, truncation_, config_ | option::type("local") | export_legendre );
+  auto impl = dynamic_cast<const TransLocalopt3*>( tmp.get() );
+  return impl->export_legendre_;
 }
 
 size_t LegendreCacheCreatorLocal::estimate() const {
diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc
index 08ecbcb70..1effd3584 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.cc
+++ b/src/atlas/trans/localopt3/TransLocalopt3.cc
@@ -51,6 +51,8 @@ class TransParameters {
 
     std::string write_legendre() const { return config_.getString( "write_legendre", "" ); }
 
+    bool export_legendre() const { return config_.getBool( "export_legendre", false ); }
+
     std::string read_fft() const { return config_.getString( "read_fft", "" ); }
 
     std::string write_fft() const { return config_.getString( "write_fft", "" ); }
@@ -432,8 +434,19 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const Doma
             else {
                 ATLAS_TRACE_SCOPE( "Legendre precomputations (structured)" ) {
 
-                    alloc_aligned( legendre_sym_, size_sym );
-                    alloc_aligned( legendre_asym_, size_asym );
+                    if( TransParameters(config).export_legendre() ) {
+                        ASSERT( not cache_.legendre() );
+                        export_legendre_ = LegendreCache( sizeof(double) * ( size_sym + size_asym ) );
+                        legendre_cachesize_ = export_legendre_.legendre().size();
+                        legendre_cache_ = export_legendre_.legendre().data();
+                        legendre_cache_ = std::malloc( legendre_cachesize_ );
+                        ReadCache legendre( legendre_cache_ );
+                        legendre_sym_  = legendre.read<double>( size_sym );
+                        legendre_asym_ = legendre.read<double>( size_asym );
+                    } else {
+                        alloc_aligned( legendre_sym_, size_sym );
+                        alloc_aligned( legendre_asym_, size_asym );
+                    }
 
                     compute_legendre_polynomialsopt3( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_,
                                                       legendre_asym_, legendre_sym_begin_.data(),
diff --git a/src/atlas/trans/localopt3/TransLocalopt3.h b/src/atlas/trans/localopt3/TransLocalopt3.h
index 14d8d4753..dda875ae5 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.h
+++ b/src/atlas/trans/localopt3/TransLocalopt3.h
@@ -40,6 +40,8 @@ class FieldSet;
 namespace atlas {
 namespace trans {
 
+class LegendreCacheCreatorLocal;
+
 //-----------------------------------------------------------------------------
 
 /// @class TransLocalopt3
@@ -148,6 +150,8 @@ class TransLocalopt3 : public trans::TransImpl {
                       const double scalar_spectra[], double gp_fields[],
                       const eckit::Configuration& = util::NoConfig() ) const;
 
+friend class LegendreCacheCreatorLocal;
+
 private:
     Grid grid_;
     Grid gridGlobal_;
@@ -184,6 +188,7 @@ class TransLocalopt3 : public trans::TransImpl {
 #endif
 
     Cache cache_;
+    Cache export_legendre_;
     const void* legendre_cache_{nullptr};
     size_t legendre_cachesize_{0};
     const void* fft_cache_{nullptr};
diff --git a/src/tests/trans/test_trans_localcache.cc b/src/tests/trans/test_trans_localcache.cc
index 1b31af0eb..247200315 100644
--- a/src/tests/trans/test_trans_localcache.cc
+++ b/src/tests/trans/test_trans_localcache.cc
@@ -264,8 +264,7 @@ CASE( "test_regional_grids with projection" ) {
     // Note: caching not yet implemented for unstructured and projected grids
 }
 
-
-CASE( "test cache creator" ) {
+CASE( "test cache creator to file" ) {
 
     auto truncation = 89;
     StructuredGrid grid_global(
@@ -277,6 +276,28 @@ CASE( "test cache creator" ) {
     auto cachefile = CacheFile( legendre_cache_creator.uid() );
     ATLAS_TRACE_SCOPE( "Creating cache "+std::string(cachefile) )
       legendre_cache_creator.create( cachefile );
+
+    Cache c = legendre_cache_creator.create();
+    auto trans1 = Trans( c, grid_global, truncation, option::type("local") );
+    auto trans2 = Trans( c, grid_global, truncation, option::type("local") );
+}
+
+CASE( "test cache creator in memory" ) {
+
+    auto truncation = 89;
+    StructuredGrid grid_global(
+        LinearSpacing( {  0., 360.}, 360, false ),
+        LinearSpacing( { 90., -90.}, 181, true  )
+    );
+
+    LegendreCacheCreator legendre_cache_creator( grid_global, truncation, option::type("local") );
+
+    Cache cache;
+    ATLAS_TRACE_SCOPE( "Creating cache in memory" )
+      cache = legendre_cache_creator.create();
+
+    auto trans1 = Trans( cache, grid_global, truncation, option::type("local") );
+    auto trans2 = Trans( cache, grid_global, truncation, option::type("local") );
 }
 
 }  // namespace test

From 389280b0b8522fbf911536e6a536c29de241e7ef Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Wed, 9 May 2018 15:00:24 +0100
Subject: [PATCH 068/123] Removed trans/localopt trans/localopt2

---
 src/atlas/CMakeLists.txt                      |  20 -
 src/atlas/trans/Trans.cc                      |   4 -
 src/atlas/trans/VorDivToUV.cc                 |   4 -
 .../trans/localopt/FourierTransformsopt.cc    |  78 --
 .../trans/localopt/FourierTransformsopt.h     |  38 -
 .../trans/localopt/LegendrePolynomialsopt.cc  | 198 -----
 .../trans/localopt/LegendrePolynomialsopt.h   |  47 --
 .../trans/localopt/LegendreTransformsopt.cc   |  62 --
 .../trans/localopt/LegendreTransformsopt.h    |  36 -
 src/atlas/trans/localopt/TransLocalopt.cc     | 704 -----------------
 src/atlas/trans/localopt/TransLocalopt.h      | 141 ----
 .../trans/localopt/VorDivToUVLocalopt.cc      | 184 -----
 src/atlas/trans/localopt/VorDivToUVLocalopt.h |  67 --
 .../trans/localopt2/FourierTransformsopt2.cc  |  78 --
 .../trans/localopt2/FourierTransformsopt2.h   |  38 -
 .../localopt2/LegendrePolynomialsopt2.cc      | 237 ------
 .../trans/localopt2/LegendrePolynomialsopt2.h |  59 --
 .../trans/localopt2/LegendreTransformsopt2.cc |  62 --
 .../trans/localopt2/LegendreTransformsopt2.h  |  37 -
 src/atlas/trans/localopt2/TransLocalopt2.cc   | 746 ------------------
 src/atlas/trans/localopt2/TransLocalopt2.h    | 143 ----
 .../trans/localopt2/VorDivToUVLocalopt2.cc    | 184 -----
 .../trans/localopt2/VorDivToUVLocalopt2.h     |  67 --
 src/atlas/trans/localopt3/TransLocalopt3.cc   |   1 -
 src/tests/trans/test_trans_localcache.cc      |  26 +-
 25 files changed, 3 insertions(+), 3258 deletions(-)
 delete mode 100644 src/atlas/trans/localopt/FourierTransformsopt.cc
 delete mode 100644 src/atlas/trans/localopt/FourierTransformsopt.h
 delete mode 100644 src/atlas/trans/localopt/LegendrePolynomialsopt.cc
 delete mode 100644 src/atlas/trans/localopt/LegendrePolynomialsopt.h
 delete mode 100644 src/atlas/trans/localopt/LegendreTransformsopt.cc
 delete mode 100644 src/atlas/trans/localopt/LegendreTransformsopt.h
 delete mode 100644 src/atlas/trans/localopt/TransLocalopt.cc
 delete mode 100644 src/atlas/trans/localopt/TransLocalopt.h
 delete mode 100644 src/atlas/trans/localopt/VorDivToUVLocalopt.cc
 delete mode 100644 src/atlas/trans/localopt/VorDivToUVLocalopt.h
 delete mode 100644 src/atlas/trans/localopt2/FourierTransformsopt2.cc
 delete mode 100644 src/atlas/trans/localopt2/FourierTransformsopt2.h
 delete mode 100644 src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc
 delete mode 100644 src/atlas/trans/localopt2/LegendrePolynomialsopt2.h
 delete mode 100644 src/atlas/trans/localopt2/LegendreTransformsopt2.cc
 delete mode 100644 src/atlas/trans/localopt2/LegendreTransformsopt2.h
 delete mode 100644 src/atlas/trans/localopt2/TransLocalopt2.cc
 delete mode 100644 src/atlas/trans/localopt2/TransLocalopt2.h
 delete mode 100644 src/atlas/trans/localopt2/VorDivToUVLocalopt2.cc
 delete mode 100644 src/atlas/trans/localopt2/VorDivToUVLocalopt2.h

diff --git a/src/atlas/CMakeLists.txt b/src/atlas/CMakeLists.txt
index 2e6384ef0..4ed6679f4 100644
--- a/src/atlas/CMakeLists.txt
+++ b/src/atlas/CMakeLists.txt
@@ -335,26 +335,6 @@ trans/local_noopt/FourierTransforms.h
 trans/local_noopt/FourierTransforms.cc
 trans/local_noopt/VorDivToUVLocal.h
 trans/local_noopt/VorDivToUVLocal.cc
-trans/localopt/TransLocalopt.h
-trans/localopt/TransLocalopt.cc
-trans/localopt/LegendrePolynomialsopt.h
-trans/localopt/LegendrePolynomialsopt.cc
-trans/localopt/LegendreTransformsopt.h
-trans/localopt/LegendreTransformsopt.cc
-trans/localopt/FourierTransformsopt.h
-trans/localopt/FourierTransformsopt.cc
-trans/localopt/VorDivToUVLocalopt.h
-trans/localopt/VorDivToUVLocalopt.cc
-trans/localopt2/TransLocalopt2.h
-trans/localopt2/TransLocalopt2.cc
-trans/localopt2/LegendrePolynomialsopt2.h
-trans/localopt2/LegendrePolynomialsopt2.cc
-trans/localopt2/LegendreTransformsopt2.h
-trans/localopt2/LegendreTransformsopt2.cc
-trans/localopt2/FourierTransformsopt2.h
-trans/localopt2/FourierTransformsopt2.cc
-trans/localopt2/VorDivToUVLocalopt2.h
-trans/localopt2/VorDivToUVLocalopt2.cc
 trans/localopt3/TransLocalopt3.h
 trans/localopt3/TransLocalopt3.cc
 trans/localopt3/LegendrePolynomialsopt3.h
diff --git a/src/atlas/trans/Trans.cc b/src/atlas/trans/Trans.cc
index cb9452d8e..d14660060 100644
--- a/src/atlas/trans/Trans.cc
+++ b/src/atlas/trans/Trans.cc
@@ -28,8 +28,6 @@
 #define TRANS_DEFAULT "local"
 #endif
 #include "atlas/trans/local_noopt/TransLocal.h"
-#include "atlas/trans/localopt/TransLocalopt.h"
-#include "atlas/trans/localopt2/TransLocalopt2.h"
 #include "atlas/trans/localopt3/TransLocalopt3.h" // --> recommended "local"
 
 namespace atlas {
@@ -65,8 +63,6 @@ struct force_link {
         load_builder_grid<TransIFS>();
 #endif
         load_builder_grid<TransLocal>();
-        load_builder_grid<TransLocalopt>();
-        load_builder_grid<TransLocalopt2>();
         load_builder_grid<TransLocalopt3>();
     }
 };
diff --git a/src/atlas/trans/VorDivToUV.cc b/src/atlas/trans/VorDivToUV.cc
index 70feef4d8..aa8fc5781 100644
--- a/src/atlas/trans/VorDivToUV.cc
+++ b/src/atlas/trans/VorDivToUV.cc
@@ -27,8 +27,6 @@
 #define TRANS_DEFAULT "local"
 #endif
 #include "atlas/trans/local_noopt/VorDivToUVLocal.h"
-#include "atlas/trans/localopt/VorDivToUVLocalopt.h"
-#include "atlas/trans/localopt2/VorDivToUVLocalopt2.h"
 #include "atlas/trans/localopt3/VorDivToUVLocalopt3.h"  // --> recommended "local"
 
 namespace atlas {
@@ -58,8 +56,6 @@ struct force_link {
         load_builder<VorDivToUVIFS>();
 #endif
         load_builder<VorDivToUVLocal>();
-        load_builder<VorDivToUVLocalopt>();
-        load_builder<VorDivToUVLocalopt2>();
         load_builder<VorDivToUVLocalopt3>();
     }
 };
diff --git a/src/atlas/trans/localopt/FourierTransformsopt.cc b/src/atlas/trans/localopt/FourierTransformsopt.cc
deleted file mode 100644
index d854f5aaa..000000000
--- a/src/atlas/trans/localopt/FourierTransformsopt.cc
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * (C) Copyright 2013 ECMWF.
- *
- * This software is licensed under the terms of the Apache Licence Version 2.0
- * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
- * In applying this licence, ECMWF does not waive the privileges and immunities
- * granted to it by virtue of its status as an intergovernmental organisation
- * nor
- * does it submit to any jurisdiction.
- */
-
-#include <algorithm>
-#include <cmath>
-#include <iostream>
-
-#include "atlas/trans/localopt/FourierTransformsopt.h"
-
-namespace atlas {
-namespace trans {
-
-//-----------------------------------------------------------------------------
-
-void invtrans_fourieropt( const size_t trcFT,
-                          const double lon,         // longitude in radians (in)
-                          const int nb_fields,      // Number of fields
-                          const double rlegReal[],  // associated Legendre functions, size (trc+1)*trc/2 (in)
-                          const double rlegImag[],  // associated Legendre functions, size (trc+1)*trc/2 (in)
-                          double rgp[] )            // gridpoint
-{
-    for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
-        rgp[jfld] = 0.;
-    }
-    // local Fourier transformation:
-    for ( int jm = 0; jm <= trcFT; ++jm ) {
-        const double cos = std::cos( jm * lon );
-        const double sin = std::sin( jm * lon );
-        for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
-            double real = cos * rlegReal[jm * nb_fields + jfld];
-            double imag = sin * rlegImag[jm * nb_fields + jfld];
-            rgp[jfld] += real - imag;
-        }
-    }
-}
-
-int fourier_truncationopt( const int truncation,    // truncation
-                           const int nx,            // number of longitudes
-                           const int nxmax,         // maximum nx
-                           const int ndgl,          // number of latitudes
-                           const double lat,        // latitude in radian
-                           const bool fullgrid ) {  // regular grid
-    int trc     = truncation;
-    int trclin  = ndgl - 1;
-    int trcquad = ndgl * 2 / 3 - 1;
-    if ( truncation >= trclin || fullgrid ) {
-        // linear
-        trc = ( nx - 1 ) / 2;
-    }
-    else if ( truncation >= trcquad ) {
-        // quadratic
-        double weight = 3 * ( trclin - truncation ) / ndgl;
-        double sqcos  = std::pow( std::cos( lat ), 2 );
-
-        trc = ( nx - 1 ) / ( 2 + weight * sqcos );
-    }
-    else {
-        // cubic
-        double sqcos = std::pow( std::cos( lat ), 2 );
-
-        trc = ( nx - 1 ) / ( 2 + sqcos ) - 1;
-    }
-    trc = std::min( truncation, trc );
-    return trc;
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-}  // namespace trans
-}  // namespace atlas
diff --git a/src/atlas/trans/localopt/FourierTransformsopt.h b/src/atlas/trans/localopt/FourierTransformsopt.h
deleted file mode 100644
index 0ddfdfcdf..000000000
--- a/src/atlas/trans/localopt/FourierTransformsopt.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * (C) Copyright 2013 ECMWF.
- *
- * This software is licensed under the terms of the Apache Licence Version 2.0
- * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
- * In applying this licence, ECMWF does not waive the privileges and immunities
- * granted to it by virtue of its status as an intergovernmental organisation
- * nor does it submit to any jurisdiction.
- */
-
-#pragma once
-
-#include <cstddef>
-
-namespace atlas {
-namespace trans {
-
-//-----------------------------------------------------------------------------
-// Routine to compute the local Fourier transformation
-//
-// Author:
-// Andreas Mueller *ECMWF*
-//
-
-void invtrans_fourieropt( const size_t trcFT,
-                       const double lon,         // longitude in radians (in)
-                       const int nb_fields,      // Number of fields
-                       const double rlegReal[],  // values of associated Legendre functions, size (trc+1)*trc/2 (in)
-                       const double rlegImag[],  // values of associated Legendre functions, size (trc+1)*trc/2 (in)
-                       double rgp[] );           // gridpoint
-
-int fourier_truncationopt( const int truncation, const int nx, const int nxmax, const int ndgl, const double lat,
-                        const bool fullgrid );
-
-// --------------------------------------------------------------------------------------------------------------------
-
-}  // namespace trans
-}  // namespace atlas
diff --git a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc
deleted file mode 100644
index 9c5562fa2..000000000
--- a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * (C) Copyright 2013 ECMWF.
- *
- * This software is licensed under the terms of the Apache Licence Version 2.0
- * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
- * In applying this licence, ECMWF does not waive the privileges and immunities
- * granted to it by virtue of its status as an intergovernmental organisation
- * nor
- * does it submit to any jurisdiction.
- */
-
-#include <cmath>
-#include <limits>
-
-#include "atlas/array.h"
-#include "atlas/parallel/mpi/mpi.h"
-#include "atlas/trans/localopt/LegendrePolynomialsopt.h"
-
-namespace atlas {
-namespace trans {
-
-//-----------------------------------------------------------------------------
-
-void compute_legendre_polynomialsopt(
-    const size_t trc,          // truncation (in)
-    const int nlats,           // number of latitudes
-    const double lats[],       // latitudes in radians (in)
-    double leg_sym[],          // values of associated Legendre functions, symmetric part
-    double leg_asym[],         // values of associated Legendre functions, asymmetric part
-    size_t leg_start_sym[],    // start indices for different zonal wave numbers, symmetric part
-    size_t leg_start_asym[] )  // start indices for different zonal wave numbers, asymmetric part
-{
-    auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; };
-    array::ArrayT<double> zfn_( trc + 1, trc + 1 );
-    array::ArrayView<double, 2> zfn = array::make_view<double, 2>( zfn_ );
-    std::vector<double> legpol( legendre_size( trc ) );
-    auto idxmn = [&]( int jm, int jn ) { return ( 2 * trc + 3 - jm ) * jm / 2 + jn - jm; };
-    int iodd;
-
-    // Compute coefficients for Taylor series in Belousov (19) and (21)
-    // Belousov, Swarztrauber use zfn(0,0)=std::sqrt(2.)
-    // IFS normalisation chosen to be 0.5*Integral(Pnm**2) = 1
-    zfn( 0, 0 ) = 2.;
-    for ( int jn = 1; jn <= trc; ++jn ) {
-        double zfnn = zfn( 0, 0 );
-        for ( int jgl = 1; jgl <= jn; ++jgl ) {
-            zfnn *= std::sqrt( 1. - 0.25 / ( jgl * jgl ) );
-        }
-        iodd          = jn % 2;
-        zfn( jn, jn ) = zfnn;
-        for ( int jgl = 2; jgl <= jn - iodd; jgl += 2 ) {
-            double zfjn = ( ( jgl - 1. ) * ( 2. * jn - jgl + 2. ) );  // new factor numerator
-            double zfjd = ( jgl * ( 2. * jn - jgl + 1. ) );           // new factor denominator
-
-            zfn( jn, jn - jgl ) = zfn( jn, jn - jgl + 2 ) * zfjn / zfjd;
-        }
-    }
-
-    // Loop over latitudes:
-    for ( int jlat = 0; jlat < nlats; ++jlat ) {
-        {
-            //ATLAS_TRACE( "compute Legendre polynomials" );
-            // --------------------
-            // 1. First two columns
-            // --------------------
-            double lat     = lats[jlat];
-            double zdlx1   = ( M_PI_2 - lat );               // theta
-            double zdlx    = std::cos( zdlx1 );              // cos(theta)
-            double zdlsita = std::sqrt( 1. - zdlx * zdlx );  // sin(theta) (this is how trans library does it)
-
-            legpol[idxmn( 0, 0 )] = 1.;
-            double vsin[trc + 1], vcos[trc + 1];
-            for ( int j = 1; j <= trc; j++ ) {
-                vsin[j] = std::sin( j * zdlx1 );
-            }
-            for ( int j = 1; j <= trc; j++ ) {
-                vcos[j] = std::cos( j * zdlx1 );
-            }
-
-            double zdl1sita = 0.;
-            // if we are less than 1 meter from the pole,
-            if ( std::abs( zdlsita ) <= std::sqrt( std::numeric_limits<double>::epsilon() ) ) {
-                zdlx    = 1.;
-                zdlsita = 0.;
-            }
-            else {
-                zdl1sita = 1. / zdlsita;
-            }
-
-            // ordinary Legendre polynomials from series expansion
-            // ---------------------------------------------------
-
-            // even N
-            for ( int jn = 2; jn <= trc; jn += 2 ) {
-                double zdlk   = 0.5 * zfn( jn, 0 );
-                double zdlldn = 0.0;
-                double zdsq   = 1. / std::sqrt( jn * ( jn + 1. ) );
-                // represented by only even k
-                for ( int jk = 2; jk <= jn; jk += 2 ) {
-                    // normalised ordinary Legendre polynomial == \overbar{P_n}^0
-                    zdlk = zdlk + zfn( jn, jk ) * vcos[jk];
-                    // normalised associated Legendre polynomial == \overbar{P_n}^1
-                    zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * vsin[jk];
-                }
-                legpol[idxmn( 0, jn )] = zdlk;
-                legpol[idxmn( 1, jn )] = zdlldn;
-            }
-
-            // odd N
-            for ( int jn = 1; jn <= trc; jn += 2 ) {
-                zfn( jn, 0 )  = 0.;
-                double zdlk   = 0.;
-                double zdlldn = 0.0;
-                double zdsq   = 1. / std::sqrt( jn * ( jn + 1. ) );
-                // represented by only even k
-                for ( int jk = 1; jk <= jn; jk += 2 ) {
-                    // normalised ordinary Legendre polynomial == \overbar{P_n}^0
-                    zdlk = zdlk + zfn( jn, jk ) * vcos[jk];
-                    // normalised associated Legendre polynomial == \overbar{P_n}^1
-                    zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * vsin[jk];
-                }
-                legpol[idxmn( 0, jn )] = zdlk;
-                legpol[idxmn( 1, jn )] = zdlldn;
-            }
-
-            // --------------------------------------------------------------
-            // 2. Diagonal (the terms 0,0 and 1,1 have already been computed)
-            //    Belousov, equation (23)
-            // --------------------------------------------------------------
-
-            double zdls = zdl1sita * std::numeric_limits<double>::min();
-            for ( int jn = 2; jn <= trc; ++jn ) {
-                double sq = std::sqrt( ( 2. * jn + 1. ) / ( 2. * jn ) );
-
-                legpol[idxmn( jn, jn )] = legpol[idxmn( jn - 1, jn - 1 )] * zdlsita * sq;
-                if ( std::abs( legpol[idxmn( jn, jn )] ) < zdls ) legpol[idxmn( jn, jn )] = 0.0;
-            }
-
-            // ---------------------------------------------
-            // 3. General recurrence (Belousov, equation 17)
-            // ---------------------------------------------
-
-            for ( int jn = 3; jn <= trc; ++jn ) {
-                for ( int jm = 2; jm < jn; ++jm ) {
-                    double cn =
-                        ( ( 2. * jn + 1. ) * ( jn + jm - 3. ) * ( jn + jm - 1. ) );     // numerator of c in Belousov
-                    double cd = ( ( 2. * jn - 3. ) * ( jn + jm - 2. ) * ( jn + jm ) );  // denominator of c in Belousov
-                    double dn =
-                        ( ( 2. * jn + 1. ) * ( jn - jm + 1. ) * ( jn + jm - 1. ) );     // numerator of d in Belousov
-                    double dd = ( ( 2. * jn - 1. ) * ( jn + jm - 2. ) * ( jn + jm ) );  // denominator of d in Belousov
-                    double en = ( ( 2. * jn + 1. ) * ( jn - jm ) );                     // numerator of e in Belousov
-                    double ed = ( ( 2. * jn - 1. ) * ( jn + jm ) );                     // denominator of e in Belousov
-
-                    legpol[idxmn( jm, jn )] = std::sqrt( cn / cd ) * legpol[idxmn( jm - 2, jn - 2 )] -
-                                              std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1 )] * zdlx +
-                                              std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1 )] * zdlx;
-                }
-            }
-        }
-
-        {
-            //ATLAS_TRACE( "add to global arrays" );
-
-            for ( int jm = 0; jm <= trc; jm++ ) {
-                int is1 = 0, ia1 = 0;
-                for ( int jn = jm; jn <= trc; jn++ ) {
-                    if ( ( jn - jm ) % 2 == 0 ) { is1++; }
-                    else {
-                        ia1++;
-                    }
-                }
-                int is2 = 0, ia2 = 0;
-                // the choice between the following two code lines determines whether
-                // total wavenumbers are summed in an ascending or descending order.
-                // The trans library in IFS uses descending order because it should
-                // be more accurate (higher wavenumbers have smaller contributions).
-                // This also needs to be changed when splitting the spectral data in
-                // TransLocalopt::invtrans_uv!
-                //for ( int jn = jm; jn <= trc; jn++ ) {
-                for ( int jn = trc; jn >= jm; jn-- ) {
-                    if ( ( jn - jm ) % 2 == 0 ) {
-                        int is      = leg_start_sym[jm] + is1 * jlat + is2++;
-                        leg_sym[is] = legpol[idxmn( jm, jn )];
-                    }
-                    else {
-                        int ia       = leg_start_asym[jm] + ia1 * jlat + ia2++;
-                        leg_asym[ia] = legpol[idxmn( jm, jn )];
-                    }
-                }
-            }
-        }
-    }
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-}  // namespace trans
-}  // namespace atlas
diff --git a/src/atlas/trans/localopt/LegendrePolynomialsopt.h b/src/atlas/trans/localopt/LegendrePolynomialsopt.h
deleted file mode 100644
index 4a2f004c7..000000000
--- a/src/atlas/trans/localopt/LegendrePolynomialsopt.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * (C) Copyright 2013 ECMWF.
- *
- * This software is licensed under the terms of the Apache Licence Version 2.0
- * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
- * In applying this licence, ECMWF does not waive the privileges and immunities
- * granted to it by virtue of its status as an intergovernmental organisation
- * nor does it submit to any jurisdiction.
- */
-
-#pragma once
-
-#include <cstddef>
-
-namespace atlas {
-namespace trans {
-
-//-----------------------------------------------------------------------------
-// Routine to compute the Legendre polynomials in serial according to Belousov
-// (using correction by Swarztrauber)
-//
-// Reference:
-// S.L. Belousov, Tables of normalized associated Legendre Polynomials, Pergamon
-// Press (1962)
-// P.N. Swarztrauber, On computing the points and weights for Gauss-Legendre
-// quadrature,
-//      SIAM J. Sci. Comput. Vol. 24 (3) pp. 945-954 (2002)
-//
-// Author of Fortran version:
-// Mats Hamrud, Philippe Courtier, Nils Wedi *ECMWF*
-//
-// Ported to C++ by:
-// Andreas Mueller *ECMWF*
-//
-void compute_legendre_polynomialsopt(
-    const size_t trc,           // truncation (in)
-    const int nlats,            // number of latitudes
-    const double lats[],        // latitudes in radians (in)
-    double legendre_sym[],      // values of associated Legendre functions, symmetric part
-    double legendre_asym[],     // values of associated Legendre functions, asymmetric part
-    size_t leg_start_sym[],     // start indices for different zonal wave numbers, symmetric part
-    size_t leg_start_asym[] );  // start indices for different zonal wave numbers, asymmetric part
-
-// --------------------------------------------------------------------------------------------------------------------
-
-}  // namespace trans
-}  // namespace atlas
diff --git a/src/atlas/trans/localopt/LegendreTransformsopt.cc b/src/atlas/trans/localopt/LegendreTransformsopt.cc
deleted file mode 100644
index 29dd5dad4..000000000
--- a/src/atlas/trans/localopt/LegendreTransformsopt.cc
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * (C) Copyright 2013 ECMWF.
- *
- * This software is licensed under the terms of the Apache Licence Version 2.0
- * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
- * In applying this licence, ECMWF does not waive the privileges and immunities
- * granted to it by virtue of its status as an intergovernmental organisation
- * nor does it submit to any jurisdiction.
- */
-
-#include <cstddef>
-
-#include "atlas/trans/localopt/LegendreTransformsopt.h"
-
-namespace atlas {
-namespace trans {
-
-//-----------------------------------------------------------------------------
-
-void invtrans_legendreopt(
-    const size_t trc,       // truncation (in)
-    const size_t trcFT,     // truncation for Fourier transformation (in)
-    const size_t trcLP,     // truncation of Legendre polynomials data legpol. Needs to be >= trc (in)
-    const double legpol[],  // values of associated Legendre functions, size (trc+1)*trc/2 (in)
-    const int nb_fields,    // number of fields
-    const double spec[],    // spectral data, size (trc+1)*trc (in)
-    double leg_real[],      // values of associated Legendre functions, size (trc+1)*trc/2 (out)
-    double leg_imag[] )     // values of associated Legendre functions, size (trc+1)*trc/2 (out)
-{
-    // Legendre transformation:
-    int k = 0, klp = 0;
-    for ( int jm = 0; jm <= trcFT; ++jm ) {
-        for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
-            leg_real[jm * nb_fields + jfld] = 0.;
-            leg_imag[jm * nb_fields + jfld] = 0.;
-        }
-        for ( int jn = jm; jn <= trcLP; ++jn, ++klp ) {
-            if ( jn <= trc ) {
-                for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
-                    // not completely sure where this factor 2 comes from. One possible
-                    // explanation:
-                    // normalization of trigonometric functions in the spherical harmonics
-                    // integral over square of trig function is 1 for m=0 and 0.5 (?) for
-                    // m>0
-                    leg_real[jm * nb_fields + jfld] += 2. * spec[( 2 * k ) * nb_fields + jfld] * legpol[klp];
-                    leg_imag[jm * nb_fields + jfld] += 2. * spec[( 2 * k + 1 ) * nb_fields + jfld] * legpol[klp];
-                }
-                ++k;
-            }
-        }
-    }
-    // Undo factor 2 for (jm == 0)
-    for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
-        leg_real[jfld] /= 2.;
-        leg_imag[jfld] /= 2.;
-    }
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-}  // namespace trans
-}  // namespace atlas
diff --git a/src/atlas/trans/localopt/LegendreTransformsopt.h b/src/atlas/trans/localopt/LegendreTransformsopt.h
deleted file mode 100644
index 55dcf0212..000000000
--- a/src/atlas/trans/localopt/LegendreTransformsopt.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * (C) Copyright 2013 ECMWF.
- *
- * This software is licensed under the terms of the Apache Licence Version 2.0
- * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
- * In applying this licence, ECMWF does not waive the privileges and immunities
- * granted to it by virtue of its status as an intergovernmental organisation
- * nor does it submit to any jurisdiction.
- */
-
-#pragma once
-
-#include <cstddef>
-
-namespace atlas {
-namespace trans {
-
-//-----------------------------------------------------------------------------
-// Routine to compute the Legendre transformation
-//
-// Author:
-// Andreas Mueller *ECMWF*
-//
-void invtrans_legendreopt( const size_t trc,    // truncation (in)
-                        const size_t trcFT,  // truncation for Fourier transformation (in)
-                        const size_t trcLP,  // truncation of Legendre polynomials data legpol. Needs to be >= trc (in)
-                        const double legpol[],  // values of associated Legendre functions, size (trc+1)*trc/2 (in)
-                        const int nb_fields,    // number of fields
-                        const double spec[],    // spectral data, size (trc+1)*trc (in)
-                        double leg_real[],      // values of associated Legendre functions, size (trc+1)*trc/2 (out)
-                        double leg_imag[] );    // values of associated Legendre functions, size (trc+1)*trc/2 (out)
-
-// --------------------------------------------------------------------------------------------------------------------
-
-}  // namespace trans
-}  // namespace atlas
diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc
deleted file mode 100644
index 9b139fc4f..000000000
--- a/src/atlas/trans/localopt/TransLocalopt.cc
+++ /dev/null
@@ -1,704 +0,0 @@
-/*
- * (C) Copyright 2013 ECMWF.
- *
- * This software is licensed under the terms of the Apache Licence Version 2.0
- * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
- * In applying this licence, ECMWF does not waive the privileges and immunities
- * granted to it by virtue of its status as an intergovernmental organisation
- * nor does it submit to any jurisdiction.
- */
-
-#include "atlas/trans/localopt/TransLocalopt.h"
-#include <math.h>
-#include "atlas/array.h"
-#include "atlas/option.h"
-#include "atlas/parallel/mpi/mpi.h"
-#include "atlas/runtime/ErrorHandling.h"
-#include "atlas/runtime/Log.h"
-#include "atlas/trans/VorDivToUV.h"
-#include "atlas/trans/local_noopt/LegendrePolynomials.h"
-#include "atlas/trans/localopt/FourierTransformsopt.h"
-#include "atlas/trans/localopt/LegendrePolynomialsopt.h"
-#include "atlas/trans/localopt/LegendreTransformsopt.h"
-#include "atlas/util/Constants.h"
-#include "eckit/linalg/LinearAlgebra.h"
-#include "eckit/linalg/Matrix.h"
-#include "eckit/eckit_config.h"
-#ifdef ECKIT_HAVE_MKL
-#include "mkl.h"
-#endif
-
-namespace atlas {
-namespace trans {
-
-namespace {
-static TransBuilderGrid<TransLocalopt> builder( "localopt" );
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-// Helper functions
-// --------------------------------------------------------------------------------------------------------------------
-namespace {  // anonymous
-
-size_t legendre_size( const size_t truncation ) {
-    return ( truncation + 2 ) * ( truncation + 1 ) / 2;
-}
-
-int nlats_northernHemisphere( const int nlats ) {
-    return ceil( nlats / 2. );
-    // using ceil here should make it possible to have odd number of latitudes (with the centre latitude being the equator)
-}
-
-int num_n( const int truncation, const int m, const bool symmetric ) {
-    int len = 0;
-    if ( symmetric ) { len = ( truncation - m + 2 ) / 2; }
-    else {
-        len = ( truncation - m + 1 ) / 2;
-    }
-    return len;
-}
-
-void alloc_aligned( double*& ptr, size_t n ) {
-#ifdef ECKIT_HAVE_MKL
-    int al = 64;
-    ptr    = (double*)mkl_malloc( sizeof( double ) * n, al );
-#else
-    posix_memalign( (void**)&ptr, sizeof( double ) * 64, sizeof( double ) * n );
-    //ptr = (double*)malloc( sizeof( double ) * n );
-    //ptr = new double[n];
-#endif
-}
-
-void free_aligned( double*& ptr ) {
-#ifdef ECKIT_HAVE_MKL
-    mkl_free( ptr );
-#else
-    free( ptr );
-#endif
-}
-
-int add_padding( int n ) {
-    return std::ceil( n / 8. ) * 8;
-}
-}  // namespace
-
-// --------------------------------------------------------------------------------------------------------------------
-// Class TransLocalopt
-// --------------------------------------------------------------------------------------------------------------------
-
-TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long truncation,
-                              const eckit::Configuration& config ) :
-    grid_( grid ),
-    truncation_( truncation ),
-    precompute_( config.getBool( "precompute", true ) ) {
-    ATLAS_TRACE( "Precompute legendre opt" );
-#ifdef ECKIT_HAVE_MKL
-    eckit::linalg::LinearAlgebra::backend( "mkl" );  // might want to choose backend with this command
-#else
-    eckit::linalg::LinearAlgebra::backend( "generic" );  // might want to choose backend with this command
-#endif
-    double fft_threshold = 0.;  // 0.05;  // fraction of latitudes of the full grid up to which FFT is used.
-    // This threshold needs to be adjusted depending on the dgemm and FFT performance of the machine
-    // on which this code is running!
-    int nlats     = 0;
-    int nlons     = 0;
-    int neqtr     = 0;
-    useFFT_       = true;
-    dgemmMethod1_ = true;
-    nlatsNH_      = 0;
-    nlatsSH_      = 0;
-    nlatsLeg_     = 0;
-    if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
-        grid::StructuredGrid g( grid_ );
-        nlats = g.ny();
-        nlons = g.nxmax();
-        for ( size_t j = 0; j < nlats; ++j ) {
-            // assumptions: latitudes in g.y(j) are monotone and decreasing
-            // no assumption on whether we have 0, 1 or 2 latitudes at the equator
-            double lat = g.y( j );
-            if ( lat > 0. ) { nlatsNH_++; }
-            if ( lat == 0. ) { neqtr++; }
-            if ( lat < 0. ) { nlatsSH_++; }
-        }
-        if ( neqtr > 0 ) {
-            nlatsNH_++;
-            nlatsSH_++;
-        }
-        if ( nlatsNH_ >= nlatsSH_ ) { nlatsLeg_ = nlatsNH_; }
-        else {
-            nlatsLeg_ = nlatsSH_;
-        }
-        Grid g_global( grid.name() );
-        grid::StructuredGrid gs_global( g_global );
-        nlonsGlobal_  = gs_global.nxmax();
-        jlonMin_      = 0;
-        double lonmin = fmod( g.x( 0, 0 ), 360 );
-        if ( lonmin < 0. ) { lonmin += 360.; }
-        if ( nlons < fft_threshold * nlonsGlobal_ ) { useFFT_ = false; }
-        else {
-            if ( nlons < nlonsGlobal_ ) {
-                // need to use FFT with cropped grid
-                for ( size_t j = 0; j < nlonsGlobal_; ++j ) {
-                    if ( gs_global.x( j, 0 ) == lonmin ) { jlonMin_ = j; }
-                }
-            }
-        }
-        //Log::info() << "nlats=" << g.ny() << " nlatsGlobal=" << gs_global.ny() << std::endl;
-    }
-    else {
-        // unstructured grid
-        useFFT_   = false;
-        nlats     = grid_.size();
-        nlons     = grid_.size();
-        nlatsNH_  = nlats;
-        nlatsLeg_ = nlats;
-    }
-    std::vector<double> lats( nlatsLeg_ );
-    std::vector<double> lons( nlons );
-    if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
-        grid::StructuredGrid g( grid_ );
-        if ( nlatsNH_ >= nlatsSH_ ) {
-            for ( size_t j = 0; j < nlatsLeg_; ++j ) {
-                lats[j] = g.y( j ) * util::Constants::degreesToRadians();
-            }
-        }
-        else {
-            for ( size_t j = nlats - 1, idx = 0; idx < nlatsLeg_; --j, ++idx ) {
-                lats[idx] = -g.y( j ) * util::Constants::degreesToRadians();
-            }
-        }
-        for ( size_t j = 0; j < nlons; ++j ) {
-            lons[j] = g.x( j, 0 ) * util::Constants::degreesToRadians();
-        }
-    }
-    else {
-        int j( 0 );
-        for ( PointXY p : grid_.xy() ) {
-            lats[j++] = p.y() * util::Constants::degreesToRadians();
-            lons[j++] = p.x() * util::Constants::degreesToRadians();
-        }
-    }
-    // precomputations for Legendre polynomials:
-    {
-        ATLAS_TRACE( "opt precomp Legendre" );
-        int size_sym  = 0;
-        int size_asym = 0;
-        legendre_sym_begin_.resize( truncation_ + 3 );
-        legendre_asym_begin_.resize( truncation_ + 3 );
-        legendre_sym_begin_[0]  = 0;
-        legendre_asym_begin_[0] = 0;
-        for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
-            size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsLeg_ );
-            size_asym += add_padding( num_n( truncation_ + 1, jm, false ) * nlatsLeg_ );
-            legendre_sym_begin_[jm + 1]  = size_sym;
-            legendre_asym_begin_[jm + 1] = size_asym;
-        }
-        alloc_aligned( legendre_sym_, size_sym );
-        alloc_aligned( legendre_asym_, size_asym );
-        FILE* file_leg;
-        file_leg = fopen( "legendre.bin", "r" );
-        if ( file_leg ) {
-            fread( legendre_sym_, sizeof( double ), size_sym, file_leg );
-            fread( legendre_asym_, sizeof( double ), size_asym, file_leg );
-            fclose( file_leg );
-        }
-        else {
-            compute_legendre_polynomialsopt( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, legendre_asym_,
-                                             legendre_sym_begin_.data(), legendre_asym_begin_.data() );
-            file_leg = fopen( "legendre.bin", "wb" );
-            fwrite( legendre_sym_, sizeof( double ), size_sym, file_leg );
-            fwrite( legendre_asym_, sizeof( double ), size_asym, file_leg );
-            fclose( file_leg );
-        }
-    }
-
-    // precomputations for Fourier transformations:
-    if ( useFFT_ ) {
-#if ATLAS_HAVE_FFTW
-        {
-            ATLAS_TRACE( "opt precomp FFTW" );
-            int num_complex = ( nlonsGlobal_ / 2 ) + 1;
-            fft_in_         = fftw_alloc_complex( nlats * num_complex );
-            fft_out_        = fftw_alloc_real( nlats * nlonsGlobal_ );
-            plan_ = fftw_plan_many_dft_c2r( 1, &nlonsGlobal_, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1,
-                                            nlonsGlobal_, FFTW_ESTIMATE );
-        }
-            // other FFT implementations should be added with #elif statements
-#else
-        useFFT_ = false;                                 // no FFT implemented => default to dgemm
-#endif
-    }
-    if ( !useFFT_ ) {
-        alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons );
-        if ( dgemmMethod1_ ) {
-            {
-                ATLAS_TRACE( "opt precomp Fourier" );
-                int idx = 0;
-                for ( int jlon = 0; jlon < nlons; jlon++ ) {
-                    double factor = 1.;
-                    for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
-                        if ( jm > 0 ) { factor = 2.; }
-                        fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor;  // real part
-                        fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor;  // imaginary part
-                    }
-                }
-            }
-        }
-        else {
-            {
-                ATLAS_TRACE( "opt precomp Fourier tp" );
-                int idx = 0;
-                for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
-                    double factor = 1.;
-                    if ( jm > 0 ) { factor = 2.; }
-                    for ( int jlon = 0; jlon < nlons; jlon++ ) {
-                        fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor;  // real part
-                    }
-                    for ( int jlon = 0; jlon < nlons; jlon++ ) {
-                        fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor;  // imaginary part
-                    }
-                }
-            }
-        }
-    }
-}  // namespace trans
-
-// --------------------------------------------------------------------------------------------------------------------
-
-TransLocalopt::TransLocalopt( const Grid& grid, const long truncation, const eckit::Configuration& config ) :
-    TransLocalopt( Cache(), grid, truncation, config ) {}
-
-TransLocalopt::TransLocalopt( const Grid& grid, const Domain&, const long truncation, const eckit::Configuration& config ) :
-    TransLocalopt( Cache(), grid, truncation, config ) {}
-
-TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const Domain&, const long truncation, const eckit::Configuration& config ) :
-    TransLocalopt( cache, grid, truncation, config ) {}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-TransLocalopt::~TransLocalopt() {
-    free_aligned( legendre_sym_ );
-    free_aligned( legendre_asym_ );
-    if ( useFFT_ ) {
-#if ATLAS_HAVE_FFTW
-        fftw_destroy_plan( plan_ );
-        fftw_free( fft_in_ );
-        fftw_free( fft_out_ );
-#endif
-    }
-    else {
-        free_aligned( fourier_ );
-    }
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocalopt::invtrans( const Field& spfield, Field& gpfield, const eckit::Configuration& config ) const {
-    NOTIMP;
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocalopt::invtrans( const FieldSet& spfields, FieldSet& gpfields, const eckit::Configuration& config ) const {
-    NOTIMP;
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocalopt::invtrans_grad( const Field& spfield, Field& gradfield, const eckit::Configuration& config ) const {
-    NOTIMP;
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocalopt::invtrans_grad( const FieldSet& spfields, FieldSet& gradfields,
-                                   const eckit::Configuration& config ) const {
-    NOTIMP;
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocalopt::invtrans_vordiv2wind( const Field& spvor, const Field& spdiv, Field& gpwind,
-                                          const eckit::Configuration& config ) const {
-    NOTIMP;
-}
-
-void TransLocalopt::invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[],
-                              const eckit::Configuration& config ) const {
-    invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields, config );
-}
-
-void gp_transposeopt( const int nb_size, const int nb_fields, const double gp_tmp[], double gp_fields[] ) {
-    for ( int jgp = 0; jgp < nb_size; jgp++ ) {
-        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-            gp_fields[jfld * nb_size + jgp] = gp_tmp[jgp * nb_fields + jfld];
-        }
-    }
-}
-
-//-----------------------------------------------------------------------------
-// Routine to compute the spectral transform by using a localopt Fourier transformation
-// for a grid (same latitude for all longitudes, allows to compute Legendre functions
-// once for all longitudes). U and v components are divided by cos(latitude) for
-// nb_vordiv_fields > 0.
-//
-// Legendre polynomials are computed up to truncation_+1 to be accurate for vorticity and
-// divergence computation. The parameter truncation is the truncation used in storing the
-// spectral data scalar_spectra and can be different from truncation_. If truncation is
-// larger than truncation_+1 the transform will behave as if the spectral data was truncated
-// to truncation_+1.
-//
-// Author:
-// Andreas Mueller *ECMWF*
-//
-void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields,
-                                 const double scalar_spectra[], double gp_fields[],
-                                 const eckit::Configuration& config ) const {
-    if ( nb_scalar_fields > 0 ) {
-        int nb_fields = nb_scalar_fields;
-
-        // Transform
-        if ( grid::StructuredGrid g = grid_ ) {
-            ATLAS_TRACE( "invtrans_uv structured opt" );
-            int nlats      = g.ny();
-            int nlons      = g.nxmax();
-            auto posMethod = [&]( int jfld, int imag, int jlat, int jm ) {
-                if ( useFFT_ || !dgemmMethod1_ ) {
-                    return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) );
-                }
-                else {
-                    return jfld + nb_fields * ( jlat + nlats * ( imag + 2 * ( jm ) ) );
-                };
-            };
-            int size_fourier_max = nb_fields * 2 * nlats;
-            double* scl_fourier;
-            alloc_aligned( scl_fourier, size_fourier_max * ( truncation_ + 1 ) );
-
-            // Legendre transform:
-            {
-                ATLAS_TRACE( "opt Legendre dgemm" );
-                for ( int jm = 0; jm <= truncation_; jm++ ) {
-                    int size_sym  = num_n( truncation_ + 1, jm, true );
-                    int size_asym = num_n( truncation_ + 1, jm, false );
-                    int n_imag    = 2;
-                    if ( jm == 0 ) { n_imag = 1; }
-                    int size_fourier = nb_fields * n_imag * nlatsLeg_;
-                    auto posFourier  = [&]( int jfld, int imag, int jlat, int jm, int nlatsH ) {
-                        return jfld + nb_fields * ( imag + n_imag * ( nlatsLeg_ - nlatsH + jlat ) );
-                    };
-                    double* scalar_sym;
-                    double* scalar_asym;
-                    double* scl_fourier_sym;
-                    double* scl_fourier_asym;
-                    alloc_aligned( scalar_sym, n_imag * nb_fields * size_sym );
-                    alloc_aligned( scalar_asym, n_imag * nb_fields * size_asym );
-                    alloc_aligned( scl_fourier_sym, size_fourier );
-                    alloc_aligned( scl_fourier_asym, size_fourier );
-                    {
-                        //ATLAS_TRACE( "opt Legendre split" );
-                        int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2;
-                        // the choice between the following two code lines determines whether
-                        // total wavenumbers are summed in an ascending or descending order.
-                        // The trans library in IFS uses descending order because it should
-                        // be more accurate (higher wavenumbers have smaller contributions).
-                        // This also needs to be changed when splitting the spectral data in
-                        // compute_legendre_polynomialsopt!
-                        //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) {
-                        for ( int jn = truncation_ + 1; jn >= jm; jn-- ) {
-                            for ( int imag = 0; imag < n_imag; imag++ ) {
-                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                                    idx = jfld + nb_fields * ( imag + 2 * ( jn - jm ) );
-                                    if ( jn <= truncation && jm < truncation ) {
-                                        if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; }
-                                        else {
-                                            scalar_asym[ia++] = scalar_spectra[idx + ioff];
-                                        }
-                                    }
-                                    else {
-                                        if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = 0.; }
-                                        else {
-                                            scalar_asym[ia++] = 0.;
-                                        }
-                                    }
-                                }
-                            }
-                        }
-                        ASSERT( ia == n_imag * nb_fields * size_asym && is == n_imag * nb_fields * size_sym );
-                    }
-                    {
-                        eckit::linalg::Matrix A( scalar_sym, nb_fields * n_imag, size_sym );
-                        eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm], size_sym, nlatsLeg_ );
-                        eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsLeg_ );
-                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-                    }
-                    if ( size_asym > 0 ) {
-                        eckit::linalg::Matrix A( scalar_asym, nb_fields * n_imag, size_asym );
-                        eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm], size_asym, nlatsLeg_ );
-                        eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsLeg_ );
-                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-                    }
-                    {
-                        //ATLAS_TRACE( "opt merge spheres" );
-                        // northern hemisphere:
-                        for ( int jlat = 0; jlat < nlatsNH_; jlat++ ) {
-                            for ( int imag = 0; imag < n_imag; imag++ ) {
-                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                                    int idx = posFourier( jfld, imag, jlat, jm, nlatsNH_ );
-                                    scl_fourier[posMethod( jfld, imag, jlat, jm )] =
-                                        scl_fourier_sym[idx] + scl_fourier_asym[idx];
-                                }
-                            }
-                        }
-                        // southern hemisphere:
-                        for ( int jlat = 0; jlat < nlatsSH_; jlat++ ) {
-                            for ( int imag = 0; imag < n_imag; imag++ ) {
-                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                                    int idx   = posFourier( jfld, imag, jlat, jm, nlatsSH_ );
-                                    int jslat = nlats - jlat - 1;
-                                    scl_fourier[posMethod( jfld, imag, jslat, jm )] =
-                                        scl_fourier_sym[idx] - scl_fourier_asym[idx];
-                                }
-                            }
-                        }
-                    }
-                    free_aligned( scalar_sym );
-                    free_aligned( scalar_asym );
-                    free_aligned( scl_fourier_sym );
-                    free_aligned( scl_fourier_asym );
-                }
-            }
-            // Fourier transformation:
-            if ( useFFT_ ) {
-#if ATLAS_HAVE_FFTW
-                {
-                    int num_complex = ( nlonsGlobal_ / 2 ) + 1;
-                    {
-                        ATLAS_TRACE( "opt FFTW" );
-                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                            int idx = 0;
-                            for ( int jlat = 0; jlat < nlats; jlat++ ) {
-                                fft_in_[idx++][0] = scl_fourier[posMethod( jfld, 0, jlat, 0 )];
-                                for ( int jm = 1; jm < num_complex; jm++, idx++ ) {
-                                    for ( int imag = 0; imag < 2; imag++ ) {
-                                        if ( jm <= truncation_ ) {
-                                            fft_in_[idx][imag] = scl_fourier[posMethod( jfld, imag, jlat, jm )];
-                                        }
-                                        else {
-                                            fft_in_[idx][imag] = 0.;
-                                        }
-                                    }
-                                }
-                            }
-                            fftw_execute_dft_c2r( plan_, fft_in_, fft_out_ );
-                            for ( int jlat = 0; jlat < nlats; jlat++ ) {
-                                for ( int jlon = 0; jlon < nlons; jlon++ ) {
-                                    int j = jlon + jlonMin_;
-                                    if ( j >= nlonsGlobal_ ) { j -= nlonsGlobal_; }
-                                    gp_fields[jlon + nlons * ( jlat + nlats * jfld )] =
-                                        fft_out_[j + nlonsGlobal_ * jlat];
-                                }
-                            }
-                        }
-                    }
-                }
-#endif
-            }
-            else {
-                if ( dgemmMethod1_ ) {
-                    // dgemm-method 1
-                    // should be faster for small domains or large truncation
-                    double* gp_opt;
-                    alloc_aligned( gp_opt, nb_fields * grid_.size() );
-                    {
-                        ATLAS_TRACE( "opt Fourier dgemm method 1" );
-                        eckit::linalg::Matrix A( scl_fourier, nb_fields * nlats, ( truncation_ + 1 ) * 2 );
-                        eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, nlons );
-                        eckit::linalg::Matrix C( gp_opt, nb_fields * nlats, nlons );
-                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-                    }
-
-                    // Transposition in grid point space:
-                    {
-                        ATLAS_TRACE( "opt transposition in gp-space" );
-                        int idx = 0;
-                        for ( int jlon = 0; jlon < nlons; jlon++ ) {
-                            for ( int jlat = 0; jlat < nlats; jlat++ ) {
-                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                                    int pos_tp = jlon + nlons * ( jlat + nlats * ( jfld ) );
-                                    //int pos  = jfld + nb_fields * ( jlat + nlats * ( jlon ) );
-                                    gp_fields[pos_tp] = gp_opt[idx++];  // = gp_opt[pos]
-                                }
-                            }
-                        }
-                    }
-                    free_aligned( gp_opt );
-                }
-                else {
-                    // dgemm-method 2
-                    {
-                        ATLAS_TRACE( "opt Fourier dgemm method 2" );
-                        eckit::linalg::Matrix A( fourier_, nlons, ( truncation_ + 1 ) * 2 );
-                        eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * nlats );
-                        eckit::linalg::Matrix C( gp_fields, nlons, nb_fields * nlats );
-                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-                    }
-                }
-            }  // namespace trans
-            // Computing u,v from U,V:
-            {
-                if ( nb_vordiv_fields > 0 ) {
-                    ATLAS_TRACE( "opt u,v from U,V" );
-                    std::vector<double> coslats( nlats );
-                    for ( size_t j = 0; j < nlats; ++j ) {
-                        coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() );
-                    }
-                    int idx = 0;
-                    for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                        for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
-                            for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) {
-                                gp_fields[idx] /= coslats[jlat];
-                                idx++;
-                            }
-                        }
-                    }
-                }
-            }
-            free_aligned( scl_fourier );
-        }  // namespace atlas
-        else {
-            ATLAS_TRACE( "invtrans_uv unstructured opt" );
-            int idx = 0;
-            for ( PointXY p : grid_.xy() ) {
-                double lon   = p.x() * util::Constants::degreesToRadians();
-                double lat   = p.y() * util::Constants::degreesToRadians();
-                double trcFT = truncation;
-
-                // Legendre transform:
-                //invtrans_legendreopt( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra,
-                //                      legReal.data(), legImag.data() );
-
-                // Fourier transform:
-                //invtrans_fourieropt( trcFT, lon, nb_fields, legReal.data(), legImag.data(),
-                //                     gp_tmp.data() + ( nb_fields * idx ) );
-                for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
-                    //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat );
-                }
-                ++idx;
-            }
-        }
-    }  // namespace trans
-}  // namespace atlas
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocalopt::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[],
-                              const double divergence_spectra[], double gp_fields[],
-                              const eckit::Configuration& config ) const {
-    invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config );
-}
-
-void extend_truncationopt( const int old_truncation, const int nb_fields, const double old_spectra[],
-                           double new_spectra[] ) {
-    int k = 0, k_old = 0;
-    for ( int m = 0; m <= old_truncation + 1; m++ ) {             // zonal wavenumber
-        for ( int n = m; n <= old_truncation + 1; n++ ) {         // total wavenumber
-            for ( int imag = 0; imag < 2; imag++ ) {              // imaginary/real part
-                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {  // field
-                    if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; }
-                    else {
-                        new_spectra[k++] = old_spectra[k_old++];
-                    }
-                }
-            }
-        }
-    }
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocalopt::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields,
-                              const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[],
-                              const eckit::Configuration& config ) const {
-    ATLAS_TRACE( "TransLocalopt::invtrans" );
-    int nb_gp              = grid_.size();
-    int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields;
-    if ( nb_vordiv_fields > 0 ) {
-        std::vector<double> vorticity_spectra_extended( nb_vordiv_spec_ext, 0. );
-        std::vector<double> divergence_spectra_extended( nb_vordiv_spec_ext, 0. );
-        std::vector<double> U_ext( nb_vordiv_spec_ext, 0. );
-        std::vector<double> V_ext( nb_vordiv_spec_ext, 0. );
-
-        {
-            ATLAS_TRACE( "opt extend vordiv" );
-            // increase truncation in vorticity_spectra and divergence_spectra:
-            extend_truncationopt( truncation_, nb_vordiv_fields, vorticity_spectra, vorticity_spectra_extended.data() );
-            extend_truncationopt( truncation_, nb_vordiv_fields, divergence_spectra,
-                                  divergence_spectra_extended.data() );
-        }
-
-        {
-            ATLAS_TRACE( "vordiv to UV opt" );
-            // call vd2uv to compute u and v in spectral space
-            trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt" ) );
-            vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(),
-                                      divergence_spectra_extended.data(), U_ext.data(), V_ext.data() );
-        }
-
-        // perform spectral transform to compute all fields in grid point space
-        invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config );
-        invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(),
-                     gp_fields + nb_gp * nb_vordiv_fields, config );
-    }
-    if ( nb_scalar_fields > 0 ) {
-        invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields + 2 * nb_gp * nb_vordiv_fields,
-                     config );
-    }
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocalopt::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const {
-    NOTIMP;
-    // Not implemented and not planned.
-    // Use the TransIFS implementation instead.
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocalopt::dirtrans( const FieldSet& gpfields, FieldSet& spfields, const eckit::Configuration& config ) const {
-    NOTIMP;
-    // Not implemented and not planned.
-    // Use the TransIFS implementation instead.
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocalopt::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv,
-                                          const eckit::Configuration& config ) const {
-    NOTIMP;
-    // Not implemented and not planned.
-    // Use the TransIFS implementation instead.
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocalopt::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[],
-                              const eckit::Configuration& ) const {
-    NOTIMP;
-    // Not implemented and not planned.
-    // Use the TransIFS implementation instead.
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocalopt::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[],
-                              double divergence_spectra[], const eckit::Configuration& ) const {
-    NOTIMP;
-    // Not implemented and not planned.
-    // Use the TransIFS implementation instead.
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-}  // namespace trans
-}  // namespace atlas
diff --git a/src/atlas/trans/localopt/TransLocalopt.h b/src/atlas/trans/localopt/TransLocalopt.h
deleted file mode 100644
index 19bbab862..000000000
--- a/src/atlas/trans/localopt/TransLocalopt.h
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * (C) Copyright 2013 ECMWF.
- *
- * This software is licensed under the terms of the Apache Licence Version 2.0
- * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
- * In applying this licence, ECMWF does not waive the privileges and immunities
- * granted to it by virtue of its status as an intergovernmental organisation
- * nor does it submit to any jurisdiction.
- */
-
-#pragma once
-
-#include <vector>
-
-#include "atlas/array.h"
-#include "atlas/grid/Grid.h"
-#include "atlas/trans/Trans.h"
-#if ATLAS_HAVE_FFTW
-#include <fftw3.h>
-#endif
-
-//-----------------------------------------------------------------------------
-// Forward declarations
-
-namespace atlas {
-class Field;
-class FieldSet;
-}  // namespace atlas
-
-//-----------------------------------------------------------------------------
-
-namespace atlas {
-namespace trans {
-
-//-----------------------------------------------------------------------------
-
-/// @class TransLocalopt
-///
-/// Localopt spherical harmonics transformations to any grid
-/// Optimisations are present for structured grids
-/// For global grids, please consider using TransIFS instead.
-///
-/// @todo:
-///  - support multiple fields
-///  - support atlas::Field and atlas::FieldSet based on function spaces
-///
-/// @note: Direct transforms are not implemented and cannot be unless
-///        the grid is global. There are no plans to support this at the moment.
-class TransLocalopt : public trans::TransImpl {
-public:
-    TransLocalopt( const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() );
-    TransLocalopt( const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() );
-    TransLocalopt( const Cache&, const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() );
-    TransLocalopt( const Cache&, const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() );
-
-    virtual ~TransLocalopt();
-
-    virtual int truncation() const override { return truncation_; }
-    virtual size_t spectralCoefficients() const override { return ( truncation_ + 1 ) * ( truncation_ + 2 ); }
-
-    virtual const Grid& grid() const override { return grid_; }
-
-    virtual void invtrans( const Field& spfield, Field& gpfield,
-                           const eckit::Configuration& = util::NoConfig() ) const override;
-
-    virtual void invtrans( const FieldSet& spfields, FieldSet& gpfields,
-                           const eckit::Configuration& = util::NoConfig() ) const override;
-
-    virtual void invtrans_grad( const Field& spfield, Field& gradfield,
-                                const eckit::Configuration& = util::NoConfig() ) const override;
-
-    virtual void invtrans_grad( const FieldSet& spfields, FieldSet& gradfields,
-                                const eckit::Configuration& = util::NoConfig() ) const override;
-
-    virtual void invtrans_vordiv2wind( const Field& spvor, const Field& spdiv, Field& gpwind,
-                                       const eckit::Configuration& = util::NoConfig() ) const override;
-
-    // -- IFS style API --
-
-    virtual void invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields,
-                           const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[],
-                           const eckit::Configuration& = util::NoConfig() ) const override;
-
-    virtual void invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[],
-                           const eckit::Configuration& = util::NoConfig() ) const override;
-
-    virtual void invtrans( const int nb_vordiv_fields, const double vorticity_spectra[],
-                           const double divergence_spectra[], double gp_fields[],
-                           const eckit::Configuration& = util::NoConfig() ) const override;
-
-    // -- NOT SUPPORTED -- //
-
-    virtual void dirtrans( const Field& gpfield, Field& spfield,
-                           const eckit::Configuration& = util::NoConfig() ) const override;
-
-    virtual void dirtrans( const FieldSet& gpfields, FieldSet& spfields,
-                           const eckit::Configuration& = util::NoConfig() ) const override;
-
-    virtual void dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv,
-                                       const eckit::Configuration& = util::NoConfig() ) const override;
-
-    virtual void dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[],
-                           const eckit::Configuration& = util::NoConfig() ) const override;
-
-    virtual void dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[],
-                           double divergence_spectra[], const eckit::Configuration& = util::NoConfig() ) const override;
-
-private:
-    void invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields,
-                      const double scalar_spectra[], double gp_fields[],
-                      const eckit::Configuration& = util::NoConfig() ) const;
-
-private:
-    Grid grid_;
-    bool useFFT_;
-    bool dgemmMethod1_;
-    int truncation_;
-    int nlatsNH_;
-    int nlatsSH_;
-    int nlatsLeg_;
-    int jlonMin_;
-    int nlonsGlobal_;
-    bool precompute_;
-    double* legendre_sym_;
-    double* legendre_asym_;
-    double* fourier_;
-    double* fouriertp_;
-    std::vector<size_t> legendre_begin_;
-    std::vector<size_t> legendre_sym_begin_;
-    std::vector<size_t> legendre_asym_begin_;
-#if ATLAS_HAVE_FFTW
-    fftw_complex* fft_in_;
-    double* fft_out_;
-    fftw_plan plan_;
-#endif
-};
-
-//-----------------------------------------------------------------------------
-
-}  // namespace trans
-}  // namespace atlas
diff --git a/src/atlas/trans/localopt/VorDivToUVLocalopt.cc b/src/atlas/trans/localopt/VorDivToUVLocalopt.cc
deleted file mode 100644
index 032aa6073..000000000
--- a/src/atlas/trans/localopt/VorDivToUVLocalopt.cc
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * (C) Copyright 2013 ECMWF.
- *
- * This software is licensed under the terms of the Apache Licence Version 2.0
- * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
- * In applying this licence, ECMWF does not waive the privileges and immunities
- * granted to it by virtue of its status as an intergovernmental organisation
- * nor does it submit to any jurisdiction.
- */
-
-#include "atlas/trans/localopt/VorDivToUVLocalopt.h"
-#include <cmath>  // for std::sqrt
-#include "atlas/functionspace/Spectral.h"
-#include "atlas/runtime/Log.h"
-#include "atlas/util/Earth.h"
-
-using atlas::FunctionSpace;
-using atlas::functionspace::Spectral;
-
-namespace atlas {
-namespace trans {
-
-namespace {
-static VorDivToUVBuilder<VorDivToUVLocalopt> builder( "localopt" );
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-// Routine to copy spectral data into internal storage form of IFS trans
-// Ported to C++ by: Andreas Mueller *ECMWF*
-void prfi1bopt( const int truncation,
-                const int km,          // zonal wavenumber
-                const int nb_fields,   // number of fields
-                const double rspec[],  // spectral data
-                double pia[] )         // spectral components in data layout of trans library
-{
-    int ilcm = truncation + 1 - km, ioff = ( 2 * truncation - km + 3 ) * km,
-        nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2;
-    for ( int j = 1; j <= ilcm; j++ ) {
-        int inm = ioff + ( ilcm - j ) * 2;
-        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-            int ir = 2 * jfld, ii = ir + 1;
-            pia[ir * nlei1 + j + 1] = rspec[inm * nb_fields + jfld];
-            pia[ii * nlei1 + j + 1] = rspec[( inm + 1 ) * nb_fields + jfld];
-        }
-    }
-
-    for ( int jfld = 0; jfld < 2 * nb_fields; jfld++ ) {
-        pia[jfld * nlei1]            = 0.;
-        pia[jfld * nlei1 + 1]        = 0.;
-        pia[jfld * nlei1 + ilcm + 2] = 0.;
-    }
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-// Routine to compute spectral velocities (*cos(latitude)) out of spectral
-// vorticity and divergence
-// Reference:
-//        ECMWF Research Department documentation of the IFS
-//        Temperton, 1991, MWR 119 p1303
-// Ported to C++ by: Andreas Mueller *ECMWF*
-void vd2uvopt( const int truncation,               // truncation
-               const int km,                       // zonal wavenumber
-               const int nb_vordiv_fields,         // number of vorticity and divergence fields
-               const double vorticity_spectra[],   // spectral data of vorticity
-               const double divergence_spectra[],  // spectral data of divergence
-               double U[],                         // spectral data of U
-               double V[],                         // spectral data of V
-               const eckit::Configuration& config ) {
-    int nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2;
-
-    // repsnm: epsilon from eq.(2.12) and (2.13) in [Temperton 1991]
-    std::vector<double> repsnm( ( truncation + 1 ) * ( truncation + 6 ) / 2 );
-    int idx = 0;
-    for ( int jm = 0; jm <= truncation; ++jm ) {
-        for ( int jn = jm; jn <= truncation + 2; ++jn, ++idx ) {
-            repsnm[idx] = std::sqrt( ( jn * jn - jm * jm ) / ( 4. * jn * jn - 1. ) );
-        }
-    }
-    repsnm[0] = 0.;
-
-    // rlapin: constant factor from eq.(2.2) and (2.3) in [Temperton 1991]
-    double ra = util::Earth::radius();
-    std::vector<double> rlapin( truncation + 3 );
-    for ( int jn = 1; jn <= truncation + 2; ++jn ) {
-        rlapin[jn] = -ra * ra / ( jn * ( jn + 1. ) );
-    }
-    rlapin[0] = 0.;
-
-    // inverse the order of repsnm and rlapin for improved accuracy
-    std::vector<double> zepsnm( truncation + 6 );
-    std::vector<double> zlapin( truncation + 6 );
-    std::vector<double> zn( truncation + 6 );
-    for ( int jn = km - 1; jn <= truncation + 2; ++jn ) {
-        int ij = truncation + 3 - jn;
-        if ( jn >= 0 ) {
-            zlapin[ij] = rlapin[jn];
-            if ( jn < km ) { zepsnm[ij] = 0.; }
-            else {
-                zepsnm[ij] = repsnm[jn + ( 2 * truncation - km + 5 ) * km / 2];
-            }
-        }
-        else {
-            zlapin[ij] = 0.;
-            zepsnm[ij] = 0.;
-        }
-        zn[ij] = jn;
-    }
-    zn[0] = truncation + 3;
-
-    // copy spectral data into internal trans storage:
-    std::vector<double> rvor( 2 * nb_vordiv_fields * nlei1 );
-    std::vector<double> rdiv( 2 * nb_vordiv_fields * nlei1 );
-    std::vector<double> ru( 2 * nb_vordiv_fields * nlei1 );
-    std::vector<double> rv( 2 * nb_vordiv_fields * nlei1 );
-    prfi1bopt( truncation, km, nb_vordiv_fields, vorticity_spectra, rvor.data() );
-    prfi1bopt( truncation, km, nb_vordiv_fields, divergence_spectra, rdiv.data() );
-
-    // compute eq.(2.12) and (2.13) in [Temperton 1991]:
-    if ( km == 0 ) {
-        for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
-            int ir = 2 * jfld * nlei1 - 1;
-            for ( int ji = 2; ji < truncation + 4 - km; ++ji ) {
-                double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1];
-                double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1];
-                ru[ir + ji]  = +psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1];
-                rv[ir + ji]  = -psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1];
-            }
-        }
-    }
-    else {
-        for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
-            int ir = 2 * jfld * nlei1 - 1, ii = ir + nlei1;
-            for ( int ji = 2; ji < truncation + 4 - km; ++ji ) {
-                double chiIm = km * zlapin[ji];
-                double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1];
-                double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1];
-                ru[ir + ji]  = -chiIm * rdiv[ii + ji] + psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1];
-                ru[ii + ji]  = +chiIm * rdiv[ir + ji] + psiM1 * rvor[ii + ji + 1] - psiP1 * rvor[ii + ji - 1];
-                rv[ir + ji]  = -chiIm * rvor[ii + ji] - psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1];
-                rv[ii + ji]  = +chiIm * rvor[ir + ji] - psiM1 * rdiv[ii + ji + 1] + psiP1 * rdiv[ii + ji - 1];
-            }
-        }
-    }
-
-    // copy data from internal storage back to external spectral data:
-    int ilcm = truncation - km;
-    int ioff = ( 2 * truncation - km + 3 ) * km;
-    // ioff: start index of zonal wavenumber km in spectral data
-    double za_r = 1. / util::Earth::radius();
-    for ( int j = 0; j <= ilcm; ++j ) {
-        // ilcm-j = total wavenumber
-        int inm = ioff + ( ilcm - j ) * 2;
-        for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
-            int ir = 2 * jfld * nlei1, ii = ir + nlei1;
-            int idx = inm * nb_vordiv_fields + jfld;
-            // real part:
-            U[idx] = ru[ir + j + 2] * za_r;
-            V[idx] = rv[ir + j + 2] * za_r;
-            idx += nb_vordiv_fields;
-            // imaginary part:
-            U[idx] = ru[ii + j + 2] * za_r;
-            V[idx] = rv[ii + j + 2] * za_r;
-        }
-    }
-}
-
-void VorDivToUVLocalopt::execute( const int nb_coeff, const int nb_fields, const double vorticity[],
-                                  const double divergence[], double U[], double V[],
-                                  const eckit::Configuration& config ) const {
-    for ( int jm = 0; jm <= truncation_; ++jm ) {
-        vd2uvopt( truncation_, jm, nb_fields, vorticity, divergence, U, V, config );
-    }
-}
-
-VorDivToUVLocalopt::VorDivToUVLocalopt( const int truncation, const eckit::Configuration& config ) :
-    truncation_( truncation ) {}
-
-VorDivToUVLocalopt::VorDivToUVLocalopt( const FunctionSpace& fs, const eckit::Configuration& config ) :
-    truncation_( Spectral( fs ).truncation() ) {}
-
-VorDivToUVLocalopt::~VorDivToUVLocalopt() {}
-
-}  // namespace trans
-}  // namespace atlas
diff --git a/src/atlas/trans/localopt/VorDivToUVLocalopt.h b/src/atlas/trans/localopt/VorDivToUVLocalopt.h
deleted file mode 100644
index 4bb7eda88..000000000
--- a/src/atlas/trans/localopt/VorDivToUVLocalopt.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * (C) Copyright 2013 ECMWF.
- *
- * This software is licensed under the terms of the Apache Licence Version 2.0
- * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
- * In applying this licence, ECMWF does not waive the privileges and immunities
- * granted to it by virtue of its status as an intergovernmental organisation
- * nor does it submit to any jurisdiction.
- */
-
-#pragma once
-
-#include "atlas/trans/VorDivToUV.h"
-
-//-----------------------------------------------------------------------------
-// Forward declarations
-
-namespace atlas {
-class FunctionSpace;
-}
-
-//-----------------------------------------------------------------------------
-
-namespace atlas {
-namespace trans {
-
-//-----------------------------------------------------------------------------
-
-class VorDivToUVLocalopt : public trans::VorDivToUVImpl {
-public:
-    VorDivToUVLocalopt( const FunctionSpace&, const eckit::Configuration& = util::NoConfig() );
-    VorDivToUVLocalopt( int truncation, const eckit::Configuration& = util::NoConfig() );
-
-    virtual ~VorDivToUVLocalopt();
-
-    virtual int truncation() const override { return truncation_; }
-
-    // pure virtual interface
-
-    // -- IFS style API --
-    // These fields have special interpretation required. You need to know what
-    // you're doing.
-    // See IFS trans library.
-
-    /*!
- * @brief Compute spectral wind (U/V) from spectral vorticity/divergence
- *
- * U = u*cos(lat)
- * V = v*cos(lat)
- *
- * @param nb_fields [in] Number of fields
- * @param vorticity [in] Spectral vorticity
- * @param divergence [in] Spectral divergence
- * @param U [out] Spectral wind U = u*cos(lat)
- * @param V [out] Spectral wind V = v*cos(lat)
- */
-    virtual void execute( const int nb_coeff, const int nb_fields, const double vorticity[], const double divergence[],
-                          double U[], double V[], const eckit::Configuration& = util::NoConfig() ) const override;
-
-private:
-    int truncation_;
-};
-
-// ------------------------------------------------------------------
-
-}  // namespace trans
-}  // namespace atlas
diff --git a/src/atlas/trans/localopt2/FourierTransformsopt2.cc b/src/atlas/trans/localopt2/FourierTransformsopt2.cc
deleted file mode 100644
index 71d3202dd..000000000
--- a/src/atlas/trans/localopt2/FourierTransformsopt2.cc
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * (C) Copyright 2013 ECMWF.
- *
- * This software is licensed under the terms of the Apache Licence Version 2.0
- * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
- * In applying this licence, ECMWF does not waive the privileges and immunities
- * granted to it by virtue of its status as an intergovernmental organisation
- * nor
- * does it submit to any jurisdiction.
- */
-
-#include <algorithm>
-#include <cmath>
-#include <iostream>
-
-#include "atlas/trans/localopt2/FourierTransformsopt2.h"
-
-namespace atlas {
-namespace trans {
-
-//-----------------------------------------------------------------------------
-
-void invtrans_fourieropt2( const size_t trcFT,
-                           const double lon,         // longitude in radians (in)
-                           const int nb_fields,      // Number of fields
-                           const double rlegReal[],  // associated Legendre functions, size (trc+1)*trc/2 (in)
-                           const double rlegImag[],  // associated Legendre functions, size (trc+1)*trc/2 (in)
-                           double rgp[] )            // gridpoint
-{
-    for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
-        rgp[jfld] = 0.;
-    }
-    // local Fourier transformation:
-    for ( int jm = 0; jm <= trcFT; ++jm ) {
-        const double cos = std::cos( jm * lon );
-        const double sin = std::sin( jm * lon );
-        for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
-            double real = cos * rlegReal[jm * nb_fields + jfld];
-            double imag = sin * rlegImag[jm * nb_fields + jfld];
-            rgp[jfld] += real - imag;
-        }
-    }
-}
-
-int fourier_truncationopt2( const int truncation,    // truncation
-                            const int nx,            // number of longitudes
-                            const int nxmax,         // maximum nx
-                            const int ndgl,          // number of latitudes
-                            const double lat,        // latitude in radian
-                            const bool fullgrid ) {  // regular grid
-    int trc     = truncation;
-    int trclin  = ndgl - 1;
-    int trcquad = ndgl * 2 / 3 - 1;
-    if ( truncation >= trclin || fullgrid ) {
-        // linear
-        trc = ( nx - 1 ) / 2;
-    }
-    else if ( truncation >= trcquad ) {
-        // quadratic
-        double weight = 3 * ( trclin - truncation ) / ndgl;
-        double sqcos  = std::pow( std::cos( lat ), 2 );
-
-        trc = ( nx - 1 ) / ( 2 + weight * sqcos );
-    }
-    else {
-        // cubic
-        double sqcos = std::pow( std::cos( lat ), 2 );
-
-        trc = ( nx - 1 ) / ( 2 + sqcos ) - 1;
-    }
-    trc = std::min( truncation, trc );
-    return trc;
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-}  // namespace trans
-}  // namespace atlas
diff --git a/src/atlas/trans/localopt2/FourierTransformsopt2.h b/src/atlas/trans/localopt2/FourierTransformsopt2.h
deleted file mode 100644
index 4281a92cc..000000000
--- a/src/atlas/trans/localopt2/FourierTransformsopt2.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * (C) Copyright 2013 ECMWF.
- *
- * This software is licensed under the terms of the Apache Licence Version 2.0
- * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
- * In applying this licence, ECMWF does not waive the privileges and immunities
- * granted to it by virtue of its status as an intergovernmental organisation
- * nor does it submit to any jurisdiction.
- */
-
-#pragma once
-
-#include <cstddef>
-
-namespace atlas {
-namespace trans {
-
-//-----------------------------------------------------------------------------
-// Routine to compute the local Fourier transformation
-//
-// Author:
-// Andreas Mueller *ECMWF*
-//
-
-void invtrans_fourieropt2( const size_t trcFT,
-                           const double lon,         // longitude in radians (in)
-                           const int nb_fields,      // Number of fields
-                           const double rlegReal[],  // values of associated Legendre functions, size (trc+1)*trc/2 (in)
-                           const double rlegImag[],  // values of associated Legendre functions, size (trc+1)*trc/2 (in)
-                           double rgp[] );           // gridpoint
-
-int fourier_truncationopt2( const int truncation, const int nx, const int nxmax, const int ndgl, const double lat,
-                            const bool fullgrid );
-
-// --------------------------------------------------------------------------------------------------------------------
-
-}  // namespace trans
-}  // namespace atlas
diff --git a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc
deleted file mode 100644
index c3e3fe2f6..000000000
--- a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * (C) Copyright 2013 ECMWF.
- *
- * This software is licensed under the terms of the Apache Licence Version 2.0
- * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
- * In applying this licence, ECMWF does not waive the privileges and immunities
- * granted to it by virtue of its status as an intergovernmental organisation
- * nor
- * does it submit to any jurisdiction.
- */
-
-#include <cmath>
-#include <limits>
-
-#include "atlas/array.h"
-#include "atlas/parallel/mpi/mpi.h"
-#include "atlas/trans/localopt2/LegendrePolynomialsopt2.h"
-
-namespace atlas {
-namespace trans {
-
-//-----------------------------------------------------------------------------
-
-void compute_zfnopt2( const size_t trc, double zfn[] ) {
-    auto idxzfn = [&]( int jn, int jk ) { return jk + ( trc + 1 ) * jn; };
-    int iodd    = 0;
-    // Compute coefficients for Taylor series in Belousov (19) and (21)
-    // Belousov, Swarztrauber use zfn[0]=std::sqrt(2.)
-    // IFS normalisation chosen to be 0.5*Integral(Pnm**2) = 1
-    zfn[idxzfn( 0, 0 )] = 2.;
-    for ( int jn = 1; jn <= trc; ++jn ) {
-        double zfnn = zfn[idxzfn( 0, 0 )];
-        for ( int jgl = 1; jgl <= jn; ++jgl ) {
-            zfnn *= std::sqrt( 1. - 0.25 / ( jgl * jgl ) );
-        }
-        iodd                  = jn % 2;
-        zfn[idxzfn( jn, jn )] = zfnn;
-        for ( int jgl = 2; jgl <= jn - iodd; jgl += 2 ) {
-            double zfjn = ( ( jgl - 1. ) * ( 2. * jn - jgl + 2. ) );  // new factor numerator
-            double zfjd = ( jgl * ( 2. * jn - jgl + 1. ) );           // new factor denominator
-
-            zfn[idxzfn( jn, jn - jgl )] = zfn[idxzfn( jn, jn - jgl + 2 )] * zfjn / zfjd;
-        }
-    }
-}
-
-
-void compute_legendre_polynomials_latopt2( const size_t trc,  // truncation (in)
-                                           const double lat,  // latitude in radians (in)
-                                           double legpol[],   // legendre polynomials
-                                           double zfn[] ) {
-    auto idxmn  = [&]( int jm, int jn ) { return ( 2 * trc + 3 - jm ) * jm / 2 + jn - jm; };
-    auto idxzfn = [&]( int jn, int jk ) { return jk + ( trc + 1 ) * jn; };
-    {  //ATLAS_TRACE( "compute Legendre polynomials" );
-        // --------------------
-        // 1. First two columns
-        // --------------------
-        double zdlx1   = ( M_PI_2 - lat );               // theta
-        double zdlx    = std::cos( zdlx1 );              // cos(theta)
-        double zdlsita = std::sqrt( 1. - zdlx * zdlx );  // sin(theta) (this is how trans library does it)
-
-        legpol[idxmn( 0, 0 )] = 1.;
-        double vsin[trc + 1], vcos[trc + 1];
-        for ( int j = 1; j <= trc; j++ ) {
-            vsin[j] = std::sin( j * zdlx1 );
-        }
-        for ( int j = 1; j <= trc; j++ ) {
-            vcos[j] = std::cos( j * zdlx1 );
-        }
-
-        double zdl1sita = 0.;
-        // if we are less than 1 meter from the pole,
-        if ( std::abs( zdlsita ) <= std::sqrt( std::numeric_limits<double>::epsilon() ) ) {
-            zdlx    = 1.;
-            zdlsita = 0.;
-        }
-        else {
-            zdl1sita = 1. / zdlsita;
-        }
-
-        // ordinary Legendre polynomials from series expansion
-        // ---------------------------------------------------
-
-        // even N
-        for ( int jn = 2; jn <= trc; jn += 2 ) {
-            double zdlk   = 0.5 * zfn[idxzfn( jn, 0 )];
-            double zdlldn = 0.0;
-            double zdsq   = 1. / std::sqrt( jn * ( jn + 1. ) );
-            // represented by only even k
-            for ( int jk = 2; jk <= jn; jk += 2 ) {
-                // normalised ordinary Legendre polynomial == \overbar{P_n}^0
-                zdlk = zdlk + zfn[idxzfn( jn, jk )] * vcos[jk];
-                // normalised associated Legendre polynomial == \overbar{P_n}^1
-                zdlldn = zdlldn + zdsq * zfn[idxzfn( jn, jk )] * jk * vsin[jk];
-            }
-            legpol[idxmn( 0, jn )] = zdlk;
-            legpol[idxmn( 1, jn )] = zdlldn;
-        }
-
-        // odd N
-        for ( int jn = 1; jn <= trc; jn += 2 ) {
-            zfn[idxzfn( jn, 0 )] = 0.;
-            double zdlk          = 0.;
-            double zdlldn        = 0.0;
-            double zdsq          = 1. / std::sqrt( jn * ( jn + 1. ) );
-            // represented by only even k
-            for ( int jk = 1; jk <= jn; jk += 2 ) {
-                // normalised ordinary Legendre polynomial == \overbar{P_n}^0
-                zdlk = zdlk + zfn[idxzfn( jn, jk )] * vcos[jk];
-                // normalised associated Legendre polynomial == \overbar{P_n}^1
-                zdlldn = zdlldn + zdsq * zfn[idxzfn( jn, jk )] * jk * vsin[jk];
-            }
-            legpol[idxmn( 0, jn )] = zdlk;
-            legpol[idxmn( 1, jn )] = zdlldn;
-        }
-
-        // --------------------------------------------------------------
-        // 2. Diagonal (the terms 0,0 and 1,1 have already been computed)
-        //    Belousov, equation (23)
-        // --------------------------------------------------------------
-
-        double zdls = zdl1sita * std::numeric_limits<double>::min();
-        for ( int jn = 2; jn <= trc; ++jn ) {
-            double sq = std::sqrt( ( 2. * jn + 1. ) / ( 2. * jn ) );
-
-            legpol[idxmn( jn, jn )] = legpol[idxmn( jn - 1, jn - 1 )] * zdlsita * sq;
-            if ( std::abs( legpol[idxmn( jn, jn )] ) < zdls ) legpol[idxmn( jn, jn )] = 0.0;
-        }
-
-        // ---------------------------------------------
-        // 3. General recurrence (Belousov, equation 17)
-        // ---------------------------------------------
-
-        for ( int jn = 3; jn <= trc; ++jn ) {
-            for ( int jm = 2; jm < jn; ++jm ) {
-                double cn = ( ( 2. * jn + 1. ) * ( jn + jm - 3. ) * ( jn + jm - 1. ) );  // numerator of c in Belousov
-                double cd = ( ( 2. * jn - 3. ) * ( jn + jm - 2. ) * ( jn + jm ) );       // denominator of c in Belousov
-                double dn = ( ( 2. * jn + 1. ) * ( jn - jm + 1. ) * ( jn + jm - 1. ) );  // numerator of d in Belousov
-                double dd = ( ( 2. * jn - 1. ) * ( jn + jm - 2. ) * ( jn + jm ) );       // denominator of d in Belousov
-                double en = ( ( 2. * jn + 1. ) * ( jn - jm ) );                          // numerator of e in Belousov
-                double ed = ( ( 2. * jn - 1. ) * ( jn + jm ) );                          // denominator of e in Belousov
-
-                legpol[idxmn( jm, jn )] = std::sqrt( cn / cd ) * legpol[idxmn( jm - 2, jn - 2 )] -
-                                          std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1 )] * zdlx +
-                                          std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1 )] * zdlx;
-            }
-        }
-    }
-}
-
-
-void compute_legendre_polynomialsopt2(
-    const size_t trc,          // truncation (in)
-    const int nlats,           // number of latitudes
-    const double lats[],       // latitudes in radians (in)
-    double leg_sym[],          // values of associated Legendre functions, symmetric part
-    double leg_asym[],         // values of associated Legendre functions, asymmetric part
-    size_t leg_start_sym[],    // start indices for different zonal wave numbers, symmetric part
-    size_t leg_start_asym[] )  // start indices for different zonal wave numbers, asymmetric part
-{
-    auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; };
-    std::vector<double> legpol( legendre_size( trc ) );
-    std::vector<double> zfn( ( trc + 1 ) * ( trc + 1 ) );
-    auto idxmn = [&]( int jm, int jn ) { return ( 2 * trc + 3 - jm ) * jm / 2 + jn - jm; };
-    compute_zfnopt2( trc, zfn.data() );
-
-    // Loop over latitudes:
-    for ( int jlat = 0; jlat < nlats; ++jlat ) {
-        // compute legendre polynomials for current latitude:
-        compute_legendre_polynomials_latopt2( trc, lats[jlat], legpol.data(), zfn.data() );
-
-        // split polynomials into symmetric and antisymmetric parts:
-        {
-            //ATLAS_TRACE( "add to global arrays" );
-
-            for ( int jm = 0; jm <= trc; jm++ ) {
-                int is1 = 0, ia1 = 0;
-                for ( int jn = jm; jn <= trc; jn++ ) {
-                    if ( ( jn - jm ) % 2 == 0 ) { is1++; }
-                    else {
-                        ia1++;
-                    }
-                }
-                int is2 = 0, ia2 = 0;
-                // the choice between the following two code lines determines whether
-                // total wavenumbers are summed in an ascending or descending order.
-                // The trans library in IFS uses descending order because it should
-                // be more accurate (higher wavenumbers have smaller contributions).
-                // This also needs to be changed when splitting the spectral data in
-                // TransLocalopt2::invtrans_uv!
-                //for ( int jn = jm; jn <= trc; jn++ ) {
-                for ( int jn = trc; jn >= jm; jn-- ) {
-                    if ( ( jn - jm ) % 2 == 0 ) {
-                        int is      = leg_start_sym[jm] + is1 * jlat + is2++;
-                        leg_sym[is] = legpol[idxmn( jm, jn )];
-                    }
-                    else {
-                        int ia       = leg_start_asym[jm] + ia1 * jlat + ia2++;
-                        leg_asym[ia] = legpol[idxmn( jm, jn )];
-                    }
-                }
-            }
-        }
-    }
-}
-
-void compute_legendre_polynomials_allopt2( const size_t trc,     // truncation (in)
-                                           const int nlats,      // number of latitudes
-                                           const double lats[],  // latitudes in radians (in)
-                                           double legendre[] )   // legendre polynomials for all latitudes
-{
-    auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; };
-    std::vector<double> legpol( legendre_size( trc ) );
-    std::vector<double> zfn( ( trc + 1 ) * ( trc + 1 ) );
-    auto idxmn  = [&]( int jm, int jn ) { return ( 2 * trc + 3 - jm ) * jm / 2 + jn - jm; };
-    auto idxmnl = [&]( int jm, int jn, int jlat ) {
-        return ( 2 * trc + 3 - jm ) * jm / 2 * nlats + jlat * ( trc - jm + 1 ) + jn - jm;
-    };
-    compute_zfnopt2( trc, zfn.data() );
-
-    // Loop over latitudes:
-    for ( int jlat = 0; jlat < nlats; ++jlat ) {
-        // compute legendre polynomials for current latitude:
-        compute_legendre_polynomials_latopt2( trc, lats[jlat], legpol.data(), zfn.data() );
-
-        for ( int jm = 1; jm <= trc; ++jm ) {
-            for ( int jn = jm; jn <= trc; ++jn ) {
-                legendre[idxmnl( jm, jn, jlat )] = legpol[idxmn( jm, jn )];
-            }
-        }
-    }
-}  // namespace trans
-
-// --------------------------------------------------------------------------------------------------------------------
-
-}  // namespace trans
-}  // namespace atlas
diff --git a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h
deleted file mode 100644
index f79438aea..000000000
--- a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * (C) Copyright 2013 ECMWF.
- *
- * This software is licensed under the terms of the Apache Licence Version 2.0
- * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
- * In applying this licence, ECMWF does not waive the privileges and immunities
- * granted to it by virtue of its status as an intergovernmental organisation
- * nor does it submit to any jurisdiction.
- */
-
-#pragma once
-
-#include <cstddef>
-
-namespace atlas {
-namespace trans {
-
-//-----------------------------------------------------------------------------
-// Routine to compute the Legendre polynomials in serial according to Belousov
-// (using correction by Swarztrauber)
-//
-// Reference:
-// S.L. Belousov, Tables of normalized associated Legendre Polynomials, Pergamon
-// Press (1962)
-// P.N. Swarztrauber, On computing the points and weights for Gauss-Legendre
-// quadrature,
-//      SIAM J. Sci. Comput. Vol. 24 (3) pp. 945-954 (2002)
-//
-// Author of Fortran version:
-// Mats Hamrud, Philippe Courtier, Nils Wedi *ECMWF*
-//
-// Ported to C++ by:
-// Andreas Mueller *ECMWF*
-//
-void compute_zfnopt2( const size_t trc, double zfn[] );
-
-void compute_legendre_polynomials_latopt2( const size_t trc,  // truncation (in)
-                                           const double lat,  // latitude in radians (in)
-                                           double legpol[],   // legendre polynomials
-                                           double zfn[] );
-
-void compute_legendre_polynomialsopt2(
-    const size_t trc,           // truncation (in)
-    const int nlats,            // number of latitudes
-    const double lats[],        // latitudes in radians (in)
-    double legendre_sym[],      // values of associated Legendre functions, symmetric part
-    double legendre_asym[],     // values of associated Legendre functions, asymmetric part
-    size_t leg_start_sym[],     // start indices for different zonal wave numbers, symmetric part
-    size_t leg_start_asym[] );  // start indices for different zonal wave numbers, asymmetric part
-
-void compute_legendre_polynomials_allopt2( const size_t trc,     // truncation (in)
-                                           const int nlats,      // number of latitudes
-                                           const double lats[],  // latitudes in radians (in)
-                                           double legendre[] );  // legendre polynomials for all latitudes
-
-// --------------------------------------------------------------------------------------------------------------------
-
-}  // namespace trans
-}  // namespace atlas
diff --git a/src/atlas/trans/localopt2/LegendreTransformsopt2.cc b/src/atlas/trans/localopt2/LegendreTransformsopt2.cc
deleted file mode 100644
index ddb71a967..000000000
--- a/src/atlas/trans/localopt2/LegendreTransformsopt2.cc
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * (C) Copyright 2013 ECMWF.
- *
- * This software is licensed under the terms of the Apache Licence Version 2.0
- * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
- * In applying this licence, ECMWF does not waive the privileges and immunities
- * granted to it by virtue of its status as an intergovernmental organisation
- * nor does it submit to any jurisdiction.
- */
-
-#include <cstddef>
-
-#include "atlas/trans/localopt2/LegendreTransformsopt2.h"
-
-namespace atlas {
-namespace trans {
-
-//-----------------------------------------------------------------------------
-
-void invtrans_legendreopt2(
-    const size_t trc,       // truncation (in)
-    const size_t trcFT,     // truncation for Fourier transformation (in)
-    const size_t trcLP,     // truncation of Legendre polynomials data legpol. Needs to be >= trc (in)
-    const double legpol[],  // values of associated Legendre functions, size (trc+1)*trc/2 (in)
-    const int nb_fields,    // number of fields
-    const double spec[],    // spectral data, size (trc+1)*trc (in)
-    double leg_real[],      // values of associated Legendre functions, size (trc+1)*trc/2 (out)
-    double leg_imag[] )     // values of associated Legendre functions, size (trc+1)*trc/2 (out)
-{
-    // Legendre transformation:
-    int k = 0, klp = 0;
-    for ( int jm = 0; jm <= trcFT; ++jm ) {
-        for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
-            leg_real[jm * nb_fields + jfld] = 0.;
-            leg_imag[jm * nb_fields + jfld] = 0.;
-        }
-        for ( int jn = jm; jn <= trcLP; ++jn, ++klp ) {
-            if ( jn <= trc ) {
-                for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
-                    // not completely sure where this factor 2 comes from. One possible
-                    // explanation:
-                    // normalization of trigonometric functions in the spherical harmonics
-                    // integral over square of trig function is 1 for m=0 and 0.5 (?) for
-                    // m>0
-                    leg_real[jm * nb_fields + jfld] += 2. * spec[( 2 * k ) * nb_fields + jfld] * legpol[klp];
-                    leg_imag[jm * nb_fields + jfld] += 2. * spec[( 2 * k + 1 ) * nb_fields + jfld] * legpol[klp];
-                }
-                ++k;
-            }
-        }
-    }
-    // Undo factor 2 for (jm == 0)
-    for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
-        leg_real[jfld] /= 2.;
-        leg_imag[jfld] /= 2.;
-    }
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-}  // namespace trans
-}  // namespace atlas
diff --git a/src/atlas/trans/localopt2/LegendreTransformsopt2.h b/src/atlas/trans/localopt2/LegendreTransformsopt2.h
deleted file mode 100644
index ef10eb885..000000000
--- a/src/atlas/trans/localopt2/LegendreTransformsopt2.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * (C) Copyright 2013 ECMWF.
- *
- * This software is licensed under the terms of the Apache Licence Version 2.0
- * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
- * In applying this licence, ECMWF does not waive the privileges and immunities
- * granted to it by virtue of its status as an intergovernmental organisation
- * nor does it submit to any jurisdiction.
- */
-
-#pragma once
-
-#include <cstddef>
-
-namespace atlas {
-namespace trans {
-
-//-----------------------------------------------------------------------------
-// Routine to compute the Legendre transformation
-//
-// Author:
-// Andreas Mueller *ECMWF*
-//
-void invtrans_legendreopt2(
-    const size_t trc,       // truncation (in)
-    const size_t trcFT,     // truncation for Fourier transformation (in)
-    const size_t trcLP,     // truncation of Legendre polynomials data legpol. Needs to be >= trc (in)
-    const double legpol[],  // values of associated Legendre functions, size (trc+1)*trc/2 (in)
-    const int nb_fields,    // number of fields
-    const double spec[],    // spectral data, size (trc+1)*trc (in)
-    double leg_real[],      // values of associated Legendre functions, size (trc+1)*trc/2 (out)
-    double leg_imag[] );    // values of associated Legendre functions, size (trc+1)*trc/2 (out)
-
-// --------------------------------------------------------------------------------------------------------------------
-
-}  // namespace trans
-}  // namespace atlas
diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc
deleted file mode 100644
index 073ba312e..000000000
--- a/src/atlas/trans/localopt2/TransLocalopt2.cc
+++ /dev/null
@@ -1,746 +0,0 @@
-/*
- * (C) Copyright 2013 ECMWF.
- *
- * This software is licensed under the terms of the Apache Licence Version 2.0
- * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
- * In applying this licence, ECMWF does not waive the privileges and immunities
- * granted to it by virtue of its status as an intergovernmental organisation
- * nor does it submit to any jurisdiction.
- */
-
-#include "atlas/trans/localopt2/TransLocalopt2.h"
-#include <math.h>
-#include "atlas/array.h"
-#include "atlas/option.h"
-#include "atlas/parallel/mpi/mpi.h"
-#include "atlas/runtime/ErrorHandling.h"
-#include "atlas/runtime/Log.h"
-#include "atlas/trans/VorDivToUV.h"
-#include "atlas/trans/local_noopt/LegendrePolynomials.h"
-#include "atlas/trans/localopt2/FourierTransformsopt2.h"
-#include "atlas/trans/localopt2/LegendrePolynomialsopt2.h"
-#include "atlas/trans/localopt2/LegendreTransformsopt2.h"
-#include "atlas/util/Constants.h"
-#include "eckit/linalg/LinearAlgebra.h"
-#include "eckit/linalg/Matrix.h"
-#include "eckit/eckit_config.h"
-#ifdef ECKIT_HAVE_MKL
-#include "mkl.h"
-#endif
-
-namespace atlas {
-namespace trans {
-
-namespace {
-static TransBuilderGrid<TransLocalopt2> builder( "localopt2" );
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-// Helper functions
-// --------------------------------------------------------------------------------------------------------------------
-namespace {  // anonymous
-
-size_t legendre_size( const size_t truncation ) {
-    return ( truncation + 2 ) * ( truncation + 1 ) / 2;
-}
-
-int nlats_northernHemisphere( const int nlats ) {
-    return ceil( nlats / 2. );
-    // using ceil here should make it possible to have odd number of latitudes (with the centre latitude being the equator)
-}
-
-int num_n( const int truncation, const int m, const bool symmetric ) {
-    int len = 0;
-    if ( symmetric ) { len = ( truncation - m + 2 ) / 2; }
-    else {
-        len = ( truncation - m + 1 ) / 2;
-    }
-    return len;
-}
-
-void alloc_aligned( double*& ptr, size_t n ) {
-#ifdef ECKIT_HAVE_MKL
-    int al = 64;
-    ptr    = (double*)mkl_malloc( sizeof( double ) * n, al );
-#else
-    posix_memalign( (void**)&ptr, sizeof( double ) * 64, sizeof( double ) * n );
-    //ptr = (double*)malloc( sizeof( double ) * n );
-    //ptr = new double[n];
-#endif
-}
-
-void free_aligned( double*& ptr ) {
-#ifdef ECKIT_HAVE_MKL
-    mkl_free( ptr );
-#else
-    free( ptr );
-#endif
-}
-
-int add_padding( int n ) {
-    return std::ceil( n / 8. ) * 8;
-}
-}  // namespace
-
-// --------------------------------------------------------------------------------------------------------------------
-// Class TransLocalopt2
-// --------------------------------------------------------------------------------------------------------------------
-
-TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long truncation,
-                                const eckit::Configuration& config ) :
-    grid_( grid ),
-    truncation_( truncation ),
-    precompute_( config.getBool( "precompute", true ) ) {
-    ATLAS_TRACE( "Precompute legendre opt2" );
-#ifdef ECKIT_HAVE_MKL
-    eckit::linalg::LinearAlgebra::backend( "mkl" );  // might want to choose backend with this command
-#else
-    eckit::linalg::LinearAlgebra::backend( "generic" );  // might want to choose backend with this command
-#endif
-    double fft_threshold = 0.05;  // fraction of latitudes of the full grid up to which FFT is used.
-    // This threshold needs to be adjusted depending on the dgemm and FFT performance of the machine
-    // on which this code is running!
-    int nlats     = 0;
-    int nlons     = 0;
-    int neqtr     = 0;
-    useFFT_       = true;
-    dgemmMethod1_ = false;
-    nlatsNH_      = 0;
-    nlatsSH_      = 0;
-    nlatsLeg_     = 0;
-    if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
-        grid::StructuredGrid g( grid_ );
-        nlats = g.ny();
-        nlons = g.nxmax();
-        for ( size_t j = 0; j < nlats; ++j ) {
-            // assumptions: latitudes in g.y(j) are monotone and decreasing
-            // no assumption on whether we have 0, 1 or 2 latitudes at the equator
-            double lat = g.y( j );
-            if ( lat > 0. ) { nlatsNH_++; }
-            if ( lat == 0. ) { neqtr++; }
-            if ( lat < 0. ) { nlatsSH_++; }
-        }
-        if ( neqtr > 0 ) {
-            nlatsNH_++;
-            nlatsSH_++;
-        }
-        if ( nlatsNH_ >= nlatsSH_ ) { nlatsLeg_ = nlatsNH_; }
-        else {
-            nlatsLeg_ = nlatsSH_;
-        }
-        Grid g_global( grid.name() );
-        grid::StructuredGrid gs_global( g_global );
-        nlonsGlobal_  = gs_global.nxmax();
-        jlonMin_      = 0;
-        double lonmin = fmod( g.x( 0, 0 ), 360 );
-        if ( lonmin < 0. ) { lonmin += 360.; }
-        if ( nlons < fft_threshold * nlonsGlobal_ ) { useFFT_ = false; }
-        else {
-            if ( nlons < nlonsGlobal_ ) {
-                // need to use FFT with cropped grid
-                for ( size_t j = 0; j < nlonsGlobal_; ++j ) {
-                    if ( gs_global.x( j, 0 ) == lonmin ) { jlonMin_ = j; }
-                }
-            }
-        }
-        //Log::info() << "nlats=" << g.ny() << " nlatsGlobal=" << gs_global.ny() << std::endl;
-        std::vector<double> lats( nlatsLeg_ );
-        std::vector<double> lons( nlons );
-        if ( nlatsNH_ >= nlatsSH_ ) {
-            for ( size_t j = 0; j < nlatsLeg_; ++j ) {
-                lats[j] = g.y( j ) * util::Constants::degreesToRadians();
-            }
-        }
-        else {
-            for ( size_t j = nlats - 1, idx = 0; idx < nlatsLeg_; --j, ++idx ) {
-                lats[idx] = -g.y( j ) * util::Constants::degreesToRadians();
-            }
-        }
-        for ( size_t j = 0; j < nlons; ++j ) {
-            lons[j] = g.x( j, 0 ) * util::Constants::degreesToRadians();
-        }
-        // precomputations for Legendre polynomials:
-        {
-            ATLAS_TRACE( "opt2 precomp Legendre" );
-            int size_sym  = 0;
-            int size_asym = 0;
-            legendre_sym_begin_.resize( truncation_ + 3 );
-            legendre_asym_begin_.resize( truncation_ + 3 );
-            legendre_sym_begin_[0]  = 0;
-            legendre_asym_begin_[0] = 0;
-            for ( int jm = 0; jm <= truncation_ + 1; jm++ ) {
-                size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsLeg_ );
-                size_asym += add_padding( num_n( truncation_ + 1, jm, false ) * nlatsLeg_ );
-                legendre_sym_begin_[jm + 1]  = size_sym;
-                legendre_asym_begin_[jm + 1] = size_asym;
-            }
-            alloc_aligned( legendre_sym_, size_sym );
-            alloc_aligned( legendre_asym_, size_asym );
-            FILE* file_leg;
-            file_leg = fopen( "legendre.bin", "r" );
-            if ( false ) {  //if ( file_leg ) {
-                fread( legendre_sym_, sizeof( double ), size_sym, file_leg );
-                fread( legendre_asym_, sizeof( double ), size_asym, file_leg );
-                fclose( file_leg );
-            }
-            else {
-                compute_legendre_polynomialsopt2( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_,
-                                                  legendre_asym_, legendre_sym_begin_.data(),
-                                                  legendre_asym_begin_.data() );
-                /*file_leg = fopen( "legendre.bin", "wb" );
-                fwrite( legendre_sym_, sizeof( double ), size_sym, file_leg );
-                fwrite( legendre_asym_, sizeof( double ), size_asym, file_leg );
-                fclose( file_leg );*/
-            }
-        }
-
-        // precomputations for Fourier transformations:
-        if ( useFFT_ ) {
-#if ATLAS_HAVE_FFTW
-            {
-                ATLAS_TRACE( "opt2 precomp FFTW" );
-                int num_complex = ( nlonsGlobal_ / 2 ) + 1;
-                fft_in_         = fftw_alloc_complex( nlats * num_complex );
-                fft_out_        = fftw_alloc_real( nlats * nlonsGlobal_ );
-                plan_ = fftw_plan_many_dft_c2r( 1, &nlonsGlobal_, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL,
-                                                1, nlonsGlobal_, FFTW_ESTIMATE );
-            }
-                // other FFT implementations should be added with #elif statements
-#else
-            useFFT_ = false;                             // no FFT implemented => default to dgemm
-#endif
-        }
-        if ( !useFFT_ ) {
-            alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons );
-            if ( dgemmMethod1_ ) {
-                {
-                    ATLAS_TRACE( "opt2 precomp Fourier" );
-                    int idx = 0;
-                    for ( int jlon = 0; jlon < nlons; jlon++ ) {
-                        double factor = 1.;
-                        for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
-                            if ( jm > 0 ) { factor = 2.; }
-                            fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor;  // real part
-                            fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor;  // imaginary part
-                        }
-                    }
-                }
-            }
-            else {
-                {
-                    ATLAS_TRACE( "opt2 precomp Fourier tp" );
-                    int idx = 0;
-                    for ( int jm = 0; jm < truncation_ + 1; jm++ ) {
-                        double factor = 1.;
-                        if ( jm > 0 ) { factor = 2.; }
-                        for ( int jlon = 0; jlon < nlons; jlon++ ) {
-                            fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor;  // real part
-                        }
-                        for ( int jlon = 0; jlon < nlons; jlon++ ) {
-                            fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor;  // imaginary part
-                        }
-                    }
-                }
-            }
-        }
-    }
-}  // namespace trans
-
-// --------------------------------------------------------------------------------------------------------------------
-
-TransLocalopt2::TransLocalopt2( const Grid& grid, const long truncation, const eckit::Configuration& config ) :
-    TransLocalopt2( Cache(), grid, truncation, config ) {}
-
-TransLocalopt2::TransLocalopt2( const Grid& grid, const Domain&, const long truncation, const eckit::Configuration& config ) :
-    TransLocalopt2( Cache(), grid, truncation, config ) {}
-
-TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const Domain&, const long truncation, const eckit::Configuration& config ) :
-    TransLocalopt2( cache, grid, truncation, config ) {}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-TransLocalopt2::~TransLocalopt2() {
-    if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
-        free_aligned( legendre_sym_ );
-        free_aligned( legendre_asym_ );
-        if ( useFFT_ ) {
-#if ATLAS_HAVE_FFTW
-            fftw_destroy_plan( plan_ );
-            fftw_free( fft_in_ );
-            fftw_free( fft_out_ );
-#endif
-        }
-        else {
-            free_aligned( fourier_ );
-        }
-    }
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocalopt2::invtrans( const Field& spfield, Field& gpfield, const eckit::Configuration& config ) const {
-    NOTIMP;
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocalopt2::invtrans( const FieldSet& spfields, FieldSet& gpfields,
-                               const eckit::Configuration& config ) const {
-    NOTIMP;
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocalopt2::invtrans_grad( const Field& spfield, Field& gradfield, const eckit::Configuration& config ) const {
-    NOTIMP;
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocalopt2::invtrans_grad( const FieldSet& spfields, FieldSet& gradfields,
-                                    const eckit::Configuration& config ) const {
-    NOTIMP;
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocalopt2::invtrans_vordiv2wind( const Field& spvor, const Field& spdiv, Field& gpwind,
-                                           const eckit::Configuration& config ) const {
-    NOTIMP;
-}
-
-void TransLocalopt2::invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[],
-                               const eckit::Configuration& config ) const {
-    invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields, config );
-}
-
-void gp_transposeopt2( const int nb_size, const int nb_fields, const double gp_tmp[], double gp_fields[] ) {
-    for ( int jgp = 0; jgp < nb_size; jgp++ ) {
-        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-            gp_fields[jfld * nb_size + jgp] = gp_tmp[jgp * nb_fields + jfld];
-        }
-    }
-}
-
-//-----------------------------------------------------------------------------
-// Routine to compute the spectral transform by using a localopt2 Fourier transformation
-// for a grid (same latitude for all longitudes, allows to compute Legendre functions
-// once for all longitudes). U and v components are divided by cos(latitude) for
-// nb_vordiv_fields > 0.
-//
-// Legendre polynomials are computed up to truncation_+1 to be accurate for vorticity and
-// divergence computation. The parameter truncation is the truncation used in storing the
-// spectral data scalar_spectra and can be different from truncation_. If truncation is
-// larger than truncation_+1 the transform will behave as if the spectral data was truncated
-// to truncation_+1.
-//
-// Author:
-// Andreas Mueller *ECMWF*
-//
-void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields,
-                                  const double scalar_spectra[], double gp_fields[],
-                                  const eckit::Configuration& config ) const {
-    if ( nb_scalar_fields > 0 ) {
-        int nb_fields = nb_scalar_fields;
-
-        // Transform
-        if ( grid::StructuredGrid g = grid_ ) {
-            ATLAS_TRACE( "invtrans_uv structured opt2" );
-            int nlats      = g.ny();
-            int nlons      = g.nxmax();
-            auto posMethod = [&]( int jfld, int imag, int jlat, int jm ) {
-                if ( useFFT_ || !dgemmMethod1_ ) {
-                    return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) );
-                }
-                else {
-                    return jfld + nb_fields * ( jlat + nlats * ( imag + 2 * ( jm ) ) );
-                };
-            };
-            int size_fourier_max = nb_fields * 2 * nlats;
-            double* scl_fourier;
-            alloc_aligned( scl_fourier, size_fourier_max * ( truncation_ + 1 ) );
-
-            // Legendre transform:
-            {
-                ATLAS_TRACE( "opt2 Legendre dgemm" );
-                for ( int jm = 0; jm <= truncation_; jm++ ) {
-                    int size_sym  = num_n( truncation_ + 1, jm, true );
-                    int size_asym = num_n( truncation_ + 1, jm, false );
-                    int n_imag    = 2;
-                    if ( jm == 0 ) { n_imag = 1; }
-                    int size_fourier = nb_fields * n_imag * nlatsLeg_;
-                    auto posFourier  = [&]( int jfld, int imag, int jlat, int jm, int nlatsH ) {
-                        return jfld + nb_fields * ( imag + n_imag * ( nlatsLeg_ - nlatsH + jlat ) );
-                    };
-                    double* scalar_sym;
-                    double* scalar_asym;
-                    double* scl_fourier_sym;
-                    double* scl_fourier_asym;
-                    alloc_aligned( scalar_sym, n_imag * nb_fields * size_sym );
-                    alloc_aligned( scalar_asym, n_imag * nb_fields * size_asym );
-                    alloc_aligned( scl_fourier_sym, size_fourier );
-                    alloc_aligned( scl_fourier_asym, size_fourier );
-                    {
-                        //ATLAS_TRACE( "opt2 Legendre split" );
-                        int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2;
-                        // the choice between the following two code lines determines whether
-                        // total wavenumbers are summed in an ascending or descending order.
-                        // The trans library in IFS uses descending order because it should
-                        // be more accurate (higher wavenumbers have smaller contributions).
-                        // This also needs to be changed when splitting the spectral data in
-                        // compute_legendre_polynomialsopt2!
-                        //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) {
-                        for ( int jn = truncation_ + 1; jn >= jm; jn-- ) {
-                            for ( int imag = 0; imag < n_imag; imag++ ) {
-                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                                    idx = jfld + nb_fields * ( imag + 2 * ( jn - jm ) );
-                                    if ( jn <= truncation && jm < truncation ) {
-                                        if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; }
-                                        else {
-                                            scalar_asym[ia++] = scalar_spectra[idx + ioff];
-                                        }
-                                    }
-                                    else {
-                                        if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = 0.; }
-                                        else {
-                                            scalar_asym[ia++] = 0.;
-                                        }
-                                    }
-                                }
-                            }
-                        }
-                        ASSERT( ia == n_imag * nb_fields * size_asym && is == n_imag * nb_fields * size_sym );
-                    }
-                    {
-                        eckit::linalg::Matrix A( scalar_sym, nb_fields * n_imag, size_sym );
-                        eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm], size_sym, nlatsLeg_ );
-                        eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsLeg_ );
-                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-                    }
-                    if ( size_asym > 0 ) {
-                        eckit::linalg::Matrix A( scalar_asym, nb_fields * n_imag, size_asym );
-                        eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm], size_asym, nlatsLeg_ );
-                        eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsLeg_ );
-                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-                    }
-                    {
-                        //ATLAS_TRACE( "opt2 merge spheres" );
-                        // northern hemisphere:
-                        for ( int jlat = 0; jlat < nlatsNH_; jlat++ ) {
-                            for ( int imag = 0; imag < n_imag; imag++ ) {
-                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                                    int idx = posFourier( jfld, imag, jlat, jm, nlatsNH_ );
-                                    scl_fourier[posMethod( jfld, imag, jlat, jm )] =
-                                        scl_fourier_sym[idx] + scl_fourier_asym[idx];
-                                }
-                            }
-                        }
-                        // southern hemisphere:
-                        for ( int jlat = 0; jlat < nlatsSH_; jlat++ ) {
-                            for ( int imag = 0; imag < n_imag; imag++ ) {
-                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                                    int idx   = posFourier( jfld, imag, jlat, jm, nlatsSH_ );
-                                    int jslat = nlats - jlat - 1;
-                                    scl_fourier[posMethod( jfld, imag, jslat, jm )] =
-                                        scl_fourier_sym[idx] - scl_fourier_asym[idx];
-                                }
-                            }
-                        }
-                    }
-                    free_aligned( scalar_sym );
-                    free_aligned( scalar_asym );
-                    free_aligned( scl_fourier_sym );
-                    free_aligned( scl_fourier_asym );
-                }
-            }
-            // Fourier transformation:
-            if ( useFFT_ ) {
-#if ATLAS_HAVE_FFTW
-                {
-                    int num_complex = ( nlonsGlobal_ / 2 ) + 1;
-                    {
-                        ATLAS_TRACE( "opt2 FFTW" );
-                        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                            int idx = 0;
-                            for ( int jlat = 0; jlat < nlats; jlat++ ) {
-                                fft_in_[idx++][0] = scl_fourier[posMethod( jfld, 0, jlat, 0 )];
-                                for ( int jm = 1; jm < num_complex; jm++, idx++ ) {
-                                    for ( int imag = 0; imag < 2; imag++ ) {
-                                        if ( jm <= truncation_ ) {
-                                            fft_in_[idx][imag] = scl_fourier[posMethod( jfld, imag, jlat, jm )];
-                                        }
-                                        else {
-                                            fft_in_[idx][imag] = 0.;
-                                        }
-                                    }
-                                }
-                            }
-                            fftw_execute_dft_c2r( plan_, fft_in_, fft_out_ );
-                            for ( int jlat = 0; jlat < nlats; jlat++ ) {
-                                for ( int jlon = 0; jlon < nlons; jlon++ ) {
-                                    int j = jlon + jlonMin_;
-                                    if ( j >= nlonsGlobal_ ) { j -= nlonsGlobal_; }
-                                    gp_fields[jlon + nlons * ( jlat + nlats * jfld )] =
-                                        fft_out_[j + nlonsGlobal_ * jlat];
-                                }
-                            }
-                        }
-                    }
-                }
-#endif
-            }
-            else {
-                if ( dgemmMethod1_ ) {
-                    // dgemm-method 1
-                    // should be faster for small domains or large truncation
-                    double* gp_opt2;
-                    alloc_aligned( gp_opt2, nb_fields * grid_.size() );
-                    {
-                        ATLAS_TRACE( "opt2 Fourier dgemm method 1" );
-                        eckit::linalg::Matrix A( scl_fourier, nb_fields * nlats, ( truncation_ + 1 ) * 2 );
-                        eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, nlons );
-                        eckit::linalg::Matrix C( gp_opt2, nb_fields * nlats, nlons );
-                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-                    }
-
-                    // Transposition in grid point space:
-                    {
-                        ATLAS_TRACE( "opt2 transposition in gp-space" );
-                        int idx = 0;
-                        for ( int jlon = 0; jlon < nlons; jlon++ ) {
-                            for ( int jlat = 0; jlat < nlats; jlat++ ) {
-                                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                                    int pos_tp = jlon + nlons * ( jlat + nlats * ( jfld ) );
-                                    //int pos  = jfld + nb_fields * ( jlat + nlats * ( jlon ) );
-                                    gp_fields[pos_tp] = gp_opt2[idx++];  // = gp_opt2[pos]
-                                }
-                            }
-                        }
-                    }
-                    free_aligned( gp_opt2 );
-                }
-                else {
-                    // dgemm-method 2
-                    {
-                        ATLAS_TRACE( "opt2 Fourier dgemm method 2" );
-                        eckit::linalg::Matrix A( fourier_, nlons, ( truncation_ + 1 ) * 2 );
-                        eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * nlats );
-                        eckit::linalg::Matrix C( gp_fields, nlons, nb_fields * nlats );
-                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-                    }
-                }
-            }  // namespace trans
-            // Computing u,v from U,V:
-            {
-                if ( nb_vordiv_fields > 0 ) {
-                    ATLAS_TRACE( "opt2 u,v from U,V" );
-                    std::vector<double> coslats( nlats );
-                    for ( size_t j = 0; j < nlats; ++j ) {
-                        coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() );
-                    }
-                    int idx = 0;
-                    for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                        for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
-                            for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) {
-                                gp_fields[idx] /= coslats[jlat];
-                                idx++;
-                            }
-                        }
-                    }
-                }
-            }
-            free_aligned( scl_fourier );
-        }  // namespace atlas
-        else {
-            ATLAS_TRACE( "invtrans_uv unstructured opt2" );
-            grid::UnstructuredGrid gu = grid_;
-            double* zfn;
-            alloc_aligned( zfn, ( truncation + 1 ) * ( truncation + 1 ) );
-            compute_zfnopt2( truncation, zfn );
-            int size_fourier = nb_fields * 2;
-            double* legendre;
-            double* scl_fourier;
-            double* scl_fourier_tp;
-            double* fouriertp;
-            double* gp_opt;
-            alloc_aligned( legendre, legendre_size( truncation + 1 ) );
-            alloc_aligned( scl_fourier, size_fourier * ( truncation + 1 ) );
-            alloc_aligned( scl_fourier_tp, size_fourier * ( truncation + 1 ) );
-            alloc_aligned( fouriertp, 2 * ( truncation + 1 ) );
-            alloc_aligned( gp_opt, nb_fields );
-
-            // loop over all points:
-            for ( int ip = 0; ip < grid_.size(); ip++ ) {
-                PointXY p  = gu.xy( ip );
-                double lon = p.x() * util::Constants::degreesToRadians();
-                double lat = p.y() * util::Constants::degreesToRadians();
-                compute_legendre_polynomials_latopt2( truncation, lat, legendre, zfn );
-                // Legendre transform:
-                {
-                    //ATLAS_TRACE( "opt Legendre dgemm" );
-                    for ( int jm = 0; jm <= truncation; jm++ ) {
-                        int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1;
-                        eckit::linalg::Matrix A( eckit::linalg::Matrix(
-                            const_cast<double*>( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) );
-                        eckit::linalg::Matrix B( legendre + noff, ns, 1 );
-                        eckit::linalg::Matrix C( scl_fourier + jm * size_fourier, nb_fields * 2, 1 );
-                        eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-                    }
-                }
-                {
-                    //ATLAS_TRACE( "opt transposition in Fourier" );
-                    int idx = 0;
-                    for ( int jm = 0; jm < truncation + 1; jm++ ) {
-                        for ( int imag = 0; imag < 2; imag++ ) {
-                            for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-                                int pos_tp = imag + 2 * ( jm + ( truncation + 1 ) * ( jfld ) );
-                                //int pos  = jfld + nb_fields * ( imag + 2 * ( jm ) );
-                                scl_fourier_tp[pos_tp] = scl_fourier[idx++];  // = scl_fourier[pos]
-                            }
-                        }
-                    }
-                }
-
-                // Fourier transformation:
-                int idx          = 0;
-                fouriertp[idx++] = 1.;  // real part
-                fouriertp[idx++] = 0.;  // imaginary part
-                for ( int jm = 1; jm < truncation + 1; jm++ ) {
-                    fouriertp[idx++] = +2. * std::cos( jm * lon );  // real part
-                    fouriertp[idx++] = -2. * std::sin( jm * lon );  // imaginary part
-                }
-                {
-                    //ATLAS_TRACE( "opt Fourier dgemm" );
-                    eckit::linalg::Matrix A( fouriertp, 1, ( truncation + 1 ) * 2 );
-                    eckit::linalg::Matrix B( scl_fourier_tp, ( truncation + 1 ) * 2, nb_fields );
-                    eckit::linalg::Matrix C( gp_opt, 1, nb_fields );
-                    eckit::linalg::LinearAlgebra::backend().gemm( A, B, C );
-                    for ( int j = 0; j < nb_fields; j++ ) {
-                        gp_fields[ip + j * grid_.size()] = gp_opt[j];
-                    }
-                }
-            }
-            free_aligned( legendre );
-            free_aligned( scl_fourier );
-            free_aligned( scl_fourier_tp );
-            free_aligned( fouriertp );
-            free_aligned( gp_opt );
-        }
-    }  // namespace trans
-}  // namespace atlas
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocalopt2::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[],
-                               const double divergence_spectra[], double gp_fields[],
-                               const eckit::Configuration& config ) const {
-    invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config );
-}
-
-void extend_truncationopt2( const int old_truncation, const int nb_fields, const double old_spectra[],
-                            double new_spectra[] ) {
-    int k = 0, k_old = 0;
-    for ( int m = 0; m <= old_truncation + 1; m++ ) {             // zonal wavenumber
-        for ( int n = m; n <= old_truncation + 1; n++ ) {         // total wavenumber
-            for ( int imag = 0; imag < 2; imag++ ) {              // imaginary/real part
-                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {  // field
-                    if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; }
-                    else {
-                        new_spectra[k++] = old_spectra[k_old++];
-                    }
-                }
-            }
-        }
-    }
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocalopt2::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields,
-                               const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[],
-                               const eckit::Configuration& config ) const {
-    ATLAS_TRACE( "TransLocalopt2::invtrans" );
-    int nb_gp              = grid_.size();
-    int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields;
-    if ( nb_vordiv_fields > 0 ) {
-        std::vector<double> vorticity_spectra_extended( nb_vordiv_spec_ext, 0. );
-        std::vector<double> divergence_spectra_extended( nb_vordiv_spec_ext, 0. );
-        std::vector<double> U_ext( nb_vordiv_spec_ext, 0. );
-        std::vector<double> V_ext( nb_vordiv_spec_ext, 0. );
-
-        {
-            ATLAS_TRACE( "opt2 extend vordiv" );
-            // increase truncation in vorticity_spectra and divergence_spectra:
-            extend_truncationopt2( truncation_, nb_vordiv_fields, vorticity_spectra,
-                                   vorticity_spectra_extended.data() );
-            extend_truncationopt2( truncation_, nb_vordiv_fields, divergence_spectra,
-                                   divergence_spectra_extended.data() );
-        }
-
-        {
-            ATLAS_TRACE( "vordiv to UV opt2" );
-            // call vd2uv to compute u and v in spectral space
-            trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt2" ) );
-            vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(),
-                                      divergence_spectra_extended.data(), U_ext.data(), V_ext.data() );
-        }
-
-        // perform spectral transform to compute all fields in grid point space
-        invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config );
-        invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(),
-                     gp_fields + nb_gp * nb_vordiv_fields, config );
-    }
-    if ( nb_scalar_fields > 0 ) {
-        invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields + 2 * nb_gp * nb_vordiv_fields,
-                     config );
-    }
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocalopt2::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const {
-    NOTIMP;
-    // Not implemented and not planned.
-    // Use the TransIFS implementation instead.
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocalopt2::dirtrans( const FieldSet& gpfields, FieldSet& spfields,
-                               const eckit::Configuration& config ) const {
-    NOTIMP;
-    // Not implemented and not planned.
-    // Use the TransIFS implementation instead.
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocalopt2::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv,
-                                           const eckit::Configuration& config ) const {
-    NOTIMP;
-    // Not implemented and not planned.
-    // Use the TransIFS implementation instead.
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocalopt2::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[],
-                               const eckit::Configuration& ) const {
-    NOTIMP;
-    // Not implemented and not planned.
-    // Use the TransIFS implementation instead.
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocalopt2::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[],
-                               double divergence_spectra[], const eckit::Configuration& ) const {
-    NOTIMP;
-    // Not implemented and not planned.
-    // Use the TransIFS implementation instead.
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-}  // namespace trans
-}  // namespace atlas
diff --git a/src/atlas/trans/localopt2/TransLocalopt2.h b/src/atlas/trans/localopt2/TransLocalopt2.h
deleted file mode 100644
index 1a2e20801..000000000
--- a/src/atlas/trans/localopt2/TransLocalopt2.h
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * (C) Copyright 2013 ECMWF.
- *
- * This software is licensed under the terms of the Apache Licence Version 2.0
- * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
- * In applying this licence, ECMWF does not waive the privileges and immunities
- * granted to it by virtue of its status as an intergovernmental organisation
- * nor does it submit to any jurisdiction.
- */
-
-#pragma once
-
-#include <vector>
-
-#include "atlas/array.h"
-#include "atlas/grid/Grid.h"
-#include "atlas/trans/Trans.h"
-#if ATLAS_HAVE_FFTW
-#include <fftw3.h>
-#endif
-
-//-----------------------------------------------------------------------------
-// Forward declarations
-
-namespace atlas {
-class Field;
-class FieldSet;
-}  // namespace atlas
-
-//-----------------------------------------------------------------------------
-
-namespace atlas {
-namespace trans {
-
-//-----------------------------------------------------------------------------
-
-/// @class TransLocalopt2
-///
-/// Localopt2 spherical harmonics transformations to any grid
-/// Optimisations are present for structured grids
-/// For global grids, please consider using TransIFS instead.
-///
-/// @todo:
-///  - support multiple fields
-///  - support atlas::Field and atlas::FieldSet based on function spaces
-///
-/// @note: Direct transforms are not implemented and cannot be unless
-///        the grid is global. There are no plans to support this at the moment.
-class TransLocalopt2 : public trans::TransImpl {
-public:
-    TransLocalopt2( const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() );
-    TransLocalopt2( const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() );
-    TransLocalopt2( const Cache&, const Grid&, const long truncation,
-                    const eckit::Configuration& = util::NoConfig() );
-    TransLocalopt2( const Cache&, const Grid&, const Domain&, const long truncation,
-                    const eckit::Configuration& = util::NoConfig() );
-
-    virtual ~TransLocalopt2();
-
-    virtual int truncation() const override { return truncation_; }
-    virtual size_t spectralCoefficients() const override { return ( truncation_ + 1 ) * ( truncation_ + 2 ); }
-
-    virtual const Grid& grid() const override { return grid_; }
-
-    virtual void invtrans( const Field& spfield, Field& gpfield,
-                           const eckit::Configuration& = util::NoConfig() ) const override;
-
-    virtual void invtrans( const FieldSet& spfields, FieldSet& gpfields,
-                           const eckit::Configuration& = util::NoConfig() ) const override;
-
-    virtual void invtrans_grad( const Field& spfield, Field& gradfield,
-                                const eckit::Configuration& = util::NoConfig() ) const override;
-
-    virtual void invtrans_grad( const FieldSet& spfields, FieldSet& gradfields,
-                                const eckit::Configuration& = util::NoConfig() ) const override;
-
-    virtual void invtrans_vordiv2wind( const Field& spvor, const Field& spdiv, Field& gpwind,
-                                       const eckit::Configuration& = util::NoConfig() ) const override;
-
-    // -- IFS style API --
-
-    virtual void invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields,
-                           const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[],
-                           const eckit::Configuration& = util::NoConfig() ) const override;
-
-    virtual void invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[],
-                           const eckit::Configuration& = util::NoConfig() ) const override;
-
-    virtual void invtrans( const int nb_vordiv_fields, const double vorticity_spectra[],
-                           const double divergence_spectra[], double gp_fields[],
-                           const eckit::Configuration& = util::NoConfig() ) const override;
-
-    // -- NOT SUPPORTED -- //
-
-    virtual void dirtrans( const Field& gpfield, Field& spfield,
-                           const eckit::Configuration& = util::NoConfig() ) const override;
-
-    virtual void dirtrans( const FieldSet& gpfields, FieldSet& spfields,
-                           const eckit::Configuration& = util::NoConfig() ) const override;
-
-    virtual void dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv,
-                                       const eckit::Configuration& = util::NoConfig() ) const override;
-
-    virtual void dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[],
-                           const eckit::Configuration& = util::NoConfig() ) const override;
-
-    virtual void dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[],
-                           double divergence_spectra[], const eckit::Configuration& = util::NoConfig() ) const override;
-
-private:
-    void invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields,
-                      const double scalar_spectra[], double gp_fields[],
-                      const eckit::Configuration& = util::NoConfig() ) const;
-
-private:
-    Grid grid_;
-    bool useFFT_;
-    bool dgemmMethod1_;
-    int truncation_;
-    int nlatsNH_;
-    int nlatsSH_;
-    int nlatsLeg_;
-    int jlonMin_;
-    int nlonsGlobal_;
-    bool precompute_;
-    double* legendre_sym_;
-    double* legendre_asym_;
-    double* fourier_;
-    double* fouriertp_;
-    std::vector<size_t> legendre_begin_;
-    std::vector<size_t> legendre_sym_begin_;
-    std::vector<size_t> legendre_asym_begin_;
-#if ATLAS_HAVE_FFTW
-    fftw_complex* fft_in_;
-    double* fft_out_;
-    fftw_plan plan_;
-#endif
-};
-
-//-----------------------------------------------------------------------------
-
-}  // namespace trans
-}  // namespace atlas
diff --git a/src/atlas/trans/localopt2/VorDivToUVLocalopt2.cc b/src/atlas/trans/localopt2/VorDivToUVLocalopt2.cc
deleted file mode 100644
index 72c5a3ac9..000000000
--- a/src/atlas/trans/localopt2/VorDivToUVLocalopt2.cc
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * (C) Copyright 2013 ECMWF.
- *
- * This software is licensed under the terms of the Apache Licence Version 2.0
- * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
- * In applying this licence, ECMWF does not waive the privileges and immunities
- * granted to it by virtue of its status as an intergovernmental organisation
- * nor does it submit to any jurisdiction.
- */
-
-#include "atlas/trans/localopt2/VorDivToUVLocalopt2.h"
-#include <cmath>  // for std::sqrt
-#include "atlas/functionspace/Spectral.h"
-#include "atlas/runtime/Log.h"
-#include "atlas/util/Earth.h"
-
-using atlas::FunctionSpace;
-using atlas::functionspace::Spectral;
-
-namespace atlas {
-namespace trans {
-
-namespace {
-static VorDivToUVBuilder<VorDivToUVLocalopt2> builder( "localopt2" );
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-// Routine to copy spectral data into internal storage form of IFS trans
-// Ported to C++ by: Andreas Mueller *ECMWF*
-void prfi1bopt2( const int truncation,
-                 const int km,          // zonal wavenumber
-                 const int nb_fields,   // number of fields
-                 const double rspec[],  // spectral data
-                 double pia[] )         // spectral components in data layout of trans library
-{
-    int ilcm = truncation + 1 - km, ioff = ( 2 * truncation - km + 3 ) * km,
-        nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2;
-    for ( int j = 1; j <= ilcm; j++ ) {
-        int inm = ioff + ( ilcm - j ) * 2;
-        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-            int ir = 2 * jfld, ii = ir + 1;
-            pia[ir * nlei1 + j + 1] = rspec[inm * nb_fields + jfld];
-            pia[ii * nlei1 + j + 1] = rspec[( inm + 1 ) * nb_fields + jfld];
-        }
-    }
-
-    for ( int jfld = 0; jfld < 2 * nb_fields; jfld++ ) {
-        pia[jfld * nlei1]            = 0.;
-        pia[jfld * nlei1 + 1]        = 0.;
-        pia[jfld * nlei1 + ilcm + 2] = 0.;
-    }
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-// Routine to compute spectral velocities (*cos(latitude)) out of spectral
-// vorticity and divergence
-// Reference:
-//        ECMWF Research Department documentation of the IFS
-//        Temperton, 1991, MWR 119 p1303
-// Ported to C++ by: Andreas Mueller *ECMWF*
-void vd2uvopt2( const int truncation,               // truncation
-                const int km,                       // zonal wavenumber
-                const int nb_vordiv_fields,         // number of vorticity and divergence fields
-                const double vorticity_spectra[],   // spectral data of vorticity
-                const double divergence_spectra[],  // spectral data of divergence
-                double U[],                         // spectral data of U
-                double V[],                         // spectral data of V
-                const eckit::Configuration& config ) {
-    int nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2;
-
-    // repsnm: epsilon from eq.(2.12) and (2.13) in [Temperton 1991]
-    std::vector<double> repsnm( ( truncation + 1 ) * ( truncation + 6 ) / 2 );
-    int idx = 0;
-    for ( int jm = 0; jm <= truncation; ++jm ) {
-        for ( int jn = jm; jn <= truncation + 2; ++jn, ++idx ) {
-            repsnm[idx] = std::sqrt( ( jn * jn - jm * jm ) / ( 4. * jn * jn - 1. ) );
-        }
-    }
-    repsnm[0] = 0.;
-
-    // rlapin: constant factor from eq.(2.2) and (2.3) in [Temperton 1991]
-    double ra = util::Earth::radius();
-    std::vector<double> rlapin( truncation + 3 );
-    for ( int jn = 1; jn <= truncation + 2; ++jn ) {
-        rlapin[jn] = -ra * ra / ( jn * ( jn + 1. ) );
-    }
-    rlapin[0] = 0.;
-
-    // inverse the order of repsnm and rlapin for improved accuracy
-    std::vector<double> zepsnm( truncation + 6 );
-    std::vector<double> zlapin( truncation + 6 );
-    std::vector<double> zn( truncation + 6 );
-    for ( int jn = km - 1; jn <= truncation + 2; ++jn ) {
-        int ij = truncation + 3 - jn;
-        if ( jn >= 0 ) {
-            zlapin[ij] = rlapin[jn];
-            if ( jn < km ) { zepsnm[ij] = 0.; }
-            else {
-                zepsnm[ij] = repsnm[jn + ( 2 * truncation - km + 5 ) * km / 2];
-            }
-        }
-        else {
-            zlapin[ij] = 0.;
-            zepsnm[ij] = 0.;
-        }
-        zn[ij] = jn;
-    }
-    zn[0] = truncation + 3;
-
-    // copy spectral data into internal trans storage:
-    std::vector<double> rvor( 2 * nb_vordiv_fields * nlei1 );
-    std::vector<double> rdiv( 2 * nb_vordiv_fields * nlei1 );
-    std::vector<double> ru( 2 * nb_vordiv_fields * nlei1 );
-    std::vector<double> rv( 2 * nb_vordiv_fields * nlei1 );
-    prfi1bopt2( truncation, km, nb_vordiv_fields, vorticity_spectra, rvor.data() );
-    prfi1bopt2( truncation, km, nb_vordiv_fields, divergence_spectra, rdiv.data() );
-
-    // compute eq.(2.12) and (2.13) in [Temperton 1991]:
-    if ( km == 0 ) {
-        for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
-            int ir = 2 * jfld * nlei1 - 1;
-            for ( int ji = 2; ji < truncation + 4 - km; ++ji ) {
-                double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1];
-                double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1];
-                ru[ir + ji]  = +psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1];
-                rv[ir + ji]  = -psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1];
-            }
-        }
-    }
-    else {
-        for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
-            int ir = 2 * jfld * nlei1 - 1, ii = ir + nlei1;
-            for ( int ji = 2; ji < truncation + 4 - km; ++ji ) {
-                double chiIm = km * zlapin[ji];
-                double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1];
-                double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1];
-                ru[ir + ji]  = -chiIm * rdiv[ii + ji] + psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1];
-                ru[ii + ji]  = +chiIm * rdiv[ir + ji] + psiM1 * rvor[ii + ji + 1] - psiP1 * rvor[ii + ji - 1];
-                rv[ir + ji]  = -chiIm * rvor[ii + ji] - psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1];
-                rv[ii + ji]  = +chiIm * rvor[ir + ji] - psiM1 * rdiv[ii + ji + 1] + psiP1 * rdiv[ii + ji - 1];
-            }
-        }
-    }
-
-    // copy data from internal storage back to external spectral data:
-    int ilcm = truncation - km;
-    int ioff = ( 2 * truncation - km + 3 ) * km;
-    // ioff: start index of zonal wavenumber km in spectral data
-    double za_r = 1. / util::Earth::radius();
-    for ( int j = 0; j <= ilcm; ++j ) {
-        // ilcm-j = total wavenumber
-        int inm = ioff + ( ilcm - j ) * 2;
-        for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
-            int ir = 2 * jfld * nlei1, ii = ir + nlei1;
-            int idx = inm * nb_vordiv_fields + jfld;
-            // real part:
-            U[idx] = ru[ir + j + 2] * za_r;
-            V[idx] = rv[ir + j + 2] * za_r;
-            idx += nb_vordiv_fields;
-            // imaginary part:
-            U[idx] = ru[ii + j + 2] * za_r;
-            V[idx] = rv[ii + j + 2] * za_r;
-        }
-    }
-}
-
-void VorDivToUVLocalopt2::execute( const int nb_coeff, const int nb_fields, const double vorticity[],
-                                   const double divergence[], double U[], double V[],
-                                   const eckit::Configuration& config ) const {
-    for ( int jm = 0; jm <= truncation_; ++jm ) {
-        vd2uvopt2( truncation_, jm, nb_fields, vorticity, divergence, U, V, config );
-    }
-}
-
-VorDivToUVLocalopt2::VorDivToUVLocalopt2( const int truncation, const eckit::Configuration& config ) :
-    truncation_( truncation ) {}
-
-VorDivToUVLocalopt2::VorDivToUVLocalopt2( const FunctionSpace& fs, const eckit::Configuration& config ) :
-    truncation_( Spectral( fs ).truncation() ) {}
-
-VorDivToUVLocalopt2::~VorDivToUVLocalopt2() {}
-
-}  // namespace trans
-}  // namespace atlas
diff --git a/src/atlas/trans/localopt2/VorDivToUVLocalopt2.h b/src/atlas/trans/localopt2/VorDivToUVLocalopt2.h
deleted file mode 100644
index 30c386a92..000000000
--- a/src/atlas/trans/localopt2/VorDivToUVLocalopt2.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * (C) Copyright 2013 ECMWF.
- *
- * This software is licensed under the terms of the Apache Licence Version 2.0
- * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
- * In applying this licence, ECMWF does not waive the privileges and immunities
- * granted to it by virtue of its status as an intergovernmental organisation
- * nor does it submit to any jurisdiction.
- */
-
-#pragma once
-
-#include "atlas/trans/VorDivToUV.h"
-
-//-----------------------------------------------------------------------------
-// Forward declarations
-
-namespace atlas {
-class FunctionSpace;
-}
-
-//-----------------------------------------------------------------------------
-
-namespace atlas {
-namespace trans {
-
-//-----------------------------------------------------------------------------
-
-class VorDivToUVLocalopt2 : public trans::VorDivToUVImpl {
-public:
-    VorDivToUVLocalopt2( const FunctionSpace&, const eckit::Configuration& = util::NoConfig() );
-    VorDivToUVLocalopt2( int truncation, const eckit::Configuration& = util::NoConfig() );
-
-    virtual ~VorDivToUVLocalopt2();
-
-    virtual int truncation() const override { return truncation_; }
-
-    // pure virtual interface
-
-    // -- IFS style API --
-    // These fields have special interpretation required. You need to know what
-    // you're doing.
-    // See IFS trans library.
-
-    /*!
- * @brief Compute spectral wind (U/V) from spectral vorticity/divergence
- *
- * U = u*cos(lat)
- * V = v*cos(lat)
- *
- * @param nb_fields [in] Number of fields
- * @param vorticity [in] Spectral vorticity
- * @param divergence [in] Spectral divergence
- * @param U [out] Spectral wind U = u*cos(lat)
- * @param V [out] Spectral wind V = v*cos(lat)
- */
-    virtual void execute( const int nb_coeff, const int nb_fields, const double vorticity[], const double divergence[],
-                          double U[], double V[], const eckit::Configuration& = util::NoConfig() ) const override;
-
-private:
-    int truncation_;
-};
-
-// ------------------------------------------------------------------
-
-}  // namespace trans
-}  // namespace atlas
diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc
index 1effd3584..5d2e50dd9 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.cc
+++ b/src/atlas/trans/localopt3/TransLocalopt3.cc
@@ -17,7 +17,6 @@
 #include "atlas/runtime/ErrorHandling.h"
 #include "atlas/runtime/Log.h"
 #include "atlas/trans/VorDivToUV.h"
-#include "atlas/trans/local_noopt/LegendrePolynomials.h"
 #include "atlas/trans/localopt3/LegendrePolynomialsopt3.h"
 #include "atlas/util/Constants.h"
 #include "eckit/config/YAMLConfiguration.h"
diff --git a/src/tests/trans/test_trans_localcache.cc b/src/tests/trans/test_trans_localcache.cc
index 247200315..1011719f0 100644
--- a/src/tests/trans/test_trans_localcache.cc
+++ b/src/tests/trans/test_trans_localcache.cc
@@ -11,31 +11,17 @@
 #include <algorithm>
 #include <iomanip>
 
-#include "atlas/array/MakeView.h"
-#include "atlas/field/FieldSet.h"
-#include "atlas/functionspace/NodeColumns.h"
-#include "atlas/functionspace/Spectral.h"
-#include "atlas/functionspace/StructuredColumns.h"
+#include "eckit/utils/MD5.h"
+
 #include "atlas/grid.h"
-#include "atlas/grid/Distribution.h"
-#include "atlas/grid/Partitioner.h"
-#include "atlas/grid/detail/partitioner/EqualRegionsPartitioner.h"
-#include "atlas/grid/detail/partitioner/TransPartitioner.h"
+#include "atlas/option.h"
 #include "atlas/library/Library.h"
-#include "atlas/mesh/Mesh.h"
-#include "atlas/mesh/Nodes.h"
 #include "atlas/meshgenerator/StructuredMeshGenerator.h"
-#include "atlas/output/Gmsh.h"
 #include "atlas/parallel/mpi/mpi.h"
 #include "atlas/runtime/Trace.h"
 #include "atlas/trans/Trans.h"
 #include "atlas/trans/LegendreCacheCreator.h"
-#include "atlas/trans/local_noopt/FourierTransforms.h"
-#include "atlas/trans/local_noopt/LegendrePolynomials.h"
-#include "atlas/trans/local_noopt/LegendreTransforms.h"
 #include "atlas/util/Constants.h"
-#include "atlas/util/Earth.h"
-#include "eckit/utils/MD5.h"
 
 #include "tests/AtlasTestEnvironment.h"
 
@@ -43,12 +29,6 @@
 #include "transi/trans.h"
 #endif
 
-using namespace eckit;
-
-using atlas::array::Array;
-using atlas::array::ArrayView;
-using atlas::array::make_view;
-
 namespace atlas {
 namespace test {
 

From ff9c8513d0a4480c8ccccd9c582d85cab4ed8bae Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Wed, 9 May 2018 15:17:53 +0100
Subject: [PATCH 069/123] Removed trans/local_noopt

---
 src/atlas/CMakeLists.txt                      |  10 -
 src/atlas/trans/Trans.cc                      |   2 -
 src/atlas/trans/VorDivToUV.cc                 |   2 -
 .../trans/local_noopt/FourierTransforms.cc    |  78 ----
 .../trans/local_noopt/FourierTransforms.h     |  38 --
 .../trans/local_noopt/LegendrePolynomials.cc  | 153 --------
 .../trans/local_noopt/LegendrePolynomials.h   |  43 ---
 .../trans/local_noopt/LegendreTransforms.cc   |  61 ---
 .../trans/local_noopt/LegendreTransforms.h    |  36 --
 src/atlas/trans/local_noopt/TransLocal.cc     | 349 ------------------
 src/atlas/trans/local_noopt/TransLocal.h      | 123 ------
 .../trans/local_noopt/VorDivToUVLocal.cc      | 184 ---------
 src/atlas/trans/local_noopt/VorDivToUVLocal.h |  67 ----
 src/atlas/trans/localopt3/TransLocalopt3.cc   |   4 +-
 src/atlas/trans/localopt3/TransLocalopt3.h    |   6 +
 src/tests/trans/test_transgeneral.cc          |   9 +-
 16 files changed, 13 insertions(+), 1152 deletions(-)
 delete mode 100644 src/atlas/trans/local_noopt/FourierTransforms.cc
 delete mode 100644 src/atlas/trans/local_noopt/FourierTransforms.h
 delete mode 100644 src/atlas/trans/local_noopt/LegendrePolynomials.cc
 delete mode 100644 src/atlas/trans/local_noopt/LegendrePolynomials.h
 delete mode 100644 src/atlas/trans/local_noopt/LegendreTransforms.cc
 delete mode 100644 src/atlas/trans/local_noopt/LegendreTransforms.h
 delete mode 100644 src/atlas/trans/local_noopt/TransLocal.cc
 delete mode 100644 src/atlas/trans/local_noopt/TransLocal.h
 delete mode 100644 src/atlas/trans/local_noopt/VorDivToUVLocal.cc
 delete mode 100644 src/atlas/trans/local_noopt/VorDivToUVLocal.h

diff --git a/src/atlas/CMakeLists.txt b/src/atlas/CMakeLists.txt
index 4ed6679f4..64dd7e1cc 100644
--- a/src/atlas/CMakeLists.txt
+++ b/src/atlas/CMakeLists.txt
@@ -325,16 +325,6 @@ trans/VorDivToUV.h
 trans/VorDivToUV.cc
 trans/LegendreCacheCreator.h
 trans/LegendreCacheCreator.cc
-trans/local_noopt/TransLocal.h
-trans/local_noopt/TransLocal.cc
-trans/local_noopt/LegendrePolynomials.h
-trans/local_noopt/LegendrePolynomials.cc
-trans/local_noopt/LegendreTransforms.h
-trans/local_noopt/LegendreTransforms.cc
-trans/local_noopt/FourierTransforms.h
-trans/local_noopt/FourierTransforms.cc
-trans/local_noopt/VorDivToUVLocal.h
-trans/local_noopt/VorDivToUVLocal.cc
 trans/localopt3/TransLocalopt3.h
 trans/localopt3/TransLocalopt3.cc
 trans/localopt3/LegendrePolynomialsopt3.h
diff --git a/src/atlas/trans/Trans.cc b/src/atlas/trans/Trans.cc
index d14660060..57a7e707e 100644
--- a/src/atlas/trans/Trans.cc
+++ b/src/atlas/trans/Trans.cc
@@ -27,7 +27,6 @@
 #else
 #define TRANS_DEFAULT "local"
 #endif
-#include "atlas/trans/local_noopt/TransLocal.h"
 #include "atlas/trans/localopt3/TransLocalopt3.h" // --> recommended "local"
 
 namespace atlas {
@@ -62,7 +61,6 @@ struct force_link {
         load_builder_functionspace<TransIFSStructuredColumns>();
         load_builder_grid<TransIFS>();
 #endif
-        load_builder_grid<TransLocal>();
         load_builder_grid<TransLocalopt3>();
     }
 };
diff --git a/src/atlas/trans/VorDivToUV.cc b/src/atlas/trans/VorDivToUV.cc
index aa8fc5781..c01076f6e 100644
--- a/src/atlas/trans/VorDivToUV.cc
+++ b/src/atlas/trans/VorDivToUV.cc
@@ -26,7 +26,6 @@
 #else
 #define TRANS_DEFAULT "local"
 #endif
-#include "atlas/trans/local_noopt/VorDivToUVLocal.h"
 #include "atlas/trans/localopt3/VorDivToUVLocalopt3.h"  // --> recommended "local"
 
 namespace atlas {
@@ -55,7 +54,6 @@ struct force_link {
 #if ATLAS_HAVE_TRANS
         load_builder<VorDivToUVIFS>();
 #endif
-        load_builder<VorDivToUVLocal>();
         load_builder<VorDivToUVLocalopt3>();
     }
 };
diff --git a/src/atlas/trans/local_noopt/FourierTransforms.cc b/src/atlas/trans/local_noopt/FourierTransforms.cc
deleted file mode 100644
index c9f6f2974..000000000
--- a/src/atlas/trans/local_noopt/FourierTransforms.cc
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * (C) Copyright 2013 ECMWF.
- *
- * This software is licensed under the terms of the Apache Licence Version 2.0
- * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
- * In applying this licence, ECMWF does not waive the privileges and immunities
- * granted to it by virtue of its status as an intergovernmental organisation
- * nor
- * does it submit to any jurisdiction.
- */
-
-#include <algorithm>
-#include <cmath>
-#include <iostream>
-
-#include "atlas/trans/local_noopt/FourierTransforms.h"
-
-namespace atlas {
-namespace trans {
-
-//-----------------------------------------------------------------------------
-
-void invtrans_fourier( const size_t trcFT,
-                       const double lon,         // longitude in radians (in)
-                       const int nb_fields,      // Number of fields
-                       const double rlegReal[],  // associated Legendre functions, size (trc+1)*trc/2 (in)
-                       const double rlegImag[],  // associated Legendre functions, size (trc+1)*trc/2 (in)
-                       double rgp[] )            // gridpoint
-{
-    for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
-        rgp[jfld] = 0.;
-    }
-    // local Fourier transformation:
-    for ( int jm = 0; jm <= trcFT; ++jm ) {
-        const double cos = std::cos( jm * lon );
-        const double sin = std::sin( jm * lon );
-        for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
-            double real = cos * rlegReal[jm * nb_fields + jfld];
-            double imag = sin * rlegImag[jm * nb_fields + jfld];
-            rgp[jfld] += real - imag;
-        }
-    }
-}
-
-int fourier_truncation( const int truncation,    // truncation
-                        const int nx,            // number of longitudes
-                        const int nxmax,         // maximum nx
-                        const int ndgl,          // number of latitudes
-                        const double lat,        // latitude in radian
-                        const bool fullgrid ) {  // regular grid
-    int trc     = truncation;
-    int trclin  = ndgl - 1;
-    int trcquad = ndgl * 2 / 3 - 1;
-    if ( truncation >= trclin || fullgrid ) {
-        // linear
-        trc = ( nx - 1 ) / 2;
-    }
-    else if ( truncation >= trcquad ) {
-        // quadratic
-        double weight = 3 * ( trclin - truncation ) / ndgl;
-        double sqcos  = std::pow( std::cos( lat ), 2 );
-
-        trc = ( nx - 1 ) / ( 2 + weight * sqcos );
-    }
-    else {
-        // cubic
-        double sqcos = std::pow( std::cos( lat ), 2 );
-
-        trc = ( nx - 1 ) / ( 2 + sqcos ) - 1;
-    }
-    trc = std::min( truncation, trc );
-    return trc;
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-}  // namespace trans
-}  // namespace atlas
diff --git a/src/atlas/trans/local_noopt/FourierTransforms.h b/src/atlas/trans/local_noopt/FourierTransforms.h
deleted file mode 100644
index 8b47a8dd9..000000000
--- a/src/atlas/trans/local_noopt/FourierTransforms.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * (C) Copyright 2013 ECMWF.
- *
- * This software is licensed under the terms of the Apache Licence Version 2.0
- * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
- * In applying this licence, ECMWF does not waive the privileges and immunities
- * granted to it by virtue of its status as an intergovernmental organisation
- * nor does it submit to any jurisdiction.
- */
-
-#pragma once
-
-#include <cstddef>
-
-namespace atlas {
-namespace trans {
-
-//-----------------------------------------------------------------------------
-// Routine to compute the local Fourier transformation
-//
-// Author:
-// Andreas Mueller *ECMWF*
-//
-
-void invtrans_fourier( const size_t trcFT,
-                       const double lon,         // longitude in radians (in)
-                       const int nb_fields,      // Number of fields
-                       const double rlegReal[],  // values of associated Legendre functions, size (trc+1)*trc/2 (in)
-                       const double rlegImag[],  // values of associated Legendre functions, size (trc+1)*trc/2 (in)
-                       double rgp[] );           // gridpoint
-
-int fourier_truncation( const int truncation, const int nx, const int nxmax, const int ndgl, const double lat,
-                        const bool fullgrid );
-
-// --------------------------------------------------------------------------------------------------------------------
-
-}  // namespace trans
-}  // namespace atlas
diff --git a/src/atlas/trans/local_noopt/LegendrePolynomials.cc b/src/atlas/trans/local_noopt/LegendrePolynomials.cc
deleted file mode 100644
index 26854fd69..000000000
--- a/src/atlas/trans/local_noopt/LegendrePolynomials.cc
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * (C) Copyright 2013 ECMWF.
- *
- * This software is licensed under the terms of the Apache Licence Version 2.0
- * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
- * In applying this licence, ECMWF does not waive the privileges and immunities
- * granted to it by virtue of its status as an intergovernmental organisation
- * nor
- * does it submit to any jurisdiction.
- */
-
-#include <cmath>
-#include <limits>
-
-#include "atlas/array.h"
-#include "atlas/trans/local_noopt/LegendrePolynomials.h"
-
-namespace atlas {
-namespace trans {
-
-//-----------------------------------------------------------------------------
-
-void compute_legendre_polynomials(
-    const size_t trc,  // truncation (in)
-    const double lat,  // latitude in radians (in)
-    double legpol[] )  // values of associated Legendre functions, size (trc+1)*trc/2 (out)
-{
-    array::ArrayT<int> idxmn_( trc + 1, trc + 1 );
-    array::ArrayView<int, 2> idxmn = array::make_view<int, 2>( idxmn_ );
-
-    int j = 0;
-    for ( int jm = 0; jm <= trc; ++jm ) {
-        for ( int jn = jm; jn <= trc; ++jn ) {
-            idxmn( jm, jn ) = j++;
-        }
-    }
-
-    array::ArrayT<double> zfn_( trc + 1, trc + 1 );
-    array::ArrayView<double, 2> zfn = array::make_view<double, 2>( zfn_ );
-
-    int iodd;
-
-    // Compute coefficients for Taylor series in Belousov (19) and (21)
-    // Belousov, Swarztrauber use zfn(0,0)=std::sqrt(2.)
-    // IFS normalisation chosen to be 0.5*Integral(Pnm**2) = 1
-    zfn( 0, 0 ) = 2.;
-    for ( int jn = 1; jn <= trc; ++jn ) {
-        double zfnn = zfn( 0, 0 );
-        for ( int jgl = 1; jgl <= jn; ++jgl ) {
-            zfnn *= std::sqrt( 1. - 0.25 / ( jgl * jgl ) );
-        }
-        iodd          = jn % 2;
-        zfn( jn, jn ) = zfnn;
-        for ( int jgl = 2; jgl <= jn - iodd; jgl += 2 ) {
-            double zfjn = ( ( jgl - 1. ) * ( 2. * jn - jgl + 2. ) );  // new factor numerator
-            double zfjd = ( jgl * ( 2. * jn - jgl + 1. ) );           // new factor denominator
-
-            zfn( jn, jn - jgl ) = zfn( jn, jn - jgl + 2 ) * zfjn / zfjd;
-        }
-    }
-
-    // --------------------
-    // 1. First two columns
-    // --------------------
-    double zdlx1   = ( M_PI_2 - lat );               // theta
-    double zdlx    = std::cos( zdlx1 );              // cos(theta)
-    double zdlsita = std::sqrt( 1. - zdlx * zdlx );  // sin(theta) (this is how trans library does it)
-
-    legpol[0]       = 1.;
-    double zdl1sita = 0.;
-
-    // if we are less than 1 meter from the pole,
-    if ( std::abs( zdlsita ) <= std::sqrt( std::numeric_limits<double>::epsilon() ) ) {
-        zdlx    = 1.;
-        zdlsita = 0.;
-    }
-    else {
-        zdl1sita = 1. / zdlsita;
-    }
-
-    // ordinary Legendre polynomials from series expansion
-    // ---------------------------------------------------
-
-    // even N
-    for ( int jn = 2; jn <= trc; jn += 2 ) {
-        double zdlk   = 0.5 * zfn( jn, 0 );
-        double zdlldn = 0.0;
-        double zdsq   = 1. / std::sqrt( jn * ( jn + 1. ) );
-        // represented by only even k
-        for ( int jk = 2; jk <= jn; jk += 2 ) {
-            // normalised ordinary Legendre polynomial == \overbar{P_n}^0
-            zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 );
-            // normalised associated Legendre polynomial == \overbar{P_n}^1
-            zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 );
-        }
-        legpol[idxmn( 0, jn )] = zdlk;
-        legpol[idxmn( 1, jn )] = zdlldn;
-    }
-
-    // odd N
-    for ( int jn = 1; jn <= trc; jn += 2 ) {
-        zfn( jn, 0 )  = 0.;
-        double zdlk   = 0.;
-        double zdlldn = 0.0;
-        double zdsq   = 1. / std::sqrt( jn * ( jn + 1. ) );
-        // represented by only even k
-        for ( int jk = 1; jk <= jn; jk += 2 ) {
-            // normalised ordinary Legendre polynomial == \overbar{P_n}^0
-            zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 );
-            // normalised associated Legendre polynomial == \overbar{P_n}^1
-            zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 );
-        }
-        legpol[idxmn( 0, jn )] = zdlk;
-        legpol[idxmn( 1, jn )] = zdlldn;
-    }
-
-    // --------------------------------------------------------------
-    // 2. Diagonal (the terms 0,0 and 1,1 have already been computed)
-    //    Belousov, equation (23)
-    // --------------------------------------------------------------
-
-    double zdls = zdl1sita * std::numeric_limits<double>::min();
-    for ( int jn = 2; jn <= trc; ++jn ) {
-        double sq = std::sqrt( ( 2. * jn + 1. ) / ( 2. * jn ) );
-
-        legpol[idxmn( jn, jn )] = legpol[idxmn( jn - 1, jn - 1 )] * zdlsita * sq;
-        if ( std::abs( legpol[idxmn( jn, jn )] ) < zdls ) legpol[idxmn( jn, jn )] = 0.0;
-    }
-
-    // ---------------------------------------------
-    // 3. General recurrence (Belousov, equation 17)
-    // ---------------------------------------------
-
-    for ( int jn = 3; jn <= trc; ++jn ) {
-        for ( int jm = 2; jm < jn; ++jm ) {
-            double cn = ( ( 2. * jn + 1. ) * ( jn + jm - 3. ) * ( jn + jm - 1. ) );  // numerator of c in Belousov
-            double cd = ( ( 2. * jn - 3. ) * ( jn + jm - 2. ) * ( jn + jm ) );       // denominator of c in Belousov
-            double dn = ( ( 2. * jn + 1. ) * ( jn - jm + 1. ) * ( jn + jm - 1. ) );  // numerator of d in Belousov
-            double dd = ( ( 2. * jn - 1. ) * ( jn + jm - 2. ) * ( jn + jm ) );       // denominator of d in Belousov
-            double en = ( ( 2. * jn + 1. ) * ( jn - jm ) );                          // numerator of e in Belousov
-            double ed = ( ( 2. * jn - 1. ) * ( jn + jm ) );                          // denominator of e in Belousov
-
-            legpol[idxmn( jm, jn )] = std::sqrt( cn / cd ) * legpol[idxmn( jm - 2, jn - 2 )] -
-                                      std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1 )] * zdlx +
-                                      std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1 )] * zdlx;
-        }
-    }
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-}  // namespace trans
-}  // namespace atlas
diff --git a/src/atlas/trans/local_noopt/LegendrePolynomials.h b/src/atlas/trans/local_noopt/LegendrePolynomials.h
deleted file mode 100644
index 56a3e7443..000000000
--- a/src/atlas/trans/local_noopt/LegendrePolynomials.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * (C) Copyright 2013 ECMWF.
- *
- * This software is licensed under the terms of the Apache Licence Version 2.0
- * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
- * In applying this licence, ECMWF does not waive the privileges and immunities
- * granted to it by virtue of its status as an intergovernmental organisation
- * nor does it submit to any jurisdiction.
- */
-
-#pragma once
-
-#include <cstddef>
-
-namespace atlas {
-namespace trans {
-
-//-----------------------------------------------------------------------------
-// Routine to compute the Legendre polynomials in serial according to Belousov
-// (using correction by Swarztrauber)
-//
-// Reference:
-// S.L. Belousov, Tables of normalized associated Legendre Polynomials, Pergamon
-// Press (1962)
-// P.N. Swarztrauber, On computing the points and weights for Gauss-Legendre
-// quadrature,
-//      SIAM J. Sci. Comput. Vol. 24 (3) pp. 945-954 (2002)
-//
-// Author of Fortran version:
-// Mats Hamrud, Philippe Courtier, Nils Wedi *ECMWF*
-//
-// Ported to C++ by:
-// Andreas Mueller *ECMWF*
-//
-void compute_legendre_polynomials(
-    const size_t trc,   // truncation (in)
-    const double lat,   // latitude in radians (in)
-    double legpol[] );  // values of associated Legendre functions, size (trc+1)*trc/2 (out)
-
-// --------------------------------------------------------------------------------------------------------------------
-
-}  // namespace trans
-}  // namespace atlas
diff --git a/src/atlas/trans/local_noopt/LegendreTransforms.cc b/src/atlas/trans/local_noopt/LegendreTransforms.cc
deleted file mode 100644
index f82d9f401..000000000
--- a/src/atlas/trans/local_noopt/LegendreTransforms.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * (C) Copyright 2013 ECMWF.
- *
- * This software is licensed under the terms of the Apache Licence Version 2.0
- * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
- * In applying this licence, ECMWF does not waive the privileges and immunities
- * granted to it by virtue of its status as an intergovernmental organisation
- * nor does it submit to any jurisdiction.
- */
-
-#include <cstddef>
-
-#include "atlas/trans/local_noopt/LegendreTransforms.h"
-
-namespace atlas {
-namespace trans {
-
-//-----------------------------------------------------------------------------
-
-void invtrans_legendre( const size_t trc,    // truncation (in)
-                        const size_t trcFT,  // truncation for Fourier transformation (in)
-                        const size_t trcLP,  // truncation of Legendre polynomials data legpol. Needs to be >= trc (in)
-                        const double legpol[],  // values of associated Legendre functions, size (trc+1)*trc/2 (in)
-                        const int nb_fields,    // number of fields
-                        const double spec[],    // spectral data, size (trc+1)*trc (in)
-                        double leg_real[],      // values of associated Legendre functions, size (trc+1)*trc/2 (out)
-                        double leg_imag[] )     // values of associated Legendre functions, size (trc+1)*trc/2 (out)
-{
-    // Legendre transformation:
-    int k = 0, klp = 0;
-    for ( int jm = 0; jm <= trcFT; ++jm ) {
-        for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
-            leg_real[jm * nb_fields + jfld] = 0.;
-            leg_imag[jm * nb_fields + jfld] = 0.;
-        }
-        for ( int jn = jm; jn <= trcLP; ++jn, ++klp ) {
-            if ( jn <= trc ) {
-                for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
-                    // not completely sure where this factor 2 comes from. One possible
-                    // explanation:
-                    // normalization of trigonometric functions in the spherical harmonics
-                    // integral over square of trig function is 1 for m=0 and 0.5 (?) for
-                    // m>0
-                    leg_real[jm * nb_fields + jfld] += 2. * spec[( 2 * k ) * nb_fields + jfld] * legpol[klp];
-                    leg_imag[jm * nb_fields + jfld] += 2. * spec[( 2 * k + 1 ) * nb_fields + jfld] * legpol[klp];
-                }
-                ++k;
-            }
-        }
-    }
-    // Undo factor 2 for (jm == 0)
-    for ( int jfld = 0; jfld < nb_fields; ++jfld ) {
-        leg_real[jfld] /= 2.;
-        leg_imag[jfld] /= 2.;
-    }
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-}  // namespace trans
-}  // namespace atlas
diff --git a/src/atlas/trans/local_noopt/LegendreTransforms.h b/src/atlas/trans/local_noopt/LegendreTransforms.h
deleted file mode 100644
index c3152e1f5..000000000
--- a/src/atlas/trans/local_noopt/LegendreTransforms.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * (C) Copyright 2013 ECMWF.
- *
- * This software is licensed under the terms of the Apache Licence Version 2.0
- * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
- * In applying this licence, ECMWF does not waive the privileges and immunities
- * granted to it by virtue of its status as an intergovernmental organisation
- * nor does it submit to any jurisdiction.
- */
-
-#pragma once
-
-#include <cstddef>
-
-namespace atlas {
-namespace trans {
-
-//-----------------------------------------------------------------------------
-// Routine to compute the Legendre transformation
-//
-// Author:
-// Andreas Mueller *ECMWF*
-//
-void invtrans_legendre( const size_t trc,    // truncation (in)
-                        const size_t trcFT,  // truncation for Fourier transformation (in)
-                        const size_t trcLP,  // truncation of Legendre polynomials data legpol. Needs to be >= trc (in)
-                        const double legpol[],  // values of associated Legendre functions, size (trc+1)*trc/2 (in)
-                        const int nb_fields,    // number of fields
-                        const double spec[],    // spectral data, size (trc+1)*trc (in)
-                        double leg_real[],      // values of associated Legendre functions, size (trc+1)*trc/2 (out)
-                        double leg_imag[] );    // values of associated Legendre functions, size (trc+1)*trc/2 (out)
-
-// --------------------------------------------------------------------------------------------------------------------
-
-}  // namespace trans
-}  // namespace atlas
diff --git a/src/atlas/trans/local_noopt/TransLocal.cc b/src/atlas/trans/local_noopt/TransLocal.cc
deleted file mode 100644
index 77d3af78f..000000000
--- a/src/atlas/trans/local_noopt/TransLocal.cc
+++ /dev/null
@@ -1,349 +0,0 @@
-/*
- * (C) Copyright 2013 ECMWF.
- *
- * This software is licensed under the terms of the Apache Licence Version 2.0
- * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
- * In applying this licence, ECMWF does not waive the privileges and immunities
- * granted to it by virtue of its status as an intergovernmental organisation
- * nor does it submit to any jurisdiction.
- */
-
-#include "atlas/trans/local_noopt/TransLocal.h"
-#include "atlas/array.h"
-#include "atlas/option.h"
-#include "atlas/parallel/mpi/mpi.h"
-#include "atlas/runtime/ErrorHandling.h"
-#include "atlas/runtime/Log.h"
-#include "atlas/trans/VorDivToUV.h"
-#include "atlas/trans/local_noopt/FourierTransforms.h"
-#include "atlas/trans/local_noopt/LegendrePolynomials.h"
-#include "atlas/trans/local_noopt/LegendreTransforms.h"
-#include "atlas/util/Constants.h"
-
-namespace atlas {
-namespace trans {
-
-namespace {
-static TransBuilderGrid<TransLocal> builder( "local_noopt" );
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-// Helper functions
-// --------------------------------------------------------------------------------------------------------------------
-namespace {  // anonymous
-
-size_t legendre_size( const size_t truncation ) {
-    return ( truncation + 2 ) * ( truncation + 1 ) / 2;
-}
-
-}  // namespace
-
-// --------------------------------------------------------------------------------------------------------------------
-// Class TransLocal
-// --------------------------------------------------------------------------------------------------------------------
-
-TransLocal::TransLocal( const Cache& cache, const Grid& grid, const long truncation,
-                        const eckit::Configuration& config ) :
-    grid_( grid ),
-    truncation_( truncation ),
-    precompute_( config.getBool( "precompute", true ) ) {
-    if ( precompute_ ) {
-        if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
-            ATLAS_TRACE( "Precompute legendre structured" );
-            grid::StructuredGrid g( grid_ );
-            size_t size( 0 );
-            legendre_begin_.resize( g.ny() );
-            for ( size_t j = 0; j < g.ny(); ++j ) {
-                legendre_begin_[j] = size;
-                size += legendre_size( truncation_ + 1 );
-            }
-            legendre_.resize( size );
-
-            for ( size_t j = 0; j < g.ny(); ++j ) {
-                double lat = g.y( j ) * util::Constants::degreesToRadians();
-                compute_legendre_polynomials( truncation_ + 1, lat, legendre_data( j ) );
-            }
-        }
-        else {
-            ATLAS_TRACE( "Precompute legendre unstructured" );
-            size_t size( 0 );
-            legendre_begin_.resize( grid_.size() );
-            for ( size_t j = 0; j < grid_.size(); ++j ) {
-                legendre_begin_[j] = size;
-                size += legendre_size( truncation_ + 1 );
-            }
-            legendre_.resize( size );
-            int j( 0 );
-            for ( PointXY p : grid_.xy() ) {
-                double lat = p.y() * util::Constants::degreesToRadians();
-                compute_legendre_polynomials( truncation_ + 1, lat, legendre_data( j++ ) );
-            }
-        }
-    }
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-TransLocal::TransLocal( const Grid& grid, const long truncation, const eckit::Configuration& config ) :
-    TransLocal( Cache(), grid, truncation, config ) {}
-
-TransLocal::TransLocal( const Grid& grid, const Domain&, const long truncation, const eckit::Configuration& config ) :
-    TransLocal( Cache(), grid, truncation, config ) {}
-
-TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain&, const long truncation, const eckit::Configuration& config ) :
-    TransLocal( cache, grid, truncation, config ) {}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-TransLocal::~TransLocal() {}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocal::invtrans( const Field& spfield, Field& gpfield, const eckit::Configuration& config ) const {
-    NOTIMP;
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocal::invtrans( const FieldSet& spfields, FieldSet& gpfields, const eckit::Configuration& config ) const {
-    NOTIMP;
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocal::invtrans_grad( const Field& spfield, Field& gradfield, const eckit::Configuration& config ) const {
-    NOTIMP;
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocal::invtrans_grad( const FieldSet& spfields, FieldSet& gradfields,
-                                const eckit::Configuration& config ) const {
-    NOTIMP;
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocal::invtrans_vordiv2wind( const Field& spvor, const Field& spdiv, Field& gpwind,
-                                       const eckit::Configuration& config ) const {
-    NOTIMP;
-}
-
-void TransLocal::invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[],
-                           const eckit::Configuration& config ) const {
-    invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields, config );
-}
-
-void gp_transpose( const int nb_size, const int nb_fields, const double gp_tmp[], double gp_fields[] ) {
-    ATLAS_TRACE( "gp_transpose" );
-    for ( int jgp = 0; jgp < nb_size; jgp++ ) {
-        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-            gp_fields[jfld * nb_size + jgp] = gp_tmp[jgp * nb_fields + jfld];
-        }
-    }
-}
-
-//-----------------------------------------------------------------------------
-// Routine to compute the spectral transform by using a local Fourier
-// transformation
-// for a grid (same latitude for all longitudes, allows to compute Legendre
-// functions
-// once for all longitudes). U and v components are divided by cos(latitude) for
-// nb_vordiv_fields > 0.
-//
-// Author:
-// Andreas Mueller *ECMWF*
-//
-void TransLocal::invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields,
-                              const double scalar_spectra[], double gp_fields[],
-                              const eckit::Configuration& config ) const {
-    if ( nb_scalar_fields > 0 ) {
-        int nb_fields = nb_scalar_fields;
-
-        // Depending on "precompute_legendre_", we have to compute the
-        // legendre polynomials for every latitute
-        std::vector<double> recomputed_legendre_;
-
-        auto legPol = [&]( double lat, int j ) -> const double* {
-            if ( precompute_ ) { return legendre_data( j ); }
-            else {
-                recomputed_legendre_.resize( legendre_size( truncation ) );
-                compute_legendre_polynomials( truncation, lat, recomputed_legendre_.data() );
-                return recomputed_legendre_.data();
-            }
-        };
-
-        // Temporary storage for legendre space
-        std::vector<double> legReal( nb_fields * ( truncation + 1 ) );
-        std::vector<double> legImag( nb_fields * ( truncation + 1 ) );
-        std::vector<double> gp_tmp( nb_fields * grid_.size(), 0. );
-
-        // Transform
-        if ( grid::StructuredGrid g = grid_ ) {
-            ATLAS_TRACE( "invtrans_uv structured" );
-            int idx = 0;
-            for ( size_t j = 0; j < g.ny(); ++j ) {
-                double lat = g.y( j ) * util::Constants::degreesToRadians();
-                double trcFT =
-                    fourier_truncation( truncation, g.nx( j ), g.nxmax(), g.ny(), lat, grid::RegularGrid( grid_ ) );
-
-                // Legendre transform:
-                {
-                    ATLAS_TRACE( "invtrans_legendre" );
-                    invtrans_legendre( truncation, trcFT, truncation_ + 1, legPol( lat, j ), nb_fields, scalar_spectra,
-                                       legReal.data(), legImag.data() );
-                }
-
-                // Fourier transform:
-                {
-                    ATLAS_TRACE( "invtrans_fourier" );
-                    for ( size_t i = 0; i < g.nx( j ); ++i ) {
-                        double lon = g.x( i, j ) * util::Constants::degreesToRadians();
-                        invtrans_fourier( trcFT, lon, nb_fields, legReal.data(), legImag.data(),
-                                          gp_tmp.data() + ( nb_fields * idx ) );
-                        for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
-                            gp_tmp[nb_fields * idx + jfld] /= std::cos( lat );
-                        }
-                        ++idx;
-                    }
-                }
-            }
-        }
-        else {
-            ATLAS_TRACE( "invtrans_uv unstructured" );
-            int idx = 0;
-            for ( PointXY p : grid_.xy() ) {
-                double lon   = p.x() * util::Constants::degreesToRadians();
-                double lat   = p.y() * util::Constants::degreesToRadians();
-                double trcFT = truncation;
-
-                // Legendre transform:
-                {
-                    ATLAS_TRACE( "invtrans_legendre" );
-                    invtrans_legendre( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields,
-                                       scalar_spectra, legReal.data(), legImag.data() );
-                }
-
-                // Fourier transform:
-                {
-                    ATLAS_TRACE( "invtrans_fourier" );
-                    invtrans_fourier( trcFT, lon, nb_fields, legReal.data(), legImag.data(),
-                                      gp_tmp.data() + ( nb_fields * idx ) );
-                    for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
-                        gp_tmp[nb_fields * idx + jfld] /= std::cos( lat );
-                    }
-                }
-                ++idx;
-            }
-        }
-
-        // transpose result (gp_tmp: jfld is fastest index. gp_fields: jfld needs to
-        // be slowest index)
-        gp_transpose( grid_.size(), nb_fields, gp_tmp.data(), gp_fields );
-    }
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocal::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[],
-                           const double divergence_spectra[], double gp_fields[],
-                           const eckit::Configuration& config ) const {
-    invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config );
-}
-
-void extend_truncation( const int old_truncation, const int nb_fields, const double old_spectra[],
-                        double new_spectra[] ) {
-    int k = 0, k_old = 0;
-    for ( int m = 0; m <= old_truncation + 1; m++ ) {             // zonal wavenumber
-        for ( int n = m; n <= old_truncation + 1; n++ ) {         // total wavenumber
-            for ( int imag = 0; imag < 2; imag++ ) {              // imaginary/real part
-                for ( int jfld = 0; jfld < nb_fields; jfld++ ) {  // field
-                    if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; }
-                    else {
-                        new_spectra[k++] = old_spectra[k_old++];
-                    }
-                }
-            }
-        }
-    }
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocal::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields,
-                           const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[],
-                           const eckit::Configuration& config ) const {
-    ATLAS_TRACE( "TransLocal::invtrans" );
-    int nb_gp              = grid_.size();
-    int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields;
-    std::vector<double> vorticity_spectra_extended( nb_vordiv_spec_ext, 0. );
-    std::vector<double> divergence_spectra_extended( nb_vordiv_spec_ext, 0. );
-    std::vector<double> U_ext( nb_vordiv_spec_ext, 0. );
-    std::vector<double> V_ext( nb_vordiv_spec_ext, 0. );
-
-    {
-        ATLAS_TRACE( "vordiv to UV" );
-        // increase truncation in vorticity_spectra and divergence_spectra:
-        extend_truncation( truncation_, nb_vordiv_fields, vorticity_spectra, vorticity_spectra_extended.data() );
-        extend_truncation( truncation_, nb_vordiv_fields, divergence_spectra, divergence_spectra_extended.data() );
-
-        // call vd2uv to compute u and v in spectral space
-        trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt" ) );
-        vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(),
-                                  divergence_spectra_extended.data(), U_ext.data(), V_ext.data() );
-    }
-
-    // perform spectral transform to compute all fields in grid point space
-    invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config );
-    invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(),
-                 gp_fields + nb_gp * nb_vordiv_fields, config );
-    invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields + 2 * nb_gp * nb_vordiv_fields, config );
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocal::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const {
-    NOTIMP;
-    // Not implemented and not planned.
-    // Use the TransIFS implementation instead.
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocal::dirtrans( const FieldSet& gpfields, FieldSet& spfields, const eckit::Configuration& config ) const {
-    NOTIMP;
-    // Not implemented and not planned.
-    // Use the TransIFS implementation instead.
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocal::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv,
-                                       const eckit::Configuration& config ) const {
-    NOTIMP;
-    // Not implemented and not planned.
-    // Use the TransIFS implementation instead.
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocal::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[],
-                           const eckit::Configuration& ) const {
-    NOTIMP;
-    // Not implemented and not planned.
-    // Use the TransIFS implementation instead.
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-void TransLocal::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[],
-                           double divergence_spectra[], const eckit::Configuration& ) const {
-    NOTIMP;
-    // Not implemented and not planned.
-    // Use the TransIFS implementation instead.
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-
-}  // namespace trans
-}  // namespace atlas
diff --git a/src/atlas/trans/local_noopt/TransLocal.h b/src/atlas/trans/local_noopt/TransLocal.h
deleted file mode 100644
index 9ad2cb5db..000000000
--- a/src/atlas/trans/local_noopt/TransLocal.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * (C) Copyright 2013 ECMWF.
- *
- * This software is licensed under the terms of the Apache Licence Version 2.0
- * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
- * In applying this licence, ECMWF does not waive the privileges and immunities
- * granted to it by virtue of its status as an intergovernmental organisation
- * nor does it submit to any jurisdiction.
- */
-
-#pragma once
-
-#include <vector>
-
-#include "atlas/grid/Grid.h"
-#include "atlas/trans/Trans.h"
-
-//-----------------------------------------------------------------------------
-// Forward declarations
-
-namespace atlas {
-class Field;
-class FieldSet;
-}  // namespace atlas
-
-//-----------------------------------------------------------------------------
-
-namespace atlas {
-namespace trans {
-
-//-----------------------------------------------------------------------------
-
-/// @class TransLocal
-///
-/// Local spherical harmonics transformations to any grid
-/// Optimisations are present for structured grids
-/// For global grids, please consider using TransIFS instead.
-///
-/// @todo:
-///  - support multiple fields
-///  - support atlas::Field and atlas::FieldSet based on function spaces
-///
-/// @note: Direct transforms are not implemented and cannot be unless
-///        the grid is global. There are no plans to support this at the moment.
-class TransLocal : public trans::TransImpl {
-public:
-    TransLocal( const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() );
-    TransLocal( const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() );
-    TransLocal( const Cache&, const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() );
-    TransLocal( const Cache&, const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() );
-
-    virtual ~TransLocal();
-
-    virtual int truncation() const override { return truncation_; }
-    virtual size_t spectralCoefficients() const override { return ( truncation_ + 1 ) * ( truncation_ + 2 ); }
-
-    virtual const Grid& grid() const override { return grid_; }
-
-    virtual void invtrans( const Field& spfield, Field& gpfield,
-                           const eckit::Configuration& = util::NoConfig() ) const override;
-
-    virtual void invtrans( const FieldSet& spfields, FieldSet& gpfields,
-                           const eckit::Configuration& = util::NoConfig() ) const override;
-
-    virtual void invtrans_grad( const Field& spfield, Field& gradfield,
-                                const eckit::Configuration& = util::NoConfig() ) const override;
-
-    virtual void invtrans_grad( const FieldSet& spfields, FieldSet& gradfields,
-                                const eckit::Configuration& = util::NoConfig() ) const override;
-
-    virtual void invtrans_vordiv2wind( const Field& spvor, const Field& spdiv, Field& gpwind,
-                                       const eckit::Configuration& = util::NoConfig() ) const override;
-
-    // -- IFS style API --
-
-    virtual void invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields,
-                           const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[],
-                           const eckit::Configuration& = util::NoConfig() ) const override;
-
-    virtual void invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[],
-                           const eckit::Configuration& = util::NoConfig() ) const override;
-
-    virtual void invtrans( const int nb_vordiv_fields, const double vorticity_spectra[],
-                           const double divergence_spectra[], double gp_fields[],
-                           const eckit::Configuration& = util::NoConfig() ) const override;
-
-    // -- NOT SUPPORTED -- //
-
-    virtual void dirtrans( const Field& gpfield, Field& spfield,
-                           const eckit::Configuration& = util::NoConfig() ) const override;
-
-    virtual void dirtrans( const FieldSet& gpfields, FieldSet& spfields,
-                           const eckit::Configuration& = util::NoConfig() ) const override;
-
-    virtual void dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv,
-                                       const eckit::Configuration& = util::NoConfig() ) const override;
-
-    virtual void dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[],
-                           const eckit::Configuration& = util::NoConfig() ) const override;
-
-    virtual void dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[],
-                           double divergence_spectra[], const eckit::Configuration& = util::NoConfig() ) const override;
-
-private:
-    const double* legendre_data( int j ) const { return legendre_.data() + legendre_begin_[j]; }
-    double* legendre_data( int j ) { return legendre_.data() + legendre_begin_[j]; }
-
-    void invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields,
-                      const double scalar_spectra[], double gp_fields[],
-                      const eckit::Configuration& = util::NoConfig() ) const;
-
-private:
-    Grid grid_;
-    int truncation_;
-    bool precompute_;
-    std::vector<double> legendre_;
-    std::vector<size_t> legendre_begin_;
-};
-
-//-----------------------------------------------------------------------------
-
-}  // namespace trans
-}  // namespace atlas
diff --git a/src/atlas/trans/local_noopt/VorDivToUVLocal.cc b/src/atlas/trans/local_noopt/VorDivToUVLocal.cc
deleted file mode 100644
index 0db63e792..000000000
--- a/src/atlas/trans/local_noopt/VorDivToUVLocal.cc
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * (C) Copyright 2013 ECMWF.
- *
- * This software is licensed under the terms of the Apache Licence Version 2.0
- * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
- * In applying this licence, ECMWF does not waive the privileges and immunities
- * granted to it by virtue of its status as an intergovernmental organisation
- * nor does it submit to any jurisdiction.
- */
-
-#include <cmath>  // for std::sqrt
-#include "atlas/trans/local_noopt/VorDivToUVLocal.h"
-#include "atlas/functionspace/Spectral.h"
-#include "atlas/runtime/Log.h"
-#include "atlas/util/Earth.h"
-
-using atlas::FunctionSpace;
-using atlas::functionspace::Spectral;
-
-namespace atlas {
-namespace trans {
-
-namespace {
-static VorDivToUVBuilder<VorDivToUVLocal> builder( "local_noopt" );
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-// Routine to copy spectral data into internal storage form of IFS trans
-// Ported to C++ by: Andreas Mueller *ECMWF*
-void prfi1b( const int truncation,
-             const int km,          // zonal wavenumber
-             const int nb_fields,   // number of fields
-             const double rspec[],  // spectral data
-             double pia[] )         // spectral components in data layout of trans library
-{
-    int ilcm = truncation + 1 - km, ioff = ( 2 * truncation - km + 3 ) * km,
-        nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2;
-    for ( int j = 1; j <= ilcm; j++ ) {
-        int inm = ioff + ( ilcm - j ) * 2;
-        for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
-            int ir = 2 * jfld, ii = ir + 1;
-            pia[ir * nlei1 + j + 1] = rspec[inm * nb_fields + jfld];
-            pia[ii * nlei1 + j + 1] = rspec[( inm + 1 ) * nb_fields + jfld];
-        }
-    }
-
-    for ( int jfld = 0; jfld < 2 * nb_fields; jfld++ ) {
-        pia[jfld * nlei1]            = 0.;
-        pia[jfld * nlei1 + 1]        = 0.;
-        pia[jfld * nlei1 + ilcm + 2] = 0.;
-    }
-}
-
-// --------------------------------------------------------------------------------------------------------------------
-// Routine to compute spectral velocities (*cos(latitude)) out of spectral
-// vorticity and divergence
-// Reference:
-//        ECMWF Research Department documentation of the IFS
-//        Temperton, 1991, MWR 119 p1303
-// Ported to C++ by: Andreas Mueller *ECMWF*
-void vd2uv( const int truncation,               // truncation
-            const int km,                       // zonal wavenumber
-            const int nb_vordiv_fields,         // number of vorticity and divergence fields
-            const double vorticity_spectra[],   // spectral data of vorticity
-            const double divergence_spectra[],  // spectral data of divergence
-            double U[],                         // spectral data of U
-            double V[],                         // spectral data of V
-            const eckit::Configuration& config ) {
-    int nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2;
-
-    // repsnm: epsilon from eq.(2.12) and (2.13) in [Temperton 1991]
-    std::vector<double> repsnm( ( truncation + 1 ) * ( truncation + 6 ) / 2 );
-    int idx = 0;
-    for ( int jm = 0; jm <= truncation; ++jm ) {
-        for ( int jn = jm; jn <= truncation + 2; ++jn, ++idx ) {
-            repsnm[idx] = std::sqrt( ( jn * jn - jm * jm ) / ( 4. * jn * jn - 1. ) );
-        }
-    }
-    repsnm[0] = 0.;
-
-    // rlapin: constant factor from eq.(2.2) and (2.3) in [Temperton 1991]
-    double ra = util::Earth::radius();
-    std::vector<double> rlapin( truncation + 3 );
-    for ( int jn = 1; jn <= truncation + 2; ++jn ) {
-        rlapin[jn] = -ra * ra / ( jn * ( jn + 1. ) );
-    }
-    rlapin[0] = 0.;
-
-    // inverse the order of repsnm and rlapin for improved accuracy
-    std::vector<double> zepsnm( truncation + 6 );
-    std::vector<double> zlapin( truncation + 6 );
-    std::vector<double> zn( truncation + 6 );
-    for ( int jn = km - 1; jn <= truncation + 2; ++jn ) {
-        int ij = truncation + 3 - jn;
-        if ( jn >= 0 ) {
-            zlapin[ij] = rlapin[jn];
-            if ( jn < km ) { zepsnm[ij] = 0.; }
-            else {
-                zepsnm[ij] = repsnm[jn + ( 2 * truncation - km + 5 ) * km / 2];
-            }
-        }
-        else {
-            zlapin[ij] = 0.;
-            zepsnm[ij] = 0.;
-        }
-        zn[ij] = jn;
-    }
-    zn[0] = truncation + 3;
-
-    // copy spectral data into internal trans storage:
-    std::vector<double> rvor( 2 * nb_vordiv_fields * nlei1 );
-    std::vector<double> rdiv( 2 * nb_vordiv_fields * nlei1 );
-    std::vector<double> ru( 2 * nb_vordiv_fields * nlei1 );
-    std::vector<double> rv( 2 * nb_vordiv_fields * nlei1 );
-    prfi1b( truncation, km, nb_vordiv_fields, vorticity_spectra, rvor.data() );
-    prfi1b( truncation, km, nb_vordiv_fields, divergence_spectra, rdiv.data() );
-
-    // compute eq.(2.12) and (2.13) in [Temperton 1991]:
-    if ( km == 0 ) {
-        for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
-            int ir = 2 * jfld * nlei1 - 1;
-            for ( int ji = 2; ji < truncation + 4 - km; ++ji ) {
-                double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1];
-                double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1];
-                ru[ir + ji]  = +psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1];
-                rv[ir + ji]  = -psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1];
-            }
-        }
-    }
-    else {
-        for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
-            int ir = 2 * jfld * nlei1 - 1, ii = ir + nlei1;
-            for ( int ji = 2; ji < truncation + 4 - km; ++ji ) {
-                double chiIm = km * zlapin[ji];
-                double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1];
-                double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1];
-                ru[ir + ji]  = -chiIm * rdiv[ii + ji] + psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1];
-                ru[ii + ji]  = +chiIm * rdiv[ir + ji] + psiM1 * rvor[ii + ji + 1] - psiP1 * rvor[ii + ji - 1];
-                rv[ir + ji]  = -chiIm * rvor[ii + ji] - psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1];
-                rv[ii + ji]  = +chiIm * rvor[ir + ji] - psiM1 * rdiv[ii + ji + 1] + psiP1 * rdiv[ii + ji - 1];
-            }
-        }
-    }
-
-    // copy data from internal storage back to external spectral data:
-    int ilcm = truncation - km;
-    int ioff = ( 2 * truncation - km + 3 ) * km;
-    // ioff: start index of zonal wavenumber km in spectral data
-    double za_r = 1. / util::Earth::radius();
-    for ( int j = 0; j <= ilcm; ++j ) {
-        // ilcm-j = total wavenumber
-        int inm = ioff + ( ilcm - j ) * 2;
-        for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
-            int ir = 2 * jfld * nlei1, ii = ir + nlei1;
-            int idx = inm * nb_vordiv_fields + jfld;
-            // real part:
-            U[idx] = ru[ir + j + 2] * za_r;
-            V[idx] = rv[ir + j + 2] * za_r;
-            idx += nb_vordiv_fields;
-            // imaginary part:
-            U[idx] = ru[ii + j + 2] * za_r;
-            V[idx] = rv[ii + j + 2] * za_r;
-        }
-    }
-}
-
-void VorDivToUVLocal::execute( const int nb_coeff, const int nb_fields, const double vorticity[],
-                               const double divergence[], double U[], double V[],
-                               const eckit::Configuration& config ) const {
-    for ( int jm = 0; jm <= truncation_; ++jm ) {
-        vd2uv( truncation_, jm, nb_fields, vorticity, divergence, U, V, config );
-    }
-}
-
-VorDivToUVLocal::VorDivToUVLocal( const int truncation, const eckit::Configuration& config ) :
-    truncation_( truncation ) {}
-
-VorDivToUVLocal::VorDivToUVLocal( const FunctionSpace& fs, const eckit::Configuration& config ) :
-    truncation_( Spectral( fs ).truncation() ) {}
-
-VorDivToUVLocal::~VorDivToUVLocal() {}
-
-}  // namespace trans
-}  // namespace atlas
diff --git a/src/atlas/trans/local_noopt/VorDivToUVLocal.h b/src/atlas/trans/local_noopt/VorDivToUVLocal.h
deleted file mode 100644
index ee3903de1..000000000
--- a/src/atlas/trans/local_noopt/VorDivToUVLocal.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * (C) Copyright 2013 ECMWF.
- *
- * This software is licensed under the terms of the Apache Licence Version 2.0
- * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
- * In applying this licence, ECMWF does not waive the privileges and immunities
- * granted to it by virtue of its status as an intergovernmental organisation
- * nor does it submit to any jurisdiction.
- */
-
-#pragma once
-
-#include "atlas/trans/VorDivToUV.h"
-
-//-----------------------------------------------------------------------------
-// Forward declarations
-
-namespace atlas {
-class FunctionSpace;
-}
-
-//-----------------------------------------------------------------------------
-
-namespace atlas {
-namespace trans {
-
-//-----------------------------------------------------------------------------
-
-class VorDivToUVLocal : public trans::VorDivToUVImpl {
-public:
-    VorDivToUVLocal( const FunctionSpace&, const eckit::Configuration& = util::NoConfig() );
-    VorDivToUVLocal( int truncation, const eckit::Configuration& = util::NoConfig() );
-
-    virtual ~VorDivToUVLocal();
-
-    virtual int truncation() const override { return truncation_; }
-
-    // pure virtual interface
-
-    // -- IFS style API --
-    // These fields have special interpretation required. You need to know what
-    // you're doing.
-    // See IFS trans library.
-
-    /*!
- * @brief Compute spectral wind (U/V) from spectral vorticity/divergence
- *
- * U = u*cos(lat)
- * V = v*cos(lat)
- *
- * @param nb_fields [in] Number of fields
- * @param vorticity [in] Spectral vorticity
- * @param divergence [in] Spectral divergence
- * @param U [out] Spectral wind U = u*cos(lat)
- * @param V [out] Spectral wind V = v*cos(lat)
- */
-    virtual void execute( const int nb_coeff, const int nb_fields, const double vorticity[], const double divergence[],
-                          double U[], double V[], const eckit::Configuration& = util::NoConfig() ) const override;
-
-private:
-    int truncation_;
-};
-
-// ------------------------------------------------------------------
-
-}  // namespace trans
-}  // namespace atlas
diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc
index 5d2e50dd9..30995afb1 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.cc
+++ b/src/atlas/trans/localopt3/TransLocalopt3.cc
@@ -184,6 +184,8 @@ int add_padding( int n ) {
     return std::ceil( n / 8. ) * 8;
 }
 
+}  // namespace
+
 int fourier_truncation( const int truncation,    // truncation
                         const int nx,            // number of longitudes
                         const int nxmax,         // maximum nx
@@ -214,8 +216,6 @@ int fourier_truncation( const int truncation,    // truncation
     return trc;
 }
 
-}  // namespace
-
 // --------------------------------------------------------------------------------------------------------------------
 // Class TransLocalopt3
 // --------------------------------------------------------------------------------------------------------------------
diff --git a/src/atlas/trans/localopt3/TransLocalopt3.h b/src/atlas/trans/localopt3/TransLocalopt3.h
index dda875ae5..bcc80b1d7 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.h
+++ b/src/atlas/trans/localopt3/TransLocalopt3.h
@@ -41,6 +41,12 @@ namespace atlas {
 namespace trans {
 
 class LegendreCacheCreatorLocal;
+int fourier_truncation( const int truncation,    // truncation
+                        const int nx,            // number of longitudes
+                        const int nxmax,         // maximum nx
+                        const int ndgl,          // number of latitudes
+                        const double lat,        // latitude in radian
+                        const bool regular );    // regular grid
 
 //-----------------------------------------------------------------------------
 
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index 9c2fb2801..92b03f489 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -29,11 +29,9 @@
 #include "atlas/parallel/mpi/mpi.h"
 #include "atlas/runtime/Trace.h"
 #include "atlas/trans/Trans.h"
-#include "atlas/trans/local_noopt/FourierTransforms.h"
-#include "atlas/trans/local_noopt/LegendrePolynomials.h"
-#include "atlas/trans/local_noopt/LegendreTransforms.h"
 #include "atlas/util/Constants.h"
 #include "atlas/util/Earth.h"
+#include "atlas/trans/localopt3/TransLocalopt3.h"
 
 #include "tests/AtlasTestEnvironment.h"
 
@@ -69,6 +67,7 @@ struct AtlasTransEnvironment : public AtlasTestEnvironment {
 
 //-----------------------------------------------------------------------------
 
+#if 0
 void compute_legendre( const size_t trc,                      // truncation (in)
                        const double& lat,                     // latitude in radians (in)
                        array::ArrayView<double, 1>& zlfpol )  // values of
@@ -216,7 +215,7 @@ void spectral_transform_grid( const size_t trc,       // truncation (in)
 
     EXPECT( idx == grid.size() );
 }
-
+#endif
 //-----------------------------------------------------------------------------
 // Routine to compute the spherical harmonics analytically at one point
 // (up to wave number 3)
@@ -528,6 +527,7 @@ double compute_rms( const size_t N,    // length of the arrays
     return rms;
 }
 
+#if 0
 //-----------------------------------------------------------------------------
 // Routine to test the spectral transform by comparing it with the analytically
 // derived spherical harmonics
@@ -566,6 +566,7 @@ double spectral_transform_test( double trc,       // truncation
 
     return rms;
 }
+#endif
 
 //-----------------------------------------------------------------------------
 #if 0

From 9d79e036c6194d1f8ab283157a0517a358c574ae Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Wed, 9 May 2018 15:39:11 +0100
Subject: [PATCH 070/123] rename trans/localopt3 trans/local

---
 src/atlas/CMakeLists.txt                      | 16 ++---
 src/atlas/trans/LegendreCacheCreator.cc       |  2 +-
 src/atlas/trans/Trans.cc                      |  4 +-
 src/atlas/trans/VorDivToUV.cc                 |  4 +-
 .../LegendreCacheCreatorLocal.cc              |  6 +-
 .../LegendreCacheCreatorLocal.h               |  0
 .../LegendrePolynomials.cc}                   |  4 +-
 .../LegendrePolynomials.h}                    |  0
 .../TransLocalopt3.cc => local/TransLocal.cc} | 71 +++++++++----------
 .../TransLocalopt3.h => local/TransLocal.h}   | 16 ++---
 .../VorDivToUVLocal.cc}                       | 13 ++--
 .../VorDivToUVLocal.h}                        |  8 +--
 src/tests/trans/test_transgeneral.cc          | 16 ++---
 13 files changed, 79 insertions(+), 81 deletions(-)
 rename src/atlas/trans/{localopt3 => local}/LegendreCacheCreatorLocal.cc (94%)
 rename src/atlas/trans/{localopt3 => local}/LegendreCacheCreatorLocal.h (100%)
 rename src/atlas/trans/{localopt3/LegendrePolynomialsopt3.cc => local/LegendrePolynomials.cc} (99%)
 rename src/atlas/trans/{localopt3/LegendrePolynomialsopt3.h => local/LegendrePolynomials.h} (100%)
 rename src/atlas/trans/{localopt3/TransLocalopt3.cc => local/TransLocal.cc} (94%)
 rename src/atlas/trans/{localopt3/TransLocalopt3.h => local/TransLocal.h} (93%)
 rename src/atlas/trans/{localopt3/VorDivToUVLocalopt3.cc => local/VorDivToUVLocal.cc} (93%)
 rename src/atlas/trans/{localopt3/VorDivToUVLocalopt3.h => local/VorDivToUVLocal.h} (86%)

diff --git a/src/atlas/CMakeLists.txt b/src/atlas/CMakeLists.txt
index 64dd7e1cc..9bf883d18 100644
--- a/src/atlas/CMakeLists.txt
+++ b/src/atlas/CMakeLists.txt
@@ -325,14 +325,14 @@ trans/VorDivToUV.h
 trans/VorDivToUV.cc
 trans/LegendreCacheCreator.h
 trans/LegendreCacheCreator.cc
-trans/localopt3/TransLocalopt3.h
-trans/localopt3/TransLocalopt3.cc
-trans/localopt3/LegendrePolynomialsopt3.h
-trans/localopt3/LegendrePolynomialsopt3.cc
-trans/localopt3/VorDivToUVLocalopt3.h
-trans/localopt3/VorDivToUVLocalopt3.cc
-trans/localopt3/LegendreCacheCreatorLocal.h
-trans/localopt3/LegendreCacheCreatorLocal.cc
+trans/local/TransLocal.h
+trans/local/TransLocal.cc
+trans/local/LegendrePolynomials.h
+trans/local/LegendrePolynomials.cc
+trans/local/VorDivToUVLocal.h
+trans/local/VorDivToUVLocal.cc
+trans/local/LegendreCacheCreatorLocal.h
+trans/local/LegendreCacheCreatorLocal.cc
 
 )
 if( ATLAS_HAVE_TRANS )
diff --git a/src/atlas/trans/LegendreCacheCreator.cc b/src/atlas/trans/LegendreCacheCreator.cc
index 6d65c76c1..8c6b402a3 100644
--- a/src/atlas/trans/LegendreCacheCreator.cc
+++ b/src/atlas/trans/LegendreCacheCreator.cc
@@ -24,7 +24,7 @@
 #else
 #define TRANS_DEFAULT "local"
 #endif
-#include "atlas/trans/localopt3/LegendreCacheCreatorLocal.h"
+#include "atlas/trans/local/LegendreCacheCreatorLocal.h"
 
 namespace atlas {
 namespace trans {
diff --git a/src/atlas/trans/Trans.cc b/src/atlas/trans/Trans.cc
index 57a7e707e..6f67b57bb 100644
--- a/src/atlas/trans/Trans.cc
+++ b/src/atlas/trans/Trans.cc
@@ -27,7 +27,7 @@
 #else
 #define TRANS_DEFAULT "local"
 #endif
-#include "atlas/trans/localopt3/TransLocalopt3.h" // --> recommended "local"
+#include "atlas/trans/local/TransLocal.h" // --> recommended "local"
 
 namespace atlas {
 namespace trans {
@@ -61,7 +61,7 @@ struct force_link {
         load_builder_functionspace<TransIFSStructuredColumns>();
         load_builder_grid<TransIFS>();
 #endif
-        load_builder_grid<TransLocalopt3>();
+        load_builder_grid<TransLocal>();
     }
 };
 
diff --git a/src/atlas/trans/VorDivToUV.cc b/src/atlas/trans/VorDivToUV.cc
index c01076f6e..e160062b5 100644
--- a/src/atlas/trans/VorDivToUV.cc
+++ b/src/atlas/trans/VorDivToUV.cc
@@ -26,7 +26,7 @@
 #else
 #define TRANS_DEFAULT "local"
 #endif
-#include "atlas/trans/localopt3/VorDivToUVLocalopt3.h"  // --> recommended "local"
+#include "atlas/trans/local/VorDivToUVLocal.h"  // --> recommended "local"
 
 namespace atlas {
 namespace trans {
@@ -54,7 +54,7 @@ struct force_link {
 #if ATLAS_HAVE_TRANS
         load_builder<VorDivToUVIFS>();
 #endif
-        load_builder<VorDivToUVLocalopt3>();
+        load_builder<VorDivToUVLocal>();
     }
 };
 
diff --git a/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc b/src/atlas/trans/local/LegendreCacheCreatorLocal.cc
similarity index 94%
rename from src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc
rename to src/atlas/trans/local/LegendreCacheCreatorLocal.cc
index a004348b9..9424ea52f 100644
--- a/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc
+++ b/src/atlas/trans/local/LegendreCacheCreatorLocal.cc
@@ -8,14 +8,14 @@
  * nor does it submit to any jurisdiction.
  */
 
-#include "atlas/trans/localopt3/LegendreCacheCreatorLocal.h"
+#include "atlas/trans/local/LegendreCacheCreatorLocal.h"
 #include <string>
 #include <sstream>
 #include "eckit/utils/MD5.h"
 #include "atlas/grid.h"
 #include "atlas/option.h"
 #include "atlas/trans/Trans.h"
-#include "atlas/trans/localopt3/TransLocalopt3.h"
+#include "atlas/trans/local/TransLocal.h"
 
 namespace atlas {
 namespace trans {
@@ -94,7 +94,7 @@ void LegendreCacheCreatorLocal::create( const std::string& path ) const {
 Cache LegendreCacheCreatorLocal::create() const {
   util::Config export_legendre("export_legendre",true);
   Trans tmp( grid_, truncation_, config_ | option::type("local") | export_legendre );
-  auto impl = dynamic_cast<const TransLocalopt3*>( tmp.get() );
+  auto impl = dynamic_cast<const TransLocal*>( tmp.get() );
   return impl->export_legendre_;
 }
 
diff --git a/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.h b/src/atlas/trans/local/LegendreCacheCreatorLocal.h
similarity index 100%
rename from src/atlas/trans/localopt3/LegendreCacheCreatorLocal.h
rename to src/atlas/trans/local/LegendreCacheCreatorLocal.h
diff --git a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc b/src/atlas/trans/local/LegendrePolynomials.cc
similarity index 99%
rename from src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc
rename to src/atlas/trans/local/LegendrePolynomials.cc
index 1cddbc18b..4bd0d67a1 100644
--- a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc
+++ b/src/atlas/trans/local/LegendrePolynomials.cc
@@ -14,7 +14,7 @@
 
 #include "atlas/array.h"
 #include "atlas/parallel/mpi/mpi.h"
-#include "atlas/trans/localopt3/LegendrePolynomialsopt3.h"
+#include "atlas/trans/local/LegendrePolynomials.h"
 
 namespace atlas {
 namespace trans {
@@ -187,7 +187,7 @@ void compute_legendre_polynomialsopt3(
                 // The trans library in IFS uses descending order because it should
                 // be more accurate (higher wavenumbers have smaller contributions).
                 // This also needs to be changed when splitting the spectral data in
-                // TransLocalopt3::invtrans_uv!
+                // TransLocal::invtrans_uv!
                 //for ( int jn = jm; jn <= trc; jn++ ) {
                 for ( int jn = trc; jn >= jm; jn-- ) {
                     if ( ( jn - jm ) % 2 == 0 ) {
diff --git a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.h b/src/atlas/trans/local/LegendrePolynomials.h
similarity index 100%
rename from src/atlas/trans/localopt3/LegendrePolynomialsopt3.h
rename to src/atlas/trans/local/LegendrePolynomials.h
diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/local/TransLocal.cc
similarity index 94%
rename from src/atlas/trans/localopt3/TransLocalopt3.cc
rename to src/atlas/trans/local/TransLocal.cc
index 30995afb1..ac8fd9ac4 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.cc
+++ b/src/atlas/trans/local/TransLocal.cc
@@ -8,7 +8,7 @@
  * nor does it submit to any jurisdiction.
  */
 
-#include "atlas/trans/localopt3/TransLocalopt3.h"
+#include "atlas/trans/local/TransLocal.h"
 #include <cstdlib>
 #include <cmath>
 #include "atlas/array.h"
@@ -17,7 +17,7 @@
 #include "atlas/runtime/ErrorHandling.h"
 #include "atlas/runtime/Log.h"
 #include "atlas/trans/VorDivToUV.h"
-#include "atlas/trans/localopt3/LegendrePolynomialsopt3.h"
+#include "atlas/trans/local/LegendrePolynomials.h"
 #include "atlas/util/Constants.h"
 #include "eckit/config/YAMLConfiguration.h"
 #include "eckit/eckit_config.h"
@@ -30,8 +30,7 @@ namespace atlas {
 namespace trans {
 
 namespace {
-static TransBuilderGrid<TransLocalopt3> builder_deprecated( "localopt3" );
-static TransBuilderGrid<TransLocalopt3> builder( "local" );
+static TransBuilderGrid<TransLocal> builder( "local" );
 }  // namespace
 
 namespace {
@@ -217,7 +216,7 @@ int fourier_truncation( const int truncation,    // truncation
 }
 
 // --------------------------------------------------------------------------------------------------------------------
-// Class TransLocalopt3
+// Class TransLocal
 // --------------------------------------------------------------------------------------------------------------------
 
 const eckit::linalg::LinearAlgebra& linear_algebra_backend() {
@@ -228,7 +227,7 @@ const eckit::linalg::LinearAlgebra& linear_algebra_backend() {
     return eckit::linalg::LinearAlgebra::backend();
 }
 
-TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const Domain& domain, const long truncation,
+TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& domain, const long truncation,
                                 const eckit::Configuration& config ) :
     grid_( grid, domain ),
     truncation_( truncation ),
@@ -240,7 +239,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const Doma
     fft_cachesize_( cache.fft().size() ),
     linalg_( linear_algebra_backend() )
 {
-    ATLAS_TRACE( "TransLocalOpt3 constructor" );
+    ATLAS_TRACE( "TransLocal constructor" );
     double fft_threshold = 0.0;  // fraction of latitudes of the full grid down to which FFT is used.
     // This threshold needs to be adjusted depending on the dgemm and FFT performance of the machine
     // on which this code is running!
@@ -586,18 +585,18 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const Doma
 
 // --------------------------------------------------------------------------------------------------------------------
 
-TransLocalopt3::TransLocalopt3( const Grid& grid, const long truncation, const eckit::Configuration& config ) :
-    TransLocalopt3( Cache(), grid, grid.domain(), truncation, config ) {}
+TransLocal::TransLocal( const Grid& grid, const long truncation, const eckit::Configuration& config ) :
+    TransLocal( Cache(), grid, grid.domain(), truncation, config ) {}
 
-TransLocalopt3::TransLocalopt3( const Grid& grid, const Domain& domain, const long truncation, const eckit::Configuration& config ) :
-    TransLocalopt3( Cache(), grid, domain, truncation, config ) {}
+TransLocal::TransLocal( const Grid& grid, const Domain& domain, const long truncation, const eckit::Configuration& config ) :
+    TransLocal( Cache(), grid, domain, truncation, config ) {}
 
-TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long truncation, const eckit::Configuration& config ) :
-    TransLocalopt3( cache, grid, grid.domain(), truncation, config ) {}
+TransLocal::TransLocal( const Cache& cache, const Grid& grid, const long truncation, const eckit::Configuration& config ) :
+    TransLocal( cache, grid, grid.domain(), truncation, config ) {}
 
 // --------------------------------------------------------------------------------------------------------------------
 
-TransLocalopt3::~TransLocalopt3() {
+TransLocal::~TransLocal() {
     if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
         if ( not legendre_cache_ ) {
             free_aligned( legendre_sym_ );
@@ -623,40 +622,40 @@ TransLocalopt3::~TransLocalopt3() {
 
 // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt3::invtrans( const Field& spfield, Field& gpfield, const eckit::Configuration& config ) const {
+void TransLocal::invtrans( const Field& spfield, Field& gpfield, const eckit::Configuration& config ) const {
     NOTIMP;
 }
 
 // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt3::invtrans( const FieldSet& spfields, FieldSet& gpfields,
+void TransLocal::invtrans( const FieldSet& spfields, FieldSet& gpfields,
                                const eckit::Configuration& config ) const {
     NOTIMP;
 }
 
 // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt3::invtrans_grad( const Field& spfield, Field& gradfield, const eckit::Configuration& config ) const {
+void TransLocal::invtrans_grad( const Field& spfield, Field& gradfield, const eckit::Configuration& config ) const {
     NOTIMP;
 }
 
 // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt3::invtrans_grad( const FieldSet& spfields, FieldSet& gradfields,
+void TransLocal::invtrans_grad( const FieldSet& spfields, FieldSet& gradfields,
                                     const eckit::Configuration& config ) const {
     NOTIMP;
 }
 
 // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt3::invtrans_vordiv2wind( const Field& spvor, const Field& spdiv, Field& gpwind,
+void TransLocal::invtrans_vordiv2wind( const Field& spvor, const Field& spdiv, Field& gpwind,
                                            const eckit::Configuration& config ) const {
     NOTIMP;
 }
 
 // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt3::invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[],
+void TransLocal::invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[],
                                const eckit::Configuration& config ) const {
     invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields, config );
 }
@@ -673,7 +672,7 @@ void gp_transposeopt3( const int nb_size, const int nb_fields, const double gp_t
 
 // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt3::invtrans_legendreopt3( const int truncation, const int nlats, const int nb_fields,
+void TransLocal::invtrans_legendreopt3( const int truncation, const int nlats, const int nb_fields,
                                             const double scalar_spectra[], double scl_fourier[],
                                             const eckit::Configuration& config ) const {
     // Legendre transform:
@@ -826,7 +825,7 @@ void TransLocalopt3::invtrans_legendreopt3( const int truncation, const int nlat
 
 // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt3::invtrans_fourier_regularopt3( const int nlats, const int nlons, const int nb_fields,
+void TransLocal::invtrans_fourier_regularopt3( const int nlats, const int nlons, const int nb_fields,
                                                    double scl_fourier[], double gp_fields[],
                                                    const eckit::Configuration& config ) const {
     // Fourier transformation:
@@ -910,7 +909,7 @@ void TransLocalopt3::invtrans_fourier_regularopt3( const int nlats, const int nl
 
 // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt3::invtrans_fourier_reducedopt3( const int nlats, const grid::StructuredGrid g, const int nb_fields,
+void TransLocal::invtrans_fourier_reducedopt3( const int nlats, const grid::StructuredGrid g, const int nb_fields,
                                                    double scl_fourier[], double gp_fields[],
                                                    const eckit::Configuration& config ) const {
     // Fourier transformation:
@@ -969,7 +968,7 @@ void TransLocalopt3::invtrans_fourier_reducedopt3( const int nlats, const grid::
 
 // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt3::invtrans_unstructured_precomp( const int truncation, const int nb_fields,
+void TransLocal::invtrans_unstructured_precomp( const int truncation, const int nb_fields,
                                                     const int nb_vordiv_fields, const double scalar_spectra[],
                                                     double gp_fields[], const eckit::Configuration& config ) const {
     ATLAS_TRACE( "invtrans_uv unstructured opt3" );
@@ -1061,7 +1060,7 @@ void TransLocalopt3::invtrans_unstructured_precomp( const int truncation, const
 
 // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt3::invtrans_unstructured( const int truncation, const int nb_fields, const int nb_vordiv_fields,
+void TransLocal::invtrans_unstructured( const int truncation, const int nb_fields, const int nb_vordiv_fields,
                                             const double scalar_spectra[], double gp_fields[],
                                             const eckit::Configuration& config ) const {
     ATLAS_TRACE( "invtrans_uv unstructured" );
@@ -1150,7 +1149,7 @@ void TransLocalopt3::invtrans_unstructured( const int truncation, const int nb_f
 }
 
 //-----------------------------------------------------------------------------
-// Routine to compute the spectral transform by using a localopt3 Fourier transformation
+// Routine to compute the spectral transform by using a Local Fourier transformation
 // for a grid (same latitude for all longitudes, allows to compute Legendre functions
 // once for all longitudes). U and v components are divided by cos(latitude) for
 // nb_vordiv_fields > 0.
@@ -1164,7 +1163,7 @@ void TransLocalopt3::invtrans_unstructured( const int truncation, const int nb_f
 // Author:
 // Andreas Mueller *ECMWF*
 //
-void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields,
+void TransLocal::invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields,
                                   const double scalar_spectra[], double gp_fields[],
                                   const eckit::Configuration& config ) const {
     if ( nb_scalar_fields > 0 ) {
@@ -1226,7 +1225,7 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel
 
 // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt3::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[],
+void TransLocal::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[],
                                const double divergence_spectra[], double gp_fields[],
                                const eckit::Configuration& config ) const {
     invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config );
@@ -1253,10 +1252,10 @@ void extend_truncationopt3( const int old_truncation, const int nb_fields, const
 
 // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt3::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields,
+void TransLocal::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields,
                                const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[],
                                const eckit::Configuration& config ) const {
-    ATLAS_TRACE( "TransLocalopt3::invtrans" );
+    ATLAS_TRACE( "TransLocal::invtrans" );
     int nb_gp              = grid_.size();
     int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields;
     if ( nb_vordiv_fields > 0 ) {
@@ -1277,7 +1276,7 @@ void TransLocalopt3::invtrans( const int nb_scalar_fields, const double scalar_s
         {
             ATLAS_TRACE( "vordiv to UV opt3" );
             // call vd2uv to compute u and v in spectral space
-            trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt3" ) );
+            trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "Local" ) );
             vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(),
                                       divergence_spectra_extended.data(), U_ext.data(), V_ext.data() );
         }
@@ -1295,7 +1294,7 @@ void TransLocalopt3::invtrans( const int nb_scalar_fields, const double scalar_s
 
 // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt3::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const {
+void TransLocal::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const {
     NOTIMP;
     // Not implemented and not planned.
     // Use the TransIFS implementation instead.
@@ -1303,7 +1302,7 @@ void TransLocalopt3::dirtrans( const Field& gpfield, Field& spfield, const eckit
 
 // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt3::dirtrans( const FieldSet& gpfields, FieldSet& spfields,
+void TransLocal::dirtrans( const FieldSet& gpfields, FieldSet& spfields,
                                const eckit::Configuration& config ) const {
     NOTIMP;
     // Not implemented and not planned.
@@ -1312,7 +1311,7 @@ void TransLocalopt3::dirtrans( const FieldSet& gpfields, FieldSet& spfields,
 
 // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt3::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv,
+void TransLocal::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv,
                                            const eckit::Configuration& config ) const {
     NOTIMP;
     // Not implemented and not planned.
@@ -1321,7 +1320,7 @@ void TransLocalopt3::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Fi
 
 // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt3::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[],
+void TransLocal::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[],
                                const eckit::Configuration& ) const {
     NOTIMP;
     // Not implemented and not planned.
@@ -1330,7 +1329,7 @@ void TransLocalopt3::dirtrans( const int nb_fields, const double scalar_fields[]
 
 // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocalopt3::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[],
+void TransLocal::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[],
                                double divergence_spectra[], const eckit::Configuration& ) const {
     NOTIMP;
     // Not implemented and not planned.
diff --git a/src/atlas/trans/localopt3/TransLocalopt3.h b/src/atlas/trans/local/TransLocal.h
similarity index 93%
rename from src/atlas/trans/localopt3/TransLocalopt3.h
rename to src/atlas/trans/local/TransLocal.h
index bcc80b1d7..8dd80359d 100644
--- a/src/atlas/trans/localopt3/TransLocalopt3.h
+++ b/src/atlas/trans/local/TransLocal.h
@@ -50,9 +50,9 @@ int fourier_truncation( const int truncation,    // truncation
 
 //-----------------------------------------------------------------------------
 
-/// @class TransLocalopt3
+/// @class TransLocal
 ///
-/// Localopt3 spherical harmonics transformations to any grid
+/// Local spherical harmonics transformations to any grid
 /// Optimisations are present for structured grids
 /// For global grids, please consider using TransIFS instead.
 ///
@@ -62,16 +62,16 @@ int fourier_truncation( const int truncation,    // truncation
 ///
 /// @note: Direct transforms are not implemented and cannot be unless
 ///        the grid is global. There are no plans to support this at the moment.
-class TransLocalopt3 : public trans::TransImpl {
+class TransLocal : public trans::TransImpl {
 public:
-    TransLocalopt3( const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() );
-    TransLocalopt3( const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() );
-    TransLocalopt3( const Cache&, const Grid&, const long truncation,
+    TransLocal( const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() );
+    TransLocal( const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() );
+    TransLocal( const Cache&, const Grid&, const long truncation,
                     const eckit::Configuration& = util::NoConfig() );
-    TransLocalopt3( const Cache&, const Grid&, const Domain&, const long truncation,
+    TransLocal( const Cache&, const Grid&, const Domain&, const long truncation,
                     const eckit::Configuration& = util::NoConfig() );
 
-    virtual ~TransLocalopt3();
+    virtual ~TransLocal();
 
     virtual int truncation() const override { return truncation_; }
     virtual size_t spectralCoefficients() const override { return ( truncation_ + 1 ) * ( truncation_ + 2 ); }
diff --git a/src/atlas/trans/localopt3/VorDivToUVLocalopt3.cc b/src/atlas/trans/local/VorDivToUVLocal.cc
similarity index 93%
rename from src/atlas/trans/localopt3/VorDivToUVLocalopt3.cc
rename to src/atlas/trans/local/VorDivToUVLocal.cc
index b7b9474af..9d23a9db7 100644
--- a/src/atlas/trans/localopt3/VorDivToUVLocalopt3.cc
+++ b/src/atlas/trans/local/VorDivToUVLocal.cc
@@ -8,7 +8,7 @@
  * nor does it submit to any jurisdiction.
  */
 
-#include "atlas/trans/localopt3/VorDivToUVLocalopt3.h"
+#include "atlas/trans/local/VorDivToUVLocal.h"
 #include <cmath>  // for std::sqrt
 #include "atlas/functionspace/Spectral.h"
 #include "atlas/runtime/Log.h"
@@ -21,8 +21,7 @@ namespace atlas {
 namespace trans {
 
 namespace {
-static VorDivToUVBuilder<VorDivToUVLocalopt3> builder_deprecated( "localopt3" );
-static VorDivToUVBuilder<VorDivToUVLocalopt3> builder( "local" );
+static VorDivToUVBuilder<VorDivToUVLocal> builder( "local" );
 }
 
 // --------------------------------------------------------------------------------------------------------------------
@@ -165,7 +164,7 @@ void vd2uvopt3( const int truncation,               // truncation
     }
 }
 
-void VorDivToUVLocalopt3::execute( const int nb_coeff, const int nb_fields, const double vorticity[],
+void VorDivToUVLocal::execute( const int nb_coeff, const int nb_fields, const double vorticity[],
                                    const double divergence[], double U[], double V[],
                                    const eckit::Configuration& config ) const {
     for ( int jm = 0; jm <= truncation_; ++jm ) {
@@ -173,13 +172,13 @@ void VorDivToUVLocalopt3::execute( const int nb_coeff, const int nb_fields, cons
     }
 }
 
-VorDivToUVLocalopt3::VorDivToUVLocalopt3( const int truncation, const eckit::Configuration& config ) :
+VorDivToUVLocal::VorDivToUVLocal( const int truncation, const eckit::Configuration& config ) :
     truncation_( truncation ) {}
 
-VorDivToUVLocalopt3::VorDivToUVLocalopt3( const FunctionSpace& fs, const eckit::Configuration& config ) :
+VorDivToUVLocal::VorDivToUVLocal( const FunctionSpace& fs, const eckit::Configuration& config ) :
     truncation_( Spectral( fs ).truncation() ) {}
 
-VorDivToUVLocalopt3::~VorDivToUVLocalopt3() {}
+VorDivToUVLocal::~VorDivToUVLocal() {}
 
 }  // namespace trans
 }  // namespace atlas
diff --git a/src/atlas/trans/localopt3/VorDivToUVLocalopt3.h b/src/atlas/trans/local/VorDivToUVLocal.h
similarity index 86%
rename from src/atlas/trans/localopt3/VorDivToUVLocalopt3.h
rename to src/atlas/trans/local/VorDivToUVLocal.h
index 44fdc98fe..ee3903de1 100644
--- a/src/atlas/trans/localopt3/VorDivToUVLocalopt3.h
+++ b/src/atlas/trans/local/VorDivToUVLocal.h
@@ -26,12 +26,12 @@ namespace trans {
 
 //-----------------------------------------------------------------------------
 
-class VorDivToUVLocalopt3 : public trans::VorDivToUVImpl {
+class VorDivToUVLocal : public trans::VorDivToUVImpl {
 public:
-    VorDivToUVLocalopt3( const FunctionSpace&, const eckit::Configuration& = util::NoConfig() );
-    VorDivToUVLocalopt3( int truncation, const eckit::Configuration& = util::NoConfig() );
+    VorDivToUVLocal( const FunctionSpace&, const eckit::Configuration& = util::NoConfig() );
+    VorDivToUVLocal( int truncation, const eckit::Configuration& = util::NoConfig() );
 
-    virtual ~VorDivToUVLocalopt3();
+    virtual ~VorDivToUVLocal();
 
     virtual int truncation() const override { return truncation_; }
 
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index 92b03f489..fb976bd25 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -31,7 +31,7 @@
 #include "atlas/trans/Trans.h"
 #include "atlas/util/Constants.h"
 #include "atlas/util/Earth.h"
-#include "atlas/trans/localopt3/TransLocalopt3.h"
+#include "atlas/trans/local/TransLocal.h"
 
 #include "tests/AtlasTestEnvironment.h"
 
@@ -744,7 +744,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
     double rav = 0.;  // compute average rms error of trans library in rav
 #endif
     trans::Trans transLocal1( g, trc, util::Config( "type", "localopt2" ) );
-    trans::Trans transLocal2( g, trc, util::Config( "type", "localopt3" ) );
+    trans::Trans transLocal2( g, trc, util::Config( "type", "Local" ) );
     double rav1 = 0., rav2 = 0.;  // compute average rms errors of transLocal1 and transLocal2
 
     functionspace::Spectral spectral( trc );
@@ -882,10 +882,10 @@ CASE( "test_trans_hires" ) {
     double tolerance  = 1.e-13;
 
 #if ATLAS_HAVE_TRANS
-    //std::string transTypes[4] = {"localopt", "localopt2", "localopt3", "ifs"};
-    //std::string transTypes[2] = {"localopt2", "localopt3"};
-    std::string transTypes[3] = {"localopt3", "localopt2", "localopt"};
-    //std::string transTypes[1] = {"localopt3"};
+    //std::string transTypes[4] = {"localopt", "localopt2", "Local", "ifs"};
+    //std::string transTypes[2] = {"localopt2", "Local"};
+    std::string transTypes[3] = {"Local", "localopt2", "localopt"};
+    //std::string transTypes[1] = {"Local"};
 #else
     std::string transTypes[1] = {"localopt2"};
 #endif
@@ -1176,8 +1176,8 @@ CASE( "test_trans_unstructured" ) {
     std::vector<double> rgp_analytic1( g.size() );
     std::vector<double> rgp_analytic2( gu.size() );
 
-    trans::Trans transLocal1( g, trc, util::Config( "type", "localopt3" ) );
-    trans::Trans transLocal2( gu, trc, util::Config( "type", "localopt3" ) );
+    trans::Trans transLocal1( g, trc, util::Config( "type", "Local" ) );
+    trans::Trans transLocal2( gu, trc, util::Config( "type", "Local" ) );
 
     int icase = 0;
     for ( int ivar_in = 2; ivar_in < 3; ivar_in++ ) {         // vorticity, divergence, scalar

From 160fe1d0caa8eee9d4421b3e7426f3ffbd0ed5cd Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Wed, 9 May 2018 18:02:10 +0100
Subject: [PATCH 071/123] Improve uid for Legendre cache files

---
 .../trans/local/LegendreCacheCreatorLocal.cc  | 52 +++++++++++++++----
 src/atlas/trans/local/TransLocal.cc           | 42 +++++++--------
 src/tests/trans/test_trans_localcache.cc      |  6 ++-
 3 files changed, 68 insertions(+), 32 deletions(-)

diff --git a/src/atlas/trans/local/LegendreCacheCreatorLocal.cc b/src/atlas/trans/local/LegendreCacheCreatorLocal.cc
index 9424ea52f..e4440e0d4 100644
--- a/src/atlas/trans/local/LegendreCacheCreatorLocal.cc
+++ b/src/atlas/trans/local/LegendreCacheCreatorLocal.cc
@@ -12,6 +12,7 @@
 #include <string>
 #include <sstream>
 #include "eckit/utils/MD5.h"
+#include "eckit/types/FloatCompare.h"
 #include "atlas/grid.h"
 #include "atlas/option.h"
 #include "atlas/trans/Trans.h"
@@ -56,18 +57,47 @@ std::string hash( const eckit::Configuration& config ) {
 std::string LegendreCacheCreatorLocal::uid() const {
   if( unique_identifier_.empty() ) {
     std::ostringstream stream;
-    stream << "local-T" << truncation_ << "-";
-    if( grid::GaussianGrid( grid_ ) ) {
-      // Same cache for any global Gaussian grid
-      stream << "GaussianN" << grid::GaussianGrid( grid_ ).N();
-    } else if( grid::RegularLonLatGrid( grid_ ) ) {
-      // Same cache for any global regular grid
-      auto g = grid::RegularLonLatGrid( grid_ );
-      stream << ( g.shiftedLat() ? "S" : "L" ) << "+x" << g.ny();
-      // The above '+' is a placeholder for any g.nx()
-    } else {
+    auto give_up = [&]() {
       // We cannot make more assumptions on reusability for different grids
       stream << "grid-" << hash( grid_ );
+    };
+    stream << "local-T" << truncation_ << "-";
+    grid::StructuredGrid structured ( grid_ );
+    if( grid_.domain().global() ) {
+      if( grid::GaussianGrid( grid_ ) ) {
+        // Same cache for any global Gaussian grid
+        stream << "GaussianN" << grid::GaussianGrid( grid_ ).N();
+      } else if( grid::RegularLonLatGrid( grid_ ) ) {
+        // Same cache for any global regular grid
+        auto g = grid::RegularLonLatGrid( grid_ );
+
+        const double dy_2 = 90. / double(g.ny());
+        bool shifted_lat = eckit::types::is_approximately_equal( g.y().front(), 90. - dy_2 ) &&
+                           eckit::types::is_approximately_equal( g.y().back(), -90. + dy_2 );
+        bool standard_lat = eckit::types::is_approximately_equal( g.y().front(), 90. ) &&
+                            eckit::types::is_approximately_equal( g.y().back(), -90. );
+
+        if( standard_lat ) {
+          stream << "L" << "-ny" << g.ny();
+        } else if( shifted_lat ) {
+          stream << "S" << "-ny" << g.ny();
+        } else { // I don't think we get here, but just in case, give up
+          give_up();
+        }
+      } else { // global but not gaussian or regularlonlat
+        give_up();
+      }
+    } else { // regional grid
+      if( grid::RegularGrid( grid_ ) && not grid_.projection() && structured.yspace().type() == "linear" ) {
+        RectangularDomain domain( grid_.domain() );
+        ASSERT( domain );
+        stream << "Regional";
+        stream << "-south" << domain.ymin();
+        stream << "-north" << domain.ymax();
+        stream << "-ny" << structured.ny();
+      } else { // It gets too complicated, so let's not be smart
+        give_up();
+      }
     }
     stream << "-OPT" << hash( config_ );
     unique_identifier_ = stream.str();
@@ -84,6 +114,8 @@ LegendreCacheCreatorLocal::LegendreCacheCreatorLocal( const Grid& grid, int trun
 }
 
 bool LegendreCacheCreatorLocal::supported() const {
+  if( not grid::StructuredGrid( grid_ ) ) return false;
+  if( grid_.projection() ) return false;
   return true;
 }
 
diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc
index ac8fd9ac4..f47893e71 100644
--- a/src/atlas/trans/local/TransLocal.cc
+++ b/src/atlas/trans/local/TransLocal.cc
@@ -228,7 +228,7 @@ const eckit::linalg::LinearAlgebra& linear_algebra_backend() {
 }
 
 TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& domain, const long truncation,
-                                const eckit::Configuration& config ) :
+                        const eckit::Configuration& config ) :
     grid_( grid, domain ),
     truncation_( truncation ),
     precompute_( config.getBool( "precompute", true ) ),
@@ -430,22 +430,22 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma
                 // TODO: check this is all aligned...
             }
             else {
-                ATLAS_TRACE_SCOPE( "Legendre precomputations (structured)" ) {
-
-                    if( TransParameters(config).export_legendre() ) {
-                        ASSERT( not cache_.legendre() );
-                        export_legendre_ = LegendreCache( sizeof(double) * ( size_sym + size_asym ) );
-                        legendre_cachesize_ = export_legendre_.legendre().size();
-                        legendre_cache_ = export_legendre_.legendre().data();
-                        legendre_cache_ = std::malloc( legendre_cachesize_ );
-                        ReadCache legendre( legendre_cache_ );
-                        legendre_sym_  = legendre.read<double>( size_sym );
-                        legendre_asym_ = legendre.read<double>( size_asym );
-                    } else {
-                        alloc_aligned( legendre_sym_, size_sym );
-                        alloc_aligned( legendre_asym_, size_asym );
-                    }
 
+              if( TransParameters(config).export_legendre() ) {
+                  ASSERT( not cache_.legendre() );
+                  export_legendre_ = LegendreCache( sizeof(double) * ( size_sym + size_asym ) );
+                  legendre_cachesize_ = export_legendre_.legendre().size();
+                  legendre_cache_ = export_legendre_.legendre().data();
+                  legendre_cache_ = std::malloc( legendre_cachesize_ );
+                  ReadCache legendre( legendre_cache_ );
+                  legendre_sym_  = legendre.read<double>( size_sym );
+                  legendre_asym_ = legendre.read<double>( size_asym );
+              } else {
+                  alloc_aligned( legendre_sym_, size_sym );
+                  alloc_aligned( legendre_asym_, size_asym );
+              }
+
+              ATLAS_TRACE_SCOPE( "Legendre precomputations (structured)" ) {
                     compute_legendre_polynomialsopt3( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_,
                                                       legendre_asym_, legendre_sym_begin_.data(),
                                                       legendre_asym_begin_.data() );
@@ -454,11 +454,11 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma
                 if ( file_path.size() ) {
                     ATLAS_TRACE( "Write LegendreCache to file" );
                     Log::debug() << "Writing Legendre cache file ..." << std::endl;
-                    Log::debug() << "    path      = " << file_path << std::endl;
+                    Log::debug() << "    path: " << file_path << std::endl;
                     WriteCache legendre( file_path );
                     legendre.write( legendre_sym_, size_sym );
                     legendre.write( legendre_asym_, size_asym );
-                    Log::debug() << "Cache file size: " << eckit::Bytes( legendre.pos ) << std::endl;
+                    Log::debug() << "    size: " << eckit::Bytes( legendre.pos ) << std::endl;
                 }
             }
         }
@@ -1192,7 +1192,7 @@ void TransLocal::invtrans_uv( const int truncation, const int nb_scalar_fields,
             // Computing u,v from U,V:
             {
                 if ( nb_vordiv_fields > 0 ) {
-                    ATLAS_TRACE( "opt3 u,v from U,V" );
+                    ATLAS_TRACE( "compute u,v from U,V" );
                     std::vector<double> coslats( nlats );
                     for ( size_t j = 0; j < nlats; ++j ) {
                         coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() );
@@ -1265,7 +1265,7 @@ void TransLocal::invtrans( const int nb_scalar_fields, const double scalar_spect
         std::vector<double> V_ext( nb_vordiv_spec_ext, 0. );
 
         {
-            ATLAS_TRACE( "opt3 extend vordiv" );
+            ATLAS_TRACE( "extend vordiv" );
             // increase truncation in vorticity_spectra and divergence_spectra:
             extend_truncationopt3( truncation_, nb_vordiv_fields, vorticity_spectra,
                                    vorticity_spectra_extended.data() );
@@ -1274,7 +1274,7 @@ void TransLocal::invtrans( const int nb_scalar_fields, const double scalar_spect
         }
 
         {
-            ATLAS_TRACE( "vordiv to UV opt3" );
+            ATLAS_TRACE( "vordiv to UV" );
             // call vd2uv to compute u and v in spectral space
             trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "Local" ) );
             vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(),
diff --git a/src/tests/trans/test_trans_localcache.cc b/src/tests/trans/test_trans_localcache.cc
index 1011719f0..e9bfb6baf 100644
--- a/src/tests/trans/test_trans_localcache.cc
+++ b/src/tests/trans/test_trans_localcache.cc
@@ -209,7 +209,6 @@ CASE( "test_regional_grids nested_in_global" ) {
 }
 
 CASE( "test_regional_grids not nested" ) {
-    auto cachefile = CacheFile("cache-regional.bin");
     auto truncation = 89;
     Cache cache;
 
@@ -217,6 +216,8 @@ CASE( "test_regional_grids not nested" ) {
 
     LegendreCacheCreator cache_creator( grid, truncation, option::type("local") );
     EXPECT( cache_creator.supported() );
+    auto cachefile = CacheFile( "leg_" + cache_creator.uid() + ".bin" );
+
     ATLAS_TRACE_SCOPE( "Creating cache "+std::string(cachefile) )
         cache_creator.create( cachefile );
 
@@ -242,6 +243,9 @@ CASE( "test_regional_grids with projection" ) {
         Trans( grid, truncation, option::type("local") );
 
     // Note: caching not yet implemented for unstructured and projected grids
+    LegendreCacheCreator legendre_cache_creator( grid, truncation, option::type("local") );
+    ATLAS_DEBUG_VAR( legendre_cache_creator.uid() );
+    EXPECT( not legendre_cache_creator.supported() );
 }
 
 CASE( "test cache creator to file" ) {

From e56f34b34e95807928f5d592eed062459f7a02d0 Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Wed, 9 May 2018 18:08:01 +0100
Subject: [PATCH 072/123] Improve uid for Legendre cache files

---
 .../trans/local/LegendreCacheCreatorLocal.cc  | 60 +++++++++----------
 1 file changed, 27 insertions(+), 33 deletions(-)

diff --git a/src/atlas/trans/local/LegendreCacheCreatorLocal.cc b/src/atlas/trans/local/LegendreCacheCreatorLocal.cc
index e4440e0d4..4e3488aed 100644
--- a/src/atlas/trans/local/LegendreCacheCreatorLocal.cc
+++ b/src/atlas/trans/local/LegendreCacheCreatorLocal.cc
@@ -63,41 +63,35 @@ std::string LegendreCacheCreatorLocal::uid() const {
     };
     stream << "local-T" << truncation_ << "-";
     grid::StructuredGrid structured ( grid_ );
-    if( grid_.domain().global() ) {
-      if( grid::GaussianGrid( grid_ ) ) {
-        // Same cache for any global Gaussian grid
-        stream << "GaussianN" << grid::GaussianGrid( grid_ ).N();
-      } else if( grid::RegularLonLatGrid( grid_ ) ) {
-        // Same cache for any global regular grid
-        auto g = grid::RegularLonLatGrid( grid_ );
-
-        const double dy_2 = 90. / double(g.ny());
-        bool shifted_lat = eckit::types::is_approximately_equal( g.y().front(), 90. - dy_2 ) &&
-                           eckit::types::is_approximately_equal( g.y().back(), -90. + dy_2 );
-        bool standard_lat = eckit::types::is_approximately_equal( g.y().front(), 90. ) &&
-                            eckit::types::is_approximately_equal( g.y().back(), -90. );
-
-        if( standard_lat ) {
-          stream << "L" << "-ny" << g.ny();
-        } else if( shifted_lat ) {
-          stream << "S" << "-ny" << g.ny();
-        } else { // I don't think we get here, but just in case, give up
-          give_up();
-        }
-      } else { // global but not gaussian or regularlonlat
-        give_up();
-      }
-    } else { // regional grid
-      if( grid::RegularGrid( grid_ ) && not grid_.projection() && structured.yspace().type() == "linear" ) {
-        RectangularDomain domain( grid_.domain() );
-        ASSERT( domain );
-        stream << "Regional";
-        stream << "-south" << domain.ymin();
-        stream << "-north" << domain.ymax();
-        stream << "-ny" << structured.ny();
-      } else { // It gets too complicated, so let's not be smart
+    if( grid::GaussianGrid( grid_ ) ) {
+      // Same cache for any global Gaussian grid
+      stream << "GaussianN" << grid::GaussianGrid( grid_ ).N();
+    } else if( grid::RegularLonLatGrid( grid_ ) ) {
+      // Same cache for any global regular grid
+      auto g = grid::RegularLonLatGrid( grid_ );
+
+      const double dy_2 = 90. / double(g.ny());
+      bool shifted_lat = eckit::types::is_approximately_equal( g.y().front(), 90. - dy_2 ) &&
+                         eckit::types::is_approximately_equal( g.y().back(), -90. + dy_2 );
+      bool standard_lat = eckit::types::is_approximately_equal( g.y().front(), 90. ) &&
+                          eckit::types::is_approximately_equal( g.y().back(), -90. );
+
+      if( standard_lat ) {
+        stream << "L" << "-ny" << g.ny();
+      } else if( shifted_lat ) {
+        stream << "S" << "-ny" << g.ny();
+      } else { // I don't think we get here, but just in case, give up
         give_up();
       }
+    } else if ( grid::RegularGrid( grid_ ) && not grid_.projection() && structured.yspace().type() == "linear" ) {
+      RectangularDomain domain( grid_.domain() );
+      ASSERT( domain );
+      stream << "Regional";
+      stream << "-south" << domain.ymin();
+      stream << "-north" << domain.ymax();
+      stream << "-ny" << structured.ny();
+    } else { // It gets too complicated, so let's not be smart
+      give_up();
     }
     stream << "-OPT" << hash( config_ );
     unique_identifier_ = stream.str();

From 3b0518b279501c694a31573e1c2e28044802777b Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Wed, 9 May 2018 18:53:34 +0100
Subject: [PATCH 073/123] Warning message for transforms to unstructured or
 projected grids

---
 src/atlas/option/TransOptions.cc         |  4 +++
 src/atlas/option/TransOptions.h          |  8 +++++
 src/atlas/trans/local/TransLocal.cc      | 40 +++++++++++++++++-------
 src/atlas/trans/local/TransLocal.h       |  1 +
 src/tests/trans/test_trans_localcache.cc |  7 ++++-
 5 files changed, 48 insertions(+), 12 deletions(-)

diff --git a/src/atlas/option/TransOptions.cc b/src/atlas/option/TransOptions.cc
index a80582a78..98e036acc 100644
--- a/src/atlas/option/TransOptions.cc
+++ b/src/atlas/option/TransOptions.cc
@@ -65,6 +65,10 @@ nproma::nproma( int nproma ) {
     set( "nproma", nproma );
 }
 
+warning::warning( int warning ) {
+    set( "warning", warning );
+}
+
 // ----------------------------------------------------------------------------
 
 }  // namespace option
diff --git a/src/atlas/option/TransOptions.h b/src/atlas/option/TransOptions.h
index d9c11593d..d87bcdab3 100644
--- a/src/atlas/option/TransOptions.h
+++ b/src/atlas/option/TransOptions.h
@@ -109,10 +109,18 @@ class read_fft : public util::Config {
 // ----------------------------------------------------------------------------
 
 class nproma : public util::Config {
+public:
     nproma( int );
 };
 
 // ----------------------------------------------------------------------------
 
+class warning : public util::Config {
+public:
+    warning( int );
+};
+
+// ----------------------------------------------------------------------------
+
 }  // namespace option
 }  // namespace atlas
diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc
index f47893e71..e7bb7ee4d 100644
--- a/src/atlas/trans/local/TransLocal.cc
+++ b/src/atlas/trans/local/TransLocal.cc
@@ -57,6 +57,8 @@ class TransParameters {
 
     bool global() const { return config_.getBool( "global", false ); }
 
+    int warning() const { return config_.getLong( "warning", 0 ); }
+
     int fft() const {
         static const std::map<std::string, int> string_to_FFT = {{"OFF", (int)option::FFT::OFF},
                                                                  {"FFTW", (int)option::FFT::FFTW}};
@@ -237,7 +239,8 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma
     legendre_cachesize_( cache.legendre().size() ),
     fft_cache_( cache.fft().data() ),
     fft_cachesize_( cache.fft().size() ),
-    linalg_( linear_algebra_backend() )
+    linalg_( linear_algebra_backend() ),
+    warning_( TransParameters(config).warning() )
 {
     ATLAS_TRACE( "TransLocal constructor" );
     double fft_threshold = 0.0;  // fraction of latitudes of the full grid down to which FFT is used.
@@ -569,6 +572,11 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma
         // unstructured grid
         if ( unstruct_precomp_ ) {
             ATLAS_TRACE( "Legendre precomputations (unstructured)" );
+
+            if( warning_ > 0 && grid_.size() > warning_ ) {
+              Log::warning() << "WARNING: Precomputations for spectral transforms could take a long time and consume a lot of memory (unstructured grid approach)!" << std::endl;
+            }
+
             std::vector<double> lats( grid_.size() );
             alloc_aligned( legendre_, legendre_size( truncation_ ) * grid_.size() );
             int j( 0 );
@@ -971,8 +979,9 @@ void TransLocal::invtrans_fourier_reducedopt3( const int nlats, const grid::Stru
 void TransLocal::invtrans_unstructured_precomp( const int truncation, const int nb_fields,
                                                     const int nb_vordiv_fields, const double scalar_spectra[],
                                                     double gp_fields[], const eckit::Configuration& config ) const {
-    ATLAS_TRACE( "invtrans_uv unstructured opt3" );
-    grid::UnstructuredGrid gu = grid_;
+
+    ATLAS_TRACE( "invtrans_uv unstructured" );
+
     const int nlats                 = grid_.size();
     const int size_fourier          = nb_fields * 2;
     double* legendre;
@@ -1000,9 +1009,8 @@ void TransLocal::invtrans_unstructured_precomp( const int truncation, const int
     // loop over all points:
     {
         ATLAS_TRACE( "Inverse Fourier Transform (NoFFT)" );
-
-        for ( int ip = 0; ip < grid_.size(); ip++ ) {
-            const PointLonLat p  = gu.lonlat( ip );
+        int ip = 0;
+        for( const PointLonLat p : grid_.lonlat() ) {
             const double lon = p.lon() * util::Constants::degreesToRadians();
             const double lat = p.lat() * util::Constants::degreesToRadians();
             {
@@ -1050,6 +1058,7 @@ void TransLocal::invtrans_unstructured_precomp( const int truncation, const int
                     }
                 }
             }
+            ++ip;
         }
     }
     free_aligned( scl_fourier );
@@ -1063,8 +1072,14 @@ void TransLocal::invtrans_unstructured_precomp( const int truncation, const int
 void TransLocal::invtrans_unstructured( const int truncation, const int nb_fields, const int nb_vordiv_fields,
                                             const double scalar_spectra[], double gp_fields[],
                                             const eckit::Configuration& config ) const {
-    ATLAS_TRACE( "invtrans_uv unstructured" );
-    grid::UnstructuredGrid gu = grid_;
+    ATLAS_TRACE( "invtrans_unstructured" );
+
+    int warning = warning_;
+    config.get("warning",warning);
+    if( warning > 0 && grid_.size() > warning ) {
+      Log::warning() << "WARNING: Spectral transforms could take a long time (unstructured grid approach)." << std::endl;
+    }
+
     double* zfn;
     alloc_aligned( zfn, ( truncation + 1 ) * ( truncation + 1 ) );
     compute_zfnopt3( truncation, zfn );
@@ -1080,9 +1095,10 @@ void TransLocal::invtrans_unstructured( const int truncation, const int nb_field
     alloc_aligned( fouriertp, 2 * ( truncation + 1 ) );
     alloc_aligned( gp_opt, nb_fields );
 
+
     // loop over all points:
-    for ( int ip = 0; ip < grid_.size(); ip++ ) {
-        const PointLonLat p  = gu.lonlat( ip );
+    int ip = 0;
+    for ( const PointLonLat p : grid_.lonlat() ) {
         const double lon = p.lon() * util::Constants::degreesToRadians();
         const double lat = p.lat() * util::Constants::degreesToRadians();
         compute_legendre_polynomials_latopt3( truncation, lat, legendre, zfn );
@@ -1140,6 +1156,7 @@ void TransLocal::invtrans_unstructured( const int truncation, const int nb_field
                 }
             }
         }
+        ++ip;
     }
     free_aligned( legendre );
     free_aligned( scl_fourier );
@@ -1170,7 +1187,8 @@ void TransLocal::invtrans_uv( const int truncation, const int nb_scalar_fields,
         int nb_fields = nb_scalar_fields;
 
         // Transform
-        if ( grid::StructuredGrid g = grid_ ) {
+        if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) {
+            auto g = grid::StructuredGrid( grid_ );
             ATLAS_TRACE( "invtrans_uv structured" );
             int nlats            = g.ny();
             int nlons            = g.nxmax();
diff --git a/src/atlas/trans/local/TransLocal.h b/src/atlas/trans/local/TransLocal.h
index 8dd80359d..41d55162b 100644
--- a/src/atlas/trans/local/TransLocal.h
+++ b/src/atlas/trans/local/TransLocal.h
@@ -201,6 +201,7 @@ friend class LegendreCacheCreatorLocal;
     size_t fft_cachesize_{0};
 
     const eckit::linalg::LinearAlgebra& linalg_;
+    int warning_ = 0;
 };
 
 //-----------------------------------------------------------------------------
diff --git a/src/tests/trans/test_trans_localcache.cc b/src/tests/trans/test_trans_localcache.cc
index e9bfb6baf..5c3ca945c 100644
--- a/src/tests/trans/test_trans_localcache.cc
+++ b/src/tests/trans/test_trans_localcache.cc
@@ -239,13 +239,18 @@ CASE( "test_regional_grids with projection" ) {
        ("north_pole", std::vector<double>{ 4., 54.} ) );
 
     StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ), projection );
+    Trans trans;
     ATLAS_TRACE_SCOPE("create without cache")
-        Trans( grid, truncation, option::type("local") );
+        trans = Trans( grid, truncation, option::type("local") | option::warning(1) );
 
     // Note: caching not yet implemented for unstructured and projected grids
     LegendreCacheCreator legendre_cache_creator( grid, truncation, option::type("local") );
     ATLAS_DEBUG_VAR( legendre_cache_creator.uid() );
     EXPECT( not legendre_cache_creator.supported() );
+
+    std::vector<double> rspecg( trans.spectralCoefficients(), 0. );
+    std::vector<double> rgp( trans.grid().size() );
+    trans.invtrans(1,rspecg.data(),rgp.data());
 }
 
 CASE( "test cache creator to file" ) {

From 4d2605b835b1db5ba341c79e03b32657718002a1 Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Thu, 10 May 2018 08:39:31 +0100
Subject: [PATCH 074/123] cleanup

---
 src/atlas/trans/local/TransLocal.cc | 12 ++++++++----
 src/atlas/trans/local/TransLocal.h  |  2 ++
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc
index e7bb7ee4d..29b7b129f 100644
--- a/src/atlas/trans/local/TransLocal.cc
+++ b/src/atlas/trans/local/TransLocal.cc
@@ -229,6 +229,12 @@ const eckit::linalg::LinearAlgebra& linear_algebra_backend() {
     return eckit::linalg::LinearAlgebra::backend();
 }
 
+bool TransLocal::warning( const eckit::Configuration& config ) const {
+  int warning = warning_;
+  config.get("warning",warning);
+  return ( warning > 0 && grid_.size() >= warning );
+}
+
 TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& domain, const long truncation,
                         const eckit::Configuration& config ) :
     grid_( grid, domain ),
@@ -573,7 +579,7 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma
         if ( unstruct_precomp_ ) {
             ATLAS_TRACE( "Legendre precomputations (unstructured)" );
 
-            if( warning_ > 0 && grid_.size() > warning_ ) {
+            if( warning() ) {
               Log::warning() << "WARNING: Precomputations for spectral transforms could take a long time and consume a lot of memory (unstructured grid approach)!" << std::endl;
             }
 
@@ -1074,9 +1080,7 @@ void TransLocal::invtrans_unstructured( const int truncation, const int nb_field
                                             const eckit::Configuration& config ) const {
     ATLAS_TRACE( "invtrans_unstructured" );
 
-    int warning = warning_;
-    config.get("warning",warning);
-    if( warning > 0 && grid_.size() > warning ) {
+    if( warning(config) ) {
       Log::warning() << "WARNING: Spectral transforms could take a long time (unstructured grid approach)." << std::endl;
     }
 
diff --git a/src/atlas/trans/local/TransLocal.h b/src/atlas/trans/local/TransLocal.h
index 41d55162b..1a9342cbc 100644
--- a/src/atlas/trans/local/TransLocal.h
+++ b/src/atlas/trans/local/TransLocal.h
@@ -156,6 +156,8 @@ class TransLocal : public trans::TransImpl {
                       const double scalar_spectra[], double gp_fields[],
                       const eckit::Configuration& = util::NoConfig() ) const;
 
+    bool warning( const eckit::Configuration& = util::NoConfig() ) const;
+
 friend class LegendreCacheCreatorLocal;
 
 private:

From 555639cd48887f577d8256e62e6e02d64cb8d974 Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Thu, 10 May 2018 11:09:08 +0100
Subject: [PATCH 075/123] Configurable defaults for Trans

---
 src/atlas/trans/Cache.cc                 | 16 ++---
 src/atlas/trans/Cache.h                  |  8 ++-
 src/atlas/trans/LegendreCacheCreator.cc  | 15 ++---
 src/atlas/trans/Trans.cc                 | 86 +++++++++++++++++++-----
 src/atlas/trans/Trans.h                  | 16 +++++
 src/atlas/trans/local/TransLocal.cc      |  4 +-
 src/tests/trans/test_trans_localcache.cc | 56 ++++++---------
 7 files changed, 123 insertions(+), 78 deletions(-)

diff --git a/src/atlas/trans/Cache.cc b/src/atlas/trans/Cache.cc
index 14e3726d1..ce22fb2c4 100644
--- a/src/atlas/trans/Cache.cc
+++ b/src/atlas/trans/Cache.cc
@@ -68,10 +68,12 @@ LegendreCache::LegendreCache( const void* address, size_t size ) :
 }
 
 Cache::Cache(const std::shared_ptr<TransCacheEntry>& legendre) :
+  trans_( nullptr ),
   legendre_( legendre ),
   fft_( new EmptyCacheEntry() ) {}
 
 Cache::Cache(const std::shared_ptr<TransCacheEntry>& legendre, const std::shared_ptr<TransCacheEntry>& fft) :
+  trans_( nullptr ),
   legendre_( legendre ),
   fft_( fft ) {}
 
@@ -79,20 +81,17 @@ Cache::Cache( const TransImpl* trans ) :
   trans_( trans ),
   legendre_( new EmptyCacheEntry() ),
   fft_( new EmptyCacheEntry() ) {
-  if( trans_ )
-    trans_->attach();
 }
 
 Cache::Cache() :
+  trans_( nullptr ),
   legendre_( new EmptyCacheEntry() ),
   fft_( new EmptyCacheEntry() ) {}
 
-Cache::Cache(const Cache& other) :
+Cache::Cache( const Cache& other ) :
   trans_( other.trans_ ),
   legendre_( other.legendre_ ),
   fft_( other.fft_ ) {
-  if( trans_ )
-    trans_->attach();
 }
 
 Cache::operator bool() const {
@@ -102,13 +101,6 @@ Cache::operator bool() const {
 Cache::~Cache() {
   pthread_once( &once, init );
   eckit::AutoLock<eckit::Mutex> lock( local_mutex );
-  if( trans_ ) {
-    trans_->detach();
-    if( trans_->owners() == 0 ) {
-      delete trans_;
-    }
-    trans_ = nullptr;
-  }
 }
 
 TransCache::TransCache( const Trans& trans ) :
diff --git a/src/atlas/trans/Cache.h b/src/atlas/trans/Cache.h
index e362d6c34..3fca5edd9 100644
--- a/src/atlas/trans/Cache.h
+++ b/src/atlas/trans/Cache.h
@@ -14,6 +14,7 @@
 
 #include "eckit/filesystem/PathName.h"
 #include "eckit/io/Buffer.h"
+#include "eckit/memory/SharedPtr.h"
 
 //-----------------------------------------------------------------------------
 // Forward declarations
@@ -95,16 +96,17 @@ class Cache {
     Cache();
     Cache( const Cache& other );
     operator bool() const;
-    const TransImpl* trans() const { return trans_; }
+    const TransImpl* trans() const { return trans_.get(); }
     const TransCacheEntry& legendre() const { return *legendre_; }
     const TransCacheEntry& fft() const { return *fft_; }
-    ~Cache();
+    virtual ~Cache();
 protected:
     Cache( const std::shared_ptr<TransCacheEntry>& legendre );
     Cache( const std::shared_ptr<TransCacheEntry>& legendre, const std::shared_ptr<TransCacheEntry>& fft );
     Cache( const TransImpl* );
 private:
-    const TransImpl*                 trans_ = nullptr;
+    eckit::SharedPtr<const TransImpl> trans_;
+//    const TransImpl*                 trans_ = nullptr;
     std::shared_ptr<TransCacheEntry> legendre_;
     std::shared_ptr<TransCacheEntry> fft_;
 };
diff --git a/src/atlas/trans/LegendreCacheCreator.cc b/src/atlas/trans/LegendreCacheCreator.cc
index 8c6b402a3..a51165dd8 100644
--- a/src/atlas/trans/LegendreCacheCreator.cc
+++ b/src/atlas/trans/LegendreCacheCreator.cc
@@ -19,10 +19,7 @@
 
 // For factory registration only:
 #if ATLAS_HAVE_TRANS
-#define TRANS_DEFAULT "ifs"
 #include "atlas/trans/ifs/LegendreCacheCreatorIFS.h"
-#else
-#define TRANS_DEFAULT "local"
 #endif
 #include "atlas/trans/local/LegendreCacheCreatorLocal.h"
 
@@ -116,16 +113,14 @@ LegendreCacheCreator::Implementation* LegendreCacheCreatorFactory::build( const
 
     static force_link static_linking;
 
-    std::string name = config.getString( "type", TRANS_DEFAULT );
+    util::Config options = Trans::config();
+    options.set( config );
 
-    Log::debug() << "Looking for LegendreCacheCreatorFactory [" << name << "]" << std::endl;
+    std::string name = options.getString( "type" );
 
-    if ( not config.has( "type" ) and not has( name ) ) {
-        name = std::string( "local" );
-        Log::debug() << "Looking for LegendreCacheCreatorFactory [" << name << "]" << std::endl;
-    }
+    Log::debug() << "Looking for LegendreCacheCreatorFactory [" << name << "]" << std::endl;
 
-    return factory( name ).make( grid, truncation, config );
+    return factory( name ).make( grid, truncation, options );
 }
 
 LegendreCacheCreator::LegendreCacheCreator() {}
diff --git a/src/atlas/trans/Trans.cc b/src/atlas/trans/Trans.cc
index 6f67b57bb..ce82ddcdf 100644
--- a/src/atlas/trans/Trans.cc
+++ b/src/atlas/trans/Trans.cc
@@ -23,15 +23,30 @@
 #if ATLAS_HAVE_TRANS
 #include "atlas/trans/ifs/TransIFSNodeColumns.h"
 #include "atlas/trans/ifs/TransIFSStructuredColumns.h"
-#define TRANS_DEFAULT "ifs"
-#else
-#define TRANS_DEFAULT "local"
 #endif
 #include "atlas/trans/local/TransLocal.h" // --> recommended "local"
 
+namespace {
+struct default_backend {
+#if ATLAS_HAVE_TRANS
+    std::string value = "ifs";
+#else
+    std::string value = "local";
+#endif
+    static default_backend instance() {
+        static default_backend x;
+        return x;
+    }
+private:
+    default_backend() = default;
+};
+}
+
 namespace atlas {
 namespace trans {
 
+util::Config TransFactory::default_options_ = util::Config( "type", default_backend::instance().value );
+
 TransImpl::~TransImpl() {}
 
 namespace {
@@ -79,7 +94,8 @@ TransFactory& factory( const std::string& name ) {
 
 }  // namespace
 
-TransFactory::TransFactory( const std::string& name ) : name_( name ) {
+TransFactory::TransFactory( const std::string& name ) :
+    name_( name ) {
     pthread_once( &once, init );
 
     eckit::AutoLock<eckit::Mutex> lock( local_mutex );
@@ -103,6 +119,28 @@ bool TransFactory::has( const std::string& name ) {
     return ( m->find( name ) != m->end() );
 }
 
+void TransFactory::backend( const std::string& backend ) {
+    pthread_once( &once, init );
+    eckit::AutoLock<eckit::Mutex> lock( local_mutex );
+    default_options_.set( "type", backend );
+}
+
+std::string TransFactory::backend() {
+    return default_options_.getString("type");
+}
+
+const eckit::Configuration& TransFactory::config() {
+    return default_options_;
+}
+
+void TransFactory::config( const eckit::Configuration& config ) {
+    std::string type = default_options_.getString( "type" );
+    default_options_ = config;
+    if( not config.has("type") ) {
+        default_options_.set( "type", type );
+    }
+}
+
 void TransFactory::list( std::ostream& out ) {
     pthread_once( &once, init );
 
@@ -135,17 +173,15 @@ Trans::Implementation* TransFactory::build( const Cache& cache, const FunctionSp
 
     static force_link static_linking;
 
+    util::Config options = default_options_;
+    options.set( config );
+
     std::string suffix( "(" + gp.type() + "," + sp.type() + ")" );
-    std::string name = config.getString( "type", TRANS_DEFAULT ) + suffix;
+    std::string name = options.getString( "type" ) + suffix;
 
     Log::debug() << "Looking for TransFactory [" << name << "]" << std::endl;
 
-    if ( not config.has( "type" ) and not has( name ) ) {
-        name = std::string( "local" ) + suffix;
-        Log::debug() << "Looking for TransFactory [" << name << "]" << std::endl;
-    }
-
-    return factory( name ).make( cache, gp, sp, config );
+    return factory( name ).make( cache, gp, sp, options );
 }
 
 Trans::Implementation* TransFactory::build( const Grid& grid, int truncation, const eckit::Configuration& config ) {
@@ -174,20 +210,36 @@ Trans::Implementation* TransFactory::build( const Cache& cache, const Grid& grid
 
     static force_link static_linking;
 
-    std::string name = config.getString( "type", TRANS_DEFAULT );
+    util::Config options = default_options_;
+    options.set( config );
+
+    std::string name = options.getString( "type" );
 
     Log::debug() << "Looking for TransFactory [" << name << "]" << std::endl;
 
-    if ( not config.has( "type" ) and not has( name ) ) {
-        name = std::string( "local" );
-        Log::debug() << "Looking for TransFactory [" << name << "]" << std::endl;
-    }
+    return factory( name ).make( cache, grid, domain, truncation, options );
+}
+
+bool Trans::hasBackend( const std::string& backend ) {
+    return TransFactory::has( backend );
+}
 
-    return factory( name ).make( cache, grid, domain, truncation, config );
+void Trans::backend( const std::string& backend ) {
+    ASSERT( hasBackend( backend ) );
+    TransFactory::backend( backend );
 }
 
+std::string Trans::backend() {
+    return TransFactory::backend();
+}
 
+const eckit::Configuration& Trans::config() {
+    return TransFactory::config();
+}
 
+void Trans::config( const eckit::Configuration& options )  {
+    TransFactory::config( options );
+}
 
 Trans::Trans() {}
 
diff --git a/src/atlas/trans/Trans.h b/src/atlas/trans/Trans.h
index 63a38817b..e4db195f8 100644
--- a/src/atlas/trans/Trans.h
+++ b/src/atlas/trans/Trans.h
@@ -149,8 +149,17 @@ class TransFactory {
 
     static bool has( const std::string& name );
 
+    static void backend( const std::string& );
+
+    static std::string backend();
+
+    static void config( const eckit::Configuration& );
+
+    static const eckit::Configuration& config();
+
 private:
     std::string name_;
+    static util::Config default_options_;
     virtual Trans_t* make( const Cache&, const FunctionSpace& gp, const FunctionSpace& sp, const eckit::Configuration& ) {
         return nullptr;
     }
@@ -203,6 +212,13 @@ class Trans {
     eckit::SharedPtr<Implementation> impl_;
 
 public:
+
+    static bool hasBackend( const std::string& );
+    static void backend( const std::string& );
+    static std::string backend();
+    static void config( const eckit::Configuration& );
+    static const eckit::Configuration& config();
+
     Trans();
     Trans( Implementation* );
     Trans( const Trans& );
diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc
index 29b7b129f..b05f216a8 100644
--- a/src/atlas/trans/local/TransLocal.cc
+++ b/src/atlas/trans/local/TransLocal.cc
@@ -580,7 +580,7 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma
             ATLAS_TRACE( "Legendre precomputations (unstructured)" );
 
             if( warning() ) {
-              Log::warning() << "WARNING: Precomputations for spectral transforms could take a long time and consume a lot of memory (unstructured grid approach)!" << std::endl;
+              Log::warning() << "WARNING: Precomputations for spectral transforms could take a long time and consume a lot of memory (unstructured grid approach)! Results may contain aliasing errors." << std::endl;
             }
 
             std::vector<double> lats( grid_.size() );
@@ -1081,7 +1081,7 @@ void TransLocal::invtrans_unstructured( const int truncation, const int nb_field
     ATLAS_TRACE( "invtrans_unstructured" );
 
     if( warning(config) ) {
-      Log::warning() << "WARNING: Spectral transforms could take a long time (unstructured grid approach)." << std::endl;
+      Log::warning() << "WARNING: Spectral transforms could take a long time (unstructured grid approach). Results may contain aliasing errors." << std::endl;
     }
 
     double* zfn;
diff --git a/src/tests/trans/test_trans_localcache.cc b/src/tests/trans/test_trans_localcache.cc
index 5c3ca945c..e505eba46 100644
--- a/src/tests/trans/test_trans_localcache.cc
+++ b/src/tests/trans/test_trans_localcache.cc
@@ -25,10 +25,6 @@
 
 #include "tests/AtlasTestEnvironment.h"
 
-#if ATLAS_HAVE_TRANS
-#include "transi/trans.h"
-#endif
-
 namespace atlas {
 namespace test {
 
@@ -36,16 +32,8 @@ namespace test {
 
 struct AtlasTransEnvironment : public AtlasTestEnvironment {
     AtlasTransEnvironment( int argc, char* argv[] ) : AtlasTestEnvironment( argc, argv ) {
-#if ATLAS_HAVE_TRANS
-        trans_use_mpi( mpi::comm().size() > 1 );
-        trans_init();
-#endif
-    }
-
-    ~AtlasTransEnvironment() {
-#if ATLAS_HAVE_TRANS
-        trans_finalize();
-#endif
+        trans::Trans::backend( "local" );
+        trans::Trans::config( option::warning(1) );
     }
 };
 
@@ -98,7 +86,7 @@ CASE( "test_global_grids" ) {
             std::make_pair(Slat(n),t),
         };
 
-        LegendreCacheCreator F_cache_creator( Grid(F(n)), t, option::type("local") );
+        LegendreCacheCreator F_cache_creator( Grid(F(n)), t );
         EXPECT( F_cache_creator.supported() );
         auto F_cachefile = CacheFile("leg_"+F_cache_creator.uid()+".bin");
         F_cache_creator.create( F_cachefile );
@@ -113,7 +101,7 @@ CASE( "test_global_grids" ) {
             ATLAS_TRACE("Case "+gridname+" T"+std::to_string(truncation));
             Grid grid(gridname);
 
-            LegendreCacheCreator cache_creator( grid, truncation, option::type("local") );
+            LegendreCacheCreator cache_creator( grid, truncation );
             EXPECT( cache_creator.supported() );
             auto cachefile = CacheFile("leg_"+cache_creator.uid()+".bin");
             cache_creator.create( cachefile );
@@ -122,13 +110,13 @@ CASE( "test_global_grids" ) {
             }
 
             ATLAS_TRACE_SCOPE("create without cache")
-                Trans( grid, truncation, option::type("local") );
+                Trans( grid, truncation );
 
             Cache cache;
             ATLAS_TRACE_SCOPE("read cache")
                 cache = LegendreCache( cachefile );
             ATLAS_TRACE_SCOPE("create with cache")
-                Trans( cache, grid, truncation, option::type("local") );
+                Trans( cache, grid, truncation );
 
         }
     }
@@ -160,7 +148,7 @@ CASE( "test_global_grids_with_subdomain" ) {
 
         Grid global_grid( gridname );
 
-        LegendreCacheCreator global_cache_creator( Grid(gridname), truncation, option::type("local") );
+        LegendreCacheCreator global_cache_creator( Grid(gridname), truncation );
         EXPECT( global_cache_creator.supported() );
         auto global_cachefile = CacheFile( "leg_" + global_cache_creator.uid() + ".bin" );
         ATLAS_TRACE_SCOPE( "Creating cache " + std::string( global_cachefile ) )
@@ -174,7 +162,7 @@ CASE( "test_global_grids_with_subdomain" ) {
         for( auto domain : domains ) {
             Grid grid( gridname, domain );
             ATLAS_TRACE_SCOPE("create with cache")
-                Trans( global_cache, global_grid, domain, truncation, option::type("local") );
+                Trans( global_cache, global_grid, domain, truncation );
         }
     }
 }
@@ -189,7 +177,7 @@ CASE( "test_regional_grids nested_in_global" ) {
     );
     EXPECT( grid_global.domain().global() );
 
-    LegendreCacheCreator global_cache_creator( grid_global, truncation, option::type("local") );
+    LegendreCacheCreator global_cache_creator( grid_global, truncation );
     EXPECT( global_cache_creator.supported() );
     auto global_cachefile = CacheFile( "leg_" + global_cache_creator.uid() + ".bin" );
     ATLAS_TRACE_SCOPE( "Creating cache "+std::string(cachefile) )
@@ -201,11 +189,11 @@ CASE( "test_regional_grids nested_in_global" ) {
 
 
     ATLAS_TRACE_SCOPE("create without cache")
-        Trans( grid_global, regional.domain(), truncation, option::type("local") );
+        Trans( grid_global, regional.domain(), truncation );
     ATLAS_TRACE_SCOPE("read cache")
         cache = LegendreCache( global_cachefile );
     ATLAS_TRACE_SCOPE("create with cache")
-        Trans( cache, grid_global, regional.domain(), truncation, option::type("local") );
+        Trans( cache, grid_global, regional.domain(), truncation );
 }
 
 CASE( "test_regional_grids not nested" ) {
@@ -214,7 +202,7 @@ CASE( "test_regional_grids not nested" ) {
 
     StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ) );
 
-    LegendreCacheCreator cache_creator( grid, truncation, option::type("local") );
+    LegendreCacheCreator cache_creator( grid, truncation );
     EXPECT( cache_creator.supported() );
     auto cachefile = CacheFile( "leg_" + cache_creator.uid() + ".bin" );
 
@@ -222,11 +210,11 @@ CASE( "test_regional_grids not nested" ) {
         cache_creator.create( cachefile );
 
     ATLAS_TRACE_SCOPE("create without cache")
-        Trans( grid, truncation, option::type("local") );
+        Trans( grid, truncation );
     ATLAS_TRACE_SCOPE("read cache")
         cache = LegendreCache( cachefile );
     ATLAS_TRACE_SCOPE("create with cache")
-        Trans( cache, grid, truncation, option::type("local") );
+        Trans( cache, grid, truncation );
 }
 
 CASE( "test_regional_grids with projection" ) {
@@ -241,10 +229,10 @@ CASE( "test_regional_grids with projection" ) {
     StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ), projection );
     Trans trans;
     ATLAS_TRACE_SCOPE("create without cache")
-        trans = Trans( grid, truncation, option::type("local") | option::warning(1) );
+        trans = Trans( grid, truncation );
 
     // Note: caching not yet implemented for unstructured and projected grids
-    LegendreCacheCreator legendre_cache_creator( grid, truncation, option::type("local") );
+    LegendreCacheCreator legendre_cache_creator( grid, truncation );
     ATLAS_DEBUG_VAR( legendre_cache_creator.uid() );
     EXPECT( not legendre_cache_creator.supported() );
 
@@ -261,14 +249,14 @@ CASE( "test cache creator to file" ) {
         LinearSpacing( { 90., -90.}, 181, true  )
     );
 
-    LegendreCacheCreator legendre_cache_creator( grid_global, truncation, option::type("local") );
+    LegendreCacheCreator legendre_cache_creator( grid_global, truncation );
     auto cachefile = CacheFile( legendre_cache_creator.uid() );
     ATLAS_TRACE_SCOPE( "Creating cache "+std::string(cachefile) )
       legendre_cache_creator.create( cachefile );
 
     Cache c = legendre_cache_creator.create();
-    auto trans1 = Trans( c, grid_global, truncation, option::type("local") );
-    auto trans2 = Trans( c, grid_global, truncation, option::type("local") );
+    auto trans1 = Trans( c, grid_global, truncation );
+    auto trans2 = Trans( c, grid_global, truncation );
 }
 
 CASE( "test cache creator in memory" ) {
@@ -279,14 +267,14 @@ CASE( "test cache creator in memory" ) {
         LinearSpacing( { 90., -90.}, 181, true  )
     );
 
-    LegendreCacheCreator legendre_cache_creator( grid_global, truncation, option::type("local") );
+    LegendreCacheCreator legendre_cache_creator( grid_global, truncation );
 
     Cache cache;
     ATLAS_TRACE_SCOPE( "Creating cache in memory" )
       cache = legendre_cache_creator.create();
 
-    auto trans1 = Trans( cache, grid_global, truncation, option::type("local") );
-    auto trans2 = Trans( cache, grid_global, truncation, option::type("local") );
+    auto trans1 = Trans( cache, grid_global, truncation );
+    auto trans2 = Trans( cache, grid_global, truncation );
 }
 
 }  // namespace test

From 0bf1dee5de24efc29afd2e9dfcd34d6f9b49e5ea Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Thu, 10 May 2018 11:38:54 +0100
Subject: [PATCH 076/123] Default options used for invtrans/dirtrans as well

---
 src/atlas/trans/Trans.cc | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/src/atlas/trans/Trans.cc b/src/atlas/trans/Trans.cc
index ce82ddcdf..7a6a0e6ea 100644
--- a/src/atlas/trans/Trans.cc
+++ b/src/atlas/trans/Trans.cc
@@ -241,6 +241,14 @@ void Trans::config( const eckit::Configuration& options )  {
     TransFactory::config( options );
 }
 
+namespace {
+util::Config options( const eckit::Configuration& config ) {
+    util::Config opts = Trans::config();
+    opts.set(config);
+    return opts;
+}
+}
+
 Trans::Trans() {}
 
 Trans::Trans( Implementation* impl ) : impl_( impl ) {}
@@ -279,37 +287,37 @@ size_t Trans::spectralCoefficients() const {
 }
 
 void Trans::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const {
-    impl_->dirtrans( gpfield, spfield, config );
+    impl_->dirtrans( gpfield, spfield, options(config) );
 }
 
 void Trans::dirtrans( const FieldSet& gpfields, FieldSet& spfields, const eckit::Configuration& config ) const {
-    impl_->dirtrans( gpfields, spfields, config );
+    impl_->dirtrans( gpfields, spfields, options(config) );
 }
 
 void Trans::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv,
                                   const eckit::Configuration& config ) const {
-    impl_->dirtrans_wind2vordiv( gpwind, spvor, spdiv, config );
+    impl_->dirtrans_wind2vordiv( gpwind, spvor, spdiv, options(config) );
 }
 
 void Trans::invtrans( const Field& spfield, Field& gpfield, const eckit::Configuration& config ) const {
-    impl_->invtrans( spfield, gpfield, config );
+    impl_->invtrans( spfield, gpfield, options(config) );
 }
 
 void Trans::invtrans( const FieldSet& spfields, FieldSet& gpfields, const eckit::Configuration& config ) const {
-    impl_->invtrans( spfields, gpfields, config );
+    impl_->invtrans( spfields, gpfields, options(config) );
 }
 
 void Trans::invtrans_grad( const Field& spfield, Field& gradfield, const eckit::Configuration& config ) const {
-    impl_->invtrans_grad( spfield, gradfield, config );
+    impl_->invtrans_grad( spfield, gradfield, options(config) );
 }
 
 void Trans::invtrans_grad( const FieldSet& spfields, FieldSet& gradfields, const eckit::Configuration& config ) const {
-    impl_->invtrans_grad( spfields, gradfields, config );
+    impl_->invtrans_grad( spfields, gradfields, options(config) );
 }
 
 void Trans::invtrans_vordiv2wind( const Field& spvor, const Field& spdiv, Field& gpwind,
                                   const eckit::Configuration& config ) const {
-    impl_->invtrans_vordiv2wind( spvor, spdiv, gpwind, config );
+    impl_->invtrans_vordiv2wind( spvor, spdiv, gpwind, options(config) );
 }
 
 // -- IFS type fields --
@@ -330,7 +338,7 @@ void Trans::invtrans( const int nb_scalar_fields, const double scalar_spectra[],
                       const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[],
                       const eckit::Configuration& config ) const {
     impl_->invtrans( nb_scalar_fields, scalar_spectra, nb_vordiv_fields, vorticity_spectra, divergence_spectra,
-                     gp_fields, config );
+                     gp_fields, options(config) );
 }
 
 /*!
@@ -341,7 +349,7 @@ void Trans::invtrans( const int nb_scalar_fields, const double scalar_spectra[],
  */
 void Trans::invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[],
                       const eckit::Configuration& config ) const {
-    impl_->invtrans( nb_scalar_fields, scalar_spectra, gp_fields, config );
+    impl_->invtrans( nb_scalar_fields, scalar_spectra, gp_fields, options(config) );
 }
 
 /*!
@@ -350,7 +358,7 @@ void Trans::invtrans( const int nb_scalar_fields, const double scalar_spectra[],
  */
 void Trans::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[], const double divergence_spectra[],
                       double gp_fields[], const eckit::Configuration& config ) const {
-    impl_->invtrans( nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config );
+    impl_->invtrans( nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, options(config) );
 }
 
 /*!
@@ -358,7 +366,7 @@ void Trans::invtrans( const int nb_vordiv_fields, const double vorticity_spectra
  */
 void Trans::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[],
                       const eckit::Configuration& config ) const {
-    impl_->dirtrans( nb_fields, scalar_fields, scalar_spectra, config );
+    impl_->dirtrans( nb_fields, scalar_fields, scalar_spectra, options(config) );
 }
 
 /*!
@@ -367,7 +375,7 @@ void Trans::dirtrans( const int nb_fields, const double scalar_fields[], double
  */
 void Trans::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[],
                       double divergence_spectra[], const eckit::Configuration& config ) const {
-    impl_->dirtrans( nb_fields, wind_fields, vorticity_spectra, divergence_spectra, config );
+    impl_->dirtrans( nb_fields, wind_fields, vorticity_spectra, divergence_spectra, options(config) );
 }
 
 }  // namespace trans

From 1824862c42dd5884ba55cec46fec5be6c4e6ed39 Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Thu, 10 May 2018 11:40:49 +0100
Subject: [PATCH 077/123] Reduce runtime of atlas_test_transgeneral

---
 src/tests/trans/test_transgeneral.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index fb976bd25..e6ac605bf 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -973,7 +973,7 @@ CASE( "test_trans_domain" ) {
     Domain testdomain2 = RectangularDomain( {-1., 1.}, {-5., 40.} );
     // Grid: (Adjust the following line if the test takes too long!)
 
-    Grid global_grid( "O640" );
+    Grid global_grid( "O64" );
     Grid g1( global_grid, testdomain1 );
     //Grid g2( gridString, testdomain2 );
 
@@ -983,7 +983,7 @@ CASE( "test_trans_domain" ) {
     using LinearSpacing = grid::LinearSpacing;
     StructuredGrid g2( LinearSpacing( {0., 180.}, 181 ), LinearSpacing( {0., 45.}, 46 ) );
 
-    int trc = 640;
+    int trc = 63;
     //Log::info() << "rgp1:" << std::endl;
     if ( eckit::PathName( "legcache.bin" ).exists() ) eckit::PathName( "legcache.bin" ).unlink();
     Trace t1( Here(), "translocal1 construction" );

From 92bbedf49ba7d9696952d76e228cb4a0ba1f155d Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Thu, 10 May 2018 14:55:58 +0100
Subject: [PATCH 078/123] Warn about possible aliasing errors in Trans for
 regional grids

---
 src/atlas/grid/Grid.h                    | 2 ++
 src/atlas/grid/detail/grid/Structured.cc | 4 ++++
 src/atlas/grid/detail/grid/Structured.h  | 4 ++++
 src/atlas/trans/local/TransLocal.cc      | 4 +++-
 4 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/atlas/grid/Grid.h b/src/atlas/grid/Grid.h
index e8d497878..0ac6aa8dc 100644
--- a/src/atlas/grid/Grid.h
+++ b/src/atlas/grid/Grid.h
@@ -222,6 +222,8 @@ class StructuredGrid : public Grid {
 
     bool periodic() const { return grid_->periodic(); }
 
+    const XSpace& xspace() const { return grid_->xspace(); }
+
     const YSpace& yspace() const { return grid_->yspace(); }
 
 private:
diff --git a/src/atlas/grid/detail/grid/Structured.cc b/src/atlas/grid/detail/grid/Structured.cc
index a2c6f0dcb..2baa2223c 100644
--- a/src/atlas/grid/detail/grid/Structured.cc
+++ b/src/atlas/grid/detail/grid/Structured.cc
@@ -226,6 +226,10 @@ Structured::XSpace::Implementation::Implementation( const Spacing& spacing ) :
     nxmin_                                 = nx_[0];
 }
 
+std::string Structured::XSpace::Implementation::type() const {
+    return "linear";
+}
+
 Grid::Spec Structured::XSpace::Implementation::spec() const {
     Grid::Spec spec;
 
diff --git a/src/atlas/grid/detail/grid/Structured.h b/src/atlas/grid/detail/grid/Structured.h
index 13be697a4..d26891f20 100644
--- a/src/atlas/grid/detail/grid/Structured.h
+++ b/src/atlas/grid/detail/grid/Structured.h
@@ -229,6 +229,8 @@ class Structured : public Grid {
 
             Spec spec() const;
 
+            std::string type() const;
+
         private:
             void reserve( long ny );
 
@@ -277,6 +279,8 @@ class Structured : public Grid {
 
         Spec spec() const { return impl_->spec(); }
 
+        std::string type() const { return impl_->type(); }
+
     private:
         eckit::SharedPtr<Implementation> impl_;
     };
diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc
index b05f216a8..b4520f9a9 100644
--- a/src/atlas/trans/local/TransLocal.cc
+++ b/src/atlas/trans/local/TransLocal.cc
@@ -338,7 +338,7 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma
         // reduce truncation towards the pole for reduced meshes:
         nlat0_.resize( truncation_ + 1 );
         if ( no_nest ) {
-            for ( int j = 0; j <= truncation_; j++ ) {
+          for ( int j = 0; j <= truncation_; j++ ) {
                 nlat0_[j] = 0;
             }
         }
@@ -542,6 +542,8 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma
 #endif
         }
         if ( !useFFT_ ) {
+            Log::warning() << "WARNING: Spectral transform results may contain aliasing errors. This will be addressed soon." << std::endl;
+
             alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlonsMax );
 #if !TRANSLOCAL_DGEMM2
             {

From a218b02ee90e44ebdc03daa71a24eb0d799390ad Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Thu, 10 May 2018 15:56:32 +0100
Subject: [PATCH 079/123] cleanup

---
 src/atlas/trans/Cache.h                      |  1 -
 src/atlas/trans/local/LegendrePolynomials.cc | 16 ++---
 src/atlas/trans/local/LegendrePolynomials.h  | 20 +++---
 src/atlas/trans/local/TransLocal.cc          | 68 ++++++++++----------
 src/atlas/trans/local/TransLocal.h           | 16 ++---
 5 files changed, 60 insertions(+), 61 deletions(-)

diff --git a/src/atlas/trans/Cache.h b/src/atlas/trans/Cache.h
index 3fca5edd9..8cd6b8097 100644
--- a/src/atlas/trans/Cache.h
+++ b/src/atlas/trans/Cache.h
@@ -54,7 +54,6 @@ class EmptyCacheEntry final : public TransCacheEntry {
 };
 
 //-----------------------------------------------------------------------------
-
 class TransCacheFileEntry final : public TransCacheEntry {
 private:
   eckit::Buffer buffer_;
diff --git a/src/atlas/trans/local/LegendrePolynomials.cc b/src/atlas/trans/local/LegendrePolynomials.cc
index 4bd0d67a1..da29d9704 100644
--- a/src/atlas/trans/local/LegendrePolynomials.cc
+++ b/src/atlas/trans/local/LegendrePolynomials.cc
@@ -21,7 +21,7 @@ namespace trans {
 
 //-----------------------------------------------------------------------------
 
-void compute_zfnopt3( const size_t trc, double zfn[] ) {
+void compute_zfn( const size_t trc, double zfn[] ) {
     auto idxzfn = [&]( int jn, int jk ) { return jk + ( trc + 1 ) * jn; };
     int iodd    = 0;
     // Compute coefficients for Taylor series in Belousov (19) and (21)
@@ -45,7 +45,7 @@ void compute_zfnopt3( const size_t trc, double zfn[] ) {
 }
 
 
-void compute_legendre_polynomials_latopt3( const size_t trc,  // truncation (in)
+void compute_legendre_polynomials_lat( const size_t trc,  // truncation (in)
                                            const double lat,  // latitude in radians (in)
                                            double legpol[],   // legendre polynomials
                                            double zfn[] ) {
@@ -149,7 +149,7 @@ void compute_legendre_polynomials_latopt3( const size_t trc,  // truncation (in)
 }
 
 
-void compute_legendre_polynomialsopt3(
+void compute_legendre_polynomials(
     const size_t trc,          // truncation (in)
     const int nlats,           // number of latitudes
     const double lats[],       // latitudes in radians (in)
@@ -162,12 +162,12 @@ void compute_legendre_polynomialsopt3(
     std::vector<double> legpol( legendre_size( trc ) );
     std::vector<double> zfn( ( trc + 1 ) * ( trc + 1 ) );
     auto idxmn = [&]( int jm, int jn ) { return ( 2 * trc + 3 - jm ) * jm / 2 + jn - jm; };
-    compute_zfnopt3( trc, zfn.data() );
+    compute_zfn( trc, zfn.data() );
 
     // Loop over latitudes:
     for ( int jlat = 0; jlat < nlats; ++jlat ) {
         // compute legendre polynomials for current latitude:
-        compute_legendre_polynomials_latopt3( trc, lats[jlat], legpol.data(), zfn.data() );
+        compute_legendre_polynomials_lat( trc, lats[jlat], legpol.data(), zfn.data() );
 
         // split polynomials into symmetric and antisymmetric parts:
         {
@@ -204,7 +204,7 @@ void compute_legendre_polynomialsopt3(
     }
 }
 
-void compute_legendre_polynomials_allopt3( const size_t trc,     // truncation (in)
+void compute_legendre_polynomials_all( const size_t trc,     // truncation (in)
                                            const int nlats,      // number of latitudes
                                            const double lats[],  // latitudes in radians (in)
                                            double legendre[] )   // legendre polynomials for all latitudes
@@ -216,12 +216,12 @@ void compute_legendre_polynomials_allopt3( const size_t trc,     // truncation (
     auto idxmnl = [&]( int jm, int jn, int jlat ) {
         return ( 2 * trc + 3 - jm ) * jm / 2 * nlats + jlat * ( trc - jm + 1 ) + jn - jm;
     };
-    compute_zfnopt3( trc, zfn.data() );
+    compute_zfn( trc, zfn.data() );
 
     // Loop over latitudes:
     for ( int jlat = 0; jlat < nlats; ++jlat ) {
         // compute legendre polynomials for current latitude:
-        compute_legendre_polynomials_latopt3( trc, lats[jlat], legpol.data(), zfn.data() );
+        compute_legendre_polynomials_lat( trc, lats[jlat], legpol.data(), zfn.data() );
 
         for ( int jm = 0; jm <= trc; ++jm ) {
             for ( int jn = jm; jn <= trc; ++jn ) {
diff --git a/src/atlas/trans/local/LegendrePolynomials.h b/src/atlas/trans/local/LegendrePolynomials.h
index 93ebb49f7..43edbb221 100644
--- a/src/atlas/trans/local/LegendrePolynomials.h
+++ b/src/atlas/trans/local/LegendrePolynomials.h
@@ -32,14 +32,14 @@ namespace trans {
 // Ported to C++ by:
 // Andreas Mueller *ECMWF*
 //
-void compute_zfnopt3( const size_t trc, double zfn[] );
+void compute_zfn( const size_t trc, double zfn[] );
 
-void compute_legendre_polynomials_latopt3( const size_t trc,  // truncation (in)
-                                           const double lat,  // latitude in radians (in)
-                                           double legpol[],   // legendre polynomials
-                                           double zfn[] );
+void compute_legendre_polynomials_lat( const size_t trc,  // truncation (in)
+                                       const double lat,  // latitude in radians (in)
+                                       double legpol[],   // legendre polynomials
+                                       double zfn[] );
 
-void compute_legendre_polynomialsopt3(
+void compute_legendre_polynomials(
     const size_t trc,           // truncation (in)
     const int nlats,            // number of latitudes
     const double lats[],        // latitudes in radians (in)
@@ -48,10 +48,10 @@ void compute_legendre_polynomialsopt3(
     size_t leg_start_sym[],     // start indices for different zonal wave numbers, symmetric part
     size_t leg_start_asym[] );  // start indices for different zonal wave numbers, asymmetric part
 
-void compute_legendre_polynomials_allopt3( const size_t trc,     // truncation (in)
-                                           const int nlats,      // number of latitudes
-                                           const double lats[],  // latitudes in radians (in)
-                                           double legendre[] );  // legendre polynomials for all latitudes
+void compute_legendre_polynomials_all( const size_t trc,     // truncation (in)
+                                       const int nlats,      // number of latitudes
+                                       const double lats[],  // latitudes in radians (in)
+                                       double legendre[] );  // legendre polynomials for all latitudes
 
 // --------------------------------------------------------------------------------------------------------------------
 
diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc
index b4520f9a9..c34cc9f3f 100644
--- a/src/atlas/trans/local/TransLocal.cc
+++ b/src/atlas/trans/local/TransLocal.cc
@@ -455,9 +455,9 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma
               }
 
               ATLAS_TRACE_SCOPE( "Legendre precomputations (structured)" ) {
-                    compute_legendre_polynomialsopt3( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_,
-                                                      legendre_asym_, legendre_sym_begin_.data(),
-                                                      legendre_asym_begin_.data() );
+                    compute_legendre_polynomials( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_,
+                                                  legendre_asym_, legendre_sym_begin_.data(),
+                                                  legendre_asym_begin_.data() );
                 }
                 std::string file_path = TransParameters( config ).write_legendre();
                 if ( file_path.size() ) {
@@ -562,7 +562,7 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma
             }
 #else
             {
-                ATLAS_TRACE( "opt3 precomp Fourier" );
+                ATLAS_TRACE( "precomp Fourier" );
                 int idx = 0;
                 for ( int jlon = 0; jlon < nlonsMax; jlon++ ) {
                     double factor = 1.;
@@ -591,7 +591,7 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma
             for ( PointLonLat p : grid_.lonlat() ) {
                 lats[j++] = p.lat() * util::Constants::degreesToRadians();
             }
-            compute_legendre_polynomials_allopt3( truncation_, grid_.size(), lats.data(), legendre_ );
+            compute_legendre_polynomials_all( truncation_, grid_.size(), lats.data(), legendre_ );
         }
         if ( TransParameters( config ).write_legendre().size() ) {
             throw eckit::NotImplemented( "Caching for unstructured grids or structured grids with projections not yet implemented", Here() );
@@ -678,7 +678,7 @@ void TransLocal::invtrans( const int nb_scalar_fields, const double scalar_spect
 
 // --------------------------------------------------------------------------------------------------------------------
 
-void gp_transposeopt3( const int nb_size, const int nb_fields, const double gp_tmp[], double gp_fields[] ) {
+void gp_transpose( const int nb_size, const int nb_fields, const double gp_tmp[], double gp_fields[] ) {
     for ( int jgp = 0; jgp < nb_size; jgp++ ) {
         for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
             gp_fields[jfld * nb_size + jgp] = gp_tmp[jgp * nb_fields + jfld];
@@ -688,9 +688,9 @@ void gp_transposeopt3( const int nb_size, const int nb_fields, const double gp_t
 
 // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocal::invtrans_legendreopt3( const int truncation, const int nlats, const int nb_fields,
-                                            const double scalar_spectra[], double scl_fourier[],
-                                            const eckit::Configuration& config ) const {
+void TransLocal::invtrans_legendre( const int truncation, const int nlats, const int nb_fields,
+                                    const double scalar_spectra[], double scl_fourier[],
+                                    const eckit::Configuration& config ) const {
     // Legendre transform:
     {
         Log::debug() << "Legendre dgemm: using " << nlatsLegReduced_ - nlat0_[0] << " latitudes out of "
@@ -715,14 +715,14 @@ void TransLocal::invtrans_legendreopt3( const int truncation, const int nlats, c
                 alloc_aligned( scl_fourier_sym, size_fourier );
                 alloc_aligned( scl_fourier_asym, size_fourier );
                 {
-                    //ATLAS_TRACE( "opt3 Legendre split" );
+                    //ATLAS_TRACE( "Legendre split" );
                     int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2;
                     // the choice between the following two code lines determines whether
                     // total wavenumbers are summed in an ascending or descending order.
                     // The trans library in IFS uses descending order because it should
                     // be more accurate (higher wavenumbers have smaller contributions).
                     // This also needs to be changed when splitting the spectral data in
-                    // compute_legendre_polynomialsopt3!
+                    // compute_legendre_polynomials!
                     //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) {
                     for ( int jn = truncation_ + 1; jn >= jm; jn-- ) {
                         for ( int imag = 0; imag < n_imag; imag++ ) {
@@ -772,7 +772,7 @@ void TransLocal::invtrans_legendreopt3( const int truncation, const int nlats, c
                     }
                 }
                 {
-                    //ATLAS_TRACE( "opt3 merge spheres" );
+                    //ATLAS_TRACE( "merge spheres" );
                     // northern hemisphere:
                     for ( int jlat = 0; jlat < nlatsNH_; jlat++ ) {
                         if ( nlatsLegReduced_ - nlat0_[jm] - nlatsNH_ + jlat >= 0 ) {
@@ -841,7 +841,7 @@ void TransLocal::invtrans_legendreopt3( const int truncation, const int nlats, c
 
 // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocal::invtrans_fourier_regularopt3( const int nlats, const int nlons, const int nb_fields,
+void TransLocal::invtrans_fourier_regular( const int nlats, const int nlons, const int nb_fields,
                                                    double scl_fourier[], double gp_fields[],
                                                    const eckit::Configuration& config ) const {
     // Fourier transformation:
@@ -894,38 +894,38 @@ void TransLocal::invtrans_fourier_regularopt3( const int nlats, const int nlons,
         // dgemm-method 2
         // should be faster for small domains or large truncation
         // but have not found any significant speedup so far
-        double* gp_opt3;
-        alloc_aligned( gp_opt3, nb_fields * grid_.size() );
+        double* gp;
+        alloc_aligned( gp, nb_fields * grid_.size() );
         {
-            ATLAS_TRACE( "opt3 Fourier dgemm method 2" );
+            ATLAS_TRACE( "Fourier dgemm method 2" );
             eckit::linalg::Matrix A( scl_fourier, nb_fields * nlats, ( truncation_ + 1 ) * 2 );
             eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, nlons );
-            eckit::linalg::Matrix C( gp_opt3, nb_fields * nlats, nlons );
+            eckit::linalg::Matrix C( gp, nb_fields * nlats, nlons );
             linalg_.gemm( A, B, C );
         }
 
         // Transposition in grid point space:
         {
-            ATLAS_TRACE( "opt3 transposition in gp-space" );
+            ATLAS_TRACE( "transposition in gp-space" );
             int idx = 0;
             for ( int jlon = 0; jlon < nlons; jlon++ ) {
                 for ( int jlat = 0; jlat < nlats; jlat++ ) {
                     for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
                         int pos_tp = jlon + nlons * ( jlat + nlats * ( jfld ) );
                         //int pos  = jfld + nb_fields * ( jlat + nlats * ( jlon ) );
-                        gp_fields[pos_tp] = gp_opt3[idx++];  // = gp_opt3[pos]
+                        gp_fields[pos_tp] = gp[idx++];  // = gp[pos]
                     }
                 }
             }
         }
-        free_aligned( gp_opt3 );
+        free_aligned( gp );
 #endif
     }
 }
 
 // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocal::invtrans_fourier_reducedopt3( const int nlats, const grid::StructuredGrid g, const int nb_fields,
+void TransLocal::invtrans_fourier_reduced( const int nlats, const grid::StructuredGrid g, const int nb_fields,
                                                    double scl_fourier[], double gp_fields[],
                                                    const eckit::Configuration& config ) const {
     // Fourier transformation:
@@ -1059,7 +1059,7 @@ void TransLocal::invtrans_unstructured_precomp( const int truncation, const int
             // Computing u,v from U,V:
             {
                 if ( nb_vordiv_fields > 0 ) {
-                    //ATLAS_TRACE( "opt3 u,v from U,V" );
+                    //ATLAS_TRACE( " u,v from U,V" );
                     double coslat = std::cos( lat );
                     for ( int j = 0; j < nb_fields; j++ ) {
                         gp_fields[ip + j * grid_.size()] /= coslat;
@@ -1088,7 +1088,7 @@ void TransLocal::invtrans_unstructured( const int truncation, const int nb_field
 
     double* zfn;
     alloc_aligned( zfn, ( truncation + 1 ) * ( truncation + 1 ) );
-    compute_zfnopt3( truncation, zfn );
+    compute_zfn( truncation, zfn );
     int size_fourier = nb_fields * 2;
     double* legendre;
     double* scl_fourier;
@@ -1107,7 +1107,7 @@ void TransLocal::invtrans_unstructured( const int truncation, const int nb_field
     for ( const PointLonLat p : grid_.lonlat() ) {
         const double lon = p.lon() * util::Constants::degreesToRadians();
         const double lat = p.lat() * util::Constants::degreesToRadians();
-        compute_legendre_polynomials_latopt3( truncation, lat, legendre, zfn );
+        compute_legendre_polynomials_lat( truncation, lat, legendre, zfn );
         // Legendre transform:
         {
             //ATLAS_TRACE( "opt Legendre dgemm" );
@@ -1155,7 +1155,7 @@ void TransLocal::invtrans_unstructured( const int truncation, const int nb_field
         // Computing u,v from U,V:
         {
             if ( nb_vordiv_fields > 0 ) {
-                //ATLAS_TRACE( "opt3 u,v from U,V" );
+                //ATLAS_TRACE( "u,v from U,V" );
                 const double coslat = std::cos( lat );
                 for ( int j = 0; j < nb_fields; j++ ) {
                     gp_fields[ip + j * grid_.size()] /= coslat;
@@ -1203,14 +1203,14 @@ void TransLocal::invtrans_uv( const int truncation, const int nb_scalar_fields,
             alloc_aligned( scl_fourier, size_fourier_max * ( truncation_ + 1 ) );
 
             // Legendre transformation:
-            invtrans_legendreopt3( truncation, nlats, nb_scalar_fields, scalar_spectra, scl_fourier, config );
+            invtrans_legendre( truncation, nlats, nb_scalar_fields, scalar_spectra, scl_fourier, config );
 
             // Fourier transformation:
             if ( grid::RegularGrid( gridGlobal_ ) ) {
-                invtrans_fourier_regularopt3( nlats, nlons, nb_fields, scl_fourier, gp_fields, config );
+                invtrans_fourier_regular( nlats, nlons, nb_fields, scl_fourier, gp_fields, config );
             }
             else {
-                invtrans_fourier_reducedopt3( nlats, g, nb_fields, scl_fourier, gp_fields, config );
+                invtrans_fourier_reduced( nlats, g, nb_fields, scl_fourier, gp_fields, config );
             }
 
             // Computing u,v from U,V:
@@ -1257,8 +1257,8 @@ void TransLocal::invtrans( const int nb_vordiv_fields, const double vorticity_sp
 
 // --------------------------------------------------------------------------------------------------------------------
 
-void extend_truncationopt3( const int old_truncation, const int nb_fields, const double old_spectra[],
-                            double new_spectra[] ) {
+void extend_truncation( const int old_truncation, const int nb_fields, const double old_spectra[],
+                        double new_spectra[] ) {
     int k = 0, k_old = 0;
     for ( int m = 0; m <= old_truncation + 1; m++ ) {             // zonal wavenumber
         for ( int n = m; n <= old_truncation + 1; n++ ) {         // total wavenumber
@@ -1291,10 +1291,10 @@ void TransLocal::invtrans( const int nb_scalar_fields, const double scalar_spect
         {
             ATLAS_TRACE( "extend vordiv" );
             // increase truncation in vorticity_spectra and divergence_spectra:
-            extend_truncationopt3( truncation_, nb_vordiv_fields, vorticity_spectra,
-                                   vorticity_spectra_extended.data() );
-            extend_truncationopt3( truncation_, nb_vordiv_fields, divergence_spectra,
-                                   divergence_spectra_extended.data() );
+            extend_truncation( truncation_, nb_vordiv_fields, vorticity_spectra,
+                               vorticity_spectra_extended.data() );
+            extend_truncation( truncation_, nb_vordiv_fields, divergence_spectra,
+                               divergence_spectra_extended.data() );
         }
 
         {
diff --git a/src/atlas/trans/local/TransLocal.h b/src/atlas/trans/local/TransLocal.h
index 1a9342cbc..abbee7c10 100644
--- a/src/atlas/trans/local/TransLocal.h
+++ b/src/atlas/trans/local/TransLocal.h
@@ -133,16 +133,16 @@ class TransLocal : public trans::TransImpl {
 #endif
     };
 
-    void invtrans_legendreopt3( const int truncation, const int nlats, const int nb_fields,
-                                const double scalar_spectra[], double scl_fourier[],
-                                const eckit::Configuration& config ) const;
+    void invtrans_legendre( const int truncation, const int nlats, const int nb_fields,
+                            const double scalar_spectra[], double scl_fourier[],
+                            const eckit::Configuration& config ) const;
 
-    void invtrans_fourier_regularopt3( const int nlats, const int nlons, const int nb_fields, double scl_fourier[],
-                                       double gp_fields[], const eckit::Configuration& config ) const;
+    void invtrans_fourier_regular( const int nlats, const int nlons, const int nb_fields, double scl_fourier[],
+                                   double gp_fields[], const eckit::Configuration& config ) const;
 
-    void invtrans_fourier_reducedopt3( const int nlats, const grid::StructuredGrid g, const int nb_fields,
-                                       double scl_fourier[], double gp_fields[],
-                                       const eckit::Configuration& config ) const;
+    void invtrans_fourier_reduced( const int nlats, const grid::StructuredGrid g, const int nb_fields,
+                                   double scl_fourier[], double gp_fields[],
+                                   const eckit::Configuration& config ) const;
 
     void invtrans_unstructured_precomp( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields,
                                         const double scalar_spectra[], double gp_fields[],

From 4e0ff1abccfba811216e861484f9bb51d20bcac6 Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Thu, 10 May 2018 15:57:46 +0100
Subject: [PATCH 080/123] ECKIT-326 Config::hash now implemented by
 eckit::Configuration::hash

---
 src/atlas/util/Config.cc   | 10 ----------
 src/atlas/util/Config.h    |  2 --
 src/atlas/util/Metadata.cc | 10 ----------
 src/atlas/util/Metadata.h  |  2 --
 4 files changed, 24 deletions(-)

diff --git a/src/atlas/util/Config.cc b/src/atlas/util/Config.cc
index a0802dac7..6284d7c81 100644
--- a/src/atlas/util/Config.cc
+++ b/src/atlas/util/Config.cc
@@ -90,16 +90,6 @@ bool Config::get( const std::string& name, std::vector<Config>& value ) const {
     return found;
 }
 
-void Config::hash( eckit::Hash& hsh ) const {
-    eckit::ValueMap map = get();
-    for ( eckit::ValueMap::const_iterator vit = map.begin(); vit != map.end(); ++vit ) {
-        hsh.add( vit->first.as<std::string>() );
-        /// @note below, we assume all Values translate to std::string, this needs
-        /// more verification
-        hsh.add( vit->second.as<std::string>() );
-    }
-}
-
 //==================================================================
 
 // ------------------------------------------------------------------
diff --git a/src/atlas/util/Config.h b/src/atlas/util/Config.h
index bf498698d..da77b78a1 100644
--- a/src/atlas/util/Config.h
+++ b/src/atlas/util/Config.h
@@ -67,8 +67,6 @@ class Config : public eckit::LocalConfiguration {
 
     using eckit::LocalConfiguration::get;
     bool get( const std::string& name, std::vector<Config>& value ) const;
-
-    void hash( eckit::Hash& ) const;
 };
 
 // ------------------------------------------------------------------
diff --git a/src/atlas/util/Metadata.cc b/src/atlas/util/Metadata.cc
index 829621164..c0831bb77 100644
--- a/src/atlas/util/Metadata.cc
+++ b/src/atlas/util/Metadata.cc
@@ -115,16 +115,6 @@ void Metadata::broadcast( Metadata& dest, const size_t root ) const {
     }
 }
 
-void Metadata::hash( eckit::Hash& hsh ) const {
-    eckit::ValueMap map = get();
-    for ( eckit::ValueMap::const_iterator vit = map.begin(); vit != map.end(); ++vit ) {
-        hsh.add( vit->first.as<std::string>() );
-        /// @note below, we assume all Values translate to std::string, this needs
-        /// more verification
-        hsh.add( vit->second.as<std::string>() );
-    }
-}
-
 Metadata::Metadata( const eckit::Value& value ) : eckit::LocalConfiguration( value ) {}
 
 // ------------------------------------------------------------------
diff --git a/src/atlas/util/Metadata.h b/src/atlas/util/Metadata.h
index 3c60a41e8..f8f10d7da 100644
--- a/src/atlas/util/Metadata.h
+++ b/src/atlas/util/Metadata.h
@@ -57,8 +57,6 @@ class Metadata : public eckit::LocalConfiguration {
 
     size_t footprint() const;
 
-    void hash( eckit::Hash& ) const;
-
 private:
     void throw_exception( const std::string& ) const;
 

From 6500aace294f66b7a40c2e7b79211e8303e3cbd7 Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Thu, 10 May 2018 15:59:36 +0100
Subject: [PATCH 081/123] Require eckit 0.21.0

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 787c96898..917d6da60 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -31,7 +31,7 @@ ecbuild_declare_project()
 
 ### eckit
 
-ecbuild_use_package( PROJECT eckit VERSION 0.20.0 REQUIRED )
+ecbuild_use_package( PROJECT eckit VERSION 0.21.0 REQUIRED )
 ecbuild_debug( "   ECKIT_FEATURES : [${ECKIT_FEATURES}]" )
 
 # options & dependencies

From 61c60858bd539c5867d90e3dd3e34002417d0cbf Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Thu, 10 May 2018 16:09:22 +0100
Subject: [PATCH 082/123] Fix version suffix

---
 VERSION.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/VERSION.cmake b/VERSION.cmake
index 62025fc5d..7cd7d9260 100644
--- a/VERSION.cmake
+++ b/VERSION.cmake
@@ -6,5 +6,5 @@
 # granted to it by virtue of its status as an intergovernmental organisation nor
 # does it submit to any jurisdiction.
 
-set  ( ${PROJECT_NAME}_VERSION_STR  "0.14.0-opt-translocal" )
+set  ( ${PROJECT_NAME}_VERSION_STR  "0.14.0-develop" )
 

From 8ccb4901fe0164dcb94e425185aa50f2f8156848 Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Thu, 10 May 2018 16:36:59 +0100
Subject: [PATCH 083/123] Script to apply clang-format

---
 tools/apply-clang-format.sh | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100755 tools/apply-clang-format.sh

diff --git a/tools/apply-clang-format.sh b/tools/apply-clang-format.sh
new file mode 100755
index 000000000..51f68ba34
--- /dev/null
+++ b/tools/apply-clang-format.sh
@@ -0,0 +1,6 @@
+#!/usr/bin/env bash
+
+SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+cd $SCRIPTDIR/../src
+find . -iname *.h -o -iname *.cc | xargs clang-format -i -style=file
+

From 3ed0bc8e8081808962e72f69405536e4d651408a Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Thu, 10 May 2018 16:31:34 +0100
Subject: [PATCH 084/123] Apply clang-format

---
 src/atlas/array/SVector.h                     |  11 +-
 src/atlas/array/gridtools/GridToolsTraits.h   |   2 +-
 src/atlas/array/helpers/ArraySlicer.h         |   6 +-
 src/atlas/grid/Grid.cc                        |   2 +-
 src/atlas/grid/detail/grid/Grid.cc            |   9 +-
 src/atlas/grid/detail/grid/Structured.cc      |   6 +-
 .../interpolation/method/FiniteElement.cc     |   2 +-
 src/atlas/mesh/actions/BuildStatistics.cc     |   8 +-
 src/atlas/mesh/actions/ExtendNodesGlobal.cc   |  18 +-
 .../meshgenerator/StructuredMeshGenerator.cc  |  36 +--
 src/atlas/option/TransOptions.cc              |   3 +-
 src/atlas/option/TransOptions.h               |   4 -
 src/atlas/runtime/trace/CallStack.cc          |   2 +-
 src/atlas/runtime/trace/Nesting.cc            |   5 +-
 src/atlas/trans/Cache.cc                      | 110 ++++-----
 src/atlas/trans/Cache.h                       |  18 +-
 src/atlas/trans/LegendreCacheCreator.cc       |   6 +-
 src/atlas/trans/LegendreCacheCreator.h        |   6 +-
 src/atlas/trans/Trans.cc                      |  56 ++---
 src/atlas/trans/Trans.h                       |  18 +-
 .../trans/ifs/LegendreCacheCreatorIFS.cc      | 117 +++++----
 src/atlas/trans/ifs/LegendreCacheCreatorIFS.h |   4 +-
 src/atlas/trans/ifs/TransIFS.cc               |   7 +-
 src/atlas/trans/ifs/TransIFS.h                |   3 +-
 .../trans/local/LegendreCacheCreatorLocal.cc  | 154 ++++++------
 .../trans/local/LegendreCacheCreatorLocal.h   |   4 +-
 src/atlas/trans/local/LegendrePolynomials.cc  |  12 +-
 src/atlas/trans/local/TransLocal.cc           | 143 +++++------
 src/atlas/trans/local/TransLocal.h            |  27 +--
 src/atlas/trans/local/VorDivToUVLocal.cc      |   4 +-
 src/atlas/util/Earth.h                        |   2 +-
 src/atlas/util/Rotation.cc                    |   2 +-
 src/atlas/util/SphericalPolygon.cc            |   4 +-
 src/atlas/util/UnitSphere.h                   |   4 +-
 src/sandbox/CMakeLists.txt                    |   1 -
 src/sandbox/fortran_modinc/CMakeLists.txt     |  16 --
 src/sandbox/fortran_modinc/mod1.f             |   6 -
 src/sandbox/fortran_modinc/mod1.h             |   4 -
 src/sandbox/fortran_modinc/mod2.f             |   6 -
 src/sandbox/fortran_modinc/mod2.h             |   4 -
 src/sandbox/fortran_modinc/sb_modinc.F90      |  26 --
 src/tests/AtlasTestEnvironment.h              |   4 +-
 src/tests/array/test_array.cc                 |   5 +-
 src/tests/array/test_table.cc                 |   2 +-
 src/tests/functionspace/test_pointcloud.cc    |   2 +-
 src/tests/grid/test_field.cc                  |   2 +-
 src/tests/grid/test_grid_ptr.cc               |   4 +-
 src/tests/grid/test_state.cc                  |   2 +-
 .../test_interpolation_finite_element.cc      |   2 +-
 src/tests/io/test_gmsh.cc                     |   2 +-
 src/tests/io/test_pointcloud_io.cc            |   2 +-
 src/tests/mesh/test_accumulate_facets.cc      |   6 +-
 src/tests/mesh/test_connectivity.cc           |   2 +-
 src/tests/mesh/test_distmesh.cc               |   2 +-
 src/tests/mesh/test_elements.cc               |   8 +-
 src/tests/mesh/test_halo.cc                   |   2 +-
 src/tests/parallel/test_haloexchange.cc       |   4 +-
 src/tests/trans/test_trans_localcache.cc      | 223 ++++++++----------
 src/tests/trans/test_transgeneral.cc          |   2 +-
 src/tests/util/test_earth.cc                  |   4 +-
 src/tests/util/test_indexview.cc              |   2 +-
 61 files changed, 539 insertions(+), 621 deletions(-)
 delete mode 100644 src/sandbox/fortran_modinc/CMakeLists.txt
 delete mode 100644 src/sandbox/fortran_modinc/mod1.f
 delete mode 100644 src/sandbox/fortran_modinc/mod1.h
 delete mode 100644 src/sandbox/fortran_modinc/mod2.f
 delete mode 100644 src/sandbox/fortran_modinc/mod2.h
 delete mode 100644 src/sandbox/fortran_modinc/sb_modinc.F90

diff --git a/src/atlas/array/SVector.h b/src/atlas/array/SVector.h
index 3180ef4bb..cb74eebd1 100644
--- a/src/atlas/array/SVector.h
+++ b/src/atlas/array/SVector.h
@@ -28,7 +28,6 @@ namespace array {
 template <typename T>
 class SVector {
 public:
-
     ATLAS_HOST_DEVICE
     SVector() : data_( nullptr ), size_( 0 ), externally_allocated_( false ) {}
 
@@ -41,13 +40,13 @@ class SVector {
     ATLAS_HOST_DEVICE
     SVector( T* data, size_t size ) : data_( data ), size_( size ) {}
 
-    SVector( size_t N ) : data_( nullptr), size_( N ), externally_allocated_( false ) {
-        if( N != 0 ) {
+    SVector( size_t N ) : data_( nullptr ), size_( N ), externally_allocated_( false ) {
+        if ( N != 0 ) {
 #if ATLAS_GRIDTOOLS_STORAGE_BACKEND_CUDA
-          cudaError_t err = cudaMallocManaged( &data_, N * sizeof( T ) );
-          if ( err != cudaSuccess ) throw eckit::AssertionFailed( "failed to allocate GPU memory" );
+            cudaError_t err = cudaMallocManaged( &data_, N * sizeof( T ) );
+            if ( err != cudaSuccess ) throw eckit::AssertionFailed( "failed to allocate GPU memory" );
 #else
-          data_ = (T*)malloc( N * sizeof( T ) );
+            data_ = (T*)malloc( N * sizeof( T ) );
 #endif
         }
     }
diff --git a/src/atlas/array/gridtools/GridToolsTraits.h b/src/atlas/array/gridtools/GridToolsTraits.h
index 4aeb071ff..2be09472c 100644
--- a/src/atlas/array/gridtools/GridToolsTraits.h
+++ b/src/atlas/array/gridtools/GridToolsTraits.h
@@ -1,7 +1,7 @@
 #pragma once
 
-#include "gridtools/common/generic_metafunctions/is_all_integrals.hpp"
 #include "gridtools/common/generic_metafunctions/accumulate.hpp"
+#include "gridtools/common/generic_metafunctions/is_all_integrals.hpp"
 #include "gridtools/storage/storage-facility.hpp"
 
 #include "atlas/array/ArrayViewDefs.h"
diff --git a/src/atlas/array/helpers/ArraySlicer.h b/src/atlas/array/helpers/ArraySlicer.h
index c12060851..b87c464bf 100644
--- a/src/atlas/array/helpers/ArraySlicer.h
+++ b/src/atlas/array/helpers/ArraySlicer.h
@@ -162,15 +162,13 @@ class ArraySlicer {
         return idx * view.stride( i_view++ );
     }
 
-    static int offset_part( View& view, int& i_view, Range range ) {
-        return range.start() * view.stride( i_view++ ); }
+    static int offset_part( View& view, int& i_view, Range range ) { return range.start() * view.stride( i_view++ ); }
 
     static int offset_part( View& view, int& i_view, RangeAll range ) {
         return range.start() * view.stride( i_view++ );
     }
 
-    static int offset_part( View& view, int& i_view, RangeTo range ) {
-        return range.start() * view.stride( i_view++ ); }
+    static int offset_part( View& view, int& i_view, RangeTo range ) { return range.start() * view.stride( i_view++ ); }
 
     static int offset_part( View& view, int& i_view, RangeFrom range ) {
         return range.start() * view.stride( i_view++ );
diff --git a/src/atlas/grid/Grid.cc b/src/atlas/grid/Grid.cc
index 1c6a03b22..c4e04d55c 100644
--- a/src/atlas/grid/Grid.cc
+++ b/src/atlas/grid/Grid.cc
@@ -94,7 +94,7 @@ StructuredGrid::StructuredGrid( const XSpace& xspace, const YSpace& yspace, cons
     Grid( new detail::grid::Structured( xspace, yspace, projection, domain ) ),
     grid_( structured_grid( get() ) ) {}
 
-StructuredGrid::StructuredGrid( const Grid& grid , const Grid::Domain& domain ) :
+StructuredGrid::StructuredGrid( const Grid& grid, const Grid::Domain& domain ) :
     Grid( grid, domain ),
     grid_( structured_grid( get() ) ) {}
 
diff --git a/src/atlas/grid/detail/grid/Grid.cc b/src/atlas/grid/detail/grid/Grid.cc
index 68e937678..1b887421c 100644
--- a/src/atlas/grid/detail/grid/Grid.cc
+++ b/src/atlas/grid/detail/grid/Grid.cc
@@ -76,11 +76,12 @@ const Grid* Grid::create( const std::string& name, const Grid::Config& config )
     //    return GridBuilder::createNamed(name);
 }
 
-const Grid* Grid::create( const Grid& grid, const Domain& domain) {
-    if( grid.type() == "structured" ) {
-        const Structured& g = dynamic_cast<const Structured&>(grid);
+const Grid* Grid::create( const Grid& grid, const Domain& domain ) {
+    if ( grid.type() == "structured" ) {
+        const Structured& g = dynamic_cast<const Structured&>( grid );
         return new Structured( g.name(), g.xspace(), g.yspace(), g.projection(), domain );
-    } else {
+    }
+    else {
         NOTIMP;
     }
 }
diff --git a/src/atlas/grid/detail/grid/Structured.cc b/src/atlas/grid/detail/grid/Structured.cc
index 2baa2223c..6cb838a1e 100644
--- a/src/atlas/grid/detail/grid/Structured.cc
+++ b/src/atlas/grid/detail/grid/Structured.cc
@@ -367,7 +367,7 @@ void Structured::crop( const Domain& dom ) {
                 jmax = std::max( j, jmax );
             }
         }
-        ASSERT(jmax >= jmin);
+        ASSERT( jmax >= jmin );
 
         size_t cropped_ny = jmax - jmin + 1;
         std::vector<double> cropped_y( y_.begin() + jmin, y_.begin() + jmin + cropped_ny );
@@ -445,10 +445,10 @@ void Structured::computeTruePeriodicity() {
             const PointLonLat Pllmax = projection().lonlat( PointXY( xmax_[j], y_[j] ) );
 
             Point3 Pxmin;
-            util::UnitSphere::convertSphericalToCartesian(Pllmin, Pxmin );
+            util::UnitSphere::convertSphericalToCartesian( Pllmin, Pxmin );
 
             Point3 Pxmax;
-            util::UnitSphere::convertSphericalToCartesian(Pllmax, Pxmax );
+            util::UnitSphere::convertSphericalToCartesian( Pllmax, Pxmax );
 
             periodic_x_ = points_equal( Pxmin, Pxmax );
         }
diff --git a/src/atlas/interpolation/method/FiniteElement.cc b/src/atlas/interpolation/method/FiniteElement.cc
index 166e9132c..b7b255ef4 100644
--- a/src/atlas/interpolation/method/FiniteElement.cc
+++ b/src/atlas/interpolation/method/FiniteElement.cc
@@ -165,7 +165,7 @@ void FiniteElement::setup( const FunctionSpace& source ) {
         for ( std::vector<size_t>::const_iterator i = failures.begin(); i != failures.end(); ++i ) {
             const PointXYZ p{( *ocoords_ )( *i, 0 ), ( *ocoords_ )( *i, 1 ), ( *ocoords_ )( *i, 2 )};  // lookup point
             PointLonLat pll;
-            util::Earth::convertCartesianToSpherical(p, pll);
+            util::Earth::convertCartesianToSpherical( p, pll );
             msg << "\t(lon,lat) = " << pll << "\n";
         }
 
diff --git a/src/atlas/mesh/actions/BuildStatistics.cc b/src/atlas/mesh/actions/BuildStatistics.cc
index 4ae572550..02e45ef32 100644
--- a/src/atlas/mesh/actions/BuildStatistics.cc
+++ b/src/atlas/mesh/actions/BuildStatistics.cc
@@ -65,10 +65,10 @@ void quad_quality( double& eta, double& rho, const PointLonLat& p1, const PointL
     // see http://geuz.org/gmsh/doc/preprints/gmsh_quad_preprint.pdf
 
     PointXYZ xyz[4];
-    util::UnitSphere::convertSphericalToCartesian(p1, xyz[0]);
-    util::UnitSphere::convertSphericalToCartesian(p2, xyz[1]);
-    util::UnitSphere::convertSphericalToCartesian(p3, xyz[2]);
-    util::UnitSphere::convertSphericalToCartesian(p4, xyz[3]);
+    util::UnitSphere::convertSphericalToCartesian( p1, xyz[0] );
+    util::UnitSphere::convertSphericalToCartesian( p2, xyz[1] );
+    util::UnitSphere::convertSphericalToCartesian( p3, xyz[2] );
+    util::UnitSphere::convertSphericalToCartesian( p4, xyz[3] );
 
     PointXYZ l2m1( PointXYZ::sub( xyz[1], xyz[0] ) );
     PointXYZ l3m2( PointXYZ::sub( xyz[2], xyz[1] ) );
diff --git a/src/atlas/mesh/actions/ExtendNodesGlobal.cc b/src/atlas/mesh/actions/ExtendNodesGlobal.cc
index d59a973e3..0d5d596de 100644
--- a/src/atlas/mesh/actions/ExtendNodesGlobal.cc
+++ b/src/atlas/mesh/actions/ExtendNodesGlobal.cc
@@ -10,13 +10,13 @@
 
 #include "atlas/mesh/actions/ExtendNodesGlobal.h"
 
-#include "eckit/exception/Exceptions.h"
 #include "atlas/field/Field.h"
 #include "atlas/grid/Grid.h"
 #include "atlas/mesh/Mesh.h"
 #include "atlas/mesh/Nodes.h"
 #include "atlas/util/CoordinateEnums.h"
 #include "atlas/util/Earth.h"
+#include "eckit/exception/Exceptions.h"
 
 namespace atlas {
 namespace mesh {
@@ -68,14 +68,14 @@ void ExtendNodesGlobal::operator()( const Grid& grid, Mesh& mesh ) const {
         PointXYZ pXYZ;
         util::Earth::convertSphericalToCartesian( pLL, pXYZ );
 
-        xyz( n, XX )          = pXYZ.x();
-        xyz( n, YY )          = pXYZ.y();
-        xyz( n, ZZ )          = pXYZ.z();
-        xy( n, XX )           = extended_pts[i].x();
-        xy( n, YY )           = extended_pts[i].y();
-        lonlat( n, LON )      = pLL.lon();
-        lonlat( n, LAT )      = pLL.lat();
-        gidx( n )             = n + 1;
+        xyz( n, XX )     = pXYZ.x();
+        xyz( n, YY )     = pXYZ.y();
+        xyz( n, ZZ )     = pXYZ.z();
+        xy( n, XX )      = extended_pts[i].x();
+        xy( n, YY )      = extended_pts[i].y();
+        lonlat( n, LON ) = pLL.lon();
+        lonlat( n, LAT ) = pLL.lat();
+        gidx( n )        = n + 1;
     }
 }
 
diff --git a/src/atlas/meshgenerator/StructuredMeshGenerator.cc b/src/atlas/meshgenerator/StructuredMeshGenerator.cc
index 6f5497e69..6ee5e8936 100644
--- a/src/atlas/meshgenerator/StructuredMeshGenerator.cc
+++ b/src/atlas/meshgenerator/StructuredMeshGenerator.cc
@@ -78,8 +78,10 @@ StructuredMeshGenerator::StructuredMeshGenerator( const eckit::Parametrisation&
     if ( p.get( "unique_pole", unique_pole ) ) options.set( "unique_pole", unique_pole );
 
     bool force_include_pole;
-    if ( p.get( "force_include_north_pole", force_include_pole ) ) options.set( "force_include_north_pole", force_include_pole );
-    if ( p.get( "force_include_south_pole", force_include_pole ) ) options.set( "force_include_south_pole", force_include_pole );
+    if ( p.get( "force_include_north_pole", force_include_pole ) )
+        options.set( "force_include_north_pole", force_include_pole );
+    if ( p.get( "force_include_south_pole", force_include_pole ) )
+        options.set( "force_include_south_pole", force_include_pole );
 
     bool three_dimensional;
     if ( p.get( "three_dimensional", three_dimensional ) || p.get( "3d", three_dimensional ) )
@@ -700,19 +702,23 @@ void StructuredMeshGenerator::generate_mesh( const grid::StructuredGrid& rg, con
 
     bool has_point_at_north_pole = rg.y().front() == 90 && rg.nx().front() > 0;
     bool has_point_at_south_pole = rg.y().back() == -90 && rg.nx().back() > 0;
-    bool possible_north_pole = !has_point_at_north_pole && rg.domain().containsNorthPole() && ( mypart == 0 );
+    bool possible_north_pole     = !has_point_at_north_pole && rg.domain().containsNorthPole() && ( mypart == 0 );
     bool possible_south_pole = !has_point_at_south_pole && rg.domain().containsSouthPole() && ( mypart == nparts - 1 );
 
-    bool force_include_north_pole(options.has("force_include_north_pole") && options.get<bool>( "force_include_north_pole" ));
-    bool force_include_south_pole(options.has("force_include_south_pole") && options.get<bool>( "force_include_south_pole" ));
+    bool force_include_north_pole( options.has( "force_include_north_pole" ) &&
+                                   options.get<bool>( "force_include_north_pole" ) );
+    bool force_include_south_pole( options.has( "force_include_south_pole" ) &&
+                                   options.get<bool>( "force_include_south_pole" ) );
 
-    bool include_north_pole = (possible_north_pole && options.get<bool>( "include_pole" )) || force_include_north_pole;
-    bool include_south_pole = (possible_south_pole && options.get<bool>( "include_pole" )) || force_include_south_pole;
-    bool patch_north_pole   =  possible_north_pole && options.get<bool>( "patch_pole" ) && rg.nx( 1 ) > 0;
-    bool patch_south_pole   =  possible_south_pole && options.get<bool>( "patch_pole" ) && rg.nx( rg.ny() - 2 ) > 0;
+    bool include_north_pole =
+        ( possible_north_pole && options.get<bool>( "include_pole" ) ) || force_include_north_pole;
+    bool include_south_pole =
+        ( possible_south_pole && options.get<bool>( "include_pole" ) ) || force_include_south_pole;
+    bool patch_north_pole = possible_north_pole && options.get<bool>( "patch_pole" ) && rg.nx( 1 ) > 0;
+    bool patch_south_pole = possible_south_pole && options.get<bool>( "patch_pole" ) && rg.nx( rg.ny() - 2 ) > 0;
 
-    int nnewnodes = (!has_point_at_north_pole && include_north_pole ? 1 : 0)
-                  + (!has_point_at_south_pole && include_south_pole ? 1 : 0);
+    int nnewnodes = ( !has_point_at_north_pole && include_north_pole ? 1 : 0 ) +
+                    ( !has_point_at_south_pole && include_south_pole ? 1 : 0 );
 
     if ( three_dimensional && nparts != 1 )
         throw BadParameter( "Cannot generate three_dimensional mesh in parallel", Here() );
@@ -827,7 +833,7 @@ void StructuredMeshGenerator::generate_mesh( const grid::StructuredGrid& rg, con
                 }
                 else if ( include_periodic_ghost_points )  // add periodic point
                 {
-//#warning TODO: use commented approach
+                    //#warning TODO: use commented approach
                     part( jnode ) = mypart;
                     // part(jnode)      = parts.at( offset_glb.at(jlat) );
                     ghost( jnode ) = 1;
@@ -922,7 +928,7 @@ void StructuredMeshGenerator::generate_mesh( const grid::StructuredGrid& rg, con
                 lonlat( inode, LAT ) = crd[LAT];
 
                 glb_idx( inode ) = periodic_glb.at( jlat ) + 1;
-//#warning TODO: use commented approach
+                //#warning TODO: use commented approach
                 //        part(inode)      = parts.at( offset_glb.at(jlat) );
                 part( inode )  = mypart;  // The actual part will be fixed later
                 ghost( inode ) = 1;
@@ -983,8 +989,8 @@ void StructuredMeshGenerator::generate_mesh( const grid::StructuredGrid& rg, con
         ++jnode;
     }
 
-    nodes.metadata().set<size_t>( "NbRealPts", size_t(nnodes - nnewnodes) );
-    nodes.metadata().set<size_t>( "NbVirtualPts", size_t(nnewnodes) );
+    nodes.metadata().set<size_t>( "NbRealPts", size_t( nnodes - nnewnodes ) );
+    nodes.metadata().set<size_t>( "NbVirtualPts", size_t( nnewnodes ) );
 
     nodes.global_index().metadata().set( "human_readable", true );
     nodes.global_index().metadata().set( "min", 1 );
diff --git a/src/atlas/option/TransOptions.cc b/src/atlas/option/TransOptions.cc
index 98e036acc..d9aa0565e 100644
--- a/src/atlas/option/TransOptions.cc
+++ b/src/atlas/option/TransOptions.cc
@@ -33,7 +33,8 @@ flt::flt( bool flt ) {
 }
 
 fft::fft( FFT fft ) {
-    static const std::map<FFT, std::string> FFT_to_string = { {FFT::OFF, "OFF"}, {FFT::FFT992, "FFT992"}, {FFT::FFTW, "FFTW"}};
+    static const std::map<FFT, std::string> FFT_to_string = {
+        {FFT::OFF, "OFF"}, {FFT::FFT992, "FFT992"}, {FFT::FFTW, "FFTW"}};
     set( "fft", FFT_to_string.at( fft ) );
 }
 
diff --git a/src/atlas/option/TransOptions.h b/src/atlas/option/TransOptions.h
index d87bcdab3..bb5c0636c 100644
--- a/src/atlas/option/TransOptions.h
+++ b/src/atlas/option/TransOptions.h
@@ -14,10 +14,6 @@
 
 // ----------------------------------------------------------------------------
 
-namespace atlas { class Grid; }
-
-// ----------------------------------------------------------------------------
-
 namespace atlas {
 namespace option {
 
diff --git a/src/atlas/runtime/trace/CallStack.cc b/src/atlas/runtime/trace/CallStack.cc
index 41b6366a9..d441c2930 100644
--- a/src/atlas/runtime/trace/CallStack.cc
+++ b/src/atlas/runtime/trace/CallStack.cc
@@ -10,7 +10,7 @@ namespace runtime {
 namespace trace {
 
 void CallStack::push_front( const eckit::CodeLocation& loc, const std::string& id ) {
-    stack_.push_front( std::hash<std::string>{}( loc.asString()+id ) );
+    stack_.push_front( std::hash<std::string>{}( loc.asString() + id ) );
 }
 
 void CallStack::pop_front() {
diff --git a/src/atlas/runtime/trace/Nesting.cc b/src/atlas/runtime/trace/Nesting.cc
index fe46dbc8c..c01d5f2d8 100644
--- a/src/atlas/runtime/trace/Nesting.cc
+++ b/src/atlas/runtime/trace/Nesting.cc
@@ -36,11 +36,10 @@ class NestingState {
     void pop() { stack_.pop_front(); }
 };
 
-Nesting::Nesting( const eckit::CodeLocation& loc, const std::string& id ) : 
+Nesting::Nesting( const eckit::CodeLocation& loc, const std::string& id ) :
     loc_( loc ),
     id_( id ),
-    stack_( NestingState::instance().push( loc, id ) ) {
-}
+    stack_( NestingState::instance().push( loc, id ) ) {}
 
 Nesting::~Nesting() {
     stop();
diff --git a/src/atlas/trans/Cache.cc b/src/atlas/trans/Cache.cc
index ce22fb2c4..fa6cf1d26 100644
--- a/src/atlas/trans/Cache.cc
+++ b/src/atlas/trans/Cache.cc
@@ -8,116 +8,96 @@
  * nor does it submit to any jurisdiction.
  */
 
-#include <cstdlib>
 #include "atlas/trans/Cache.h"
+#include <cstdlib>
 
-#include "eckit/io/DataHandle.h"
 #include "eckit/exception/Exceptions.h"
+#include "eckit/io/DataHandle.h"
 #include "eckit/thread/AutoLock.h"
 #include "eckit/thread/Mutex.h"
 
-#include "atlas/runtime/Trace.h"
 #include "atlas/runtime/Log.h"
+#include "atlas/runtime/Trace.h"
 #include "atlas/trans/Trans.h"
 
 namespace {
-static eckit::Mutex* local_mutex               = 0;
-static pthread_once_t once                     = PTHREAD_ONCE_INIT;
+static eckit::Mutex* local_mutex = 0;
+static pthread_once_t once       = PTHREAD_ONCE_INIT;
 static void init() {
     local_mutex = new eckit::Mutex();
 }
-}
+}  // namespace
 
 namespace atlas {
 namespace trans {
 
-TransCacheFileEntry::TransCacheFileEntry(const eckit::PathName& path) : buffer_( path.size() ) {
-  ATLAS_TRACE();
-  Log::debug() << "Loading cache from file " << path << std::endl;
-  std::unique_ptr<eckit::DataHandle> dh( path.fileHandle() );
-  dh->openForRead();
-  dh->read( buffer_.data(), buffer_.size() );
-  dh->close();
+TransCacheFileEntry::TransCacheFileEntry( const eckit::PathName& path ) : buffer_( path.size() ) {
+    ATLAS_TRACE();
+    Log::debug() << "Loading cache from file " << path << std::endl;
+    std::unique_ptr<eckit::DataHandle> dh( path.fileHandle() );
+    dh->openForRead();
+    dh->read( buffer_.data(), buffer_.size() );
+    dh->close();
 }
 
-TransCacheMemoryEntry::TransCacheMemoryEntry(const void* data, size_t size) : data_(data), size_(size) {
-  ASSERT(data_);
-  ASSERT(size_);
+TransCacheMemoryEntry::TransCacheMemoryEntry( const void* data, size_t size ) : data_( data ), size_( size ) {
+    ASSERT( data_ );
+    ASSERT( size_ );
 }
 
-LegendreFFTCache::LegendreFFTCache( const void* legendre_address, size_t legendre_size, const void* fft_address, size_t fft_size ) :
-  Cache( std::make_shared<TransCacheMemoryEntry>( legendre_address, legendre_size ),
-         std::make_shared<TransCacheMemoryEntry>( fft_address, fft_size ) ) {
-}
+LegendreFFTCache::LegendreFFTCache( const void* legendre_address, size_t legendre_size, const void* fft_address,
+                                    size_t fft_size ) :
+    Cache( std::make_shared<TransCacheMemoryEntry>( legendre_address, legendre_size ),
+           std::make_shared<TransCacheMemoryEntry>( fft_address, fft_size ) ) {}
 
 LegendreFFTCache::LegendreFFTCache( const eckit::PathName& legendre_path, const eckit::PathName& fft_path ) :
-  Cache( std::shared_ptr<TransCacheEntry>( new TransCacheFileEntry( legendre_path ) ),
-         std::shared_ptr<TransCacheEntry>( new TransCacheFileEntry( fft_path ) ) ) {
-}
+    Cache( std::shared_ptr<TransCacheEntry>( new TransCacheFileEntry( legendre_path ) ),
+           std::shared_ptr<TransCacheEntry>( new TransCacheFileEntry( fft_path ) ) ) {}
 
 LegendreCache::LegendreCache( const eckit::PathName& path ) :
-  Cache( std::shared_ptr<TransCacheEntry>( new TransCacheFileEntry( path ) ) ) {
-}
+    Cache( std::shared_ptr<TransCacheEntry>( new TransCacheFileEntry( path ) ) ) {}
 
-LegendreCache::LegendreCache( size_t size) :
-  Cache( std::make_shared<TransCacheOwnedMemoryEntry>( size ) ) {
-}
+LegendreCache::LegendreCache( size_t size ) : Cache( std::make_shared<TransCacheOwnedMemoryEntry>( size ) ) {}
 
 LegendreCache::LegendreCache( const void* address, size_t size ) :
-  Cache( std::make_shared<TransCacheMemoryEntry>( address, size ) ) {
-}
+    Cache( std::make_shared<TransCacheMemoryEntry>( address, size ) ) {}
 
-Cache::Cache(const std::shared_ptr<TransCacheEntry>& legendre) :
-  trans_( nullptr ),
-  legendre_( legendre ),
-  fft_( new EmptyCacheEntry() ) {}
+Cache::Cache( const std::shared_ptr<TransCacheEntry>& legendre ) :
+    trans_( nullptr ),
+    legendre_( legendre ),
+    fft_( new EmptyCacheEntry() ) {}
 
-Cache::Cache(const std::shared_ptr<TransCacheEntry>& legendre, const std::shared_ptr<TransCacheEntry>& fft) :
-  trans_( nullptr ),
-  legendre_( legendre ),
-  fft_( fft ) {}
+Cache::Cache( const std::shared_ptr<TransCacheEntry>& legendre, const std::shared_ptr<TransCacheEntry>& fft ) :
+    trans_( nullptr ),
+    legendre_( legendre ),
+    fft_( fft ) {}
 
 Cache::Cache( const TransImpl* trans ) :
-  trans_( trans ),
-  legendre_( new EmptyCacheEntry() ),
-  fft_( new EmptyCacheEntry() ) {
-}
+    trans_( trans ),
+    legendre_( new EmptyCacheEntry() ),
+    fft_( new EmptyCacheEntry() ) {}
 
-Cache::Cache() :
-  trans_( nullptr ),
-  legendre_( new EmptyCacheEntry() ),
-  fft_( new EmptyCacheEntry() ) {}
+Cache::Cache() : trans_( nullptr ), legendre_( new EmptyCacheEntry() ), fft_( new EmptyCacheEntry() ) {}
 
-Cache::Cache( const Cache& other ) :
-  trans_( other.trans_ ),
-  legendre_( other.legendre_ ),
-  fft_( other.fft_ ) {
-}
+Cache::Cache( const Cache& other ) : trans_( other.trans_ ), legendre_( other.legendre_ ), fft_( other.fft_ ) {}
 
 Cache::operator bool() const {
-  return trans_ || bool(legendre()) ;
+    return trans_ || bool( legendre() );
 }
 
 Cache::~Cache() {
-  pthread_once( &once, init );
-  eckit::AutoLock<eckit::Mutex> lock( local_mutex );
+    pthread_once( &once, init );
+    eckit::AutoLock<eckit::Mutex> lock( local_mutex );
 }
 
-TransCache::TransCache( const Trans& trans ) :
-  Cache( trans.get() ) {
-}
+TransCache::TransCache( const Trans& trans ) : Cache( trans.get() ) {}
 
-TransCacheOwnedMemoryEntry::TransCacheOwnedMemoryEntry(size_t size) :
-  size_(size) {
-  if( size_ ) {
-    data_ = std::malloc( size_ );
-  }
+TransCacheOwnedMemoryEntry::TransCacheOwnedMemoryEntry( size_t size ) : size_( size ) {
+    if ( size_ ) { data_ = std::malloc( size_ ); }
 }
 
 TransCacheOwnedMemoryEntry::~TransCacheOwnedMemoryEntry() {
-  if( size_ ) {
-    std::free( data_ );
-  }
+    if ( size_ ) { std::free( data_ ); }
 }
 
 }  // namespace trans
diff --git a/src/atlas/trans/Cache.h b/src/atlas/trans/Cache.h
index 8cd6b8097..318ba373c 100644
--- a/src/atlas/trans/Cache.h
+++ b/src/atlas/trans/Cache.h
@@ -56,7 +56,8 @@ class EmptyCacheEntry final : public TransCacheEntry {
 //-----------------------------------------------------------------------------
 class TransCacheFileEntry final : public TransCacheEntry {
 private:
-  eckit::Buffer buffer_;
+    eckit::Buffer buffer_;
+
 public:
     TransCacheFileEntry( const eckit::PathName& path );
     virtual size_t size() const override { return buffer_.size(); }
@@ -67,9 +68,10 @@ class TransCacheFileEntry final : public TransCacheEntry {
 
 class TransCacheMemoryEntry final : public TransCacheEntry {
 public:
-    TransCacheMemoryEntry(const void* data, size_t size);
+    TransCacheMemoryEntry( const void* data, size_t size );
     virtual const void* data() const override { return data_; }
     virtual size_t size() const override { return size_; }
+
 private:
     const void* data_;
     const size_t size_;
@@ -79,12 +81,13 @@ class TransCacheMemoryEntry final : public TransCacheEntry {
 
 class TransCacheOwnedMemoryEntry final : public TransCacheEntry {
 public:
-    TransCacheOwnedMemoryEntry(size_t size);
+    TransCacheOwnedMemoryEntry( size_t size );
     ~TransCacheOwnedMemoryEntry();
     virtual const void* data() const override { return data_; }
     virtual size_t size() const override { return size_; }
+
 private:
-    void* data_ = nullptr;
+    void* data_        = nullptr;
     const size_t size_ = 0;
 };
 
@@ -99,13 +102,15 @@ class Cache {
     const TransCacheEntry& legendre() const { return *legendre_; }
     const TransCacheEntry& fft() const { return *fft_; }
     virtual ~Cache();
+
 protected:
     Cache( const std::shared_ptr<TransCacheEntry>& legendre );
     Cache( const std::shared_ptr<TransCacheEntry>& legendre, const std::shared_ptr<TransCacheEntry>& fft );
     Cache( const TransImpl* );
+
 private:
     eckit::SharedPtr<const TransImpl> trans_;
-//    const TransImpl*                 trans_ = nullptr;
+    //    const TransImpl*                 trans_ = nullptr;
     std::shared_ptr<TransCacheEntry> legendre_;
     std::shared_ptr<TransCacheEntry> fft_;
 };
@@ -125,8 +130,7 @@ class LegendreCache : public Cache {
 
 class LegendreFFTCache : public Cache {
 public:
-    LegendreFFTCache( const void* legendre_address, size_t legendre_size,
-                      const void* fft_address, size_t fft_size );
+    LegendreFFTCache( const void* legendre_address, size_t legendre_size, const void* fft_address, size_t fft_size );
     LegendreFFTCache( const eckit::PathName& legendre_path, const eckit::PathName& fft_path );
 };
 
diff --git a/src/atlas/trans/LegendreCacheCreator.cc b/src/atlas/trans/LegendreCacheCreator.cc
index a51165dd8..f5411e6b6 100644
--- a/src/atlas/trans/LegendreCacheCreator.cc
+++ b/src/atlas/trans/LegendreCacheCreator.cc
@@ -30,9 +30,9 @@ LegendreCacheCreatorImpl::~LegendreCacheCreatorImpl() {}
 
 namespace {
 
-static eckit::Mutex* local_mutex               = 0;
+static eckit::Mutex* local_mutex                              = 0;
 static std::map<std::string, LegendreCacheCreatorFactory*>* m = 0;
-static pthread_once_t once                     = PTHREAD_ONCE_INIT;
+static pthread_once_t once                                    = PTHREAD_ONCE_INIT;
 
 static void init() {
     local_mutex = new eckit::Mutex();
@@ -106,7 +106,7 @@ void LegendreCacheCreatorFactory::list( std::ostream& out ) {
 }
 
 LegendreCacheCreator::Implementation* LegendreCacheCreatorFactory::build( const Grid& grid, int truncation,
-                                            const eckit::Configuration& config ) {
+                                                                          const eckit::Configuration& config ) {
     pthread_once( &once, init );
 
     eckit::AutoLock<eckit::Mutex> lock( local_mutex );
diff --git a/src/atlas/trans/LegendreCacheCreator.h b/src/atlas/trans/LegendreCacheCreator.h
index db0f71aa8..f6be334c4 100644
--- a/src/atlas/trans/LegendreCacheCreator.h
+++ b/src/atlas/trans/LegendreCacheCreator.h
@@ -16,8 +16,8 @@
 #include "eckit/memory/Owned.h"
 #include "eckit/memory/SharedPtr.h"
 
-#include "atlas/util/Config.h"
 #include "atlas/trans/Trans.h"
+#include "atlas/util/Config.h"
 
 //-----------------------------------------------------------------------------
 // Forward declarations
@@ -97,7 +97,9 @@ class LegendreCacheCreatorFactory {
 
 private:
     std::string name_;
-    virtual LegendreCacheCreatorImpl* make( const Grid& gp, int truncation, const eckit::Configuration& ) { return nullptr; }
+    virtual LegendreCacheCreatorImpl* make( const Grid& gp, int truncation, const eckit::Configuration& ) {
+        return nullptr;
+    }
 
 protected:
     LegendreCacheCreatorFactory( const std::string& );
diff --git a/src/atlas/trans/Trans.cc b/src/atlas/trans/Trans.cc
index 7a6a0e6ea..a8e7f6f2a 100644
--- a/src/atlas/trans/Trans.cc
+++ b/src/atlas/trans/Trans.cc
@@ -24,7 +24,7 @@
 #include "atlas/trans/ifs/TransIFSNodeColumns.h"
 #include "atlas/trans/ifs/TransIFSStructuredColumns.h"
 #endif
-#include "atlas/trans/local/TransLocal.h" // --> recommended "local"
+#include "atlas/trans/local/TransLocal.h"  // --> recommended "local"
 
 namespace {
 struct default_backend {
@@ -37,10 +37,11 @@ struct default_backend {
         static default_backend x;
         return x;
     }
+
 private:
     default_backend() = default;
 };
-}
+}  // namespace
 
 namespace atlas {
 namespace trans {
@@ -94,8 +95,7 @@ TransFactory& factory( const std::string& name ) {
 
 }  // namespace
 
-TransFactory::TransFactory( const std::string& name ) :
-    name_( name ) {
+TransFactory::TransFactory( const std::string& name ) : name_( name ) {
     pthread_once( &once, init );
 
     eckit::AutoLock<eckit::Mutex> lock( local_mutex );
@@ -126,7 +126,7 @@ void TransFactory::backend( const std::string& backend ) {
 }
 
 std::string TransFactory::backend() {
-    return default_options_.getString("type");
+    return default_options_.getString( "type" );
 }
 
 const eckit::Configuration& TransFactory::config() {
@@ -136,9 +136,7 @@ const eckit::Configuration& TransFactory::config() {
 void TransFactory::config( const eckit::Configuration& config ) {
     std::string type = default_options_.getString( "type" );
     default_options_ = config;
-    if( not config.has("type") ) {
-        default_options_.set( "type", type );
-    }
+    if ( not config.has( "type" ) ) { default_options_.set( "type", type ); }
 }
 
 void TransFactory::list( std::ostream& out ) {
@@ -162,7 +160,7 @@ Trans::Implementation* TransFactory::build( const FunctionSpace& gp, const Funct
 
 Trans::Implementation* TransFactory::build( const Cache& cache, const FunctionSpace& gp, const FunctionSpace& sp,
                                             const eckit::Configuration& config ) {
-    if( cache.trans() ) {
+    if ( cache.trans() ) {
         Log::debug() << "Creating Trans from cache, ignoring any other arguments" << std::endl;
         return cache.trans();
     }
@@ -188,7 +186,8 @@ Trans::Implementation* TransFactory::build( const Grid& grid, int truncation, co
     return build( Cache(), grid, truncation, config );
 }
 
-Trans::Implementation* TransFactory::build( const Grid& grid, const Domain& domain, int truncation, const eckit::Configuration& config ) {
+Trans::Implementation* TransFactory::build( const Grid& grid, const Domain& domain, int truncation,
+                                            const eckit::Configuration& config ) {
     return build( Cache(), grid, domain, truncation, config );
 }
 
@@ -199,7 +198,7 @@ Trans::Implementation* TransFactory::build( const Cache& cache, const Grid& grid
 
 Trans::Implementation* TransFactory::build( const Cache& cache, const Grid& grid, const Domain& domain, int truncation,
                                             const eckit::Configuration& config ) {
-    if( cache.trans() ) {
+    if ( cache.trans() ) {
         Log::debug() << "Creating Trans from cache, ignoring any other arguments" << std::endl;
         return cache.trans();
     }
@@ -237,17 +236,17 @@ const eckit::Configuration& Trans::config() {
     return TransFactory::config();
 }
 
-void Trans::config( const eckit::Configuration& options )  {
+void Trans::config( const eckit::Configuration& options ) {
     TransFactory::config( options );
 }
 
 namespace {
 util::Config options( const eckit::Configuration& config ) {
     util::Config opts = Trans::config();
-    opts.set(config);
+    opts.set( config );
     return opts;
 }
-}
+}  // namespace
 
 Trans::Trans() {}
 
@@ -269,7 +268,8 @@ Trans::Trans( const Cache& cache, const FunctionSpace& gp, const FunctionSpace&
 Trans::Trans( const Cache& cache, const Grid& grid, int truncation, const eckit::Configuration& config ) :
     impl_( TransFactory::build( cache, grid, truncation, config ) ) {}
 
-Trans::Trans( const Cache& cache, const Grid& grid, const Domain& domain, int truncation, const eckit::Configuration& config ) :
+Trans::Trans( const Cache& cache, const Grid& grid, const Domain& domain, int truncation,
+              const eckit::Configuration& config ) :
     impl_( TransFactory::build( cache, grid, domain, truncation, config ) ) {}
 
 Trans::Trans( const Trans& trans ) : impl_( trans.impl_ ) {}
@@ -287,37 +287,37 @@ size_t Trans::spectralCoefficients() const {
 }
 
 void Trans::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const {
-    impl_->dirtrans( gpfield, spfield, options(config) );
+    impl_->dirtrans( gpfield, spfield, options( config ) );
 }
 
 void Trans::dirtrans( const FieldSet& gpfields, FieldSet& spfields, const eckit::Configuration& config ) const {
-    impl_->dirtrans( gpfields, spfields, options(config) );
+    impl_->dirtrans( gpfields, spfields, options( config ) );
 }
 
 void Trans::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv,
                                   const eckit::Configuration& config ) const {
-    impl_->dirtrans_wind2vordiv( gpwind, spvor, spdiv, options(config) );
+    impl_->dirtrans_wind2vordiv( gpwind, spvor, spdiv, options( config ) );
 }
 
 void Trans::invtrans( const Field& spfield, Field& gpfield, const eckit::Configuration& config ) const {
-    impl_->invtrans( spfield, gpfield, options(config) );
+    impl_->invtrans( spfield, gpfield, options( config ) );
 }
 
 void Trans::invtrans( const FieldSet& spfields, FieldSet& gpfields, const eckit::Configuration& config ) const {
-    impl_->invtrans( spfields, gpfields, options(config) );
+    impl_->invtrans( spfields, gpfields, options( config ) );
 }
 
 void Trans::invtrans_grad( const Field& spfield, Field& gradfield, const eckit::Configuration& config ) const {
-    impl_->invtrans_grad( spfield, gradfield, options(config) );
+    impl_->invtrans_grad( spfield, gradfield, options( config ) );
 }
 
 void Trans::invtrans_grad( const FieldSet& spfields, FieldSet& gradfields, const eckit::Configuration& config ) const {
-    impl_->invtrans_grad( spfields, gradfields, options(config) );
+    impl_->invtrans_grad( spfields, gradfields, options( config ) );
 }
 
 void Trans::invtrans_vordiv2wind( const Field& spvor, const Field& spdiv, Field& gpwind,
                                   const eckit::Configuration& config ) const {
-    impl_->invtrans_vordiv2wind( spvor, spdiv, gpwind, options(config) );
+    impl_->invtrans_vordiv2wind( spvor, spdiv, gpwind, options( config ) );
 }
 
 // -- IFS type fields --
@@ -338,7 +338,7 @@ void Trans::invtrans( const int nb_scalar_fields, const double scalar_spectra[],
                       const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[],
                       const eckit::Configuration& config ) const {
     impl_->invtrans( nb_scalar_fields, scalar_spectra, nb_vordiv_fields, vorticity_spectra, divergence_spectra,
-                     gp_fields, options(config) );
+                     gp_fields, options( config ) );
 }
 
 /*!
@@ -349,7 +349,7 @@ void Trans::invtrans( const int nb_scalar_fields, const double scalar_spectra[],
  */
 void Trans::invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[],
                       const eckit::Configuration& config ) const {
-    impl_->invtrans( nb_scalar_fields, scalar_spectra, gp_fields, options(config) );
+    impl_->invtrans( nb_scalar_fields, scalar_spectra, gp_fields, options( config ) );
 }
 
 /*!
@@ -358,7 +358,7 @@ void Trans::invtrans( const int nb_scalar_fields, const double scalar_spectra[],
  */
 void Trans::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[], const double divergence_spectra[],
                       double gp_fields[], const eckit::Configuration& config ) const {
-    impl_->invtrans( nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, options(config) );
+    impl_->invtrans( nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, options( config ) );
 }
 
 /*!
@@ -366,7 +366,7 @@ void Trans::invtrans( const int nb_vordiv_fields, const double vorticity_spectra
  */
 void Trans::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[],
                       const eckit::Configuration& config ) const {
-    impl_->dirtrans( nb_fields, scalar_fields, scalar_spectra, options(config) );
+    impl_->dirtrans( nb_fields, scalar_fields, scalar_spectra, options( config ) );
 }
 
 /*!
@@ -375,7 +375,7 @@ void Trans::dirtrans( const int nb_fields, const double scalar_fields[], double
  */
 void Trans::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[],
                       double divergence_spectra[], const eckit::Configuration& config ) const {
-    impl_->dirtrans( nb_fields, wind_fields, vorticity_spectra, divergence_spectra, options(config) );
+    impl_->dirtrans( nb_fields, wind_fields, vorticity_spectra, divergence_spectra, options( config ) );
 }
 
 }  // namespace trans
diff --git a/src/atlas/trans/Trans.h b/src/atlas/trans/Trans.h
index e4db195f8..7a8f18a12 100644
--- a/src/atlas/trans/Trans.h
+++ b/src/atlas/trans/Trans.h
@@ -13,8 +13,8 @@
 #include "eckit/memory/Owned.h"
 #include "eckit/memory/SharedPtr.h"
 
-#include "atlas/util/Config.h"
 #include "atlas/trans/Cache.h"
+#include "atlas/util/Config.h"
 
 //-----------------------------------------------------------------------------
 // Forward declarations
@@ -124,23 +124,25 @@ class TransImpl : public eckit::Owned {
 class TransFactory {
 protected:
     using Trans_t = const TransImpl;
+
 public:
     /*!
    * \brief build Trans
    * \return TransImpl
    */
     static Trans_t* build( const FunctionSpace& gp, const FunctionSpace& sp,
-                             const eckit::Configuration& = util::Config() );
+                           const eckit::Configuration& = util::Config() );
     static Trans_t* build( const Grid&, int truncation, const eckit::Configuration& = util::Config() );
 
     static Trans_t* build( const Grid&, const Domain&, int truncation, const eckit::Configuration& = util::Config() );
 
     static Trans_t* build( const Cache&, const FunctionSpace& gp, const FunctionSpace& sp,
-                             const eckit::Configuration& = util::Config() );
+                           const eckit::Configuration& = util::Config() );
 
     static Trans_t* build( const Cache&, const Grid&, int truncation, const eckit::Configuration& = util::Config() );
 
-    static Trans_t* build( const Cache&, const Grid&, const Domain&, int truncation, const eckit::Configuration& = util::Config() );
+    static Trans_t* build( const Cache&, const Grid&, const Domain&, int truncation,
+                           const eckit::Configuration& = util::Config() );
 
     /*!
    * \brief list all registered trans implementations
@@ -160,7 +162,8 @@ class TransFactory {
 private:
     std::string name_;
     static util::Config default_options_;
-    virtual Trans_t* make( const Cache&, const FunctionSpace& gp, const FunctionSpace& sp, const eckit::Configuration& ) {
+    virtual Trans_t* make( const Cache&, const FunctionSpace& gp, const FunctionSpace& sp,
+                           const eckit::Configuration& ) {
         return nullptr;
     }
     virtual Trans_t* make( const Cache&, const Grid& gp, const Domain&, int truncation, const eckit::Configuration& ) {
@@ -177,7 +180,7 @@ class TransFactory {
 template <class T>
 class TransBuilderFunctionSpace : public TransFactory {
     virtual Trans_t* make( const Cache& cache, const FunctionSpace& gp, const FunctionSpace& sp,
-                             const eckit::Configuration& config ) {
+                           const eckit::Configuration& config ) {
         return new T( cache, gp, sp, config );
     }
     virtual Trans_t* make( const Cache&, const Grid&, const Domain&, int, const eckit::Configuration& ) {
@@ -191,7 +194,7 @@ class TransBuilderFunctionSpace : public TransFactory {
 template <class T>
 class TransBuilderGrid : public TransFactory {
     virtual Trans_t* make( const Cache& cache, const Grid& grid, const Domain& domain, int truncation,
-                             const eckit::Configuration& config ) {
+                           const eckit::Configuration& config ) {
         return new T( cache, grid, domain, truncation, config );
     }
     virtual Trans_t* make( const Cache&, const FunctionSpace&, const FunctionSpace&, const eckit::Configuration& ) {
@@ -212,7 +215,6 @@ class Trans {
     eckit::SharedPtr<Implementation> impl_;
 
 public:
-
     static bool hasBackend( const std::string& );
     static void backend( const std::string& );
     static std::string backend();
diff --git a/src/atlas/trans/ifs/LegendreCacheCreatorIFS.cc b/src/atlas/trans/ifs/LegendreCacheCreatorIFS.cc
index 7ac8e74b0..54b276d64 100644
--- a/src/atlas/trans/ifs/LegendreCacheCreatorIFS.cc
+++ b/src/atlas/trans/ifs/LegendreCacheCreatorIFS.cc
@@ -9,12 +9,12 @@
  */
 
 #include "atlas/trans/ifs/LegendreCacheCreatorIFS.h"
-#include <string>
 #include <sstream>
-#include "eckit/utils/MD5.h"
+#include <string>
 #include "atlas/grid.h"
 #include "atlas/option.h"
 #include "atlas/trans/Trans.h"
+#include "eckit/utils/MD5.h"
 
 namespace atlas {
 namespace trans {
@@ -26,92 +26,91 @@ static LegendreCacheCreatorBuilder<LegendreCacheCreatorIFS> builder( "ifs" );
 namespace {
 
 std::string truncate( const std::string& str ) {
-  const int trunc = std::min(10ul,str.size());
-  return str.substr( 0, trunc );
+    const int trunc = std::min( 10ul, str.size() );
+    return str.substr( 0, trunc );
 }
 
 std::string hash( const Grid& grid ) {
-  eckit::MD5 h;
-  if( grid::StructuredGrid( grid ) && not grid.projection() ) {
-    auto g = grid::StructuredGrid( grid );
-    h.add( g.y().data(), g.y().size() * sizeof(double) );
-  } else {
-    grid.hash( h );
-  }
-  return truncate( h.digest() );
+    eckit::MD5 h;
+    if ( grid::StructuredGrid( grid ) && not grid.projection() ) {
+        auto g = grid::StructuredGrid( grid );
+        h.add( g.y().data(), g.y().size() * sizeof( double ) );
+    }
+    else {
+        grid.hash( h );
+    }
+    return truncate( h.digest() );
 }
 
 std::string hash( const eckit::Configuration& config ) {
-  eckit::MD5 h;
+    eckit::MD5 h;
 
-  // Add options and other unique keys
-  h << "flt" << config.getBool( "flt", false );
+    // Add options and other unique keys
+    h << "flt" << config.getBool( "flt", false );
 
-  return truncate( h.digest() );
+    return truncate( h.digest() );
 }
 
-}
+}  // namespace
 
 std::string LegendreCacheCreatorIFS::uid() const {
-  if( unique_identifier_.empty() ) {
-    std::ostringstream stream;
-    stream << "ifs-T" << truncation_ << "-";
-    if( grid::GaussianGrid( grid_ ) ) {
-      if( grid::RegularGaussianGrid( grid_ ) ) {
-        stream << "RegularGaussianN" << grid::GaussianGrid( grid_ ).N();
-      } else {
-        stream << "ReducedGaussianN" << grid::GaussianGrid( grid_ ).N() << "-PL";
-        stream << hash( grid_ );
-      }
-    } else if( grid::RegularLonLatGrid( grid_ ) ) {
-      auto g = grid::RegularLonLatGrid( grid_ );
-      if( g.standard() || g.shifted() ) {
-          stream << ( g.standard() ? "L" : "S" ) << g.nx() << "x" << g.ny();
-      } else {
-        // We cannot make more assumptions on reusability for different grids
-        stream << "grid-" << hash( grid_ );
-      }
-    } else {
-      // We cannot make more assumptions on reusability for different grids
-      stream << "grid-" << hash( grid_ );
+    if ( unique_identifier_.empty() ) {
+        std::ostringstream stream;
+        stream << "ifs-T" << truncation_ << "-";
+        if ( grid::GaussianGrid( grid_ ) ) {
+            if ( grid::RegularGaussianGrid( grid_ ) ) {
+                stream << "RegularGaussianN" << grid::GaussianGrid( grid_ ).N();
+            }
+            else {
+                stream << "ReducedGaussianN" << grid::GaussianGrid( grid_ ).N() << "-PL";
+                stream << hash( grid_ );
+            }
+        }
+        else if ( grid::RegularLonLatGrid( grid_ ) ) {
+            auto g = grid::RegularLonLatGrid( grid_ );
+            if ( g.standard() || g.shifted() ) { stream << ( g.standard() ? "L" : "S" ) << g.nx() << "x" << g.ny(); }
+            else {
+                // We cannot make more assumptions on reusability for different grids
+                stream << "grid-" << hash( grid_ );
+            }
+        }
+        else {
+            // We cannot make more assumptions on reusability for different grids
+            stream << "grid-" << hash( grid_ );
+        }
+        stream << "-OPT" << hash( config_ );
+        unique_identifier_ = stream.str();
     }
-    stream << "-OPT" << hash( config_ );
-    unique_identifier_ = stream.str();
-  }
-  return unique_identifier_;
+    return unique_identifier_;
 }
 
 LegendreCacheCreatorIFS::~LegendreCacheCreatorIFS() {}
 
 bool LegendreCacheCreatorIFS::supported() const {
-  if( grid::GaussianGrid( grid_ ) ) {
-    return true;
-  } else if( grid::RegularLonLatGrid( grid_ ) ) {
-    auto g = grid::RegularLonLatGrid( grid_ );
-    if( g.standard() || g.shifted() ) {
-        return true;
+    if ( grid::GaussianGrid( grid_ ) ) { return true; }
+    else if ( grid::RegularLonLatGrid( grid_ ) ) {
+        auto g = grid::RegularLonLatGrid( grid_ );
+        if ( g.standard() || g.shifted() ) { return true; }
     }
-  }
-  return false;
+    return false;
 }
 
-LegendreCacheCreatorIFS::LegendreCacheCreatorIFS( const Grid& grid, int truncation, const eckit::Configuration& config ) :
-  grid_(grid),
-  truncation_(truncation),
-  config_(config) {
-}
+LegendreCacheCreatorIFS::LegendreCacheCreatorIFS( const Grid& grid, int truncation,
+                                                  const eckit::Configuration& config ) :
+    grid_( grid ),
+    truncation_( truncation ),
+    config_( config ) {}
 
 void LegendreCacheCreatorIFS::create( const std::string& path ) const {
-  Trans( grid_, truncation_, config_ | option::type("ifs") | option::write_legendre( path ) );
+    Trans( grid_, truncation_, config_ | option::type( "ifs" ) | option::write_legendre( path ) );
 }
 
 Cache LegendreCacheCreatorIFS::create() const {
-  return TransCache(
-      Trans( grid_, truncation_, config_ | option::type("ifs") ) );
+    return TransCache( Trans( grid_, truncation_, config_ | option::type( "ifs" ) ) );
 }
 
 size_t LegendreCacheCreatorIFS::estimate() const {
-    return size_t(truncation_ * truncation_ * truncation_) / 2 * sizeof(double);
+    return size_t( truncation_ * truncation_ * truncation_ ) / 2 * sizeof( double );
 }
 
 
diff --git a/src/atlas/trans/ifs/LegendreCacheCreatorIFS.h b/src/atlas/trans/ifs/LegendreCacheCreatorIFS.h
index e6629ebea..947f72599 100644
--- a/src/atlas/trans/ifs/LegendreCacheCreatorIFS.h
+++ b/src/atlas/trans/ifs/LegendreCacheCreatorIFS.h
@@ -10,8 +10,8 @@
 
 #pragma once
 
-#include "atlas/trans/LegendreCacheCreator.h"
 #include "atlas/grid/Grid.h"
+#include "atlas/trans/LegendreCacheCreator.h"
 #include "atlas/util/Config.h"
 
 //-----------------------------------------------------------------------------
@@ -31,7 +31,7 @@ class LegendreCacheCreatorIFS : public trans::LegendreCacheCreatorImpl {
 
     virtual std::string uid() const override;
 
-    virtual void create(const std::string &path) const override;
+    virtual void create( const std::string& path ) const override;
 
     virtual Cache create() const override;
 
diff --git a/src/atlas/trans/ifs/TransIFS.cc b/src/atlas/trans/ifs/TransIFS.cc
index 7532f237d..62a02568c 100644
--- a/src/atlas/trans/ifs/TransIFS.cc
+++ b/src/atlas/trans/ifs/TransIFS.cc
@@ -638,13 +638,14 @@ TransIFS::TransIFS( const Grid& grid, const eckit::Configuration& config ) :
     TransIFS( grid, /*grid-only*/ -1, config ) {}
 
 
-
-TransIFS::TransIFS( const Grid& grid, const Domain& domain, const long truncation, const eckit::Configuration& config ) :
+TransIFS::TransIFS( const Grid& grid, const Domain& domain, const long truncation,
+                    const eckit::Configuration& config ) :
     TransIFS( Cache(), grid, truncation, config ) {
     ASSERT( domain.global() );
 }
 
-TransIFS::TransIFS( const Cache& cache, const Grid& grid, const Domain& domain, const long truncation, const eckit::Configuration& config ) :
+TransIFS::TransIFS( const Cache& cache, const Grid& grid, const Domain& domain, const long truncation,
+                    const eckit::Configuration& config ) :
     TransIFS( cache, grid, truncation, config ) {
     ASSERT( domain.global() );
 }
diff --git a/src/atlas/trans/ifs/TransIFS.h b/src/atlas/trans/ifs/TransIFS.h
index ee6bf8909..1ae224e2e 100644
--- a/src/atlas/trans/ifs/TransIFS.h
+++ b/src/atlas/trans/ifs/TransIFS.h
@@ -78,7 +78,8 @@ class TransIFS : public trans::TransImpl {
     TransIFS( const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() );
     TransIFS( const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() );
     TransIFS( const Cache&, const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() );
-    TransIFS( const Cache&, const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() );
+    TransIFS( const Cache&, const Grid&, const Domain&, const long truncation,
+              const eckit::Configuration& = util::NoConfig() );
 
     virtual ~TransIFS();
     operator ::Trans_t*() const { return trans(); }
diff --git a/src/atlas/trans/local/LegendreCacheCreatorLocal.cc b/src/atlas/trans/local/LegendreCacheCreatorLocal.cc
index 4e3488aed..54e58060c 100644
--- a/src/atlas/trans/local/LegendreCacheCreatorLocal.cc
+++ b/src/atlas/trans/local/LegendreCacheCreatorLocal.cc
@@ -9,14 +9,14 @@
  */
 
 #include "atlas/trans/local/LegendreCacheCreatorLocal.h"
-#include <string>
 #include <sstream>
-#include "eckit/utils/MD5.h"
-#include "eckit/types/FloatCompare.h"
+#include <string>
 #include "atlas/grid.h"
 #include "atlas/option.h"
 #include "atlas/trans/Trans.h"
 #include "atlas/trans/local/TransLocal.h"
+#include "eckit/types/FloatCompare.h"
+#include "eckit/utils/MD5.h"
 
 namespace atlas {
 namespace trans {
@@ -28,104 +28,112 @@ static LegendreCacheCreatorBuilder<LegendreCacheCreatorLocal> builder( "local" )
 namespace {
 
 std::string truncate( const std::string& str ) {
-  const int trunc = std::min(10ul,str.size());
-  return str.substr( 0, trunc );
+    const int trunc = std::min( 10ul, str.size() );
+    return str.substr( 0, trunc );
 }
 
 std::string hash( const Grid& grid ) {
-  eckit::MD5 h;
-  if( grid::StructuredGrid( grid ) && not grid.projection() ) {
-    auto g = grid::StructuredGrid( grid );
-    h.add( g.y().data(), g.y().size() * sizeof(double) );
-  } else {
-    grid.hash( h );
-  }
-  return truncate( h.digest() );
+    eckit::MD5 h;
+    if ( grid::StructuredGrid( grid ) && not grid.projection() ) {
+        auto g = grid::StructuredGrid( grid );
+        h.add( g.y().data(), g.y().size() * sizeof( double ) );
+    }
+    else {
+        grid.hash( h );
+    }
+    return truncate( h.digest() );
 }
 
 std::string hash( const eckit::Configuration& config ) {
-  eckit::MD5 h;
+    eckit::MD5 h;
 
-  // Add options and other unique keys
-  h << "flt" << config.getBool( "flt", false );
+    // Add options and other unique keys
+    h << "flt" << config.getBool( "flt", false );
 
-  return truncate( h.digest() );
+    return truncate( h.digest() );
 }
 
-}
+}  // namespace
 
 std::string LegendreCacheCreatorLocal::uid() const {
-  if( unique_identifier_.empty() ) {
-    std::ostringstream stream;
-    auto give_up = [&]() {
-      // We cannot make more assumptions on reusability for different grids
-      stream << "grid-" << hash( grid_ );
-    };
-    stream << "local-T" << truncation_ << "-";
-    grid::StructuredGrid structured ( grid_ );
-    if( grid::GaussianGrid( grid_ ) ) {
-      // Same cache for any global Gaussian grid
-      stream << "GaussianN" << grid::GaussianGrid( grid_ ).N();
-    } else if( grid::RegularLonLatGrid( grid_ ) ) {
-      // Same cache for any global regular grid
-      auto g = grid::RegularLonLatGrid( grid_ );
-
-      const double dy_2 = 90. / double(g.ny());
-      bool shifted_lat = eckit::types::is_approximately_equal( g.y().front(), 90. - dy_2 ) &&
-                         eckit::types::is_approximately_equal( g.y().back(), -90. + dy_2 );
-      bool standard_lat = eckit::types::is_approximately_equal( g.y().front(), 90. ) &&
-                          eckit::types::is_approximately_equal( g.y().back(), -90. );
-
-      if( standard_lat ) {
-        stream << "L" << "-ny" << g.ny();
-      } else if( shifted_lat ) {
-        stream << "S" << "-ny" << g.ny();
-      } else { // I don't think we get here, but just in case, give up
-        give_up();
-      }
-    } else if ( grid::RegularGrid( grid_ ) && not grid_.projection() && structured.yspace().type() == "linear" ) {
-      RectangularDomain domain( grid_.domain() );
-      ASSERT( domain );
-      stream << "Regional";
-      stream << "-south" << domain.ymin();
-      stream << "-north" << domain.ymax();
-      stream << "-ny" << structured.ny();
-    } else { // It gets too complicated, so let's not be smart
-      give_up();
+    if ( unique_identifier_.empty() ) {
+        std::ostringstream stream;
+        auto give_up = [&]() {
+            // We cannot make more assumptions on reusability for different grids
+            stream << "grid-" << hash( grid_ );
+        };
+        stream << "local-T" << truncation_ << "-";
+        grid::StructuredGrid structured( grid_ );
+        if ( grid::GaussianGrid( grid_ ) ) {
+            // Same cache for any global Gaussian grid
+            stream << "GaussianN" << grid::GaussianGrid( grid_ ).N();
+        }
+        else if ( grid::RegularLonLatGrid( grid_ ) ) {
+            // Same cache for any global regular grid
+            auto g = grid::RegularLonLatGrid( grid_ );
+
+            const double dy_2 = 90. / double( g.ny() );
+            bool shifted_lat  = eckit::types::is_approximately_equal( g.y().front(), 90. - dy_2 ) &&
+                               eckit::types::is_approximately_equal( g.y().back(), -90. + dy_2 );
+            bool standard_lat = eckit::types::is_approximately_equal( g.y().front(), 90. ) &&
+                                eckit::types::is_approximately_equal( g.y().back(), -90. );
+
+            if ( standard_lat ) {
+                stream << "L"
+                       << "-ny" << g.ny();
+            }
+            else if ( shifted_lat ) {
+                stream << "S"
+                       << "-ny" << g.ny();
+            }
+            else {  // I don't think we get here, but just in case, give up
+                give_up();
+            }
+        }
+        else if ( grid::RegularGrid( grid_ ) && not grid_.projection() && structured.yspace().type() == "linear" ) {
+            RectangularDomain domain( grid_.domain() );
+            ASSERT( domain );
+            stream << "Regional";
+            stream << "-south" << domain.ymin();
+            stream << "-north" << domain.ymax();
+            stream << "-ny" << structured.ny();
+        }
+        else {  // It gets too complicated, so let's not be smart
+            give_up();
+        }
+        stream << "-OPT" << hash( config_ );
+        unique_identifier_ = stream.str();
     }
-    stream << "-OPT" << hash( config_ );
-    unique_identifier_ = stream.str();
-  }
-  return unique_identifier_;
+    return unique_identifier_;
 }
 
 LegendreCacheCreatorLocal::~LegendreCacheCreatorLocal() {}
 
-LegendreCacheCreatorLocal::LegendreCacheCreatorLocal( const Grid& grid, int truncation, const eckit::Configuration& config ) :
-  grid_(grid),
-  truncation_(truncation),
-  config_(config) {
-}
+LegendreCacheCreatorLocal::LegendreCacheCreatorLocal( const Grid& grid, int truncation,
+                                                      const eckit::Configuration& config ) :
+    grid_( grid ),
+    truncation_( truncation ),
+    config_( config ) {}
 
 bool LegendreCacheCreatorLocal::supported() const {
-  if( not grid::StructuredGrid( grid_ ) ) return false;
-  if( grid_.projection() ) return false;
-  return true;
+    if ( not grid::StructuredGrid( grid_ ) ) return false;
+    if ( grid_.projection() ) return false;
+    return true;
 }
 
 void LegendreCacheCreatorLocal::create( const std::string& path ) const {
-  Trans tmp( grid_, truncation_, config_ | option::type("local") | option::write_legendre( path ) );
+    Trans tmp( grid_, truncation_, config_ | option::type( "local" ) | option::write_legendre( path ) );
 }
 
 Cache LegendreCacheCreatorLocal::create() const {
-  util::Config export_legendre("export_legendre",true);
-  Trans tmp( grid_, truncation_, config_ | option::type("local") | export_legendre );
-  auto impl = dynamic_cast<const TransLocal*>( tmp.get() );
-  return impl->export_legendre_;
+    util::Config export_legendre( "export_legendre", true );
+    Trans tmp( grid_, truncation_, config_ | option::type( "local" ) | export_legendre );
+    auto impl = dynamic_cast<const TransLocal*>( tmp.get() );
+    return impl->export_legendre_;
 }
 
 size_t LegendreCacheCreatorLocal::estimate() const {
-    return size_t(truncation_ * truncation_ * truncation_) / 2 * sizeof(double);
+    return size_t( truncation_ * truncation_ * truncation_ ) / 2 * sizeof( double );
 }
 
 
diff --git a/src/atlas/trans/local/LegendreCacheCreatorLocal.h b/src/atlas/trans/local/LegendreCacheCreatorLocal.h
index b21237e5b..ff3d14752 100644
--- a/src/atlas/trans/local/LegendreCacheCreatorLocal.h
+++ b/src/atlas/trans/local/LegendreCacheCreatorLocal.h
@@ -10,8 +10,8 @@
 
 #pragma once
 
-#include "atlas/trans/LegendreCacheCreator.h"
 #include "atlas/grid/Grid.h"
+#include "atlas/trans/LegendreCacheCreator.h"
 #include "atlas/util/Config.h"
 
 //-----------------------------------------------------------------------------
@@ -31,7 +31,7 @@ class LegendreCacheCreatorLocal : public trans::LegendreCacheCreatorImpl {
 
     virtual std::string uid() const override;
 
-    virtual void create(const std::string &path) const override;
+    virtual void create( const std::string& path ) const override;
 
     virtual Cache create() const override;
 
diff --git a/src/atlas/trans/local/LegendrePolynomials.cc b/src/atlas/trans/local/LegendrePolynomials.cc
index da29d9704..38084bdd5 100644
--- a/src/atlas/trans/local/LegendrePolynomials.cc
+++ b/src/atlas/trans/local/LegendrePolynomials.cc
@@ -46,9 +46,9 @@ void compute_zfn( const size_t trc, double zfn[] ) {
 
 
 void compute_legendre_polynomials_lat( const size_t trc,  // truncation (in)
-                                           const double lat,  // latitude in radians (in)
-                                           double legpol[],   // legendre polynomials
-                                           double zfn[] ) {
+                                       const double lat,  // latitude in radians (in)
+                                       double legpol[],   // legendre polynomials
+                                       double zfn[] ) {
     auto idxmn  = [&]( int jm, int jn ) { return ( 2 * trc + 3 - jm ) * jm / 2 + jn - jm; };
     auto idxzfn = [&]( int jn, int jk ) { return jk + ( trc + 1 ) * jn; };
     {  //ATLAS_TRACE( "compute Legendre polynomials" );
@@ -205,9 +205,9 @@ void compute_legendre_polynomials(
 }
 
 void compute_legendre_polynomials_all( const size_t trc,     // truncation (in)
-                                           const int nlats,      // number of latitudes
-                                           const double lats[],  // latitudes in radians (in)
-                                           double legendre[] )   // legendre polynomials for all latitudes
+                                       const int nlats,      // number of latitudes
+                                       const double lats[],  // latitudes in radians (in)
+                                       double legendre[] )   // legendre polynomials for all latitudes
 {
     auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; };
     std::vector<double> legpol( legendre_size( trc ) );
diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc
index c34cc9f3f..6dc294c04 100644
--- a/src/atlas/trans/local/TransLocal.cc
+++ b/src/atlas/trans/local/TransLocal.cc
@@ -9,8 +9,8 @@
  */
 
 #include "atlas/trans/local/TransLocal.h"
-#include <cstdlib>
 #include <cmath>
+#include <cstdlib>
 #include "atlas/array.h"
 #include "atlas/option.h"
 #include "atlas/parallel/mpi/mpi.h"
@@ -173,7 +173,7 @@ int num_n( const int truncation, const int m, const bool symmetric ) {
 
 void alloc_aligned( double*& ptr, size_t n ) {
     const size_t alignment = 64 * sizeof( double );
-    ptr = (double*) aligned_alloc( alignment, sizeof( double ) * n );
+    ptr                    = (double*)aligned_alloc( alignment, sizeof( double ) * n );
 }
 
 void free_aligned( double*& ptr ) {
@@ -222,17 +222,17 @@ int fourier_truncation( const int truncation,    // truncation
 // --------------------------------------------------------------------------------------------------------------------
 
 const eckit::linalg::LinearAlgebra& linear_algebra_backend() {
-    if( eckit::linalg::LinearAlgebra::hasBackend("mkl") ) {
-        return eckit::linalg::LinearAlgebra::getBackend("mkl");
+    if ( eckit::linalg::LinearAlgebra::hasBackend( "mkl" ) ) {
+        return eckit::linalg::LinearAlgebra::getBackend( "mkl" );
     }
     // Default backend
     return eckit::linalg::LinearAlgebra::backend();
 }
 
 bool TransLocal::warning( const eckit::Configuration& config ) const {
-  int warning = warning_;
-  config.get("warning",warning);
-  return ( warning > 0 && grid_.size() >= warning );
+    int warning = warning_;
+    config.get( "warning", warning );
+    return ( warning > 0 && grid_.size() >= warning );
 }
 
 TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& domain, const long truncation,
@@ -246,8 +246,7 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma
     fft_cache_( cache.fft().data() ),
     fft_cachesize_( cache.fft().size() ),
     linalg_( linear_algebra_backend() ),
-    warning_( TransParameters(config).warning() )
-{
+    warning_( TransParameters( config ).warning() ) {
     ATLAS_TRACE( "TransLocal constructor" );
     double fft_threshold = 0.0;  // fraction of latitudes of the full grid down to which FFT is used.
     // This threshold needs to be adjusted depending on the dgemm and FFT performance of the machine
@@ -289,7 +288,7 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma
         }
 
         gridGlobal_ = grid;
-        if( not gridGlobal_.domain().global() ) {
+        if ( not gridGlobal_.domain().global() ) {
             if ( grid::RegularGrid( grid_ ) ) {
                 // non-nested regular grid
                 no_nest         = true;
@@ -300,7 +299,8 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma
                 nlatsLegDomain_ = nlatsNH_;
                 gridGlobal_     = grid_;
                 useGlobalLeg    = false;
-            } else {
+            }
+            else {
                 NOTIMP;
                 // non-nested reduced grids are not supported
             }
@@ -338,7 +338,7 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma
         // reduce truncation towards the pole for reduced meshes:
         nlat0_.resize( truncation_ + 1 );
         if ( no_nest ) {
-          for ( int j = 0; j <= truncation_; j++ ) {
+            for ( int j = 0; j <= truncation_; j++ ) {
                 nlat0_[j] = 0;
             }
         }
@@ -439,22 +439,22 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma
                 // TODO: check this is all aligned...
             }
             else {
+                if ( TransParameters( config ).export_legendre() ) {
+                    ASSERT( not cache_.legendre() );
+                    export_legendre_    = LegendreCache( sizeof( double ) * ( size_sym + size_asym ) );
+                    legendre_cachesize_ = export_legendre_.legendre().size();
+                    legendre_cache_     = export_legendre_.legendre().data();
+                    legendre_cache_     = std::malloc( legendre_cachesize_ );
+                    ReadCache legendre( legendre_cache_ );
+                    legendre_sym_  = legendre.read<double>( size_sym );
+                    legendre_asym_ = legendre.read<double>( size_asym );
+                }
+                else {
+                    alloc_aligned( legendre_sym_, size_sym );
+                    alloc_aligned( legendre_asym_, size_asym );
+                }
 
-              if( TransParameters(config).export_legendre() ) {
-                  ASSERT( not cache_.legendre() );
-                  export_legendre_ = LegendreCache( sizeof(double) * ( size_sym + size_asym ) );
-                  legendre_cachesize_ = export_legendre_.legendre().size();
-                  legendre_cache_ = export_legendre_.legendre().data();
-                  legendre_cache_ = std::malloc( legendre_cachesize_ );
-                  ReadCache legendre( legendre_cache_ );
-                  legendre_sym_  = legendre.read<double>( size_sym );
-                  legendre_asym_ = legendre.read<double>( size_asym );
-              } else {
-                  alloc_aligned( legendre_sym_, size_sym );
-                  alloc_aligned( legendre_asym_, size_asym );
-              }
-
-              ATLAS_TRACE_SCOPE( "Legendre precomputations (structured)" ) {
+                ATLAS_TRACE_SCOPE( "Legendre precomputations (structured)" ) {
                     compute_legendre_polynomials( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_,
                                                   legendre_asym_, legendre_sym_begin_.data(),
                                                   legendre_asym_begin_.data() );
@@ -531,7 +531,7 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma
             }
                 // other FFT implementations should be added with #elif statements
 #else
-            useFFT_ = false;                             // no FFT implemented => default to dgemm
+            useFFT_               = false;  // no FFT implemented => default to dgemm
             std::string file_path = TransParameters( config ).write_fft();
             if ( file_path.size() ) {
                 std::ofstream write( file_path );
@@ -542,7 +542,9 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma
 #endif
         }
         if ( !useFFT_ ) {
-            Log::warning() << "WARNING: Spectral transform results may contain aliasing errors. This will be addressed soon." << std::endl;
+            Log::warning()
+                << "WARNING: Spectral transform results may contain aliasing errors. This will be addressed soon."
+                << std::endl;
 
             alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlonsMax );
 #if !TRANSLOCAL_DGEMM2
@@ -581,8 +583,10 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma
         if ( unstruct_precomp_ ) {
             ATLAS_TRACE( "Legendre precomputations (unstructured)" );
 
-            if( warning() ) {
-              Log::warning() << "WARNING: Precomputations for spectral transforms could take a long time and consume a lot of memory (unstructured grid approach)! Results may contain aliasing errors." << std::endl;
+            if ( warning() ) {
+                Log::warning() << "WARNING: Precomputations for spectral transforms could take a long time and consume "
+                                  "a lot of memory (unstructured grid approach)! Results may contain aliasing errors."
+                               << std::endl;
             }
 
             std::vector<double> lats( grid_.size() );
@@ -594,7 +598,8 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma
             compute_legendre_polynomials_all( truncation_, grid_.size(), lats.data(), legendre_ );
         }
         if ( TransParameters( config ).write_legendre().size() ) {
-            throw eckit::NotImplemented( "Caching for unstructured grids or structured grids with projections not yet implemented", Here() );
+            throw eckit::NotImplemented(
+                "Caching for unstructured grids or structured grids with projections not yet implemented", Here() );
         }
     }
 }  // namespace trans
@@ -604,10 +609,12 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma
 TransLocal::TransLocal( const Grid& grid, const long truncation, const eckit::Configuration& config ) :
     TransLocal( Cache(), grid, grid.domain(), truncation, config ) {}
 
-TransLocal::TransLocal( const Grid& grid, const Domain& domain, const long truncation, const eckit::Configuration& config ) :
+TransLocal::TransLocal( const Grid& grid, const Domain& domain, const long truncation,
+                        const eckit::Configuration& config ) :
     TransLocal( Cache(), grid, domain, truncation, config ) {}
 
-TransLocal::TransLocal( const Cache& cache, const Grid& grid, const long truncation, const eckit::Configuration& config ) :
+TransLocal::TransLocal( const Cache& cache, const Grid& grid, const long truncation,
+                        const eckit::Configuration& config ) :
     TransLocal( cache, grid, grid.domain(), truncation, config ) {}
 
 // --------------------------------------------------------------------------------------------------------------------
@@ -644,8 +651,7 @@ void TransLocal::invtrans( const Field& spfield, Field& gpfield, const eckit::Co
 
 // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocal::invtrans( const FieldSet& spfields, FieldSet& gpfields,
-                               const eckit::Configuration& config ) const {
+void TransLocal::invtrans( const FieldSet& spfields, FieldSet& gpfields, const eckit::Configuration& config ) const {
     NOTIMP;
 }
 
@@ -658,21 +664,21 @@ void TransLocal::invtrans_grad( const Field& spfield, Field& gradfield, const ec
 // --------------------------------------------------------------------------------------------------------------------
 
 void TransLocal::invtrans_grad( const FieldSet& spfields, FieldSet& gradfields,
-                                    const eckit::Configuration& config ) const {
+                                const eckit::Configuration& config ) const {
     NOTIMP;
 }
 
 // --------------------------------------------------------------------------------------------------------------------
 
 void TransLocal::invtrans_vordiv2wind( const Field& spvor, const Field& spdiv, Field& gpwind,
-                                           const eckit::Configuration& config ) const {
+                                       const eckit::Configuration& config ) const {
     NOTIMP;
 }
 
 // --------------------------------------------------------------------------------------------------------------------
 
 void TransLocal::invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[],
-                               const eckit::Configuration& config ) const {
+                           const eckit::Configuration& config ) const {
     invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields, config );
 }
 
@@ -841,9 +847,8 @@ void TransLocal::invtrans_legendre( const int truncation, const int nlats, const
 
 // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocal::invtrans_fourier_regular( const int nlats, const int nlons, const int nb_fields,
-                                                   double scl_fourier[], double gp_fields[],
-                                                   const eckit::Configuration& config ) const {
+void TransLocal::invtrans_fourier_regular( const int nlats, const int nlons, const int nb_fields, double scl_fourier[],
+                                           double gp_fields[], const eckit::Configuration& config ) const {
     // Fourier transformation:
     if ( useFFT_ ) {
 #if ATLAS_HAVE_FFTW && !TRANSLOCAL_DGEMM2
@@ -926,8 +931,8 @@ void TransLocal::invtrans_fourier_regular( const int nlats, const int nlons, con
 // --------------------------------------------------------------------------------------------------------------------
 
 void TransLocal::invtrans_fourier_reduced( const int nlats, const grid::StructuredGrid g, const int nb_fields,
-                                                   double scl_fourier[], double gp_fields[],
-                                                   const eckit::Configuration& config ) const {
+                                           double scl_fourier[], double gp_fields[],
+                                           const eckit::Configuration& config ) const {
     // Fourier transformation:
     int nlonsMax = g.nxmax();
     if ( useFFT_ ) {
@@ -984,14 +989,13 @@ void TransLocal::invtrans_fourier_reduced( const int nlats, const grid::Structur
 
 // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocal::invtrans_unstructured_precomp( const int truncation, const int nb_fields,
-                                                    const int nb_vordiv_fields, const double scalar_spectra[],
-                                                    double gp_fields[], const eckit::Configuration& config ) const {
-
+void TransLocal::invtrans_unstructured_precomp( const int truncation, const int nb_fields, const int nb_vordiv_fields,
+                                                const double scalar_spectra[], double gp_fields[],
+                                                const eckit::Configuration& config ) const {
     ATLAS_TRACE( "invtrans_uv unstructured" );
 
-    const int nlats                 = grid_.size();
-    const int size_fourier          = nb_fields * 2;
+    const int nlats        = grid_.size();
+    const int size_fourier = nb_fields * 2;
     double* legendre;
     double* scl_fourier;
     double* scl_fourier_tp;
@@ -1018,7 +1022,7 @@ void TransLocal::invtrans_unstructured_precomp( const int truncation, const int
     {
         ATLAS_TRACE( "Inverse Fourier Transform (NoFFT)" );
         int ip = 0;
-        for( const PointLonLat p : grid_.lonlat() ) {
+        for ( const PointLonLat p : grid_.lonlat() ) {
             const double lon = p.lon() * util::Constants::degreesToRadians();
             const double lat = p.lat() * util::Constants::degreesToRadians();
             {
@@ -1078,12 +1082,14 @@ void TransLocal::invtrans_unstructured_precomp( const int truncation, const int
 // --------------------------------------------------------------------------------------------------------------------
 
 void TransLocal::invtrans_unstructured( const int truncation, const int nb_fields, const int nb_vordiv_fields,
-                                            const double scalar_spectra[], double gp_fields[],
-                                            const eckit::Configuration& config ) const {
+                                        const double scalar_spectra[], double gp_fields[],
+                                        const eckit::Configuration& config ) const {
     ATLAS_TRACE( "invtrans_unstructured" );
 
-    if( warning(config) ) {
-      Log::warning() << "WARNING: Spectral transforms could take a long time (unstructured grid approach). Results may contain aliasing errors." << std::endl;
+    if ( warning( config ) ) {
+        Log::warning() << "WARNING: Spectral transforms could take a long time (unstructured grid approach). Results "
+                          "may contain aliasing errors."
+                       << std::endl;
     }
 
     double* zfn;
@@ -1187,8 +1193,8 @@ void TransLocal::invtrans_unstructured( const int truncation, const int nb_field
 // Andreas Mueller *ECMWF*
 //
 void TransLocal::invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields,
-                                  const double scalar_spectra[], double gp_fields[],
-                                  const eckit::Configuration& config ) const {
+                              const double scalar_spectra[], double gp_fields[],
+                              const eckit::Configuration& config ) const {
     if ( nb_scalar_fields > 0 ) {
         int nb_fields = nb_scalar_fields;
 
@@ -1250,8 +1256,8 @@ void TransLocal::invtrans_uv( const int truncation, const int nb_scalar_fields,
 // --------------------------------------------------------------------------------------------------------------------
 
 void TransLocal::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[],
-                               const double divergence_spectra[], double gp_fields[],
-                               const eckit::Configuration& config ) const {
+                           const double divergence_spectra[], double gp_fields[],
+                           const eckit::Configuration& config ) const {
     invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config );
 }
 
@@ -1277,8 +1283,8 @@ void extend_truncation( const int old_truncation, const int nb_fields, const dou
 // --------------------------------------------------------------------------------------------------------------------
 
 void TransLocal::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields,
-                               const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[],
-                               const eckit::Configuration& config ) const {
+                           const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[],
+                           const eckit::Configuration& config ) const {
     ATLAS_TRACE( "TransLocal::invtrans" );
     int nb_gp              = grid_.size();
     int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields;
@@ -1291,10 +1297,8 @@ void TransLocal::invtrans( const int nb_scalar_fields, const double scalar_spect
         {
             ATLAS_TRACE( "extend vordiv" );
             // increase truncation in vorticity_spectra and divergence_spectra:
-            extend_truncation( truncation_, nb_vordiv_fields, vorticity_spectra,
-                               vorticity_spectra_extended.data() );
-            extend_truncation( truncation_, nb_vordiv_fields, divergence_spectra,
-                               divergence_spectra_extended.data() );
+            extend_truncation( truncation_, nb_vordiv_fields, vorticity_spectra, vorticity_spectra_extended.data() );
+            extend_truncation( truncation_, nb_vordiv_fields, divergence_spectra, divergence_spectra_extended.data() );
         }
 
         {
@@ -1326,8 +1330,7 @@ void TransLocal::dirtrans( const Field& gpfield, Field& spfield, const eckit::Co
 
 // --------------------------------------------------------------------------------------------------------------------
 
-void TransLocal::dirtrans( const FieldSet& gpfields, FieldSet& spfields,
-                               const eckit::Configuration& config ) const {
+void TransLocal::dirtrans( const FieldSet& gpfields, FieldSet& spfields, const eckit::Configuration& config ) const {
     NOTIMP;
     // Not implemented and not planned.
     // Use the TransIFS implementation instead.
@@ -1336,7 +1339,7 @@ void TransLocal::dirtrans( const FieldSet& gpfields, FieldSet& spfields,
 // --------------------------------------------------------------------------------------------------------------------
 
 void TransLocal::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv,
-                                           const eckit::Configuration& config ) const {
+                                       const eckit::Configuration& config ) const {
     NOTIMP;
     // Not implemented and not planned.
     // Use the TransIFS implementation instead.
@@ -1345,7 +1348,7 @@ void TransLocal::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field&
 // --------------------------------------------------------------------------------------------------------------------
 
 void TransLocal::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[],
-                               const eckit::Configuration& ) const {
+                           const eckit::Configuration& ) const {
     NOTIMP;
     // Not implemented and not planned.
     // Use the TransIFS implementation instead.
@@ -1354,7 +1357,7 @@ void TransLocal::dirtrans( const int nb_fields, const double scalar_fields[], do
 // --------------------------------------------------------------------------------------------------------------------
 
 void TransLocal::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[],
-                               double divergence_spectra[], const eckit::Configuration& ) const {
+                           double divergence_spectra[], const eckit::Configuration& ) const {
     NOTIMP;
     // Not implemented and not planned.
     // Use the TransIFS implementation instead.
diff --git a/src/atlas/trans/local/TransLocal.h b/src/atlas/trans/local/TransLocal.h
index abbee7c10..29005083f 100644
--- a/src/atlas/trans/local/TransLocal.h
+++ b/src/atlas/trans/local/TransLocal.h
@@ -41,12 +41,12 @@ namespace atlas {
 namespace trans {
 
 class LegendreCacheCreatorLocal;
-int fourier_truncation( const int truncation,    // truncation
-                        const int nx,            // number of longitudes
-                        const int nxmax,         // maximum nx
-                        const int ndgl,          // number of latitudes
-                        const double lat,        // latitude in radian
-                        const bool regular );    // regular grid
+int fourier_truncation( const int truncation,  // truncation
+                        const int nx,          // number of longitudes
+                        const int nxmax,       // maximum nx
+                        const int ndgl,        // number of latitudes
+                        const double lat,      // latitude in radian
+                        const bool regular );  // regular grid
 
 //-----------------------------------------------------------------------------
 
@@ -66,10 +66,9 @@ class TransLocal : public trans::TransImpl {
 public:
     TransLocal( const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() );
     TransLocal( const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() );
-    TransLocal( const Cache&, const Grid&, const long truncation,
-                    const eckit::Configuration& = util::NoConfig() );
+    TransLocal( const Cache&, const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() );
     TransLocal( const Cache&, const Grid&, const Domain&, const long truncation,
-                    const eckit::Configuration& = util::NoConfig() );
+                const eckit::Configuration& = util::NoConfig() );
 
     virtual ~TransLocal();
 
@@ -133,16 +132,14 @@ class TransLocal : public trans::TransImpl {
 #endif
     };
 
-    void invtrans_legendre( const int truncation, const int nlats, const int nb_fields,
-                            const double scalar_spectra[], double scl_fourier[],
-                            const eckit::Configuration& config ) const;
+    void invtrans_legendre( const int truncation, const int nlats, const int nb_fields, const double scalar_spectra[],
+                            double scl_fourier[], const eckit::Configuration& config ) const;
 
     void invtrans_fourier_regular( const int nlats, const int nlons, const int nb_fields, double scl_fourier[],
                                    double gp_fields[], const eckit::Configuration& config ) const;
 
     void invtrans_fourier_reduced( const int nlats, const grid::StructuredGrid g, const int nb_fields,
-                                   double scl_fourier[], double gp_fields[],
-                                   const eckit::Configuration& config ) const;
+                                   double scl_fourier[], double gp_fields[], const eckit::Configuration& config ) const;
 
     void invtrans_unstructured_precomp( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields,
                                         const double scalar_spectra[], double gp_fields[],
@@ -158,7 +155,7 @@ class TransLocal : public trans::TransImpl {
 
     bool warning( const eckit::Configuration& = util::NoConfig() ) const;
 
-friend class LegendreCacheCreatorLocal;
+    friend class LegendreCacheCreatorLocal;
 
 private:
     Grid grid_;
diff --git a/src/atlas/trans/local/VorDivToUVLocal.cc b/src/atlas/trans/local/VorDivToUVLocal.cc
index 9d23a9db7..647fe6382 100644
--- a/src/atlas/trans/local/VorDivToUVLocal.cc
+++ b/src/atlas/trans/local/VorDivToUVLocal.cc
@@ -165,8 +165,8 @@ void vd2uvopt3( const int truncation,               // truncation
 }
 
 void VorDivToUVLocal::execute( const int nb_coeff, const int nb_fields, const double vorticity[],
-                                   const double divergence[], double U[], double V[],
-                                   const eckit::Configuration& config ) const {
+                               const double divergence[], double U[], double V[],
+                               const eckit::Configuration& config ) const {
     for ( int jm = 0; jm <= truncation_; ++jm ) {
         vd2uvopt3( truncation_, jm, nb_fields, vorticity, divergence, U, V, config );
     }
diff --git a/src/atlas/util/Earth.h b/src/atlas/util/Earth.h
index 43b69c5b9..9e73a08b0 100644
--- a/src/atlas/util/Earth.h
+++ b/src/atlas/util/Earth.h
@@ -34,7 +34,7 @@ struct DatumWGS84SemiMajorAxis {
 
 //------------------------------------------------------------------------------------------------------
 
-typedef eckit::geometry::SphereT< DatumIFS > Earth;
+typedef eckit::geometry::SphereT<DatumIFS> Earth;
 
 //------------------------------------------------------------------------------------------------------
 
diff --git a/src/atlas/util/Rotation.cc b/src/atlas/util/Rotation.cc
index 4fe6fad65..1ea3ed83c 100644
--- a/src/atlas/util/Rotation.cc
+++ b/src/atlas/util/Rotation.cc
@@ -13,10 +13,10 @@
 #include <cmath>
 #include <iostream>
 
-#include "eckit/config/Parametrisation.h"
 #include "atlas/util/Constants.h"
 #include "atlas/util/CoordinateEnums.h"
 #include "atlas/util/UnitSphere.h"
+#include "eckit/config/Parametrisation.h"
 
 // Temporary option to activate implementation by RMI during ESCAPE
 #define OLD_IMPLEMENTATION 0
diff --git a/src/atlas/util/SphericalPolygon.cc b/src/atlas/util/SphericalPolygon.cc
index 20cf0abdb..2b1c036e7 100644
--- a/src/atlas/util/SphericalPolygon.cc
+++ b/src/atlas/util/SphericalPolygon.cc
@@ -50,9 +50,7 @@ bool SphericalPolygon::contains( const PointLonLat& P ) const {
             const double lat = util::Earth::greatCircleLatitudeGivenLongitude( A, B, P.lon() );
 
             ASSERT( !std::isnan( lat ) );
-            if ( eckit::types::is_approximately_equal( P.lat(), lat ) ) {
-                return true;
-            }
+            if ( eckit::types::is_approximately_equal( P.lat(), lat ) ) { return true; }
 
             wn += ( P.lat() > lat ? -1 : 1 ) * ( APB ? -1 : 1 );
         }
diff --git a/src/atlas/util/UnitSphere.h b/src/atlas/util/UnitSphere.h
index 3a49f230d..de3889222 100644
--- a/src/atlas/util/UnitSphere.h
+++ b/src/atlas/util/UnitSphere.h
@@ -23,5 +23,5 @@ using eckit::geometry::UnitSphere;
 
 //------------------------------------------------------------------------------------------------------
 
-} // namespace util
-} // namespace atlas
+}  // namespace util
+}  // namespace atlas
diff --git a/src/sandbox/CMakeLists.txt b/src/sandbox/CMakeLists.txt
index f4714d0e4..bc5e1748c 100644
--- a/src/sandbox/CMakeLists.txt
+++ b/src/sandbox/CMakeLists.txt
@@ -7,7 +7,6 @@
 # does it submit to any jurisdiction.
 
 add_subdirectory( fortran_submodule )
-add_subdirectory( fortran_modinc )
 add_subdirectory( fortran_object )
 add_subdirectory( example_fortran )
 add_subdirectory( fortran_acc_fields )
diff --git a/src/sandbox/fortran_modinc/CMakeLists.txt b/src/sandbox/fortran_modinc/CMakeLists.txt
deleted file mode 100644
index 7384f276c..000000000
--- a/src/sandbox/fortran_modinc/CMakeLists.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-
-if( CMAKE_Fortran_COMPILER_LOADED )
-
-add_custom_target( atlas_sandbox_fortran_modinc_includes SOURCES
-       mod1.h
-       mod1.f
-       mod2.h
-       mod2.f
-)
-
-ecbuild_add_library( TARGET atlas_sandbox_fortran_modinc
-  CONDITION ON
-  SOURCES sb_modinc.F90
-)
-
-endif()
diff --git a/src/sandbox/fortran_modinc/mod1.f b/src/sandbox/fortran_modinc/mod1.f
deleted file mode 100644
index 9c5b402e3..000000000
--- a/src/sandbox/fortran_modinc/mod1.f
+++ /dev/null
@@ -1,6 +0,0 @@
-! (C) Copyright 2013-2015 ECMWF.
-
-subroutine do_something_with_T2(v2)
-    type(T2) :: v2
-    v2%cpp_object_ptr = 2
-end subroutine
diff --git a/src/sandbox/fortran_modinc/mod1.h b/src/sandbox/fortran_modinc/mod1.h
deleted file mode 100644
index d6e3d6e3a..000000000
--- a/src/sandbox/fortran_modinc/mod1.h
+++ /dev/null
@@ -1,4 +0,0 @@
-! (C) Copyright 2013 ECMWF.
-
-type, extends( fckit_object ), public :: T1
-end type
diff --git a/src/sandbox/fortran_modinc/mod2.f b/src/sandbox/fortran_modinc/mod2.f
deleted file mode 100644
index 20b0a4587..000000000
--- a/src/sandbox/fortran_modinc/mod2.f
+++ /dev/null
@@ -1,6 +0,0 @@
-! (C) Copyright 2013-2015 ECMWF.
-
-subroutine do_something_with_T1(v1)
-    type(T1) :: v1
-    v1%cpp_object_ptr = 1
-end subroutine
diff --git a/src/sandbox/fortran_modinc/mod2.h b/src/sandbox/fortran_modinc/mod2.h
deleted file mode 100644
index be0fca4a2..000000000
--- a/src/sandbox/fortran_modinc/mod2.h
+++ /dev/null
@@ -1,4 +0,0 @@
-! (C) Copyright 2013 ECMWF.
-
-type, extends( fckit_object ), public :: T2
-end type
diff --git a/src/sandbox/fortran_modinc/sb_modinc.F90 b/src/sandbox/fortran_modinc/sb_modinc.F90
deleted file mode 100644
index 28ae9f7e7..000000000
--- a/src/sandbox/fortran_modinc/sb_modinc.F90
+++ /dev/null
@@ -1,26 +0,0 @@
-! (C) Copyright 2013-2015 ECMWF.
-
-#include "atlas/atlas_f.h"
-
-module sb_mod
-
-type, public :: fckit_object
-  integer,public :: cpp_object_ptr
-end type
-
-#include "mod1.h"
-#include "mod2.h"
-contains
-#include "mod1.f"
-#include "mod2.f"
-end module sb_mod
-
-
-program sb_program
-use sb_mod
-type(T1) :: v1
-type(T2) :: v2
-integer :: res
-res = v1%cpp_object_ptr + v2%cpp_object_ptr
-end program sb_program
-
diff --git a/src/tests/AtlasTestEnvironment.h b/src/tests/AtlasTestEnvironment.h
index 9d529329d..9da6d7e3a 100644
--- a/src/tests/AtlasTestEnvironment.h
+++ b/src/tests/AtlasTestEnvironment.h
@@ -50,8 +50,8 @@ namespace test {
         if ( atlas::test::barrier_timeout( atlas::test::ATLAS_MPI_BARRIER_TIMEOUT() ) ) {                            \
             atlas::Log::warning() << "\nWARNING: Test \"" << description                                             \
                                   << "\" failed with MPI deadlock.  (${ATLAS_MPI_BARRIER_TIMEOUT}="                  \
-                                  << atlas::test::ATLAS_MPI_BARRIER_TIMEOUT()                                        \
-                                  << ").\nCalling MPI_Abort..." << std::endl;                                        \
+                                  << atlas::test::ATLAS_MPI_BARRIER_TIMEOUT() << ").\nCalling MPI_Abort..."          \
+                                  << std::endl;                                                                      \
             eckit::mpi::comm().abort();                                                                              \
         }                                                                                                            \
     }                                                                                                                \
diff --git a/src/tests/array/test_array.cc b/src/tests/array/test_array.cc
index a093e56c2..4b406762e 100644
--- a/src/tests/array/test_array.cc
+++ b/src/tests/array/test_array.cc
@@ -570,10 +570,11 @@ CASE( "test_wrap" ) {
 
 CASE( "test_acc_map" ) {
     Array* ds = Array::create<double>( 2, 3, 4 );
-    if( ATLAS_HAVE_ACC ) {
+    if ( ATLAS_HAVE_ACC ) {
         EXPECT( ds->accMap() == true );
         EXPECT( ds->accMap() == true );
-    } else {
+    }
+    else {
         EXPECT( ds->accMap() == false );
     }
 }
diff --git a/src/tests/array/test_table.cc b/src/tests/array/test_table.cc
index 4f5b81f16..6e94710a9 100644
--- a/src/tests/array/test_table.cc
+++ b/src/tests/array/test_table.cc
@@ -9,8 +9,8 @@
  */
 
 #include "atlas/array/Table.h"
-#include "atlas/runtime/Log.h"
 #include "atlas/library/defines.h"
+#include "atlas/runtime/Log.h"
 #include "tests/AtlasTestEnvironment.h"
 
 using namespace atlas::array;
diff --git a/src/tests/functionspace/test_pointcloud.cc b/src/tests/functionspace/test_pointcloud.cc
index 636a01257..c01fc6d99 100644
--- a/src/tests/functionspace/test_pointcloud.cc
+++ b/src/tests/functionspace/test_pointcloud.cc
@@ -8,8 +8,8 @@
  * nor does it submit to any jurisdiction.
  */
 
-#include "atlas/functionspace/PointCloud.h"
 #include "atlas/array.h"
+#include "atlas/functionspace/PointCloud.h"
 
 #include "tests/AtlasTestEnvironment.h"
 
diff --git a/src/tests/grid/test_field.cc b/src/tests/grid/test_field.cc
index 0f52d9760..30bc597c2 100644
--- a/src/tests/grid/test_field.cc
+++ b/src/tests/grid/test_field.cc
@@ -12,7 +12,6 @@
 #include "eckit/runtime/Tool.h"
 #include "eckit/value/CompositeParams.h"
 
-#include "atlas/runtime/Log.h"
 #include "atlas/array/DataType.h"
 #include "atlas/array/MakeView.h"
 #include "atlas/field/FieldSet.h"
@@ -24,6 +23,7 @@
 #include "atlas/mesh/Nodes.h"
 #include "atlas/meshgenerator/DelaunayMeshGenerator.h"
 #include "atlas/parallel/mpi/mpi.h"
+#include "atlas/runtime/Log.h"
 
 #include "tests/AtlasTestEnvironment.h"
 
diff --git a/src/tests/grid/test_grid_ptr.cc b/src/tests/grid/test_grid_ptr.cc
index 224c3a9fd..4cf73042d 100644
--- a/src/tests/grid/test_grid_ptr.cc
+++ b/src/tests/grid/test_grid_ptr.cc
@@ -13,11 +13,11 @@
 #include <sstream>
 
 #include "atlas/grid/Grid.h"
-#include "atlas/runtime/Log.h"
-#include "atlas/util/Config.h"
 #include "atlas/mesh/Mesh.h"
 #include "atlas/meshgenerator/StructuredMeshGenerator.h"
 #include "atlas/output/Gmsh.h"
+#include "atlas/runtime/Log.h"
+#include "atlas/util/Config.h"
 
 #include "tests/AtlasTestEnvironment.h"
 
diff --git a/src/tests/grid/test_state.cc b/src/tests/grid/test_state.cc
index e828014d6..9a61d5c2a 100644
--- a/src/tests/grid/test_state.cc
+++ b/src/tests/grid/test_state.cc
@@ -16,7 +16,6 @@
 #include "eckit/parser/JSON.h"
 #include "eckit/parser/JSONParser.h"
 
-#include "atlas/library/config.h"
 #include "atlas/array/ArrayView.h"
 #include "atlas/array/DataType.h"
 #include "atlas/array/MakeView.h"
@@ -24,6 +23,7 @@
 #include "atlas/field/State.h"
 #include "atlas/grid/Grid.h"
 #include "atlas/library/Library.h"
+#include "atlas/library/config.h"
 #include "atlas/mesh/Mesh.h"
 #include "atlas/runtime/Log.h"
 
diff --git a/src/tests/interpolation/test_interpolation_finite_element.cc b/src/tests/interpolation/test_interpolation_finite_element.cc
index dd7718829..2a3c614a0 100644
--- a/src/tests/interpolation/test_interpolation_finite_element.cc
+++ b/src/tests/interpolation/test_interpolation_finite_element.cc
@@ -12,9 +12,9 @@
 
 #include "eckit/types/FloatCompare.h"
 
-#include "atlas/functionspace/PointCloud.h"
 #include "atlas/array.h"
 #include "atlas/functionspace.h"
+#include "atlas/functionspace/PointCloud.h"
 #include "atlas/grid.h"
 #include "atlas/interpolation.h"
 #include "atlas/mesh.h"
diff --git a/src/tests/io/test_gmsh.cc b/src/tests/io/test_gmsh.cc
index bf96399ff..016b96cc5 100644
--- a/src/tests/io/test_gmsh.cc
+++ b/src/tests/io/test_gmsh.cc
@@ -12,8 +12,8 @@
 #include "atlas/output/Gmsh.h"
 #include "atlas/output/Output.h"
 
-#include "tests/TestMeshes.h"
 #include "tests/AtlasTestEnvironment.h"
+#include "tests/TestMeshes.h"
 
 namespace atlas {
 namespace test {
diff --git a/src/tests/io/test_pointcloud_io.cc b/src/tests/io/test_pointcloud_io.cc
index 990b3d6ed..56c86dace 100644
--- a/src/tests/io/test_pointcloud_io.cc
+++ b/src/tests/io/test_pointcloud_io.cc
@@ -15,7 +15,6 @@
 #include "eckit/memory/ScopedPtr.h"
 #include "eckit/types/FloatCompare.h"
 
-#include "atlas/library/config.h"
 #include "atlas/array/MakeView.h"
 #include "atlas/field/Field.h"
 #include "atlas/field/FieldSet.h"
@@ -23,6 +22,7 @@
 #include "atlas/functionspace/NodeColumns.h"
 #include "atlas/grid/Grid.h"
 #include "atlas/grid/detail/grid/Unstructured.h"
+#include "atlas/library/config.h"
 #include "atlas/mesh/Mesh.h"
 #include "atlas/mesh/Nodes.h"
 #include "atlas/output/detail/PointCloudIO.h"
diff --git a/src/tests/mesh/test_accumulate_facets.cc b/src/tests/mesh/test_accumulate_facets.cc
index ea80c4bc5..d02eecce6 100644
--- a/src/tests/mesh/test_accumulate_facets.cc
+++ b/src/tests/mesh/test_accumulate_facets.cc
@@ -8,13 +8,13 @@
  * nor does it submit to any jurisdiction.
  */
 
-#include "atlas/library/config.h"
-#include "atlas/library/Library.h"
-#include "atlas/mesh/detail/AccumulateFacets.h"
 #include "atlas/grid/Grid.h"
+#include "atlas/library/Library.h"
+#include "atlas/library/config.h"
 #include "atlas/mesh/HybridElements.h"
 #include "atlas/mesh/Mesh.h"
 #include "atlas/mesh/actions/BuildEdges.h"
+#include "atlas/mesh/detail/AccumulateFacets.h"
 #include "atlas/meshgenerator/StructuredMeshGenerator.h"
 #include "atlas/util/Unique.h"
 
diff --git a/src/tests/mesh/test_connectivity.cc b/src/tests/mesh/test_connectivity.cc
index ce887a8be..81356dd94 100644
--- a/src/tests/mesh/test_connectivity.cc
+++ b/src/tests/mesh/test_connectivity.cc
@@ -8,10 +8,10 @@
  * nor does it submit to any jurisdiction.
  */
 
+#include "atlas/library/defines.h"
 #include "atlas/mesh/Connectivity.h"
 #include "atlas/runtime/Log.h"
 #include "atlas/runtime/Trace.h"
-#include "atlas/library/defines.h"
 
 #include "tests/AtlasTestEnvironment.h"
 
diff --git a/src/tests/mesh/test_distmesh.cc b/src/tests/mesh/test_distmesh.cc
index 92c1e81e6..e1bf2bed3 100644
--- a/src/tests/mesh/test_distmesh.cc
+++ b/src/tests/mesh/test_distmesh.cc
@@ -31,8 +31,8 @@
 #include "atlas/runtime/Log.h"
 #include "atlas/util/CoordinateEnums.h"
 
-#include "tests/TestMeshes.h"
 #include "tests/AtlasTestEnvironment.h"
+#include "tests/TestMeshes.h"
 
 using namespace atlas;
 using namespace atlas::output;
diff --git a/src/tests/mesh/test_elements.cc b/src/tests/mesh/test_elements.cc
index 8d6c296e0..ef66b3ef1 100644
--- a/src/tests/mesh/test_elements.cc
+++ b/src/tests/mesh/test_elements.cc
@@ -15,17 +15,17 @@
 #include "eckit/exception/Exceptions.h"
 #include "eckit/memory/ScopedPtr.h"
 
-#include "atlas/library/config.h"
 #include "atlas/field/Field.h"
+#include "atlas/grid/Grid.h"
 #include "atlas/library/Library.h"
+#include "atlas/library/config.h"
 #include "atlas/mesh/Connectivity.h"
 #include "atlas/mesh/ElementType.h"
 #include "atlas/mesh/Elements.h"
-#include "atlas/mesh/Nodes.h"
-#include "atlas/runtime/Log.h"
-#include "atlas/grid/Grid.h"
 #include "atlas/mesh/Mesh.h"
+#include "atlas/mesh/Nodes.h"
 #include "atlas/meshgenerator/StructuredMeshGenerator.h"
+#include "atlas/runtime/Log.h"
 
 #include "tests/AtlasTestEnvironment.h"
 
diff --git a/src/tests/mesh/test_halo.cc b/src/tests/mesh/test_halo.cc
index 8a5731158..c08c9f3dc 100644
--- a/src/tests/mesh/test_halo.cc
+++ b/src/tests/mesh/test_halo.cc
@@ -32,8 +32,8 @@
 #include "atlas/util/MicroDeg.h"
 #include "atlas/util/Unique.h"
 
-#include "tests/TestMeshes.h"
 #include "tests/AtlasTestEnvironment.h"
+#include "tests/TestMeshes.h"
 
 using namespace atlas::output;
 using namespace atlas::util;
diff --git a/src/tests/parallel/test_haloexchange.cc b/src/tests/parallel/test_haloexchange.cc
index de2feb678..cd7d1b7c8 100644
--- a/src/tests/parallel/test_haloexchange.cc
+++ b/src/tests/parallel/test_haloexchange.cc
@@ -43,12 +43,12 @@ size_t eval_idx( size_t pos, std::array<size_t, Rank>& strides, FirstDim first )
 
 template <int Rank, typename FirstDim, typename SecondDim>
 size_t eval_idx( size_t pos, std::array<size_t, Rank>& strides, FirstDim first, SecondDim second ) {
-    return first * strides[pos] + eval_idx<Rank>( pos+1, strides, second );
+    return first * strides[pos] + eval_idx<Rank>( pos + 1, strides, second );
 }
 
 template <int Rank, typename FirstDim, typename SecondDim, typename ThirdDim>
 size_t eval_idx( size_t pos, std::array<size_t, Rank>& strides, FirstDim first, SecondDim second, ThirdDim third ) {
-    return first * strides[pos] + eval_idx<Rank>( pos+1, strides, second, third );
+    return first * strides[pos] + eval_idx<Rank>( pos + 1, strides, second, third );
 }
 
 template <typename DATA_TYPE, int Rank, int Dim>
diff --git a/src/tests/trans/test_trans_localcache.cc b/src/tests/trans/test_trans_localcache.cc
index e505eba46..6220706ec 100644
--- a/src/tests/trans/test_trans_localcache.cc
+++ b/src/tests/trans/test_trans_localcache.cc
@@ -14,13 +14,13 @@
 #include "eckit/utils/MD5.h"
 
 #include "atlas/grid.h"
-#include "atlas/option.h"
 #include "atlas/library/Library.h"
 #include "atlas/meshgenerator/StructuredMeshGenerator.h"
+#include "atlas/option.h"
 #include "atlas/parallel/mpi/mpi.h"
 #include "atlas/runtime/Trace.h"
-#include "atlas/trans/Trans.h"
 #include "atlas/trans/LegendreCacheCreator.h"
+#include "atlas/trans/Trans.h"
 #include "atlas/util/Constants.h"
 
 #include "tests/AtlasTestEnvironment.h"
@@ -33,23 +33,23 @@ namespace test {
 struct AtlasTransEnvironment : public AtlasTestEnvironment {
     AtlasTransEnvironment( int argc, char* argv[] ) : AtlasTestEnvironment( argc, argv ) {
         trans::Trans::backend( "local" );
-        trans::Trans::config( option::warning(1) );
+        trans::Trans::config( option::warning( 1 ) );
     }
 };
 
-using trans::Trans;
+using grid::GaussianGrid;
+using grid::StructuredGrid;
+using trans::Cache;
 using trans::LegendreCache;
 using trans::LegendreCacheCreator;
-using trans::Cache;
-using grid::StructuredGrid;
-using grid::GaussianGrid;
-using XSpace = StructuredGrid::XSpace;
-using YSpace = StructuredGrid::YSpace;
+using trans::Trans;
+using XSpace        = StructuredGrid::XSpace;
+using YSpace        = StructuredGrid::YSpace;
 using LinearSpacing = grid::LinearSpacing;
 
-eckit::PathName CacheFile(const std::string& path) {
-    eckit::PathName cachefile(path);
-    if( cachefile.exists() ) cachefile.unlink();
+eckit::PathName CacheFile( const std::string& path ) {
+    eckit::PathName cachefile( path );
+    if ( cachefile.exists() ) cachefile.unlink();
     return cachefile;
 }
 
@@ -58,178 +58,171 @@ std::string hash( const trans::Cache& c ) {
 }
 
 std::string hash( const eckit::PathName& f ) {
-    return hash( LegendreCache(f) );
+    return hash( LegendreCache( f ) );
 }
 
-std::string F(int n)    { return "F"   +std::to_string(n); }
-std::string O(int n)    { return "O"   +std::to_string(n); }
-std::string N(int n)    { return "N"   +std::to_string(n); }
-std::string L(int n)    { return "L"   +std::to_string(n); }
-std::string S(int n)    { return "S"   +std::to_string(n); }
-std::string Slon(int n) { return "Slon"+std::to_string(n); }
-std::string Slat(int n) { return "Slat"+std::to_string(n); }
+std::string F( int n ) {
+    return "F" + std::to_string( n );
+}
+std::string O( int n ) {
+    return "O" + std::to_string( n );
+}
+std::string N( int n ) {
+    return "N" + std::to_string( n );
+}
+std::string L( int n ) {
+    return "L" + std::to_string( n );
+}
+std::string S( int n ) {
+    return "S" + std::to_string( n );
+}
+std::string Slon( int n ) {
+    return "Slon" + std::to_string( n );
+}
+std::string Slat( int n ) {
+    return "Slat" + std::to_string( n );
+}
 
 //-----------------------------------------------------------------------------
 
 CASE( "test_global_grids" ) {
     // auto resolutions = { 32, 64, 160, 320, 640 };
-    auto resolutions = { 32, 64 };
-    for( int n : resolutions ) {
-        int t = n-1;
+    auto resolutions = {32, 64};
+    for ( int n : resolutions ) {
+        int t      = n - 1;
         auto cases = {
-            std::make_pair(F(n),t),
-            std::make_pair(O(n),t),
-            std::make_pair(N(n),t),
-            std::make_pair(L(n),t),
-            std::make_pair(S(n),t),
-            std::make_pair(Slon(n),t),
-            std::make_pair(Slat(n),t),
+            std::make_pair( F( n ), t ),    std::make_pair( O( n ), t ), std::make_pair( N( n ), t ),
+            std::make_pair( L( n ), t ),    std::make_pair( S( n ), t ), std::make_pair( Slon( n ), t ),
+            std::make_pair( Slat( n ), t ),
         };
 
-        LegendreCacheCreator F_cache_creator( Grid(F(n)), t );
+        LegendreCacheCreator F_cache_creator( Grid( F( n ) ), t );
         EXPECT( F_cache_creator.supported() );
-        auto F_cachefile = CacheFile("leg_"+F_cache_creator.uid()+".bin");
+        auto F_cachefile = CacheFile( "leg_" + F_cache_creator.uid() + ".bin" );
         F_cache_creator.create( F_cachefile );
-        Cache F_cache = LegendreCache( F_cachefile );
-        auto F_cache_hash = hash(F_cache);
+        Cache F_cache     = LegendreCache( F_cachefile );
+        auto F_cache_hash = hash( F_cache );
 
-        for( auto _case : cases )
-        {
+        for ( auto _case : cases ) {
             auto gridname   = _case.first;
             auto truncation = _case.second;
-            Log::info() << "Case "+gridname+" T"+std::to_string(truncation) << std::endl;
-            ATLAS_TRACE("Case "+gridname+" T"+std::to_string(truncation));
-            Grid grid(gridname);
+            Log::info() << "Case " + gridname + " T" + std::to_string( truncation ) << std::endl;
+            ATLAS_TRACE( "Case " + gridname + " T" + std::to_string( truncation ) );
+            Grid grid( gridname );
 
             LegendreCacheCreator cache_creator( grid, truncation );
             EXPECT( cache_creator.supported() );
-            auto cachefile = CacheFile("leg_"+cache_creator.uid()+".bin");
+            auto cachefile = CacheFile( "leg_" + cache_creator.uid() + ".bin" );
             cache_creator.create( cachefile );
-            if( GaussianGrid(grid) ) {
-                EXPECT( hash(cachefile) == F_cache_hash );
-            }
+            if ( GaussianGrid( grid ) ) { EXPECT( hash( cachefile ) == F_cache_hash ); }
 
-            ATLAS_TRACE_SCOPE("create without cache")
-                Trans( grid, truncation );
+            ATLAS_TRACE_SCOPE( "create without cache" )
+            Trans( grid, truncation );
 
             Cache cache;
-            ATLAS_TRACE_SCOPE("read cache")
-                cache = LegendreCache( cachefile );
-            ATLAS_TRACE_SCOPE("create with cache")
-                Trans( cache, grid, truncation );
-
+            ATLAS_TRACE_SCOPE( "read cache" )
+            cache = LegendreCache( cachefile );
+            ATLAS_TRACE_SCOPE( "create with cache" )
+            Trans( cache, grid, truncation );
         }
     }
 }
 
 CASE( "test_global_grids_with_subdomain" ) {
-    int n = 64;
-    int t = n-1;
-    auto cases = {
-        std::make_pair(F(n),t),
-        std::make_pair(O(n),t),
-        std::make_pair(N(n),t),
-        std::make_pair(L(n),t),
-        std::make_pair(S(n),t),
-        std::make_pair(Slon(n),t),
-        std::make_pair(Slat(n),t)
-    };
+    int n        = 64;
+    int t        = n - 1;
+    auto cases   = {std::make_pair( F( n ), t ),   std::make_pair( O( n ), t ), std::make_pair( N( n ), t ),
+                  std::make_pair( L( n ), t ),   std::make_pair( S( n ), t ), std::make_pair( Slon( n ), t ),
+                  std::make_pair( Slat( n ), t )};
     auto domains = std::vector<Domain>{
-        ZonalBandDomain  ( {-10., 5.} ),
+        ZonalBandDomain( {-10., 5.} ),
         RectangularDomain( {-1., 1.}, {50., 55.} ),
         RectangularDomain( {-1., 1.}, {-5., 40.} ),
     };
-    for( auto _case : cases )
-    {
+    for ( auto _case : cases ) {
         auto gridname   = _case.first;
         auto truncation = _case.second;
 
-        ATLAS_TRACE("Case "+gridname+" T"+std::to_string(truncation));
+        ATLAS_TRACE( "Case " + gridname + " T" + std::to_string( truncation ) );
 
         Grid global_grid( gridname );
 
-        LegendreCacheCreator global_cache_creator( Grid(gridname), truncation );
+        LegendreCacheCreator global_cache_creator( Grid( gridname ), truncation );
         EXPECT( global_cache_creator.supported() );
         auto global_cachefile = CacheFile( "leg_" + global_cache_creator.uid() + ".bin" );
         ATLAS_TRACE_SCOPE( "Creating cache " + std::string( global_cachefile ) )
-            global_cache_creator.create( global_cachefile );
+        global_cache_creator.create( global_cachefile );
 
         Cache global_cache;
-        ATLAS_TRACE_SCOPE("read cache")
-            global_cache = LegendreCache( global_cachefile );
-        auto global_hash = hash(global_cache);
+        ATLAS_TRACE_SCOPE( "read cache" )
+        global_cache     = LegendreCache( global_cachefile );
+        auto global_hash = hash( global_cache );
 
-        for( auto domain : domains ) {
+        for ( auto domain : domains ) {
             Grid grid( gridname, domain );
-            ATLAS_TRACE_SCOPE("create with cache")
-                Trans( global_cache, global_grid, domain, truncation );
+            ATLAS_TRACE_SCOPE( "create with cache" )
+            Trans( global_cache, global_grid, domain, truncation );
         }
     }
 }
 
 CASE( "test_regional_grids nested_in_global" ) {
-    auto cachefile = CacheFile("regional_lonlat.bin");
+    auto cachefile  = CacheFile( "regional_lonlat.bin" );
     auto truncation = 89;
     Cache cache;
-    StructuredGrid grid_global(
-        LinearSpacing( {  0., 360.}, 360, false ),
-        LinearSpacing( { 90., -90.}, 181, true  )
-    );
+    StructuredGrid grid_global( LinearSpacing( {0., 360.}, 360, false ), LinearSpacing( {90., -90.}, 181, true ) );
     EXPECT( grid_global.domain().global() );
 
     LegendreCacheCreator global_cache_creator( grid_global, truncation );
     EXPECT( global_cache_creator.supported() );
     auto global_cachefile = CacheFile( "leg_" + global_cache_creator.uid() + ".bin" );
-    ATLAS_TRACE_SCOPE( "Creating cache "+std::string(cachefile) )
-        global_cache_creator.create( global_cachefile );
+    ATLAS_TRACE_SCOPE( "Creating cache " + std::string( cachefile ) )
+    global_cache_creator.create( global_cachefile );
 
 
+    StructuredGrid regional( LinearSpacing( {0., 180.}, 181 ), LinearSpacing( {0., 45.}, 46 ) );
 
-    StructuredGrid regional( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ) );
 
-
-    ATLAS_TRACE_SCOPE("create without cache")
-        Trans( grid_global, regional.domain(), truncation );
-    ATLAS_TRACE_SCOPE("read cache")
-        cache = LegendreCache( global_cachefile );
-    ATLAS_TRACE_SCOPE("create with cache")
-        Trans( cache, grid_global, regional.domain(), truncation );
+    ATLAS_TRACE_SCOPE( "create without cache" )
+    Trans( grid_global, regional.domain(), truncation );
+    ATLAS_TRACE_SCOPE( "read cache" )
+    cache = LegendreCache( global_cachefile );
+    ATLAS_TRACE_SCOPE( "create with cache" )
+    Trans( cache, grid_global, regional.domain(), truncation );
 }
 
 CASE( "test_regional_grids not nested" ) {
     auto truncation = 89;
     Cache cache;
 
-    StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ) );
+    StructuredGrid grid( LinearSpacing( {0., 180.}, 181 ), LinearSpacing( {0., 45.}, 46 ) );
 
     LegendreCacheCreator cache_creator( grid, truncation );
     EXPECT( cache_creator.supported() );
     auto cachefile = CacheFile( "leg_" + cache_creator.uid() + ".bin" );
 
-    ATLAS_TRACE_SCOPE( "Creating cache "+std::string(cachefile) )
-        cache_creator.create( cachefile );
+    ATLAS_TRACE_SCOPE( "Creating cache " + std::string( cachefile ) )
+    cache_creator.create( cachefile );
 
-    ATLAS_TRACE_SCOPE("create without cache")
-        Trans( grid, truncation );
-    ATLAS_TRACE_SCOPE("read cache")
-        cache = LegendreCache( cachefile );
-    ATLAS_TRACE_SCOPE("create with cache")
-        Trans( cache, grid, truncation );
+    ATLAS_TRACE_SCOPE( "create without cache" )
+    Trans( grid, truncation );
+    ATLAS_TRACE_SCOPE( "read cache" )
+    cache = LegendreCache( cachefile );
+    ATLAS_TRACE_SCOPE( "create with cache" )
+    Trans( cache, grid, truncation );
 }
 
 CASE( "test_regional_grids with projection" ) {
-    auto cachefile = CacheFile("cache-regional.bin");
+    auto cachefile  = CacheFile( "cache-regional.bin" );
     auto truncation = 89;
     Cache cache;
 
-    Projection projection( util::Config
-       ( "type",      "rotated_lonlat")
-       ("north_pole", std::vector<double>{ 4., 54.} ) );
+    Projection projection( util::Config( "type", "rotated_lonlat" )( "north_pole", std::vector<double>{4., 54.} ) );
 
-    StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ), projection );
+    StructuredGrid grid( LinearSpacing( {0., 180.}, 181 ), LinearSpacing( {0., 45.}, 46 ), projection );
     Trans trans;
-    ATLAS_TRACE_SCOPE("create without cache")
-        trans = Trans( grid, truncation );
+    ATLAS_TRACE_SCOPE( "create without cache" )
+    trans = Trans( grid, truncation );
 
     // Note: caching not yet implemented for unstructured and projected grids
     LegendreCacheCreator legendre_cache_creator( grid, truncation );
@@ -238,40 +231,32 @@ CASE( "test_regional_grids with projection" ) {
 
     std::vector<double> rspecg( trans.spectralCoefficients(), 0. );
     std::vector<double> rgp( trans.grid().size() );
-    trans.invtrans(1,rspecg.data(),rgp.data());
+    trans.invtrans( 1, rspecg.data(), rgp.data() );
 }
 
 CASE( "test cache creator to file" ) {
-
     auto truncation = 89;
-    StructuredGrid grid_global(
-        LinearSpacing( {  0., 360.}, 360, false ),
-        LinearSpacing( { 90., -90.}, 181, true  )
-    );
+    StructuredGrid grid_global( LinearSpacing( {0., 360.}, 360, false ), LinearSpacing( {90., -90.}, 181, true ) );
 
     LegendreCacheCreator legendre_cache_creator( grid_global, truncation );
     auto cachefile = CacheFile( legendre_cache_creator.uid() );
-    ATLAS_TRACE_SCOPE( "Creating cache "+std::string(cachefile) )
-      legendre_cache_creator.create( cachefile );
+    ATLAS_TRACE_SCOPE( "Creating cache " + std::string( cachefile ) )
+    legendre_cache_creator.create( cachefile );
 
-    Cache c = legendre_cache_creator.create();
+    Cache c     = legendre_cache_creator.create();
     auto trans1 = Trans( c, grid_global, truncation );
     auto trans2 = Trans( c, grid_global, truncation );
 }
 
 CASE( "test cache creator in memory" ) {
-
     auto truncation = 89;
-    StructuredGrid grid_global(
-        LinearSpacing( {  0., 360.}, 360, false ),
-        LinearSpacing( { 90., -90.}, 181, true  )
-    );
+    StructuredGrid grid_global( LinearSpacing( {0., 360.}, 360, false ), LinearSpacing( {90., -90.}, 181, true ) );
 
     LegendreCacheCreator legendre_cache_creator( grid_global, truncation );
 
     Cache cache;
     ATLAS_TRACE_SCOPE( "Creating cache in memory" )
-      cache = legendre_cache_creator.create();
+    cache = legendre_cache_creator.create();
 
     auto trans1 = Trans( cache, grid_global, truncation );
     auto trans2 = Trans( cache, grid_global, truncation );
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index e6ac605bf..c775fc9f0 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -29,9 +29,9 @@
 #include "atlas/parallel/mpi/mpi.h"
 #include "atlas/runtime/Trace.h"
 #include "atlas/trans/Trans.h"
+#include "atlas/trans/local/TransLocal.h"
 #include "atlas/util/Constants.h"
 #include "atlas/util/Earth.h"
-#include "atlas/trans/local/TransLocal.h"
 
 #include "tests/AtlasTestEnvironment.h"
 
diff --git a/src/tests/util/test_earth.cc b/src/tests/util/test_earth.cc
index 3013aaa27..7ca1f7474 100644
--- a/src/tests/util/test_earth.cc
+++ b/src/tests/util/test_earth.cc
@@ -135,8 +135,8 @@ CASE( "test_earth_lon_135" ) {
 CASE( "test_earth_lon_225" ) {
     const PointLonLat p1[2] = {{225., 0.}, {-135., 0.}};
     PointXYZ p2[2];
-    Earth::convertSphericalToCartesian( p1[0], p2[0]);
-    Earth::convertSphericalToCartesian( p1[1], p2[1]);
+    Earth::convertSphericalToCartesian( p1[0], p2[0] );
+    Earth::convertSphericalToCartesian( p1[1], p2[1] );
 
     EXPECT( eckit::types::is_approximately_equal( p2[0].x(), -L ) );
     EXPECT( eckit::types::is_approximately_equal( p2[0].y(), -L ) );
diff --git a/src/tests/util/test_indexview.cc b/src/tests/util/test_indexview.cc
index a204e2f00..c62cfa00c 100644
--- a/src/tests/util/test_indexview.cc
+++ b/src/tests/util/test_indexview.cc
@@ -12,8 +12,8 @@
 #include "atlas/array/ArrayView.h"
 #include "atlas/array/IndexView.h"
 #include "atlas/array/MakeView.h"
-#include "atlas/parallel/mpi/mpi.h"
 #include "atlas/library/defines.h"
+#include "atlas/parallel/mpi/mpi.h"
 
 #include "tests/AtlasTestEnvironment.h"
 

From 51fd87f52b5aa067137548cdfc70e792f2f6ac1f Mon Sep 17 00:00:00 2001
From: Tiago Quintino <tiago.quintino@ecmwf.int>
Date: Thu, 10 May 2018 17:19:20 +0100
Subject: [PATCH 085/123] Fix Bamboo Intel compiler cmake requirements

---
 bamboo/INTEL-env.sh | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/bamboo/INTEL-env.sh b/bamboo/INTEL-env.sh
index b9fb01587..6715f9e8c 100644
--- a/bamboo/INTEL-env.sh
+++ b/bamboo/INTEL-env.sh
@@ -1,11 +1,16 @@
-# Initialise module environment if it is not
+#!/bin/bash
+
+# initialise module environment if it is not
 if [[ ! $(command -v module > /dev/null 2>&1) ]]; then
   . /usr/local/apps/module/init/bash
 fi
-# unload modules not available for intel
+
+module unload grib_api
 module unload eccodes
 module unload emos
 module unload fftw
 module unload libemos
 
-module switch gnu intel/16.0.3
+module load cmake/3.10.2
+
+module switch gnu intel/16.0.3
\ No newline at end of file

From 7b9ccddc555d40ac17294917f63fb5a67438dd21 Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Thu, 10 May 2018 17:25:46 +0100
Subject: [PATCH 086/123] Add code coverage with travis and codecov

---
 .travis.yml | 15 ++++++++++++++-
 README.md   |  1 +
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 0247644e7..ec4ac30bf 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -54,7 +54,7 @@ matrix:
         - CACHE_NAME=linux-gcc7-mpich
         - CXX_COMPILER='g++-7' C_COMPILER='gcc-7' Fortran_COMPILER='gfortran-7'
         - MPI='mpich'
-        - ATLAS_CMAKE_OPTIONS="-DCMAKE_BUILD_TYPE=DEBUG"
+        - ATLAS_CMAKE_OPTIONS="-DCMAKE_BUILD_TYPE=DEBUG -DENABLE_GPROF=ON"
       addons:
         apt:
           sources:  ['ubuntu-toolchain-r-test']
@@ -233,6 +233,19 @@ script:
   #################################################################
   - ctest
 
+after_success:
+
+  - |
+    if [[ "${TRAVIS_OS_NAME}" == "linux" ]]; then
+      # Creating report
+      cd ${ATLAS_BUILD_DIR}
+      lcov --directory . --capture --output-file coverage.info # capture coverage info
+      lcov --remove coverage.info '/usr/*' --output-file coverage.info # filter out system
+      lcov --list coverage.info #debug info
+      # Uploading report to CodeCov
+      bash <(curl -s https://codecov.io/bash) -t 9c489980-d292-499c-8615-af02df3b20d1 || echo "Codecov did not collect coverage reports"
+    fi
+
 after_failure:
 
   - cd ${ATLAS_BUILD_DIR}
diff --git a/README.md b/README.md
index f0a4014ea..e93a28e98 100644
--- a/README.md
+++ b/README.md
@@ -3,6 +3,7 @@ Atlas
 
 [![travis master](https://img.shields.io/travis/ecmwf/atlas/master.svg?label=master&logo=travis)](http://travis-ci.org/ecmwf/atlas "master")
 [![travis develop](https://img.shields.io/travis/ecmwf/atlas/develop.svg?label=develop&logo=travis)](http://travis-ci.org/ecmwf/atlas "develop")
+[![codecov](https://codecov.io/gh/ecmwf/atlas/branch/develop/graph/badge.svg)](https://codecov.io/gh/ecmwf/atlas)
 
 Project home: https://software.ecmwf.int/wiki/display/ATLAS  
 Contact: Willem Deconinck (willem.deconinck@ecmwf.int)  

From 63b7adba61fdd5c49b57dfcde1796fbf25d2a017 Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Thu, 10 May 2018 17:52:02 +0100
Subject: [PATCH 087/123] aligned_alloc not supported on MacOSX

---
 src/atlas/trans/local/TransLocal.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc
index 6dc294c04..fbd2dbc65 100644
--- a/src/atlas/trans/local/TransLocal.cc
+++ b/src/atlas/trans/local/TransLocal.cc
@@ -173,7 +173,7 @@ int num_n( const int truncation, const int m, const bool symmetric ) {
 
 void alloc_aligned( double*& ptr, size_t n ) {
     const size_t alignment = 64 * sizeof( double );
-    ptr                    = (double*)aligned_alloc( alignment, sizeof( double ) * n );
+    posix_memalign( (void**)&ptr, alignment, sizeof( double ) * n );
 }
 
 void free_aligned( double*& ptr ) {

From a46dd171e240ad2b01b7e09bfbf90f3407e2985b Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Thu, 10 May 2018 18:07:46 +0100
Subject: [PATCH 088/123] Disable atlas_test_transgeneral if FFTW is not found

---
 src/tests/trans/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/tests/trans/CMakeLists.txt b/src/tests/trans/CMakeLists.txt
index e9cbad59f..8d0f283b8 100644
--- a/src/tests/trans/CMakeLists.txt
+++ b/src/tests/trans/CMakeLists.txt
@@ -49,6 +49,7 @@ ecbuild_add_test( TARGET atlas_test_transgeneral
   SOURCES   test_transgeneral.cc
   LIBS      atlas
   ENVIRONMENT ATLAS_TRACE_REPORT=1
+  CONDITION ATLAS_HAVE_FFTW
 )
 
 ecbuild_add_test( TARGET atlas_test_trans_localcache

From 2bf70d596a699997eefe4b00b8bedfbb56342f21 Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Fri, 11 May 2018 09:48:50 +0100
Subject: [PATCH 089/123] travis: install fftw

---
 .travis.yml | 35 ++++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index ec4ac30bf..d9389979c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -46,7 +46,7 @@ matrix:
       addons:
         apt:
           sources:  ['ubuntu-toolchain-r-test']
-          packages: ['g++-5', 'gcc-5', 'gfortran-5']
+          packages: ['g++-5', 'gcc-5', 'gfortran-5', 'libfftw3-dev']
 
     - os: linux
       compiler: gcc
@@ -55,10 +55,11 @@ matrix:
         - CXX_COMPILER='g++-7' C_COMPILER='gcc-7' Fortran_COMPILER='gfortran-7'
         - MPI='mpich'
         - ATLAS_CMAKE_OPTIONS="-DCMAKE_BUILD_TYPE=DEBUG -DENABLE_GPROF=ON"
+        - COVERAGE=ON
       addons:
         apt:
           sources:  ['ubuntu-toolchain-r-test']
-          packages: ['g++-7', 'gcc-7', 'gfortran-7']
+          packages: ['g++-7', 'gcc-7', 'gfortran-7', 'libfftw3-dev', 'lcov']
 
     - os: linux
       compiler: gcc
@@ -143,16 +144,6 @@ install:
       source ${DEPS_DIR}/pgi/env.sh
     fi
 
-
-  #################################################################
-  # Install CGAL
-  #################################################################
-  - |
-    ### Install CGAL
-    if [[ "${TRAVIS_OS_NAME}" == "osx" ]]; then
-      brew upgrade cgal || brew install cgal
-    fi
-
   #################################################################
   # Install MPI
   #################################################################
@@ -179,6 +170,24 @@ install:
     fi
     cmake --version
 
+  #################################################################
+  # Install FFTW
+  #################################################################
+  - |
+    ### Install FFTW
+    if [[ "${TRAVIS_OS_NAME}" == "osx" ]]; then
+      brew upgrade fftw || brew install fftw
+    fi
+
+  #################################################################
+  # Install CGAL
+  #################################################################
+  - |
+    ### Install CGAL
+    if [[ "${TRAVIS_OS_NAME}" == "osx" ]]; then
+      brew upgrade cgal || brew install cgal
+    fi
+
   #################################################################
   # Install ecbuild
   #################################################################
@@ -236,7 +245,7 @@ script:
 after_success:
 
   - |
-    if [[ "${TRAVIS_OS_NAME}" == "linux" ]]; then
+    if [[ "${COVERAGE}" == "ON" ]]; then
       # Creating report
       cd ${ATLAS_BUILD_DIR}
       lcov --directory . --capture --output-file coverage.info # capture coverage info

From 045c539a117bd14b9f2c12615e367023aac51b5c Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Fri, 11 May 2018 15:05:41 +0100
Subject: [PATCH 090/123] travis: always verbose ctest (for now)

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index d9389979c..f02f88aa1 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -240,7 +240,7 @@ script:
   #################################################################
   # Test Atlas
   #################################################################
-  - ctest
+  - ctest -VV
 
 after_success:
 

From 8e708e33aa3c9bbc0c8d688b02af4386e78a6988 Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Fri, 11 May 2018 18:22:18 +0100
Subject: [PATCH 091/123] ATLAS-158 Possible bug exposed for
 TransLocal::invtrans to regional grid

---
 src/atlas/trans/local/TransLocal.cc | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc
index fbd2dbc65..bbb417962 100644
--- a/src/atlas/trans/local/TransLocal.cc
+++ b/src/atlas/trans/local/TransLocal.cc
@@ -893,7 +893,27 @@ void TransLocal::invtrans_fourier_regular( const int nlats, const int nlons, con
             eckit::linalg::Matrix A( fourier_, nlons, ( truncation_ + 1 ) * 2 );
             eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * nlats );
             eckit::linalg::Matrix C( gp_fields, nlons, nb_fields * nlats );
+
+// BUG ATLAS-159: valgrind warns here, saying that B(1,:) is uninitialised
+//                if workaround above labeled ATLAS-159 is not applied.
+//
+//                        for( int i=0; i<A.rows(); ++i ) {
+//                          for ( int j=0; j<A.cols(); ++j ) {
+//                            if( A(i,j) == 999.999 ) {
+//                              ASSERT(false);
+//                            }
+//                          }
+//                        }
+//                        for ( int i=0; i<B.rows(); ++i ) {
+//                          for( int j=0; j<B.cols(); ++j ) {
+//                            if( B(i,j) == 999.999 ) {
+//                              ASSERT(false);
+//                            }
+//                          }
+//                        }
+
             linalg_.gemm( A, B, C );
+
         }
 #else
         // dgemm-method 2
@@ -1208,6 +1228,12 @@ void TransLocal::invtrans_uv( const int truncation, const int nb_scalar_fields,
             double* scl_fourier;
             alloc_aligned( scl_fourier, size_fourier_max * ( truncation_ + 1 ) );
 
+// ATLAS-159 workaround begin
+            for( int i=0; i<size_fourier_max*(truncation_+1); ++i ) {
+              scl_fourier[i] = 0.;
+            }
+// ATLAS-159 workaround end
+
             // Legendre transformation:
             invtrans_legendre( truncation, nlats, nb_scalar_fields, scalar_spectra, scl_fourier, config );
 

From 8b54e972e5f3f0d98383d05e9de1524a7da74ad8 Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Fri, 11 May 2018 18:23:34 +0100
Subject: [PATCH 092/123] grid::RegularGaussian(N,domain) constructor

---
 src/atlas/grid/Grid.cc | 4 ++++
 src/atlas/grid/Grid.h  | 3 ++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/atlas/grid/Grid.cc b/src/atlas/grid/Grid.cc
index c4e04d55c..a0c18922e 100644
--- a/src/atlas/grid/Grid.cc
+++ b/src/atlas/grid/Grid.cc
@@ -12,6 +12,7 @@
 
 #include <limits>
 #include <vector>
+#include <string>
 
 #include "eckit/config/Parametrisation.h"
 #include "eckit/exception/Exceptions.h"
@@ -104,5 +105,8 @@ ReducedGaussianGrid::ReducedGaussianGrid( const std::vector<long>& nx, const Dom
 ReducedGaussianGrid::ReducedGaussianGrid( const std::initializer_list<long>& nx ) :
     ReducedGaussianGrid( std::vector<long>( nx ) ) {}
 
+RegularGaussianGrid::RegularGaussianGrid( int N, const Grid::Domain& domain ) :
+    RegularGaussianGrid::grid_t( "F" + std::to_string( N ), domain ) {}
+
 }  // namespace grid
 }  // namespace atlas
diff --git a/src/atlas/grid/Grid.h b/src/atlas/grid/Grid.h
index 0ac6aa8dc..9f45b14b6 100644
--- a/src/atlas/grid/Grid.h
+++ b/src/atlas/grid/Grid.h
@@ -302,7 +302,7 @@ class ReducedGaussianGrid : public Gaussian<ReducedGrid> {
 public:
     using grid_t::grid_t;
     ReducedGaussianGrid( const std::initializer_list<long>& pl );
-    ReducedGaussianGrid( const std::vector<long>& pl, const Domain& domain = Domain() );
+    ReducedGaussianGrid( const std::vector<long>& pl, const Domain& = Domain() );
 
     operator bool() const { return valid(); }
 
@@ -316,6 +316,7 @@ class RegularGaussianGrid : public Gaussian<RegularGrid> {
 
 public:
     using grid_t::grid_t;
+    RegularGaussianGrid( int N , const Domain& = Domain() );
 
     inline double lon( size_t i ) const { return x( i ); }
 

From d4a7a104cd46bf4867cbf37ebf57bca2f2a31a10 Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Wed, 23 May 2018 17:43:04 +0000
Subject: [PATCH 093/123] FFTW include must be public

---
 src/atlas/CMakeLists.txt | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/atlas/CMakeLists.txt b/src/atlas/CMakeLists.txt
index 9bf883d18..b31ed29cb 100644
--- a/src/atlas/CMakeLists.txt
+++ b/src/atlas/CMakeLists.txt
@@ -559,8 +559,9 @@ ecbuild_add_library( TARGET atlas
   PRIVATE_INCLUDES
     "${CGAL_INCLUDE_DIRS}"
     "${TRANSI_INCLUDE_DIRS}"
-    "${MPI_CXX_INCLUDE_DIRS}"
-    "${FFTW_INCLUDES}"
+
+  PUBLIC_INCLUDES "${FFTW_INCLUDES}"
+
   LIBS
     eckit_geometry
     eckit_linalg

From 30c9d88f044c1912add061d5cabc91346ac28934 Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Wed, 30 May 2018 12:09:27 +0100
Subject: [PATCH 094/123] ATLAS-160 Create atlas_atest_mgrids executable to
 chase problems

---
 src/atlas/grid/Partitioner.h               |  2 +-
 src/tests/CMakeLists.txt                   |  1 +
 src/tests/acceptance_tests/CMakeLists.txt  |  4 +
 src/tests/acceptance_tests/atest_mgrids.cc | 99 ++++++++++++++++++++++
 4 files changed, 105 insertions(+), 1 deletion(-)
 create mode 100644 src/tests/acceptance_tests/CMakeLists.txt
 create mode 100644 src/tests/acceptance_tests/atest_mgrids.cc

diff --git a/src/atlas/grid/Partitioner.h b/src/atlas/grid/Partitioner.h
index 388bbb914..6ba0b0683 100644
--- a/src/atlas/grid/Partitioner.h
+++ b/src/atlas/grid/Partitioner.h
@@ -63,7 +63,7 @@ class MatchingMeshPartitioner : public Partitioner {
 
 public:
     MatchingMeshPartitioner();
-    MatchingMeshPartitioner( const Mesh& mesh, const Config& config );
+    MatchingMeshPartitioner( const Mesh& mesh, const Config& config = util::NoConfig() );
 };
 
 // ------------------------------------------------------------------
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index 851fd3f6b..8596b6405 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -80,3 +80,4 @@ add_subdirectory( numerics )
 add_subdirectory( trans )
 add_subdirectory( interpolation )
 
+add_subdirectory( acceptance_tests )
diff --git a/src/tests/acceptance_tests/CMakeLists.txt b/src/tests/acceptance_tests/CMakeLists.txt
new file mode 100644
index 000000000..149843ed4
--- /dev/null
+++ b/src/tests/acceptance_tests/CMakeLists.txt
@@ -0,0 +1,4 @@
+ecbuild_add_executable( TARGET atlas_atest_mgrids 
+                  SOURCES atest_mgrids.cc
+                  LIBS atlas
+                )
diff --git a/src/tests/acceptance_tests/atest_mgrids.cc b/src/tests/acceptance_tests/atest_mgrids.cc
new file mode 100644
index 000000000..948a4a9e7
--- /dev/null
+++ b/src/tests/acceptance_tests/atest_mgrids.cc
@@ -0,0 +1,99 @@
+/*
+ * (C) Copyright 2013 ECMWF.
+ *
+ * This software is licensed under the terms of the Apache Licence Version 2.0
+ * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+ * In applying this licence, ECMWF does not waive the privileges and immunities
+ * granted to it by virtue of its status as an intergovernmental organisation
+ * nor does it submit to any jurisdiction.
+ */
+
+#include <cmath>
+#include <fstream>
+#include <iostream>
+#include <limits>
+#include <memory>
+#include <sstream>
+#include <vector>
+
+#include "atlas/grid.h"
+#include "atlas/mesh.h"
+#include "atlas/functionspace.h"
+#include "atlas/field.h"
+#include "atlas/meshgenerator.h"
+#include "atlas/option.h"
+#include "atlas/parallel/mpi/mpi.h"
+#include "atlas/runtime/AtlasTool.h"
+#include "atlas/runtime/Log.h"
+#include "atlas/util/Config.h"
+#include "atlas/output/Gmsh.h"
+#include "atlas/numerics/fvm/Method.h"
+#include "atlas/interpolation/Interpolation.h"
+
+#include "atlas/mesh/actions/BuildHalo.h"
+
+using namespace atlas;
+
+//------------------------------------------------------------------------------
+
+class Program : public AtlasTool {
+    virtual void execute( const Args& args );
+public:
+    Program( int argc, char** argv );
+  };
+
+//-----------------------------------------------------------------------------
+
+Program::Program( int argc, char** argv ) : AtlasTool( argc, argv ) {
+    add_option( new SimpleOption<std::string>( "gridA", "grid A" ) );
+    add_option( new SimpleOption<std::string>( "gridB", "grid B" ) );
+    add_option( new SimpleOption<bool>( "ghost", "Output ghost elements" ) );
+    add_option( new SimpleOption<long>( "haloA", "Halo size" ) );
+    add_option( new SimpleOption<long>( "haloB", "Halo size" ) );
+}
+
+//-----------------------------------------------------------------------------
+
+void Program::execute( const Args& args ) {
+  
+  auto ghost = util::Config("ghost",args.getBool("ghost",false));
+  auto haloA = option::halo( args.getLong("haloA",1) );
+  auto haloB = option::halo( args.getLong("haloB",1) );
+  
+  auto gridA = Grid( args.getString("gridA") );
+  auto gridB = Grid( args.getString("gridB") );
+  
+  auto meshgenerator = MeshGenerator( "structured" );
+
+  auto distA = grid::Distribution( gridA, grid::Partitioner( "trans" ) );
+
+  auto meshA = meshgenerator.generate( gridA, distA );
+
+  numerics::fvm::Method fvmA(meshA,haloA);
+  auto gmshA = output::Gmsh( "meshA.msh", ghost );
+  gmshA.write(meshA);
+
+
+  auto distB = grid::Distribution( gridB, grid::MatchingMeshPartitioner( meshA ) );
+
+  auto meshB = meshgenerator.generate( gridB, distB );
+
+  numerics::fvm::Method fvmB(meshB,haloB);
+
+  // Field fieldB = fvmB.node_columns().createField<double>();
+
+  output::Gmsh gmshB( "meshB.msh", ghost );
+  gmshB.write(meshB);
+  // gmshB.write(fieldB);
+
+  Interpolation AtoB( option::type("finite-element"), fvmA.node_columns(), fvmB.node_columns() );
+  Interpolation BtoA( option::type("finite-element"), fvmB.node_columns(), fvmA.node_columns() );
+
+}
+
+//------------------------------------------------------------------------------
+
+int main( int argc, char** argv ) {
+    Program tool( argc, argv );
+    return tool.start();
+}

From abbe5a6a55a817642b7691de8e3a48f1c1c43150 Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Wed, 30 May 2018 14:11:10 +0100
Subject: [PATCH 095/123] ATLAS-160 Relax too strict sanity check

---
 src/atlas/mesh/actions/BuildParallelFields.cc | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/atlas/mesh/actions/BuildParallelFields.cc b/src/atlas/mesh/actions/BuildParallelFields.cc
index cca09905b..20d7cb732 100644
--- a/src/atlas/mesh/actions/BuildParallelFields.cc
+++ b/src/atlas/mesh/actions/BuildParallelFields.cc
@@ -503,15 +503,18 @@ Field& build_edges_partition( Mesh& mesh ) {
         bool edge_partition_is_same_as_one_of_nodes = ( p == pn1 || p == pn2 );
         if ( edge_is_partition_boundary ) {
             if ( not edge_partition_is_same_as_one_of_nodes ) {
-                if ( elem1 != edge_to_elem.missing_value() ) {
-                    Log::error() << EDGE( jedge ) << " [p" << p << "] is not correct elem1[p" << elem_part( elem1 )
-                                 << "]" << std::endl;
+                // If this is a ghost edge, we could trust it.
+                if( edge_part(jedge) == mypart ) {
+                    if ( elem1 != edge_to_elem.missing_value() ) {
+                        Log::error() << "[" << mypart << "] " << EDGE( jedge ) << " [p" << p << "] is not correct elem1[p" << elem_part( elem1 )
+                                     << "]" << std::endl;
+                    }
+                    else {
+                        Log::error() << "[" << mypart << "] " << EDGE( jedge ) << " [p" << p << "] is not correct elem2[p" << elem_part( elem2 )
+                                     << "]" << std::endl;
+                    }
+                    insane = 1;
                 }
-                else {
-                    Log::error() << EDGE( jedge ) << " [p" << p << "] is not correct elem2[p" << elem_part( elem2 )
-                                 << "]" << std::endl;
-                }
-                insane = 1;
             }
         }
         else {

From deab74c0c808a812ac92b6f2e565fd358ba0da01 Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Thu, 31 May 2018 09:33:10 +0100
Subject: [PATCH 096/123] ATLAS-160 Gmsh writer was converting gidx_t to int

---
 src/atlas/output/detail/GmshIO.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/atlas/output/detail/GmshIO.cc b/src/atlas/output/detail/GmshIO.cc
index f3f4cb25e..c8c2a60e4 100644
--- a/src/atlas/output/detail/GmshIO.cc
+++ b/src/atlas/output/detail/GmshIO.cc
@@ -753,13 +753,13 @@ void GmshIO::write( const Mesh& mesh, const PathName& file_path ) const {
     file << nb_nodes << "\n";
     double xyz[3] = {0., 0., 0.};
     for ( size_t n = 0; n < nb_nodes; ++n ) {
-        int g = glb_idx( n );
+        gidx_t g = glb_idx( n );
 
         for ( size_t d = 0; d < surfdim; ++d )
             xyz[d] = coords( n, d );
 
         if ( binary ) {
-            file.write( reinterpret_cast<const char*>( &g ), sizeof( int ) );
+            file.write( reinterpret_cast<const char*>( &g ), sizeof( gidx_t ) );
             file.write( reinterpret_cast<const char*>( &xyz ), sizeof( double ) * 3 );
         }
         else {

From aff3f98ad36f864f669ba31f0cd14e2003a9d416 Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Thu, 31 May 2018 09:45:27 +0100
Subject: [PATCH 097/123] ATLAS-160 Fix edge partition numbers for certain
 boundary edges

---
 src/atlas/mesh/actions/BuildParallelFields.cc | 35 ++++++++-----------
 1 file changed, 15 insertions(+), 20 deletions(-)

diff --git a/src/atlas/mesh/actions/BuildParallelFields.cc b/src/atlas/mesh/actions/BuildParallelFields.cc
index 20d7cb732..120a4f86a 100644
--- a/src/atlas/mesh/actions/BuildParallelFields.cc
+++ b/src/atlas/mesh/actions/BuildParallelFields.cc
@@ -358,6 +358,7 @@ Field& build_edges_partition( Mesh& mesh ) {
     array::ArrayView<gidx_t, 1> node_gidx = array::make_view<gidx_t, 1>( nodes.global_index() );
 
     array::ArrayView<int, 1> elem_part = array::make_view<int, 1>( mesh.cells().partition() );
+    array::ArrayView<int, 1> elem_halo = array::make_view<int, 1>( mesh.cells().halo() );
 
     auto check_flags = [&]( idx_t jedge, int flag ) {
         idx_t ip1 = edge_nodes( jedge, 0 );
@@ -412,6 +413,9 @@ Field& build_edges_partition( Mesh& mesh ) {
             // if( not domain_bdry(jedge) ) {
             bdry_edges.push_back( edge_glb_idx( jedge ) );
             p = elem_part( elem1 );
+            if( pn1 != p && pn2 == pn1 && elem_halo( elem1 ) > 0 ) {
+                p = pn1;
+            }
             // }
         }
         else if ( p != elem_part( elem1 ) && p != elem_part( elem2 ) ) {
@@ -503,18 +507,15 @@ Field& build_edges_partition( Mesh& mesh ) {
         bool edge_partition_is_same_as_one_of_nodes = ( p == pn1 || p == pn2 );
         if ( edge_is_partition_boundary ) {
             if ( not edge_partition_is_same_as_one_of_nodes ) {
-                // If this is a ghost edge, we could trust it.
-                if( edge_part(jedge) == mypart ) {
-                    if ( elem1 != edge_to_elem.missing_value() ) {
-                        Log::error() << "[" << mypart << "] " << EDGE( jedge ) << " [p" << p << "] is not correct elem1[p" << elem_part( elem1 )
-                                     << "]" << std::endl;
-                    }
-                    else {
-                        Log::error() << "[" << mypart << "] " << EDGE( jedge ) << " [p" << p << "] is not correct elem2[p" << elem_part( elem2 )
-                                     << "]" << std::endl;
-                    }
-                    insane = 1;
+                if ( elem1 != edge_to_elem.missing_value() ) {
+                    Log::error() << "[" << mypart << "] " << EDGE( jedge ) << " [p" << p << "] is not correct elem1[p" << elem_part( elem1 )
+                                 << "]" << std::endl;
+                }
+                else {
+                    Log::error() << "[" << mypart << "] " << EDGE( jedge ) << " [p" << p << "] is not correct elem2[p" << elem_part( elem2 )
+                                 << "]" << std::endl;
                 }
+                insane = 1;
             }
         }
         else {
@@ -541,12 +542,6 @@ Field& build_edges_partition( Mesh& mesh ) {
     //      DEBUG_VAR( "           the part is " << edge_part(jedge) );
     //#endif
     //  }
-    //  /// TODO: Make sure that the edge-partition is at least one of the
-    //  partition numbers of the
-    //  /// neighbouring elements.
-    //  /// Because of this problem, the size of the halo should be set to 2
-    //  instead of 1!!!
-    //  /// This will be addressed with JIRA issue  ATLAS-12
 
     return edges.partition();
 }
@@ -670,13 +665,13 @@ Field& build_edges_remote_idx( Mesh& mesh ) {
             else {
                 std::stringstream msg;
 #ifdef DEBUGGING_PARFIELDS
-                msg << "Edge(" << recv_edge( jedge, 2 ) << "[p" << recv_edge( jedge, 4 ) << "] "
-                    << recv_edge( jedge, 3 ) << "[p" << recv_edge( jedge, 5 ) << "])";
+                msg << "Edge(" << recv_edge[ jedge * varsize + 2 ] << "[p" << recv_edge[ jedge * varsize + 4 ] << "] "
+                    << recv_edge[ jedge *varsize + 3 ] << "[p" << recv_edge[ jedge * varsize + 5 ] << "])";
 #else
                 msg << "Edge with uid " << recv_uid;
 #endif
                 msg << " requested by rank [" << jpart << "]";
-                msg << " that should be owned is not found. This could be because no "
+                msg << " that should be owned by " << mpi::comm().rank() << " is not found. This could be because no "
                        "halo was built.";
                 // throw eckit::SeriousBug(msg.str(),Here());
                 Log::warning() << msg.str() << " @ " << Here() << std::endl;

From fba1f3f6c6c54098b7370adfd9c28e41e24df52b Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Thu, 31 May 2018 09:56:28 +0100
Subject: [PATCH 098/123] ATLAS-160 Fix wrong global index calculations in
 periodic region

---
 src/atlas/mesh/HybridElements.cc    |  2 +
 src/atlas/mesh/HybridElements.h     |  3 ++
 src/atlas/mesh/actions/BuildHalo.cc | 64 +++++++++++++++++++++--------
 src/atlas/mesh/actions/BuildHalo.h  |  4 +-
 4 files changed, 54 insertions(+), 19 deletions(-)

diff --git a/src/atlas/mesh/HybridElements.cc b/src/atlas/mesh/HybridElements.cc
index ad898f6f0..91c345438 100644
--- a/src/atlas/mesh/HybridElements.cc
+++ b/src/atlas/mesh/HybridElements.cc
@@ -49,6 +49,7 @@ static void set_uninitialized_fields_to_zero( HybridElements& elems, size_t begi
     IndexView<int, 1> remote_index    = make_indexview<int, 1>( elems.remote_index() );
     ArrayView<int, 1> partition       = make_view<int, 1>( elems.partition() );
     ArrayView<int, 1> halo            = make_view<int, 1>( elems.halo() );
+    ArrayView<int, 1> flags           = make_view<int, 1>( elems.flags() );
     ArrayView<int, 1> patch           = make_view<int, 1>( elems.field( "patch" ) );
 
     for ( size_t j = begin; j < elems.size(); ++j ) {
@@ -68,6 +69,7 @@ HybridElements::HybridElements() : size_( 0 ), elements_size_(), elements_begin_
     add( Field( "remote_idx", make_datatype<int>(), make_shape( size() ) ) );
     add( Field( "partition", make_datatype<int>(), make_shape( size() ) ) );
     add( Field( "halo", make_datatype<int>(), make_shape( size() ) ) );
+    add( Field( "flags", make_datatype<int>(), make_shape( size() ) ) );
     add( Field( "patch", make_datatype<int>(), make_shape( size() ) ) );
     set_uninitialized_fields_to_zero( *this, 0 );
 
diff --git a/src/atlas/mesh/HybridElements.h b/src/atlas/mesh/HybridElements.h
index 59369dbea..3e5f9b5f9 100644
--- a/src/atlas/mesh/HybridElements.h
+++ b/src/atlas/mesh/HybridElements.h
@@ -119,6 +119,9 @@ class HybridElements : public eckit::Owned {
     const Field& halo() const { return field( "halo" ); }
     Field& halo() { return field( "halo" ); }
 
+    const Field& flags() const { return field( "flags" ); }
+    Field& flags() { return field( "flags" ); }
+
     // -- Modifiers
 
     /// @brief Add a new element type with given number of elements
diff --git a/src/atlas/mesh/actions/BuildHalo.cc b/src/atlas/mesh/actions/BuildHalo.cc
index 776901bad..acb5fdf6d 100644
--- a/src/atlas/mesh/actions/BuildHalo.cc
+++ b/src/atlas/mesh/actions/BuildHalo.cc
@@ -194,14 +194,25 @@ void make_cells_global_index_human_readable( const mesh::actions::BuildHalo& bui
 
     if ( do_all ) {
         cells_to_edit.resize( cells_glb_idx.size() );
-        for ( size_t i = 0; i < cells_glb_idx.size(); ++i )
+        for ( size_t i = 0; i < cells_glb_idx.size(); ++i ) {
             cells_to_edit[i] = i;
+        }
     }
     else {
+        size_t nb_cells_to_edit(0);
+        for( const auto& new_cells : build_halo.periodic_cells_local_index_ ) {
+            nb_cells_to_edit += new_cells.size();
+        }
+        cells_to_edit.resize( nb_cells_to_edit );
+        int c{ 0 };
+        int i{ 0 };
+        for ( int t = 0; t < cells.nb_types(); ++t ) {
+            for ( idx_t p : build_halo.periodic_cells_local_index_[t] ) {
+               cells_to_edit[i++] = c + p;
+            }
+            c += cells.elements( t ).size();
+        }
         glb_idx_max = cells.global_index().metadata().getLong( "max", 0 );
-        cells_to_edit.resize( build_halo.periodic_cells_local_index_.size() );
-        for ( size_t i = 0; i < cells_to_edit.size(); ++i )
-            cells_to_edit[i] = build_halo.periodic_cells_local_index_[i];
     }
 
     std::vector<gidx_t> glb_idx( cells_to_edit.size() );
@@ -497,6 +508,8 @@ class BuildHaloHelper {
 
         std::vector<std::vector<int>> elem_part;
 
+        std::vector<std::vector<int>> elem_flags;
+
         std::vector<std::vector<int>> elem_type;
 
         Buffers( Mesh& mesh ) {
@@ -511,6 +524,7 @@ class BuildHaloHelper {
             elem_nodes_id.resize( mpi_size );
             elem_nodes_displs.resize( mpi_size );
             elem_part.resize( mpi_size );
+            elem_flags.resize( mpi_size );
             elem_type.resize( mpi_size );
         }
 
@@ -556,6 +570,7 @@ class BuildHaloHelper {
             comm.allToAll( send.elem_nodes_id, recv.elem_nodes_id );
             comm.allToAll( send.elem_part, recv.elem_part );
             comm.allToAll( send.elem_type, recv.elem_type );
+            comm.allToAll( send.elem_flags, recv.elem_flags );
             comm.allToAll( send.elem_nodes_displs, recv.elem_nodes_displs );
         }
     }
@@ -577,6 +592,7 @@ class BuildHaloHelper {
     array::ArrayView<int, 1> ghost;
     mesh::HybridElements::Connectivity* elem_nodes;
     array::ArrayView<int, 1> elem_part;
+    array::ArrayView<int, 1> elem_flags;
     array::ArrayView<gidx_t, 1> elem_glb_idx;
 
     std::vector<int> bdry_nodes;
@@ -598,6 +614,7 @@ class BuildHaloHelper {
         ghost( array::make_view<int, 1>( mesh.nodes().ghost() ) ),
         elem_nodes( &mesh.cells().node_connectivity() ),
         elem_part( array::make_view<int, 1>( mesh.cells().partition() ) ),
+        elem_flags( array::make_view<int, 1>( mesh.cells().flags() ) ),
         elem_glb_idx( array::make_view<gidx_t, 1>( mesh.cells().global_index() ) ),
         compute_uid( mesh ) {
         halo = 0;
@@ -618,6 +635,7 @@ class BuildHaloHelper {
 
         elem_nodes   = &mesh.cells().node_connectivity();
         elem_part    = array::make_view<int, 1>( mesh.cells().partition() );
+        elem_flags   = array::make_view<int, 1>( mesh.cells().flags() );
         elem_glb_idx = array::make_view<gidx_t, 1>( mesh.cells().global_index() );
     }
 
@@ -665,6 +683,7 @@ class BuildHaloHelper {
 
         buf.elem_glb_idx[p].resize( nb_elems );
         buf.elem_part[p].resize( nb_elems );
+        buf.elem_flags[p].resize( nb_elems, Topology::NONE );
         buf.elem_type[p].resize( nb_elems );
         buf.elem_nodes_id[p].resize( nb_elem_nodes );
         buf.elem_nodes_displs[p].resize( nb_elems );
@@ -675,6 +694,7 @@ class BuildHaloHelper {
 
             buf.elem_glb_idx[p][jelem] = elem_glb_idx( ielem );
             buf.elem_part[p][jelem]    = elem_part( ielem );
+            Topology::set( buf.elem_flags[p][jelem], elem_flags( ielem ) );
             buf.elem_type[p][jelem]    = mesh.cells().type_idx( ielem );
             for ( size_t jnode = 0; jnode < elem_nodes->cols( ielem ); ++jnode )
                 buf.elem_nodes_id[p][jelemnode++] = compute_uid( ( *elem_nodes )( ielem, jnode ) );
@@ -728,6 +748,7 @@ class BuildHaloHelper {
 
         buf.elem_glb_idx[p].resize( nb_elems );
         buf.elem_part[p].resize( nb_elems );
+        buf.elem_flags[p].resize( nb_elems, Topology::NONE );
         buf.elem_type[p].resize( nb_elems );
         buf.elem_nodes_id[p].resize( nb_elem_nodes );
         buf.elem_nodes_displs[p].resize( nb_elems );
@@ -736,6 +757,7 @@ class BuildHaloHelper {
             buf.elem_nodes_displs[p][jelem] = jelemnode;
             size_t ielem                    = elems[jelem];
             buf.elem_part[p][jelem]         = elem_part( ielem );
+            Topology::set( buf.elem_flags[p][jelem], elem_flags( ielem ) | newflags );
             buf.elem_type[p][jelem]         = mesh.cells().type_idx( ielem );
             std::vector<double> crds( elem_nodes->cols( ielem ) * 2 );
             for ( size_t jnode = 0; jnode < elem_nodes->cols( ielem ); ++jnode ) {
@@ -751,7 +773,7 @@ class BuildHaloHelper {
         }
     }
 
-    void add_nodes( Buffers& buf, bool periodic ) {
+    void add_nodes( Buffers& buf ) {
         ATLAS_TRACE();
 
         const size_t mpi_size = mpi::comm().size();
@@ -827,7 +849,10 @@ class BuildHaloHelper {
                 lonlat( loc_idx, XX ) = pll.lon();
                 lonlat( loc_idx, YY ) = pll.lat();
 
-                if ( periodic ) status.new_periodic_ghost_points.push_back( loc_idx );
+                if ( Topology::check( flags( loc_idx ), Topology::PERIODIC ) and not
+                     Topology::check( flags( loc_idx ), Topology::BC ) ) {
+                    status.new_periodic_ghost_points.push_back( loc_idx );
+                }
 
                 // make sure new node was not already there
                 {
@@ -849,7 +874,7 @@ class BuildHaloHelper {
         }
     }
 
-    void add_elements( Buffers& buf, bool periodic ) {
+    void add_elements( Buffers& buf ) {
         ATLAS_TRACE();
 
         const size_t mpi_size = mpi::comm().size();
@@ -924,6 +949,7 @@ class BuildHaloHelper {
             auto elem_type_glb_idx = elements.view<gidx_t, 1>( mesh.cells().global_index() );
             auto elem_type_part    = elements.view<int, 1>( mesh.cells().partition() );
             auto elem_type_halo    = elements.view<int, 1>( mesh.cells().halo() );
+            auto elem_type_flags   = elements.view<int, 1>( mesh.cells().flags() );
             auto elem_type_patch   = elements.view<int, 1>( mesh.cells().field( "patch" ) );
 
             // Copy information in new elements
@@ -936,21 +962,24 @@ class BuildHaloHelper {
                     elem_type_part( loc_idx )    = buf.elem_part[jpart][jelem];
                     elem_type_halo( loc_idx )    = halo + 1;
                     elem_type_patch( loc_idx )   = 0;
-                    for ( size_t n = 0; n < node_connectivity.cols(); ++n )
+                    elem_type_flags( loc_idx )   = buf.elem_flags[jpart][jelem];
+                    for ( size_t n = 0; n < node_connectivity.cols(); ++n ) {
                         node_connectivity.set(
                             loc_idx, n, uid2node[buf.elem_nodes_id[jpart][buf.elem_nodes_displs[jpart][jelem] + n]] );
+                    }
 
-                    if ( periodic ) { status.new_periodic_ghost_cells[t].push_back( old_size + new_elem ); }
-
+                    if( Topology::check( elem_type_flags( loc_idx ), Topology::PERIODIC ) ) {
+                        status.new_periodic_ghost_cells[t].push_back( old_size + new_elem );
+                    }
                     ++new_elem;
                 }
             }
         }
     }
 
-    void add_buffers( Buffers& buf, bool periodic = false ) {
-        add_nodes( buf, periodic );
-        add_elements( buf, periodic );
+    void add_buffers( Buffers& buf ) {
+        add_nodes( buf );
+        add_elements( buf );
         update();
     }
 };
@@ -1196,9 +1225,11 @@ void increase_halo_periodic( BuildHaloHelper& helper, const PeriodicPoints& peri
 #ifdef DEBUG_OUTPUT
     Log::debug() << "recv: \n" << recvmesh << std::endl;
 #endif
-    helper.add_buffers( recvmesh, /* periodic = */ true );
+    helper.add_buffers( recvmesh );
 }
 
+BuildHalo::BuildHalo(Mesh& mesh) : mesh_( mesh ), periodic_cells_local_index_( mesh.cells().nb_types() ) {}
+
 void BuildHalo::operator()( int nb_elems ) {
     ATLAS_TRACE( "BuildHalo" );
 
@@ -1240,12 +1271,10 @@ void BuildHalo::operator()( int nb_elems ) {
         for ( idx_t p : helper.status.new_periodic_ghost_points ) {
             periodic_points_local_index_.push_back( p );
         }
-        int c( 0 );
         for ( int t = 0; t < mesh_.cells().nb_types(); ++t ) {
             for ( idx_t p : helper.status.new_periodic_ghost_cells[t] ) {
-                periodic_cells_local_index_.push_back( c + p );
+               periodic_cells_local_index_[t].push_back( p );
             }
-            c += mesh_.cells().elements( t ).size();
         }
 
         std::stringstream ss;
@@ -1268,6 +1297,7 @@ void BuildHalo::operator()( int nb_elems ) {
 
     make_nodes_global_index_human_readable( *this, mesh_.nodes(),
                                             /*do_all*/ false );
+
     make_cells_global_index_human_readable( *this, mesh_.cells(),
                                             /*do_all*/ false );
     //  renumber_nodes_glb_idx (mesh_.nodes());
diff --git a/src/atlas/mesh/actions/BuildHalo.h b/src/atlas/mesh/actions/BuildHalo.h
index 99839b60f..542af321f 100644
--- a/src/atlas/mesh/actions/BuildHalo.h
+++ b/src/atlas/mesh/actions/BuildHalo.h
@@ -23,12 +23,12 @@ namespace actions {
 
 class BuildHalo {
 public:
-    BuildHalo( Mesh& mesh ) : mesh_( mesh ) {}
+    BuildHalo( Mesh& mesh );
     void operator()( int nb_elems );
 
 public:
     std::vector<idx_t> periodic_points_local_index_;
-    std::vector<idx_t> periodic_cells_local_index_;
+    std::vector<std::vector<idx_t>> periodic_cells_local_index_;
 
 private:
     Mesh& mesh_;

From 206f676e55e88401b7c6f7876714a5b031f07790 Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Thu, 31 May 2018 10:50:31 +0100
Subject: [PATCH 099/123] ATLAS-160 Remove cells.field("patch") in favour of
 cells.field("flags")

---
 src/atlas/mesh/Elements.h                          |  3 +++
 src/atlas/mesh/HybridElements.cc                   |  4 +---
 src/atlas/mesh/Nodes.h                             |  3 ++-
 src/atlas/mesh/PartitionPolygon.cc                 | 11 ++++++++---
 src/atlas/mesh/actions/BuildDualMesh.cc            |  7 ++++++-
 src/atlas/mesh/actions/BuildEdges.cc               | 11 +++++++----
 src/atlas/mesh/actions/BuildHalo.cc                |  8 +++++---
 src/atlas/mesh/detail/AccumulateFacets.cc          |  7 ++++++-
 src/atlas/meshgenerator/StructuredMeshGenerator.cc | 11 ++++-------
 9 files changed, 42 insertions(+), 23 deletions(-)

diff --git a/src/atlas/mesh/Elements.h b/src/atlas/mesh/Elements.h
index 12024510f..b765f00ea 100644
--- a/src/atlas/mesh/Elements.h
+++ b/src/atlas/mesh/Elements.h
@@ -116,6 +116,9 @@ class Elements : public eckit::Owned {
     const Field& halo() const { return hybrid_elements_->halo(); }
     Field& halo() { return hybrid_elements_->halo(); }
 
+    const Field& flags() const { return hybrid_elements_->flags(); }
+    Field& flags() { return hybrid_elements_->flags(); }
+
     template <typename DATATYPE, int RANK>
     array::LocalView<DATATYPE, RANK, array::Intent::ReadOnly> view( const Field& ) const;
 
diff --git a/src/atlas/mesh/HybridElements.cc b/src/atlas/mesh/HybridElements.cc
index 91c345438..d663321d2 100644
--- a/src/atlas/mesh/HybridElements.cc
+++ b/src/atlas/mesh/HybridElements.cc
@@ -50,14 +50,13 @@ static void set_uninitialized_fields_to_zero( HybridElements& elems, size_t begi
     ArrayView<int, 1> partition       = make_view<int, 1>( elems.partition() );
     ArrayView<int, 1> halo            = make_view<int, 1>( elems.halo() );
     ArrayView<int, 1> flags           = make_view<int, 1>( elems.flags() );
-    ArrayView<int, 1> patch           = make_view<int, 1>( elems.field( "patch" ) );
 
     for ( size_t j = begin; j < elems.size(); ++j ) {
         global_index( j ) = 0;
         remote_index( j ) = 0;
         partition( j )    = 0;
         halo( j )         = 0;
-        patch( j )        = 0;
+        flags( j )        = 0;
     }
 }
 }  // namespace
@@ -70,7 +69,6 @@ HybridElements::HybridElements() : size_( 0 ), elements_size_(), elements_begin_
     add( Field( "partition", make_datatype<int>(), make_shape( size() ) ) );
     add( Field( "halo", make_datatype<int>(), make_shape( size() ) ) );
     add( Field( "flags", make_datatype<int>(), make_shape( size() ) ) );
-    add( Field( "patch", make_datatype<int>(), make_shape( size() ) ) );
     set_uninitialized_fields_to_zero( *this, 0 );
 
     node_connectivity_ = &add( new Connectivity( "node" ) );
diff --git a/src/atlas/mesh/Nodes.h b/src/atlas/mesh/Nodes.h
index 4c4713fb9..46e0d4abb 100644
--- a/src/atlas/mesh/Nodes.h
+++ b/src/atlas/mesh/Nodes.h
@@ -47,7 +47,8 @@ class Nodes : public eckit::Owned {
             WEST     = ( 1 << 4 ),
             EAST     = ( 1 << 5 ),
             NORTH    = ( 1 << 6 ),
-            SOUTH    = ( 1 << 7 )
+            SOUTH    = ( 1 << 7 ),
+            PATCH    = ( 1 << 8 )
         };
     };
 
diff --git a/src/atlas/mesh/PartitionPolygon.cc b/src/atlas/mesh/PartitionPolygon.cc
index aadf2e8b3..c18c3ebe7 100644
--- a/src/atlas/mesh/PartitionPolygon.cc
+++ b/src/atlas/mesh/PartitionPolygon.cc
@@ -27,13 +27,18 @@ util::Polygon::edge_set_t compute_edges( const detail::MeshImpl& mesh, size_t ha
         const Elements& elements = mesh.cells().elements( t );
 
         const BlockConnectivity& conn = elements.node_connectivity();
-        auto field_patch              = elements.view<int, 1>( elements.field( "patch" ) );
-        auto field_halo               = elements.view<int, 1>( elements.field( "halo" ) );
+        auto field_flags              = elements.view<int, 1>( elements.flags() );
+        auto field_halo               = elements.view<int, 1>( elements.halo() );
+
+        auto patch = [&field_flags]( size_t e ) {
+            using Topology = atlas::mesh::Nodes::Topology;
+            return Topology::check( field_flags( e ), Topology::PATCH );
+        };
 
         const size_t nb_nodes = elements.nb_nodes();
 
         for ( size_t j = 0; j < elements.size(); ++j ) {
-            if ( field_patch( j ) == 0 && field_halo( j ) <= halo ) {
+            if ( patch( j ) == 0 && field_halo( j ) <= halo ) {
                 for ( size_t k = 0; k < nb_nodes; ++k ) {
                     util::Polygon::edge_t edge( conn( j, k ), conn( j, ( k + 1 ) % nb_nodes ) );
                     if ( !edges.erase( edge.reverse() ) ) { edges.insert( edge ); }
diff --git a/src/atlas/mesh/actions/BuildDualMesh.cc b/src/atlas/mesh/actions/BuildDualMesh.cc
index 52ed5edb3..2fe9fb693 100644
--- a/src/atlas/mesh/actions/BuildDualMesh.cc
+++ b/src/atlas/mesh/actions/BuildDualMesh.cc
@@ -167,7 +167,12 @@ void add_median_dual_volume_contribution_cells( const mesh::HybridElements& cell
     const array::ArrayView<double, 2> edge_centroids = array::make_view<double, 2>( edges.field( "centroids_xy" ) );
     const mesh::HybridElements::Connectivity& cell_edge_connectivity = cells.edge_connectivity();
     const mesh::HybridElements::Connectivity& edge_node_connectivity = edges.node_connectivity();
-    auto patch = array::make_view<int, 1>( cells.field( "patch" ) );
+    auto field_flags = array::make_view<int, 1>( cells.flags() );
+
+    auto patch = [&field_flags]( size_t e ) {
+        using Topology = atlas::mesh::Nodes::Topology;
+        return Topology::check( field_flags( e ), Topology::PATCH );
+    };
 
     // special ordering for bit-identical results
     size_t nb_cells = cells.size();
diff --git a/src/atlas/mesh/actions/BuildEdges.cc b/src/atlas/mesh/actions/BuildEdges.cc
index e4ea06ddc..d06296268 100644
--- a/src/atlas/mesh/actions/BuildEdges.cc
+++ b/src/atlas/mesh/actions/BuildEdges.cc
@@ -113,13 +113,16 @@ void build_element_to_edge_connectivity( Mesh& mesh ) {
         }
     }
 
+
     // Verify that all edges have been found
+    auto field_flags = array::make_view<int, 1>( mesh.cells().flags() );
+    auto patch = [&field_flags]( size_t e ) {
+        using Topology = atlas::mesh::Nodes::Topology;
+        return Topology::check( field_flags( e ), Topology::PATCH );
+    };
+
     for ( size_t jcell = 0; jcell < mesh.cells().size(); ++jcell ) {
-        // If this is a patched element (over the pole), there were no edges
-        // created, so skip the check.
-        auto patch = array::make_view<int, 1>( mesh.cells().field( "patch" ) );
         if ( patch( jcell ) ) continue;
-
         for ( size_t jcol = 0; jcol < cell_edge_connectivity.cols( jcell ); ++jcol ) {
             if ( cell_edge_connectivity( jcell, jcol ) == cell_edge_connectivity.missing_value() ) {
                 const array::ArrayView<gidx_t, 1> gidx = array::make_view<gidx_t, 1>( mesh.nodes().global_index() );
diff --git a/src/atlas/mesh/actions/BuildHalo.cc b/src/atlas/mesh/actions/BuildHalo.cc
index acb5fdf6d..3fb385ceb 100644
--- a/src/atlas/mesh/actions/BuildHalo.cc
+++ b/src/atlas/mesh/actions/BuildHalo.cc
@@ -307,7 +307,11 @@ void build_lookup_node2elem( const Mesh& mesh, Node2Elem& node2elem ) {
     }
 
     const mesh::HybridElements::Connectivity& elem_nodes = mesh.cells().node_connectivity();
-    auto patched                                         = array::make_view<int, 1>( mesh.cells().field( "patch" ) );
+    auto field_flags                                     = array::make_view<int, 1>( mesh.cells().flags() );
+    auto patched = [&field_flags]( size_t e ) {
+        using Topology = atlas::mesh::Nodes::Topology;
+        return Topology::check( field_flags( e ), Topology::PATCH );
+    };
 
     size_t nb_elems = mesh.cells().size();
     for ( size_t elem = 0; elem < nb_elems; ++elem ) {
@@ -950,7 +954,6 @@ class BuildHaloHelper {
             auto elem_type_part    = elements.view<int, 1>( mesh.cells().partition() );
             auto elem_type_halo    = elements.view<int, 1>( mesh.cells().halo() );
             auto elem_type_flags   = elements.view<int, 1>( mesh.cells().flags() );
-            auto elem_type_patch   = elements.view<int, 1>( mesh.cells().field( "patch" ) );
 
             // Copy information in new elements
             size_t new_elem( 0 );
@@ -961,7 +964,6 @@ class BuildHaloHelper {
                     elem_type_glb_idx( loc_idx ) = std::abs( buf.elem_glb_idx[jpart][jelem] );
                     elem_type_part( loc_idx )    = buf.elem_part[jpart][jelem];
                     elem_type_halo( loc_idx )    = halo + 1;
-                    elem_type_patch( loc_idx )   = 0;
                     elem_type_flags( loc_idx )   = buf.elem_flags[jpart][jelem];
                     for ( size_t n = 0; n < node_connectivity.cols(); ++n ) {
                         node_connectivity.set(
diff --git a/src/atlas/mesh/detail/AccumulateFacets.cc b/src/atlas/mesh/detail/AccumulateFacets.cc
index 827a1e3ed..5262377ca 100644
--- a/src/atlas/mesh/detail/AccumulateFacets.cc
+++ b/src/atlas/mesh/detail/AccumulateFacets.cc
@@ -37,7 +37,12 @@ void accumulate_facets( const mesh::HybridElements& cells, const mesh::Nodes& no
     for ( size_t t = 0; t < cells.nb_types(); ++t ) {
         const mesh::Elements& elements            = cells.elements( t );
         const mesh::BlockConnectivity& elem_nodes = elements.node_connectivity();
-        auto patch                                = elements.view<int, 1>( elements.field( "patch" ) );
+        auto elem_flags                           = elements.view<int, 1>( elements.flags() );
+
+        auto patch = [&elem_flags]( size_t e ) {
+            using Topology = atlas::mesh::Nodes::Topology;
+            return Topology::check( elem_flags( e ), Topology::PATCH );
+        };
 
         size_t nb_elems          = elements.size();
         size_t nb_nodes_in_facet = 2;
diff --git a/src/atlas/meshgenerator/StructuredMeshGenerator.cc b/src/atlas/meshgenerator/StructuredMeshGenerator.cc
index 6ee5e8936..028e90a1e 100644
--- a/src/atlas/meshgenerator/StructuredMeshGenerator.cc
+++ b/src/atlas/meshgenerator/StructuredMeshGenerator.cc
@@ -1005,12 +1005,12 @@ void StructuredMeshGenerator::generate_mesh( const grid::StructuredGrid& rg, con
     mesh::HybridElements::Connectivity& node_connectivity = mesh.cells().node_connectivity();
     array::ArrayView<gidx_t, 1> cells_glb_idx             = array::make_view<gidx_t, 1>( mesh.cells().global_index() );
     array::ArrayView<int, 1> cells_part                   = array::make_view<int, 1>( mesh.cells().partition() );
-    array::ArrayView<int, 1> cells_patch                  = array::make_view<int, 1>( mesh.cells().field( "patch" ) );
+    array::ArrayView<int, 1> cells_flags                  = array::make_view<int, 1>( mesh.cells().flags() );
 
     /*
      * label all patch cells a non-patch
      */
-    cells_patch.assign( 0 );
+    cells_flags.assign( 0 );
 
     /*
      * Fill in connectivity tables with global node indices first
@@ -1050,7 +1050,6 @@ void StructuredMeshGenerator::generate_mesh( const grid::StructuredGrid& rg, con
                 node_connectivity.set( jcell, quad_nodes );
                 cells_glb_idx( jcell ) = jcell + 1;
                 cells_part( jcell )    = mypart;
-                cells_patch( jcell )   = 0;
             }
             else  // This is a triag
             {
@@ -1088,7 +1087,6 @@ void StructuredMeshGenerator::generate_mesh( const grid::StructuredGrid& rg, con
                 node_connectivity.set( jcell, triag_nodes );
                 cells_glb_idx( jcell ) = jcell + 1;
                 cells_part( jcell )    = mypart;
-                cells_patch( jcell )   = 0;
             }
         }
     }
@@ -1107,7 +1105,6 @@ void StructuredMeshGenerator::generate_mesh( const grid::StructuredGrid& rg, con
             node_connectivity.set( jcell, triag_nodes );
             cells_glb_idx( jcell ) = jcell + 1;
             cells_part( jcell )    = mypart;
-            cells_patch( jcell )   = 0;
         }
     }
     else if ( patch_north_pole ) {
@@ -1140,7 +1137,7 @@ void StructuredMeshGenerator::generate_mesh( const grid::StructuredGrid& rg, con
 
             cells_glb_idx( jcell ) = jcell + 1;
             cells_part( jcell )    = mypart;
-            cells_patch( jcell )   = 1;  // mark cell as "patch"
+            Topology::set( cells_flags( jcell ), Topology::PATCH );
 
             if ( jbackward == jforward + 2 ) break;
 
@@ -1203,7 +1200,7 @@ void StructuredMeshGenerator::generate_mesh( const grid::StructuredGrid& rg, con
 
             cells_glb_idx( jcell ) = jcell + 1;
             cells_part( jcell )    = mypart;
-            cells_patch( jcell )   = 1;  // mark cell as "patch"
+            Topology::set( cells_flags( jcell ), Topology::PATCH );
 
             if ( jbackward == jforward + 2 ) break;
 

From af1d543a95a37b0cec353776718f4e3dd9611d13 Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Thu, 31 May 2018 10:51:08 +0100
Subject: [PATCH 100/123] Reenable writing of field

---
 src/tests/acceptance_tests/atest_mgrids.cc | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/tests/acceptance_tests/atest_mgrids.cc b/src/tests/acceptance_tests/atest_mgrids.cc
index 948a4a9e7..70287a218 100644
--- a/src/tests/acceptance_tests/atest_mgrids.cc
+++ b/src/tests/acceptance_tests/atest_mgrids.cc
@@ -55,14 +55,14 @@ Program::Program( int argc, char** argv ) : AtlasTool( argc, argv ) {
 //-----------------------------------------------------------------------------
 
 void Program::execute( const Args& args ) {
-  
+
   auto ghost = util::Config("ghost",args.getBool("ghost",false));
   auto haloA = option::halo( args.getLong("haloA",1) );
   auto haloB = option::halo( args.getLong("haloB",1) );
-  
+
   auto gridA = Grid( args.getString("gridA") );
   auto gridB = Grid( args.getString("gridB") );
-  
+
   auto meshgenerator = MeshGenerator( "structured" );
 
   auto distA = grid::Distribution( gridA, grid::Partitioner( "trans" ) );
@@ -73,18 +73,17 @@ void Program::execute( const Args& args ) {
   auto gmshA = output::Gmsh( "meshA.msh", ghost );
   gmshA.write(meshA);
 
-
   auto distB = grid::Distribution( gridB, grid::MatchingMeshPartitioner( meshA ) );
 
   auto meshB = meshgenerator.generate( gridB, distB );
 
   numerics::fvm::Method fvmB(meshB,haloB);
 
-  // Field fieldB = fvmB.node_columns().createField<double>();
+  Field fieldB = fvmB.node_columns().createField<double>();
 
   output::Gmsh gmshB( "meshB.msh", ghost );
   gmshB.write(meshB);
-  // gmshB.write(fieldB);
+  gmshB.write(fieldB);
 
   Interpolation AtoB( option::type("finite-element"), fvmA.node_columns(), fvmB.node_columns() );
   Interpolation BtoA( option::type("finite-element"), fvmB.node_columns(), fvmA.node_columns() );

From 0e910aa24b9a96191e3e6979fb044862757409cf Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Thu, 31 May 2018 09:33:45 +0100
Subject: [PATCH 101/123] Move some output to trace channel

---
 .../detail/partitioner/MatchingMeshPartitionerBruteForce.cc     | 2 +-
 .../detail/partitioner/MatchingMeshPartitionerLonLatPolygon.cc  | 2 +-
 .../partitioner/MatchingMeshPartitionerSphericalPolygon.cc      | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/atlas/grid/detail/partitioner/MatchingMeshPartitionerBruteForce.cc b/src/atlas/grid/detail/partitioner/MatchingMeshPartitionerBruteForce.cc
index efe07b5c9..594405720 100644
--- a/src/atlas/grid/detail/partitioner/MatchingMeshPartitionerBruteForce.cc
+++ b/src/atlas/grid/detail/partitioner/MatchingMeshPartitionerBruteForce.cc
@@ -100,7 +100,7 @@ void MatchingMeshPartitionerBruteForce::partition( const Grid& grid, int partiti
     }
 
     {
-        eckit::ProgressTimer timer( "Partitioning target", grid.size(), "point", double( 10 ), atlas::Log::info() );
+        eckit::ProgressTimer timer( "Partitioning target", grid.size(), "point", double( 10 ), atlas::Log::trace() );
         for ( size_t i = 0; i < grid.size(); ++i, ++timer ) {
             partitioning[i] = -1;
             const PointLonLat& P( coordinates[i] );
diff --git a/src/atlas/grid/detail/partitioner/MatchingMeshPartitionerLonLatPolygon.cc b/src/atlas/grid/detail/partitioner/MatchingMeshPartitionerLonLatPolygon.cc
index af30c791e..0fc38d583 100644
--- a/src/atlas/grid/detail/partitioner/MatchingMeshPartitionerLonLatPolygon.cc
+++ b/src/atlas/grid/detail/partitioner/MatchingMeshPartitionerLonLatPolygon.cc
@@ -47,7 +47,7 @@ void MatchingMeshPartitionerLonLatPolygon::partition( const Grid& grid, int part
     const util::LonLatPolygon poly( prePartitionedMesh_.polygon( 0 ), prePartitionedMesh_.nodes().lonlat() );
 
     {
-        eckit::ProgressTimer timer( "Partitioning", grid.size(), "point", double( 10 ), atlas::Log::info() );
+        eckit::ProgressTimer timer( "Partitioning", grid.size(), "point", double( 10 ), atlas::Log::trace() );
         size_t i = 0;
 
         for ( const PointXY Pxy : grid.xy() ) {
diff --git a/src/atlas/grid/detail/partitioner/MatchingMeshPartitionerSphericalPolygon.cc b/src/atlas/grid/detail/partitioner/MatchingMeshPartitionerSphericalPolygon.cc
index a4e42abaa..39d52178f 100644
--- a/src/atlas/grid/detail/partitioner/MatchingMeshPartitionerSphericalPolygon.cc
+++ b/src/atlas/grid/detail/partitioner/MatchingMeshPartitionerSphericalPolygon.cc
@@ -46,7 +46,7 @@ void MatchingMeshPartitionerSphericalPolygon::partition( const Grid& grid, int p
     const util::SphericalPolygon poly( prePartitionedMesh_.polygon( 0 ), prePartitionedMesh_.nodes().lonlat() );
 
     {
-        eckit::ProgressTimer timer( "Partitioning", grid.size(), "point", double( 10 ), atlas::Log::info() );
+        eckit::ProgressTimer timer( "Partitioning", grid.size(), "point", double( 10 ), atlas::Log::trace() );
         size_t i = 0;
 
         for ( const PointXY Pxy : grid.xy() ) {

From cca0973a1bdc619d2458cf1a2bc50912d9669975 Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Thu, 31 May 2018 17:52:33 +0100
Subject: [PATCH 102/123] ATLAS-161 Fix parallel FiniteElement interpolation

---
 .../interpolation/method/FiniteElement.cc     |  7 ++-
 src/atlas/mesh/actions/BuildCellCentres.cc    | 60 +++++++++++++------
 src/atlas/mesh/actions/BuildCellCentres.h     |  7 ++-
 3 files changed, 54 insertions(+), 20 deletions(-)

diff --git a/src/atlas/interpolation/method/FiniteElement.cc b/src/atlas/interpolation/method/FiniteElement.cc
index b7b255ef4..9a70c4a15 100644
--- a/src/atlas/interpolation/method/FiniteElement.cc
+++ b/src/atlas/interpolation/method/FiniteElement.cc
@@ -88,7 +88,10 @@ void FiniteElement::setup( const FunctionSpace& source ) {
     Field source_xyz = mesh::actions::BuildXYZField( "xyz" )( meshSource );
 
     // generate barycenters of each triangle & insert them on a kd-tree
-    Field cell_centres = mesh::actions::BuildCellCentres( "centre" )( meshSource );
+    util::Config config;
+    config.set("name", "centre ");
+    config.set("flatten_virtual_elements", false );
+    Field cell_centres = mesh::actions::BuildCellCentres( config )( meshSource );
 
     eckit::ScopedPtr<ElemIndex3> eTree( create_element_kdtree( cell_centres ) );
 
@@ -150,6 +153,7 @@ void FiniteElement::setup( const FunctionSpace& source ) {
                                 "---------------------\n";
                 PointLonLat pll;
                 util::Earth::convertCartesianToSpherical( p, pll );
+                if( pll.lon() < 0 ) pll.lon() += 360.;
                 Log::debug() << "Failed to project point (lon,lat)=" << pll << '\n';
                 Log::debug() << failures_log.str();
             }
@@ -166,6 +170,7 @@ void FiniteElement::setup( const FunctionSpace& source ) {
             const PointXYZ p{( *ocoords_ )( *i, 0 ), ( *ocoords_ )( *i, 1 ), ( *ocoords_ )( *i, 2 )};  // lookup point
             PointLonLat pll;
             util::Earth::convertCartesianToSpherical( p, pll );
+            if( pll.lon() < 0 ) pll.lon() += 360.;
             msg << "\t(lon,lat) = " << pll << "\n";
         }
 
diff --git a/src/atlas/mesh/actions/BuildCellCentres.cc b/src/atlas/mesh/actions/BuildCellCentres.cc
index 71ff4dd80..2c2e1f773 100644
--- a/src/atlas/mesh/actions/BuildCellCentres.cc
+++ b/src/atlas/mesh/actions/BuildCellCentres.cc
@@ -26,10 +26,25 @@ namespace actions {
 
 //----------------------------------------------------------------------------------------------------------------------
 
-BuildCellCentres::BuildCellCentres( const std::string& field_name ) : field_name_( field_name ) {}
+BuildCellCentres::BuildCellCentres( const std::string& field_name, bool force_recompute ) :
+    field_name_( field_name ),
+    force_recompute_( force_recompute ),
+    flatten_virtual_elements_( true ) {
+}
+
+BuildCellCentres::BuildCellCentres( eckit::Configuration& config ) :
+    field_name_( config.getString( "name", "centre" ) ),
+    force_recompute_( config.getBool( "force_recompute", false ) ),
+    flatten_virtual_elements_( config.getBool( "flatten_virtual_elements", true) ) {
+}
 
 Field& BuildCellCentres::operator()( Mesh& mesh ) const {
+    bool recompute = force_recompute_;
     if ( !mesh.cells().has_field( field_name_ ) ) {
+        mesh.cells().add( Field( field_name_, array::make_datatype<double>(), array::make_shape( mesh.cells().size(), 3 ) ) );
+        recompute = true;
+    }
+    if ( recompute ) {
         mesh::Nodes& nodes                 = mesh.nodes();
         array::ArrayView<double, 2> coords = array::make_view<double, 2>( nodes.field( "xyz" ) );
 
@@ -37,8 +52,7 @@ Field& BuildCellCentres::operator()( Mesh& mesh ) const {
         if ( nodes.metadata().has( "NbRealPts" ) ) { firstVirtualPoint = nodes.metadata().get<size_t>( "NbRealPts" ); }
 
         size_t nb_cells                       = mesh.cells().size();
-        array::ArrayView<double, 2> centroids = array::make_view<double, 2>( mesh.cells().add(
-            Field( field_name_, array::make_datatype<double>(), array::make_shape( nb_cells, 3 ) ) ) );
+        auto centroids = array::make_view<double, 2>( mesh.cells().field( field_name_ ) );
         const mesh::HybridElements::Connectivity& cell_node_connectivity = mesh.cells().node_connectivity();
 
         for ( size_t e = 0; e < nb_cells; ++e ) {
@@ -68,23 +82,33 @@ Field& BuildCellCentres::operator()( Mesh& mesh ) const {
             int nb_unique_nodes = int( nb_cell_nodes ) - nb_equal_nodes;
             if ( nb_unique_nodes < 3 ) { continue; }
 
-            // calculate centroid by averaging coordinates (uses only "real" nodes)
-            size_t nb_real_nodes = 0;
-            for ( size_t n = 0; n < nb_cell_nodes; ++n ) {
-                const size_t i = size_t( cell_node_connectivity( e, n ) );
-                if ( i < firstVirtualPoint ) {
-                    ++nb_real_nodes;
-                    centroids( e, XX ) += coords( i, XX );
-                    centroids( e, YY ) += coords( i, YY );
-                    centroids( e, ZZ ) += coords( i, ZZ );
+            if( flatten_virtual_elements_ ) {
+                // calculate centroid by averaging coordinates (uses only "real" nodes)
+                size_t nb_real_nodes = 0;
+                for ( size_t n = 0; n < nb_cell_nodes; ++n ) {
+                    const size_t i = size_t( cell_node_connectivity( e, n ) );
+                    if ( i < firstVirtualPoint ) {
+                        ++nb_real_nodes;
+                        centroids( e, XX ) += coords( i, XX );
+                        centroids( e, YY ) += coords( i, YY );
+                        centroids( e, ZZ ) += coords( i, ZZ );
+                    }
                 }
-            }
 
-            if ( nb_real_nodes > 1 ) {
-                const double average_coefficient = 1. / static_cast<double>( nb_real_nodes );
-                centroids( e, XX ) *= average_coefficient;
-                centroids( e, YY ) *= average_coefficient;
-                centroids( e, ZZ ) *= average_coefficient;
+                if ( nb_real_nodes > 1 ) {
+                    const double average_coefficient = 1. / static_cast<double>( nb_real_nodes );
+                    centroids( e, XX ) *= average_coefficient;
+                    centroids( e, YY ) *= average_coefficient;
+                    centroids( e, ZZ ) *= average_coefficient;
+                }
+            } else {
+                const double average_coefficient = 1./ static_cast<double>( nb_cell_nodes );
+                for ( size_t n = 0; n < nb_cell_nodes; ++n ) {
+                    const size_t i = size_t( cell_node_connectivity( e, n ) );
+                    for ( size_t d=0; d<3; ++d ) {
+                        centroids( e, d ) += coords( i, d ) * average_coefficient;
+                    }
+                }
             }
         }
     }
diff --git a/src/atlas/mesh/actions/BuildCellCentres.h b/src/atlas/mesh/actions/BuildCellCentres.h
index 0df60152b..6f4182cd3 100644
--- a/src/atlas/mesh/actions/BuildCellCentres.h
+++ b/src/atlas/mesh/actions/BuildCellCentres.h
@@ -12,6 +12,8 @@
 
 #include <string>
 
+#include "atlas/util/Config.h"
+
 namespace atlas {
 
 class Mesh;
@@ -23,13 +25,16 @@ namespace actions {
 /// Generates the cell centres on each cell
 class BuildCellCentres {
 public:
-    BuildCellCentres( const std::string& field_name = "centre" );
+    BuildCellCentres( const std::string& field_name = "centre", bool force_recompute = false );
+    BuildCellCentres( eckit::Configuration& );
 
     /// @note Correct only for Linear Triangles and Quadrilaterals
     Field& operator()( Mesh& ) const;
 
 private:
     std::string field_name_;
+    bool force_recompute_;
+    bool flatten_virtual_elements_;
 };
 
 }  // namespace actions

From 90648c8a8bdd6dfe0cd8b43f6c12d8406c189a5f Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Thu, 31 May 2018 17:01:13 +0000
Subject: [PATCH 103/123] ATLAS-161 Add more options to atlas_atest_mgrids

---
 src/tests/acceptance_tests/atest_mgrids.cc | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/tests/acceptance_tests/atest_mgrids.cc b/src/tests/acceptance_tests/atest_mgrids.cc
index 70287a218..c71df090f 100644
--- a/src/tests/acceptance_tests/atest_mgrids.cc
+++ b/src/tests/acceptance_tests/atest_mgrids.cc
@@ -50,6 +50,8 @@ Program::Program( int argc, char** argv ) : AtlasTool( argc, argv ) {
     add_option( new SimpleOption<bool>( "ghost", "Output ghost elements" ) );
     add_option( new SimpleOption<long>( "haloA", "Halo size" ) );
     add_option( new SimpleOption<long>( "haloB", "Halo size" ) );
+    add_option( new SimpleOption<bool>( "no-forward",  "no forward interpolation" ) );
+    add_option( new SimpleOption<bool>( "no-backward", "no backward interpolation" ) );
 }
 
 //-----------------------------------------------------------------------------
@@ -85,9 +87,12 @@ void Program::execute( const Args& args ) {
   gmshB.write(meshB);
   gmshB.write(fieldB);
 
-  Interpolation AtoB( option::type("finite-element"), fvmA.node_columns(), fvmB.node_columns() );
-  Interpolation BtoA( option::type("finite-element"), fvmB.node_columns(), fvmA.node_columns() );
-
+  if( not args.getBool("no-forward",false) ) {
+    Interpolation AtoB( option::type("finite-element"), fvmA.node_columns(), fvmB.node_columns() );
+  }
+  if( not args.getBool("no-backward",false) ) {  
+    Interpolation BtoA( option::type("finite-element"), fvmB.node_columns(), fvmA.node_columns() );
+  }
 }
 
 //------------------------------------------------------------------------------

From fc3b8c43970da00fe9743c57a5815c9f5ccbdce4 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Wed, 30 May 2018 18:56:47 +0100
Subject: [PATCH 104/123] some cleanup

---
 src/atlas/trans/local/TransLocal.cc      | 47 ++++++++++++------------
 src/atlas/trans/local/VorDivToUVLocal.cc | 32 ++++++++--------
 src/tests/trans/test_transgeneral.cc     | 13 ++++---
 3 files changed, 47 insertions(+), 45 deletions(-)

diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc
index bbb417962..d8d945dc7 100644
--- a/src/atlas/trans/local/TransLocal.cc
+++ b/src/atlas/trans/local/TransLocal.cc
@@ -529,7 +529,7 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma
                 //                    write.close();
                 //                }
             }
-                // other FFT implementations should be added with #elif statements
+            // other FFT implementations should be added with #elif statements
 #else
             useFFT_               = false;  // no FFT implemented => default to dgemm
             std::string file_path = TransParameters( config ).write_fft();
@@ -894,26 +894,25 @@ void TransLocal::invtrans_fourier_regular( const int nlats, const int nlons, con
             eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * nlats );
             eckit::linalg::Matrix C( gp_fields, nlons, nb_fields * nlats );
 
-// BUG ATLAS-159: valgrind warns here, saying that B(1,:) is uninitialised
-//                if workaround above labeled ATLAS-159 is not applied.
-//
-//                        for( int i=0; i<A.rows(); ++i ) {
-//                          for ( int j=0; j<A.cols(); ++j ) {
-//                            if( A(i,j) == 999.999 ) {
-//                              ASSERT(false);
-//                            }
-//                          }
-//                        }
-//                        for ( int i=0; i<B.rows(); ++i ) {
-//                          for( int j=0; j<B.cols(); ++j ) {
-//                            if( B(i,j) == 999.999 ) {
-//                              ASSERT(false);
-//                            }
-//                          }
-//                        }
+            // BUG ATLAS-159: valgrind warns here, saying that B(1,:) is uninitialised
+            //                if workaround above labeled ATLAS-159 is not applied.
+            //
+            //                        for( int i=0; i<A.rows(); ++i ) {
+            //                          for ( int j=0; j<A.cols(); ++j ) {
+            //                            if( A(i,j) == 999.999 ) {
+            //                              ASSERT(false);
+            //                            }
+            //                          }
+            //                        }
+            //                        for ( int i=0; i<B.rows(); ++i ) {
+            //                          for( int j=0; j<B.cols(); ++j ) {
+            //                            if( B(i,j) == 999.999 ) {
+            //                              ASSERT(false);
+            //                            }
+            //                          }
+            //                        }
 
             linalg_.gemm( A, B, C );
-
         }
 #else
         // dgemm-method 2
@@ -1228,11 +1227,11 @@ void TransLocal::invtrans_uv( const int truncation, const int nb_scalar_fields,
             double* scl_fourier;
             alloc_aligned( scl_fourier, size_fourier_max * ( truncation_ + 1 ) );
 
-// ATLAS-159 workaround begin
-            for( int i=0; i<size_fourier_max*(truncation_+1); ++i ) {
-              scl_fourier[i] = 0.;
+            // ATLAS-159 workaround begin
+            for ( int i = 0; i < size_fourier_max * ( truncation_ + 1 ); ++i ) {
+                scl_fourier[i] = 0.;
             }
-// ATLAS-159 workaround end
+            // ATLAS-159 workaround end
 
             // Legendre transformation:
             invtrans_legendre( truncation, nlats, nb_scalar_fields, scalar_spectra, scl_fourier, config );
@@ -1330,7 +1329,7 @@ void TransLocal::invtrans( const int nb_scalar_fields, const double scalar_spect
         {
             ATLAS_TRACE( "vordiv to UV" );
             // call vd2uv to compute u and v in spectral space
-            trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "Local" ) );
+            trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "local" ) );
             vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(),
                                       divergence_spectra_extended.data(), U_ext.data(), V_ext.data() );
         }
diff --git a/src/atlas/trans/local/VorDivToUVLocal.cc b/src/atlas/trans/local/VorDivToUVLocal.cc
index 647fe6382..dd171b718 100644
--- a/src/atlas/trans/local/VorDivToUVLocal.cc
+++ b/src/atlas/trans/local/VorDivToUVLocal.cc
@@ -27,11 +27,11 @@ static VorDivToUVBuilder<VorDivToUVLocal> builder( "local" );
 // --------------------------------------------------------------------------------------------------------------------
 // Routine to copy spectral data into internal storage form of IFS trans
 // Ported to C++ by: Andreas Mueller *ECMWF*
-void prfi1bopt3( const int truncation,
-                 const int km,          // zonal wavenumber
-                 const int nb_fields,   // number of fields
-                 const double rspec[],  // spectral data
-                 double pia[] )         // spectral components in data layout of trans library
+void prfi1b( const int truncation,
+             const int km,          // zonal wavenumber
+             const int nb_fields,   // number of fields
+             const double rspec[],  // spectral data
+             double pia[] )         // spectral components in data layout of trans library
 {
     int ilcm = truncation + 1 - km, ioff = ( 2 * truncation - km + 3 ) * km,
         nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2;
@@ -58,14 +58,14 @@ void prfi1bopt3( const int truncation,
 //        ECMWF Research Department documentation of the IFS
 //        Temperton, 1991, MWR 119 p1303
 // Ported to C++ by: Andreas Mueller *ECMWF*
-void vd2uvopt3( const int truncation,               // truncation
-                const int km,                       // zonal wavenumber
-                const int nb_vordiv_fields,         // number of vorticity and divergence fields
-                const double vorticity_spectra[],   // spectral data of vorticity
-                const double divergence_spectra[],  // spectral data of divergence
-                double U[],                         // spectral data of U
-                double V[],                         // spectral data of V
-                const eckit::Configuration& config ) {
+void vd2uv( const int truncation,               // truncation
+            const int km,                       // zonal wavenumber
+            const int nb_vordiv_fields,         // number of vorticity and divergence fields
+            const double vorticity_spectra[],   // spectral data of vorticity
+            const double divergence_spectra[],  // spectral data of divergence
+            double U[],                         // spectral data of U
+            double V[],                         // spectral data of V
+            const eckit::Configuration& config ) {
     int nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2;
 
     // repsnm: epsilon from eq.(2.12) and (2.13) in [Temperton 1991]
@@ -112,8 +112,8 @@ void vd2uvopt3( const int truncation,               // truncation
     std::vector<double> rdiv( 2 * nb_vordiv_fields * nlei1 );
     std::vector<double> ru( 2 * nb_vordiv_fields * nlei1 );
     std::vector<double> rv( 2 * nb_vordiv_fields * nlei1 );
-    prfi1bopt3( truncation, km, nb_vordiv_fields, vorticity_spectra, rvor.data() );
-    prfi1bopt3( truncation, km, nb_vordiv_fields, divergence_spectra, rdiv.data() );
+    prfi1b( truncation, km, nb_vordiv_fields, vorticity_spectra, rvor.data() );
+    prfi1b( truncation, km, nb_vordiv_fields, divergence_spectra, rdiv.data() );
 
     // compute eq.(2.12) and (2.13) in [Temperton 1991]:
     if ( km == 0 ) {
@@ -168,7 +168,7 @@ void VorDivToUVLocal::execute( const int nb_coeff, const int nb_fields, const do
                                const double divergence[], double U[], double V[],
                                const eckit::Configuration& config ) const {
     for ( int jm = 0; jm <= truncation_; ++jm ) {
-        vd2uvopt3( truncation_, jm, nb_fields, vorticity, divergence, U, V, config );
+        vd2uv( truncation_, jm, nb_fields, vorticity, divergence, U, V, config );
     }
 }
 
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index c775fc9f0..2b6ccc1a9 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -970,18 +970,18 @@ CASE( "test_trans_domain" ) {
     //Domain testdomain = RectangularDomain( {0., 30.}, {-.05, .05} );
     Domain testdomain1 = ZonalBandDomain( {-10., 5.} );
     //Domain testdomain1 = RectangularDomain( {-1., 1.}, {50., 55.} );
-    Domain testdomain2 = RectangularDomain( {-1., 1.}, {-5., 40.} );
+    Domain testdomain2 = RectangularDomain( {-10., 10.}, {-5., 40.} );
     // Grid: (Adjust the following line if the test takes too long!)
 
     Grid global_grid( "O64" );
     Grid g1( global_grid, testdomain1 );
-    //Grid g2( gridString, testdomain2 );
+    Grid g2( global_grid, testdomain2 );
 
     bool fourierTrc1 = true;
     bool fourierTrc2 = false;
     using grid::StructuredGrid;
     using LinearSpacing = grid::LinearSpacing;
-    StructuredGrid g2( LinearSpacing( {0., 180.}, 181 ), LinearSpacing( {0., 45.}, 46 ) );
+    //StructuredGrid g2( LinearSpacing( {0., 180.}, 181 ), LinearSpacing( {0., 45.}, 46 ) );
 
     int trc = 63;
     //Log::info() << "rgp1:" << std::endl;
@@ -994,9 +994,12 @@ CASE( "test_trans_domain" ) {
     trans::Cache cache;
     ATLAS_TRACE_SCOPE( "Read cache" ) cache = trans::LegendreCache( "legcache.bin" );
     Trace t2( Here(), "translocal2 construction" );
+    trans::Trans transLocal2( cache, global_grid, g2.domain(), trc,
+                              option::type( "local" ) | option::write_legendre( "legcache2.bin" ) );
+    //trans::Trans transLocal2( cache, g2, trc, option::type( "local" ) );
     //trans::Trans transLocal2( cache, g2, trc,
     //                          option::type( "local" ) | option::no_fft() );
-    trans::Trans transLocal2( g2, trc, option::type( "local" ) );
+    //trans::Trans transLocal2( g2, trc, option::type( "local" ) );
     t2.stop();
 
     double rav1 = 0., rav2 = 0.;  // compute average rms errors of transLocal1 and transLocal2
@@ -1293,7 +1296,7 @@ CASE( "test_trans_invtrans" ) {
 }
 #endif
 
-    //-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
 
 #if 0
 CASE( "test_trans_fourier_truncation" ) {

From 00568966a14448cde9c9c00f529a6ee3fc0dc683 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Thu, 31 May 2018 19:08:25 +0100
Subject: [PATCH 105/123] fixed vd2uv for reduced grids and improved its
 performance by moving loop over wavenumbers inside vd2uv

---
 src/atlas/trans/local/TransLocal.cc      |   2 +-
 src/atlas/trans/local/VorDivToUVLocal.cc | 141 ++++++++++++-----------
 src/tests/trans/test_transgeneral.cc     |   8 +-
 3 files changed, 77 insertions(+), 74 deletions(-)

diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc
index d8d945dc7..604114dec 100644
--- a/src/atlas/trans/local/TransLocal.cc
+++ b/src/atlas/trans/local/TransLocal.cc
@@ -1255,7 +1255,7 @@ void TransLocal::invtrans_uv( const int truncation, const int nb_scalar_fields,
                     int idx = 0;
                     for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
                         for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
-                            for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) {
+                            for ( int jlon = 0; jlon < g.nx( jlat ); jlon++ ) {
                                 gp_fields[idx] /= coslats[jlat];
                                 idx++;
                             }
diff --git a/src/atlas/trans/local/VorDivToUVLocal.cc b/src/atlas/trans/local/VorDivToUVLocal.cc
index dd171b718..798f28f5e 100644
--- a/src/atlas/trans/local/VorDivToUVLocal.cc
+++ b/src/atlas/trans/local/VorDivToUVLocal.cc
@@ -12,6 +12,7 @@
 #include <cmath>  // for std::sqrt
 #include "atlas/functionspace/Spectral.h"
 #include "atlas/runtime/Log.h"
+#include "atlas/runtime/Trace.h"
 #include "atlas/util/Earth.h"
 
 using atlas::FunctionSpace;
@@ -59,18 +60,20 @@ void prfi1b( const int truncation,
 //        Temperton, 1991, MWR 119 p1303
 // Ported to C++ by: Andreas Mueller *ECMWF*
 void vd2uv( const int truncation,               // truncation
-            const int km,                       // zonal wavenumber
             const int nb_vordiv_fields,         // number of vorticity and divergence fields
             const double vorticity_spectra[],   // spectral data of vorticity
             const double divergence_spectra[],  // spectral data of divergence
             double U[],                         // spectral data of U
             double V[],                         // spectral data of V
             const eckit::Configuration& config ) {
+    ATLAS_TRACE( "vd2uv" );
+    std::vector<double> repsnm( ( truncation + 1 ) * ( truncation + 6 ) / 2 );
+    int idx   = 0;
     int nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2;
+    double ra = util::Earth::radius();
+    std::vector<double> rlapin( truncation + 3 );
 
     // repsnm: epsilon from eq.(2.12) and (2.13) in [Temperton 1991]
-    std::vector<double> repsnm( ( truncation + 1 ) * ( truncation + 6 ) / 2 );
-    int idx = 0;
     for ( int jm = 0; jm <= truncation; ++jm ) {
         for ( int jn = jm; jn <= truncation + 2; ++jn, ++idx ) {
             repsnm[idx] = std::sqrt( ( jn * jn - jm * jm ) / ( 4. * jn * jn - 1. ) );
@@ -79,8 +82,6 @@ void vd2uv( const int truncation,               // truncation
     repsnm[0] = 0.;
 
     // rlapin: constant factor from eq.(2.2) and (2.3) in [Temperton 1991]
-    double ra = util::Earth::radius();
-    std::vector<double> rlapin( truncation + 3 );
     for ( int jn = 1; jn <= truncation + 2; ++jn ) {
         rlapin[jn] = -ra * ra / ( jn * ( jn + 1. ) );
     }
@@ -90,76 +91,78 @@ void vd2uv( const int truncation,               // truncation
     std::vector<double> zepsnm( truncation + 6 );
     std::vector<double> zlapin( truncation + 6 );
     std::vector<double> zn( truncation + 6 );
-    for ( int jn = km - 1; jn <= truncation + 2; ++jn ) {
-        int ij = truncation + 3 - jn;
-        if ( jn >= 0 ) {
-            zlapin[ij] = rlapin[jn];
-            if ( jn < km ) { zepsnm[ij] = 0.; }
+    for ( int km = 0; km <= truncation; ++km ) {
+        for ( int jn = km - 1; jn <= truncation + 2; ++jn ) {
+            int ij = truncation + 3 - jn;
+            if ( jn >= 0 ) {
+                zlapin[ij] = rlapin[jn];
+                if ( jn < km ) { zepsnm[ij] = 0.; }
+                else {
+                    zepsnm[ij] = repsnm[jn + ( 2 * truncation - km + 5 ) * km / 2];
+                }
+            }
             else {
-                zepsnm[ij] = repsnm[jn + ( 2 * truncation - km + 5 ) * km / 2];
+                zlapin[ij] = 0.;
+                zepsnm[ij] = 0.;
             }
+            zn[ij] = jn;
         }
-        else {
-            zlapin[ij] = 0.;
-            zepsnm[ij] = 0.;
-        }
-        zn[ij] = jn;
-    }
-    zn[0] = truncation + 3;
-
-    // copy spectral data into internal trans storage:
-    std::vector<double> rvor( 2 * nb_vordiv_fields * nlei1 );
-    std::vector<double> rdiv( 2 * nb_vordiv_fields * nlei1 );
-    std::vector<double> ru( 2 * nb_vordiv_fields * nlei1 );
-    std::vector<double> rv( 2 * nb_vordiv_fields * nlei1 );
-    prfi1b( truncation, km, nb_vordiv_fields, vorticity_spectra, rvor.data() );
-    prfi1b( truncation, km, nb_vordiv_fields, divergence_spectra, rdiv.data() );
-
-    // compute eq.(2.12) and (2.13) in [Temperton 1991]:
-    if ( km == 0 ) {
-        for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
-            int ir = 2 * jfld * nlei1 - 1;
-            for ( int ji = 2; ji < truncation + 4 - km; ++ji ) {
-                double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1];
-                double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1];
-                ru[ir + ji]  = +psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1];
-                rv[ir + ji]  = -psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1];
+        zn[0] = truncation + 3;
+
+        // copy spectral data into internal trans storage:
+        std::vector<double> rvor( 2 * nb_vordiv_fields * nlei1 );
+        std::vector<double> rdiv( 2 * nb_vordiv_fields * nlei1 );
+        std::vector<double> ru( 2 * nb_vordiv_fields * nlei1 );
+        std::vector<double> rv( 2 * nb_vordiv_fields * nlei1 );
+        prfi1b( truncation, km, nb_vordiv_fields, vorticity_spectra, rvor.data() );
+        prfi1b( truncation, km, nb_vordiv_fields, divergence_spectra, rdiv.data() );
+
+        // compute eq.(2.12) and (2.13) in [Temperton 1991]:
+        if ( km == 0 ) {
+            for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
+                int ir = 2 * jfld * nlei1 - 1;
+                for ( int ji = 2; ji < truncation + 4 - km; ++ji ) {
+                    double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1];
+                    double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1];
+                    ru[ir + ji]  = +psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1];
+                    rv[ir + ji]  = -psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1];
+                }
             }
         }
-    }
-    else {
-        for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
-            int ir = 2 * jfld * nlei1 - 1, ii = ir + nlei1;
-            for ( int ji = 2; ji < truncation + 4 - km; ++ji ) {
-                double chiIm = km * zlapin[ji];
-                double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1];
-                double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1];
-                ru[ir + ji]  = -chiIm * rdiv[ii + ji] + psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1];
-                ru[ii + ji]  = +chiIm * rdiv[ir + ji] + psiM1 * rvor[ii + ji + 1] - psiP1 * rvor[ii + ji - 1];
-                rv[ir + ji]  = -chiIm * rvor[ii + ji] - psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1];
-                rv[ii + ji]  = +chiIm * rvor[ir + ji] - psiM1 * rdiv[ii + ji + 1] + psiP1 * rdiv[ii + ji - 1];
+        else {
+            for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
+                int ir = 2 * jfld * nlei1 - 1, ii = ir + nlei1;
+                for ( int ji = 2; ji < truncation + 4 - km; ++ji ) {
+                    double chiIm = km * zlapin[ji];
+                    double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1];
+                    double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1];
+                    ru[ir + ji]  = -chiIm * rdiv[ii + ji] + psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1];
+                    ru[ii + ji]  = +chiIm * rdiv[ir + ji] + psiM1 * rvor[ii + ji + 1] - psiP1 * rvor[ii + ji - 1];
+                    rv[ir + ji]  = -chiIm * rvor[ii + ji] - psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1];
+                    rv[ii + ji]  = +chiIm * rvor[ir + ji] - psiM1 * rdiv[ii + ji + 1] + psiP1 * rdiv[ii + ji - 1];
+                }
             }
         }
-    }
 
-    // copy data from internal storage back to external spectral data:
-    int ilcm = truncation - km;
-    int ioff = ( 2 * truncation - km + 3 ) * km;
-    // ioff: start index of zonal wavenumber km in spectral data
-    double za_r = 1. / util::Earth::radius();
-    for ( int j = 0; j <= ilcm; ++j ) {
-        // ilcm-j = total wavenumber
-        int inm = ioff + ( ilcm - j ) * 2;
-        for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
-            int ir = 2 * jfld * nlei1, ii = ir + nlei1;
-            int idx = inm * nb_vordiv_fields + jfld;
-            // real part:
-            U[idx] = ru[ir + j + 2] * za_r;
-            V[idx] = rv[ir + j + 2] * za_r;
-            idx += nb_vordiv_fields;
-            // imaginary part:
-            U[idx] = ru[ii + j + 2] * za_r;
-            V[idx] = rv[ii + j + 2] * za_r;
+        // copy data from internal storage back to external spectral data:
+        int ilcm = truncation - km;
+        int ioff = ( 2 * truncation - km + 3 ) * km;
+        // ioff: start index of zonal wavenumber km in spectral data
+        double za_r = 1. / util::Earth::radius();
+        for ( int j = 0; j <= ilcm; ++j ) {
+            // ilcm-j = total wavenumber
+            int inm = ioff + ( ilcm - j ) * 2;
+            for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
+                int ir = 2 * jfld * nlei1, ii = ir + nlei1;
+                int idx = inm * nb_vordiv_fields + jfld;
+                // real part:
+                U[idx] = ru[ir + j + 2] * za_r;
+                V[idx] = rv[ir + j + 2] * za_r;
+                idx += nb_vordiv_fields;
+                // imaginary part:
+                U[idx] = ru[ii + j + 2] * za_r;
+                V[idx] = rv[ii + j + 2] * za_r;
+            }
         }
     }
 }
@@ -167,9 +170,7 @@ void vd2uv( const int truncation,               // truncation
 void VorDivToUVLocal::execute( const int nb_coeff, const int nb_fields, const double vorticity[],
                                const double divergence[], double U[], double V[],
                                const eckit::Configuration& config ) const {
-    for ( int jm = 0; jm <= truncation_; ++jm ) {
-        vd2uv( truncation_, jm, nb_fields, vorticity, divergence, U, V, config );
-    }
+    vd2uv( truncation_, nb_fields, vorticity, divergence, U, V, config );
 }
 
 VorDivToUVLocal::VorDivToUVLocal( const int truncation, const eckit::Configuration& config ) :
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index 2b6ccc1a9..c9c413ff5 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -976,6 +976,8 @@ CASE( "test_trans_domain" ) {
     Grid global_grid( "O64" );
     Grid g1( global_grid, testdomain1 );
     Grid g2( global_grid, testdomain2 );
+    //Grid g1( global_grid );
+    //Grid g2( global_grid );
 
     bool fourierTrc1 = true;
     bool fourierTrc2 = false;
@@ -1006,7 +1008,7 @@ CASE( "test_trans_domain" ) {
 
     functionspace::Spectral spectral( trc );
 
-    int nb_scalar = 1, nb_vordiv = 0;
+    int nb_scalar = 1, nb_vordiv = 1;
     int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv;
     std::vector<double> sp( 2 * N * nb_scalar );
     std::vector<double> vor( 2 * N * nb_vordiv );
@@ -1018,8 +1020,8 @@ CASE( "test_trans_domain" ) {
     std::vector<double> rgp2_analytic( g2.size() );
 
     int icase = 0;
-    for ( int ivar_in = 2; ivar_in < 3; ivar_in++ ) {         // vorticity, divergence, scalar
-        for ( int ivar_out = 2; ivar_out < 3; ivar_out++ ) {  // u, v, scalar
+    for ( int ivar_in = 0; ivar_in < 3; ivar_in++ ) {         // vorticity, divergence, scalar
+        for ( int ivar_out = 0; ivar_out < 3; ivar_out++ ) {  // u, v, scalar
             int nb_fld = 1;
             if ( ivar_out == 2 ) {
                 tolerance = 1.e-13;

From 98cfd1e55dea2ed4a3011a3816533b90f9cba05a Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Tue, 5 Jun 2018 17:06:37 +0100
Subject: [PATCH 106/123] some clean-up. Unstructured grids are currently not
 working (ATLAS-162)

---
 src/atlas/trans/local/TransLocal.cc      |   2 +-
 src/atlas/trans/local/VorDivToUVLocal.cc | 144 +++++----
 src/tests/trans/test_transgeneral.cc     | 371 +----------------------
 3 files changed, 87 insertions(+), 430 deletions(-)

diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc
index 604114dec..cdd20ad80 100644
--- a/src/atlas/trans/local/TransLocal.cc
+++ b/src/atlas/trans/local/TransLocal.cc
@@ -1253,7 +1253,7 @@ void TransLocal::invtrans_uv( const int truncation, const int nb_scalar_fields,
                         coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() );
                     }
                     int idx = 0;
-                    for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
+                    for ( int jfld = 0; jfld < nb_vordiv_fields; jfld++ ) {
                         for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
                             for ( int jlon = 0; jlon < g.nx( jlat ); jlon++ ) {
                                 gp_fields[idx] /= coslats[jlat];
diff --git a/src/atlas/trans/local/VorDivToUVLocal.cc b/src/atlas/trans/local/VorDivToUVLocal.cc
index 798f28f5e..56bf78c99 100644
--- a/src/atlas/trans/local/VorDivToUVLocal.cc
+++ b/src/atlas/trans/local/VorDivToUVLocal.cc
@@ -66,102 +66,116 @@ void vd2uv( const int truncation,               // truncation
             double U[],                         // spectral data of U
             double V[],                         // spectral data of V
             const eckit::Configuration& config ) {
-    ATLAS_TRACE( "vd2uv" );
     std::vector<double> repsnm( ( truncation + 1 ) * ( truncation + 6 ) / 2 );
     int idx   = 0;
     int nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2;
     double ra = util::Earth::radius();
     std::vector<double> rlapin( truncation + 3 );
 
-    // repsnm: epsilon from eq.(2.12) and (2.13) in [Temperton 1991]
-    for ( int jm = 0; jm <= truncation; ++jm ) {
-        for ( int jn = jm; jn <= truncation + 2; ++jn, ++idx ) {
-            repsnm[idx] = std::sqrt( ( jn * jn - jm * jm ) / ( 4. * jn * jn - 1. ) );
+    {
+        //ATLAS_TRACE( "general setup" );
+        // repsnm: epsilon from eq.(2.12) and (2.13) in [Temperton 1991]
+        for ( int jm = 0; jm <= truncation; ++jm ) {
+            for ( int jn = jm; jn <= truncation + 2; ++jn, ++idx ) {
+                repsnm[idx] = std::sqrt( ( jn * jn - jm * jm ) / ( 4. * jn * jn - 1. ) );
+            }
         }
-    }
-    repsnm[0] = 0.;
+        repsnm[0] = 0.;
 
-    // rlapin: constant factor from eq.(2.2) and (2.3) in [Temperton 1991]
-    for ( int jn = 1; jn <= truncation + 2; ++jn ) {
-        rlapin[jn] = -ra * ra / ( jn * ( jn + 1. ) );
+        // rlapin: constant factor from eq.(2.2) and (2.3) in [Temperton 1991]
+        for ( int jn = 1; jn <= truncation + 2; ++jn ) {
+            rlapin[jn] = -ra * ra / ( jn * ( jn + 1. ) );
+        }
+        rlapin[0] = 0.;
     }
-    rlapin[0] = 0.;
 
     // inverse the order of repsnm and rlapin for improved accuracy
     std::vector<double> zepsnm( truncation + 6 );
     std::vector<double> zlapin( truncation + 6 );
     std::vector<double> zn( truncation + 6 );
     for ( int km = 0; km <= truncation; ++km ) {
-        for ( int jn = km - 1; jn <= truncation + 2; ++jn ) {
-            int ij = truncation + 3 - jn;
-            if ( jn >= 0 ) {
-                zlapin[ij] = rlapin[jn];
-                if ( jn < km ) { zepsnm[ij] = 0.; }
+        {
+            //ATLAS_TRACE( "current wavenumber setup" );
+            for ( int jn = km - 1; jn <= truncation + 2; ++jn ) {
+                int ij = truncation + 3 - jn;
+                if ( jn >= 0 ) {
+                    zlapin[ij] = rlapin[jn];
+                    if ( jn < km ) { zepsnm[ij] = 0.; }
+                    else {
+                        zepsnm[ij] = repsnm[jn + ( 2 * truncation - km + 5 ) * km / 2];
+                    }
+                }
                 else {
-                    zepsnm[ij] = repsnm[jn + ( 2 * truncation - km + 5 ) * km / 2];
+                    zlapin[ij] = 0.;
+                    zepsnm[ij] = 0.;
                 }
+                zn[ij] = jn;
             }
-            else {
-                zlapin[ij] = 0.;
-                zepsnm[ij] = 0.;
-            }
-            zn[ij] = jn;
+            zn[0] = truncation + 3;
         }
-        zn[0] = truncation + 3;
 
         // copy spectral data into internal trans storage:
         std::vector<double> rvor( 2 * nb_vordiv_fields * nlei1 );
         std::vector<double> rdiv( 2 * nb_vordiv_fields * nlei1 );
         std::vector<double> ru( 2 * nb_vordiv_fields * nlei1 );
         std::vector<double> rv( 2 * nb_vordiv_fields * nlei1 );
-        prfi1b( truncation, km, nb_vordiv_fields, vorticity_spectra, rvor.data() );
-        prfi1b( truncation, km, nb_vordiv_fields, divergence_spectra, rdiv.data() );
-
-        // compute eq.(2.12) and (2.13) in [Temperton 1991]:
-        if ( km == 0 ) {
-            for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
-                int ir = 2 * jfld * nlei1 - 1;
-                for ( int ji = 2; ji < truncation + 4 - km; ++ji ) {
-                    double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1];
-                    double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1];
-                    ru[ir + ji]  = +psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1];
-                    rv[ir + ji]  = -psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1];
+        {
+            //ATLAS_TRACE( "copy data to internal storage" );
+            prfi1b( truncation, km, nb_vordiv_fields, vorticity_spectra, rvor.data() );
+            prfi1b( truncation, km, nb_vordiv_fields, divergence_spectra, rdiv.data() );
+        }
+
+        {
+            //ATLAS_TRACE( "actual computation" );
+            // compute eq.(2.12) and (2.13) in [Temperton 1991]:
+            if ( km == 0 ) {
+                for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
+                    int ir = 2 * jfld * nlei1 - 1;
+                    for ( int ji = 2; ji < truncation + 4 - km; ++ji ) {
+                        double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1];
+                        double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1];
+                        ru[ir + ji]  = +psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1];
+                        rv[ir + ji]  = -psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1];
+                    }
                 }
             }
-        }
-        else {
-            for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
-                int ir = 2 * jfld * nlei1 - 1, ii = ir + nlei1;
-                for ( int ji = 2; ji < truncation + 4 - km; ++ji ) {
-                    double chiIm = km * zlapin[ji];
-                    double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1];
-                    double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1];
-                    ru[ir + ji]  = -chiIm * rdiv[ii + ji] + psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1];
-                    ru[ii + ji]  = +chiIm * rdiv[ir + ji] + psiM1 * rvor[ii + ji + 1] - psiP1 * rvor[ii + ji - 1];
-                    rv[ir + ji]  = -chiIm * rvor[ii + ji] - psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1];
-                    rv[ii + ji]  = +chiIm * rvor[ir + ji] - psiM1 * rdiv[ii + ji + 1] + psiP1 * rdiv[ii + ji - 1];
+            else {
+                for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
+                    int ir = 2 * jfld * nlei1 - 1, ii = ir + nlei1;
+                    for ( int ji = 2; ji < truncation + 4 - km; ++ji ) {
+                        double chiIm = km * zlapin[ji];
+                        double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1];
+                        double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1];
+                        ru[ir + ji]  = -chiIm * rdiv[ii + ji] + psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1];
+                        ru[ii + ji]  = +chiIm * rdiv[ir + ji] + psiM1 * rvor[ii + ji + 1] - psiP1 * rvor[ii + ji - 1];
+                        rv[ir + ji]  = -chiIm * rvor[ii + ji] - psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1];
+                        rv[ii + ji]  = +chiIm * rvor[ir + ji] - psiM1 * rdiv[ii + ji + 1] + psiP1 * rdiv[ii + ji - 1];
+                    }
                 }
             }
         }
 
-        // copy data from internal storage back to external spectral data:
-        int ilcm = truncation - km;
-        int ioff = ( 2 * truncation - km + 3 ) * km;
-        // ioff: start index of zonal wavenumber km in spectral data
-        double za_r = 1. / util::Earth::radius();
-        for ( int j = 0; j <= ilcm; ++j ) {
-            // ilcm-j = total wavenumber
-            int inm = ioff + ( ilcm - j ) * 2;
-            for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
-                int ir = 2 * jfld * nlei1, ii = ir + nlei1;
-                int idx = inm * nb_vordiv_fields + jfld;
-                // real part:
-                U[idx] = ru[ir + j + 2] * za_r;
-                V[idx] = rv[ir + j + 2] * za_r;
-                idx += nb_vordiv_fields;
-                // imaginary part:
-                U[idx] = ru[ii + j + 2] * za_r;
-                V[idx] = rv[ii + j + 2] * za_r;
+        {
+            //ATLAS_TRACE( "copy data back to external storage" );
+            // copy data from internal storage back to external spectral data:
+            int ilcm = truncation - km;
+            int ioff = ( 2 * truncation - km + 3 ) * km;
+            // ioff: start index of zonal wavenumber km in spectral data
+            double za_r = 1. / util::Earth::radius();
+            for ( int j = 0; j <= ilcm; ++j ) {
+                // ilcm-j = total wavenumber
+                int inm = ioff + ( ilcm - j ) * 2;
+                for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) {
+                    int ir = 2 * jfld * nlei1, ii = ir + nlei1;
+                    int idx = inm * nb_vordiv_fields + jfld;
+                    // real part:
+                    U[idx] = ru[ir + j + 2] * za_r;
+                    V[idx] = rv[ir + j + 2] * za_r;
+                    idx += nb_vordiv_fields;
+                    // imaginary part:
+                    U[idx] = ru[ii + j + 2] * za_r;
+                    V[idx] = rv[ii + j + 2] * za_r;
+                }
             }
         }
     }
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index c9c413ff5..da74a9980 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -65,157 +65,6 @@ struct AtlasTransEnvironment : public AtlasTestEnvironment {
     }
 };
 
-//-----------------------------------------------------------------------------
-
-#if 0
-void compute_legendre( const size_t trc,                      // truncation (in)
-                       const double& lat,                     // latitude in radians (in)
-                       array::ArrayView<double, 1>& zlfpol )  // values of
-                                                              // associated
-                                                              // Legendre
-                                                              // functions, size
-                                                              // (trc+1)*trc/2
-                                                              // (out)
-{
-    trans::compute_legendre_polynomials( trc, lat, zlfpol.data() );
-}
-
-//-----------------------------------------------------------------------------
-
-void legendre_transform( const size_t trc,                           // truncation (in)
-                         const size_t trcFT,                         // truncation for Fourier transformation (in)
-                         array::ArrayView<double, 1>& rlegReal,      // values of associated Legendre
-                                                                     // functions, size (trc+1)*trc/2
-                                                                     // (out)
-                         array::ArrayView<double, 1>& rlegImag,      // values of associated Legendre
-                                                                     // functions, size (trc+1)*trc/2
-                                                                     // (out)
-                         const array::ArrayView<double, 1>& zlfpol,  // values of associated Legendre
-                                                                     // functions, size (trc+1)*trc/2
-                                                                     // (in)
-                         const double rspecg[] )                     // spectral data, size (trc+1)*trc (in)
-{
-    trans::invtrans_legendre( trc, trcFT, trc, zlfpol.data(), 1, rspecg, rlegReal.data(), rlegImag.data() );
-}
-
-//-----------------------------------------------------------------------------
-
-double fourier_transform( const size_t trcFT, array::ArrayView<double, 1>& rlegReal,  // values of associated Legendre
-                                                                                      // functions, size (trc+1)*trc/2
-                                                                                      // (out)
-                          array::ArrayView<double, 1>& rlegImag,                      // values of associated Legendre
-                                                                                      // functions, size (trc+1)*trc/2
-                                                                                      // (out)
-                          const double lon )                                          // radians
-{
-    double gp[1];
-    trans::invtrans_fourier( trcFT, lon, 1, rlegReal.data(), rlegImag.data(), gp );
-    return gp[0];
-}
-
-//-----------------------------------------------------------------------------
-// Routine to compute the spectral transform by using a local Fourier
-// transformation
-// for a single point
-//
-// Author:
-// Andreas Mueller *ECMWF*
-//
-double spectral_transform_point( const size_t trc,        // truncation (in)
-                                 const size_t trcFT,      // truncation for Fourier transformation (in)
-                                 const double lon,        // longitude in radians (in)
-                                 const double lat,        // latitude in radians (in)
-                                 const double rspecg[] )  // spectral data, size (trc+1)*trc (in)
-{
-    int N = ( trc + 2 ) * ( trc + 1 ) / 2;
-    ATLAS_TRACE();
-    atlas::array::ArrayT<double> zlfpol_( N );
-    atlas::array::ArrayView<double, 1> zlfpol = make_view<double, 1>( zlfpol_ );
-
-    atlas::array::ArrayT<double> rlegReal_( trcFT + 1 );
-    atlas::array::ArrayView<double, 1> rlegReal = make_view<double, 1>( rlegReal_ );
-
-    atlas::array::ArrayT<double> rlegImag_( trcFT + 1 );
-    atlas::array::ArrayView<double, 1> rlegImag = make_view<double, 1>( rlegImag_ );
-
-    // Legendre transform:
-    compute_legendre( trc, lat, zlfpol );
-    legendre_transform( trc, trcFT, rlegReal, rlegImag, zlfpol, rspecg );
-
-    // Fourier transform:
-    return fourier_transform( trcFT, rlegReal, rlegImag, lon );
-}
-
-//-----------------------------------------------------------------------------
-// Routine to compute the spectral transform by using a local Fourier
-// transformation
-// for a grid (same latitude for all longitudes, allows to compute Legendre
-// functions
-// once for all longitudes)
-//
-// Author:
-// Andreas Mueller *ECMWF*
-//
-void spectral_transform_grid( const size_t trc,       // truncation (in)
-                              const size_t trcFT,     // truncation for Fourier transformation (in)
-                              const Grid grid,        // call with something like Grid("O32")
-                              const double rspecg[],  // spectral data, size (trc+1)*trc (in)
-                              double rgp[],           // resulting grid point data (out)
-                              const bool pointwise )  // use point function for unstructured mesh for
-                                                      // testing purposes
-{
-    std::ostream& out = Log::info();  // just for debugging
-    int N             = ( trc + 2 ) * ( trc + 1 ) / 2;
-    ATLAS_TRACE();
-    atlas::array::ArrayT<double> zlfpol_( N );
-    atlas::array::ArrayView<double, 1> zlfpol = make_view<double, 1>( zlfpol_ );
-
-    atlas::array::ArrayT<double> rlegReal_( trcFT + 1 );
-    atlas::array::ArrayView<double, 1> rlegReal = make_view<double, 1>( rlegReal_ );
-
-    atlas::array::ArrayT<double> rlegImag_( trcFT + 1 );
-    atlas::array::ArrayView<double, 1> rlegImag = make_view<double, 1>( rlegImag_ );
-
-    int idx = 0;
-
-    if ( grid::StructuredGrid( grid ) ) {
-        grid::StructuredGrid g( grid );
-        for ( size_t j = 0; j < g.ny(); ++j ) {
-            double lat = g.y( j ) * util::Constants::degreesToRadians();
-
-            // Legendre transform:
-            compute_legendre( trc, lat, zlfpol );
-            legendre_transform( trc, trcFT, rlegReal, rlegImag, zlfpol, rspecg );
-
-            for ( size_t i = 0; i < g.nx( j ); ++i ) {
-                double lon = g.x( i, j ) * util::Constants::degreesToRadians();
-                // Fourier transform:
-                rgp[idx++] = fourier_transform( trcFT, rlegReal, rlegImag, lon );
-            }
-        }
-    }
-    else {
-        for ( PointXY p : grid.xy() ) {
-            double lon = p.x() * util::Constants::degreesToRadians();
-            double lat = p.y() * util::Constants::degreesToRadians();
-            if ( pointwise ) {
-                // alternative for testing: use spectral_transform_point function:
-                rgp[idx++] = spectral_transform_point( trc, trcFT, lon, lat, rspecg );
-            }
-            else {
-                // Legendre transform:
-                compute_legendre( trc, lat, zlfpol );
-                legendre_transform( trc, trcFT, rlegReal, rlegImag, zlfpol, rspecg );
-
-                // Fourier transform:
-                rgp[idx++] = fourier_transform( trcFT, rlegReal, rlegImag, lon );
-            }
-        }
-    }
-
-    EXPECT( idx == grid.size() );
-}
-#endif
 //-----------------------------------------------------------------------------
 // Routine to compute the spherical harmonics analytically at one point
 // (up to wave number 3)
@@ -527,203 +376,8 @@ double compute_rms( const size_t N,    // length of the arrays
     return rms;
 }
 
-#if 0
-//-----------------------------------------------------------------------------
-// Routine to test the spectral transform by comparing it with the analytically
-// derived spherical harmonics
-//
-// Author:
-// Andreas Mueller *ECMWF*
-//
-double spectral_transform_test( double trc,       // truncation
-                                double n,         // total wave number (implemented so far for n<4
-                                double m,         // zonal wave number (implemented so far for m<4, m<n
-                                int imag,         // 0: test real part, 1: test imaginary part
-                                Grid g,           // call with something like Grid("O32")
-                                bool pointwise )  // use point function for unstructured mesh for testing
-                                                  // purposes
-{
-    std::ostream& out  = Log::info();
-    int N              = ( trc + 2 ) * ( trc + 1 ) / 2;
-    auto* rspecg       = new double[2 * N];
-    auto* rgp          = new double[g.size()];
-    auto* rgp_analytic = new double[g.size()];
-
-    // compute analytic solution (this also initializes rspecg and needs to be
-    // done before the actual transform):
-    spectral_transform_grid_analytic( trc, true, n, m, imag, g, rspecg, rgp_analytic, 2, 2 );
-    // perform spectral transform:
-
-    spectral_transform_grid( trc, trc, g, rspecg, rgp, pointwise );
-
-    // for( int i=0; i<g.size(); ++i ) rgp[i] = 0.;
-
-    double rms = compute_rms( g.size(), rgp, rgp_analytic );
-
-    delete[] rspecg;
-    delete[] rgp;
-    delete[] rgp_analytic;
-
-    return rms;
-}
-#endif
-
-//-----------------------------------------------------------------------------
-#if 0
-CASE( "test_transgeneral_legendrepolynomials" )
-{
-  std::ostream& out = Log::info(); // just for debugging
-  out << "test_transgeneral_legendrepolynomials" << std::endl;
-/*
-  Grid g( "O10" );
-  trans::Trans trans(g,1279);
-*/
-  int trc = 1280; // truncation + 1
-  int N = (trc+2)*(trc+1)/2;
-  atlas::array::ArrayT<double> zlfpol_(N);
-  atlas::array::ArrayView<double,1> zlfpol = make_view<double,1>(zlfpol_);
-
-  double lat = std::acos(0.99312859918509488);
-  compute_legendre(trc, lat, zlfpol);
-}
-#endif
-//-----------------------------------------------------------------------------
-#if 0
-#if 1
-CASE( "test_transgeneral_point" ) {
-    std::ostream& out = Log::info();
-    Log::info() << "test_transgeneral_point" << std::endl;
-    double tolerance = 2.e-15;
-    // test spectral transform up to wave number 3 by comparing
-    // the result with the analytically computed spherical harmonics
-
-    Grid g = grid::UnstructuredGrid( {{50., 20.}, {30., -20.}, {179., -89.}, {-101., 70.}} );
-
-    int trc = 47;  // truncation
-
-    double rms = 0.;
-    for ( int m = 0; m <= 3; m++ ) {                   // zonal wavenumber
-        for ( int n = m; n <= 3; n++ ) {               // total wavenumber
-            for ( int imag = 0; imag <= 1; imag++ ) {  // real and imaginary part
-                rms = spectral_transform_test( trc, n, m, imag, g, true );
-                EXPECT( rms < tolerance );
-            }
-        }
-    }
-}
-#endif
 //-----------------------------------------------------------------------------
 #if 1
-CASE( "test_transgeneral_unstructured" ) {
-    std::ostream& out = Log::info();
-    Log::info() << "test_transgeneral_unstructured" << std::endl;
-    double tolerance = 2.e-15;
-    // test spectral transform up to wave number 3 by comparing
-    // the result with the analytically computed spherical harmonics
-
-    Grid g = grid::UnstructuredGrid( new std::vector<PointXY>{{50., 20.}, {30., -20.}, {179., -89.}, {-101., 70.}} );
-
-    int trc = 47;  // truncation
-
-    double rms = 0.;
-    for ( int m = 0; m <= 3; m++ ) {                   // zonal wavenumber
-        for ( int n = m; n <= 3; n++ ) {               // total wavenumber
-            for ( int imag = 0; imag <= 1; imag++ ) {  // real and imaginary part
-                rms = spectral_transform_test( trc, n, m, imag, g, false );
-                EXPECT( rms < tolerance );
-            }
-        }
-    }
-}
-
-//-----------------------------------------------------------------------------
-
-CASE( "test_transgeneral_structured" ) {
-    std::ostream& out = Log::info();
-    Log::info() << "test_transgeneral_structured" << std::endl;
-    double tolerance = 2.e-15;
-    // test spectral transform up to wave number 3 by comparing
-    // the result with the analytically computed spherical harmonics
-
-    std::string grid_uid( "O10" );
-    grid::StructuredGrid g( grid_uid );
-
-    int trc = 47;  // truncation
-
-    double rms = 0.;
-    for ( int m = 0; m <= 3; m++ ) {                   // zonal wavenumber
-        for ( int n = m; n <= 3; n++ ) {               // total wavenumber
-            for ( int imag = 0; imag <= 1; imag++ ) {  // real and imaginary part
-                rms = spectral_transform_test( trc, n, m, imag, g, false );
-                EXPECT( rms < tolerance );
-            }
-        }
-    }
-}
-
-//-----------------------------------------------------------------------------
-
-CASE( "test_transgeneral_with_translib" ) {
-    Log::info() << "test_transgeneral_with_translib" << std::endl;
-    // test transgeneral by comparing its result with the trans library
-    // this test is based on the test_nomesh case in test_trans.cc
-
-    std::ostream& out = Log::info();
-    double tolerance  = 1.e-13;
-    Grid g( "F24" );
-    grid::StructuredGrid gs( g );
-    int trc = 47;
-#if ATLAS_HAVE_TRANS
-    trans::Trans transIFS( g, trc, util::Config( "type", "ifs" ) );
-#endif
-    functionspace::StructuredColumns gridpoints( g );
-    functionspace::Spectral spectral( trc );
-    Field spf = spectral.createField<double>( option::name( "spf" ) );
-    Field gpf = gridpoints.createField<double>( option::name( "gpf" ) );
-
-    int N = ( trc + 2 ) * ( trc + 1 ) / 2;
-    std::vector<double> rspecg( 2 * N );
-    std::vector<double> rgp( g.size() );
-    std::vector<double> rgp_analytic( g.size() );
-
-    int k = 0;
-    for ( int m = 0; m <= trc; m++ ) {                 // zonal wavenumber
-        for ( int n = m; n <= trc; n++ ) {             // total wavenumber
-            for ( int imag = 0; imag <= 1; imag++ ) {  // real and imaginary part
-
-                if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., 2, 2 ) == 0. ) {
-                    array::ArrayView<double, 1> sp = array::make_view<double, 1>( spf );
-                    sp.assign( 0. );
-                    sp( k ) = 1.;
-                    spectral_transform_grid_analytic( trc, trc, n, m, imag, g,  //
-                                                      rspecg.data(), rgp_analytic.data(), 2, 2 );
-
-                    // compute spectral transform with the general transform:
-                    spectral_transform_grid( trc, trc, g, sp.data(), rgp.data(), false );
-                    array::ArrayView<double, 1> gp = array::make_view<double, 1>( gpf );
-
-                    double rms_gen = compute_rms( g.size(), rgp.data(), rgp_analytic.data() );
-
-                    if ( rms_gen >= tolerance ) {
-                        ATLAS_DEBUG_VAR( rms_gen );
-                        ATLAS_DEBUG_VAR( tolerance );
-                    }
-                    EXPECT( rms_gen < tolerance );
-#if ATLAS_HAVE_TRANS
-                    EXPECT_NO_THROW( transIFS.invtrans( spf, gpf ) );
-                    double rms_trans = compute_rms( g.size(), gp.data(), rgp.data() );
-                    EXPECT( rms_trans < tolerance );
-#endif
-                }
-                k++;
-            }
-        }
-    }
-}
-#endif
-#endif
-//-----------------------------------------------------------------------------
-#if 0
 CASE( "test_trans_vordiv_with_translib" ) {
     Log::info() << "test_trans_vordiv_with_translib" << std::endl;
     // test transgeneral by comparing its result with the trans library
@@ -743,8 +397,8 @@ CASE( "test_trans_vordiv_with_translib" ) {
     trans::Trans transIFS( g, trc, util::Config( "type", "ifs" ) );
     double rav = 0.;  // compute average rms error of trans library in rav
 #endif
-    trans::Trans transLocal1( g, trc, util::Config( "type", "localopt2" ) );
-    trans::Trans transLocal2( g, trc, util::Config( "type", "Local" ) );
+    trans::Trans transLocal1( g, trc, util::Config( "type", "local" ) );
+    trans::Trans transLocal2( g, trc, util::Config( "type", "local" ) );
     double rav1 = 0., rav2 = 0.;  // compute average rms errors of transLocal1 and transLocal2
 
     functionspace::Spectral spectral( trc );
@@ -1150,7 +804,9 @@ CASE( "test_trans_unstructured" ) {
     //Domain testdomain = RectangularDomain( {20., 25.}, {40., 60.} );
     Domain testdomain = RectangularDomain( {0., 90.}, {0., 90.} );
     // Grid: (Adjust the following line if the test takes too long!)
-    Grid g( "F120", testdomain );
+    Grid grid_global( "F120" );
+    Grid g( grid_global, testdomain );
+    int trc = 120;
     grid::StructuredGrid gs( g );
     std::vector<PointXY> pts( g.size() );
     int idx( 0 );
@@ -1166,7 +822,6 @@ CASE( "test_trans_unstructured" ) {
     }
     Grid gu = grid::UnstructuredGrid( new std::vector<PointXY>( &pts[0], &pts[idx] ) );
     Log::info() << "gu: size=" << gu.size() << std::endl;
-    int trc     = 120;
     double rav1 = 0., rav2 = 0.;  // compute average rms errors of transLocal1 and transLocal2
 
     int nb_scalar = 1, nb_vordiv = 1;
@@ -1181,8 +836,8 @@ CASE( "test_trans_unstructured" ) {
     std::vector<double> rgp_analytic1( g.size() );
     std::vector<double> rgp_analytic2( gu.size() );
 
-    trans::Trans transLocal1( g, trc, util::Config( "type", "Local" ) );
-    trans::Trans transLocal2( gu, trc, util::Config( "type", "Local" ) );
+    trans::Trans transLocal1( grid_global, testdomain, trc, option::type( "local" ) );
+    trans::Trans transLocal2( gu, trc, util::Config( "type", "local" ) );
 
     int icase = 0;
     for ( int ivar_in = 2; ivar_in < 3; ivar_in++ ) {         // vorticity, divergence, scalar
@@ -1285,18 +940,6 @@ CASE( "test_trans_unstructured" ) {
     Log::info() << "average RMS error of transLocal2: " << rav2 << std::endl;
 }
 #endif
-#if 0
-CASE( "test_trans_invtrans" ) {
-    trans::Trans trans( Grid( "O64" ), 63, util::Config( "type", "local" ) );
-
-    std::vector<double> rspec( trans.spectralCoefficients() );
-    std::vector<double> rgp( trans.grid().size() );
-
-    // TODO: rspec needs proper initial data
-
-    trans.invtrans( 1, rspec.data(), rgp.data() );
-}
-#endif
 
 //-----------------------------------------------------------------------------
 

From 63660bf1d9e0c499ac62bef53f0dd21ceb1f3a5e Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Wed, 6 Jun 2018 11:43:35 +0100
Subject: [PATCH 107/123] Avoid inclusion of fftw3.h in TransLocal.h

---
 src/atlas/trans/local/TransLocal.cc      | 65 +++++++++++++++---------
 src/atlas/trans/local/TransLocal.h       | 16 +++---
 src/atlas_f/trans/atlas_Trans_module.F90 |  2 +
 3 files changed, 51 insertions(+), 32 deletions(-)

diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc
index cdd20ad80..cdcceaa19 100644
--- a/src/atlas/trans/local/TransLocal.cc
+++ b/src/atlas/trans/local/TransLocal.cc
@@ -26,6 +26,11 @@
 #include "eckit/log/Bytes.h"
 #include "eckit/parser/JSON.h"
 
+#include "atlas/library/defines.h"
+#if ATLAS_HAVE_FFTW
+#include <fftw3.h>
+#endif
+
 namespace atlas {
 namespace trans {
 
@@ -217,6 +222,17 @@ int fourier_truncation( const int truncation,    // truncation
     return trc;
 }
 
+namespace detail {
+struct FFTW_Data {
+#if ATLAS_HAVE_FFTW
+    fftw_complex* in;
+    double* out;
+    std::vector<fftw_plan> plans;
+#endif
+};
+}
+
+
 // --------------------------------------------------------------------------------------------------------------------
 // Class TransLocal
 // --------------------------------------------------------------------------------------------------------------------
@@ -245,6 +261,7 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma
     legendre_cachesize_( cache.legendre().size() ),
     fft_cache_( cache.fft().data() ),
     fft_cachesize_( cache.fft().size() ),
+    fftw_( new detail::FFTW_Data ),
     linalg_( linear_algebra_backend() ),
     warning_( TransParameters( config ).warning() ) {
     ATLAS_TRACE( "TransLocal constructor" );
@@ -478,8 +495,8 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma
             {
                 ATLAS_TRACE( "Fourier precomputations (FFTW)" );
                 int num_complex = ( nlonsMaxGlobal_ / 2 ) + 1;
-                fft_in_         = fftw_alloc_complex( nlats * num_complex );
-                fft_out_        = fftw_alloc_real( nlats * nlonsMaxGlobal_ );
+                fftw_->in         = fftw_alloc_complex( nlats * num_complex );
+                fftw_->out        = fftw_alloc_real( nlats * nlonsMaxGlobal_ );
 
                 if ( fft_cache_ ) {
                     Log::debug() << "Import FFTW wisdom from cache" << std::endl;
@@ -498,16 +515,16 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma
                 //                read.close();
                 //                if ( wisdomString.length() > 0 ) { fftw_import_wisdom_from_string( &wisdomString[0u] ); }
                 if ( grid::RegularGrid( gridGlobal_ ) ) {
-                    plans_.resize( 1 );
-                    plans_[0] = fftw_plan_many_dft_c2r( 1, &nlonsMaxGlobal_, nlats, fft_in_, NULL, 1, num_complex,
-                                                        fft_out_, NULL, 1, nlonsMaxGlobal_, FFTW_ESTIMATE );
+                    fftw_->plans.resize( 1 );
+                    fftw_->plans[0] = fftw_plan_many_dft_c2r( 1, &nlonsMaxGlobal_, nlats, fftw_->in, NULL, 1, num_complex,
+                                                              fftw_->out, NULL, 1, nlonsMaxGlobal_, FFTW_ESTIMATE );
                 }
                 else {
-                    plans_.resize( nlatsLegDomain_ );
+                    fftw_->plans.resize( nlatsLegDomain_ );
                     for ( int j = 0; j < nlatsLegDomain_; j++ ) {
                         int nlonsGlobalj = gs_global.nx( jlatMinLeg_ + j );
                         //ASSERT( nlonsGlobalj > 0 && nlonsGlobalj <= nlonsMaxGlobal_ );
-                        plans_[j] = fftw_plan_dft_c2r_1d( nlonsGlobalj, fft_in_, fft_out_, FFTW_ESTIMATE );
+                        fftw_->plans[j] = fftw_plan_dft_c2r_1d( nlonsGlobalj, fftw_->in, fftw_->out, FFTW_ESTIMATE );
                     }
                 }
                 std::string file_path = TransParameters( config ).write_fft();
@@ -627,11 +644,11 @@ TransLocal::~TransLocal() {
         }
         if ( useFFT_ ) {
 #if ATLAS_HAVE_FFTW && !TRANSLOCAL_DGEMM2
-            for ( int j = 0; j < plans_.size(); j++ ) {
-                fftw_destroy_plan( plans_[j] );
+            for ( int j = 0; j < fftw_->plans.size(); j++ ) {
+                fftw_destroy_plan( fftw_->plans[j] );
             }
-            fftw_free( fft_in_ );
-            fftw_free( fft_out_ );
+            fftw_free( fftw_->in );
+            fftw_free( fftw_->out );
 #endif
         }
         else {
@@ -859,25 +876,25 @@ void TransLocal::invtrans_fourier_regular( const int nlats, const int nlons, con
                 for ( int jfld = 0; jfld < nb_fields; jfld++ ) {
                     int idx = 0;
                     for ( int jlat = 0; jlat < nlats; jlat++ ) {
-                        fft_in_[idx++][0] = scl_fourier[posMethod( jfld, 0, jlat, 0, nb_fields, nlats )];
+                        fftw_->in[idx++][0] = scl_fourier[posMethod( jfld, 0, jlat, 0, nb_fields, nlats )];
                         for ( int jm = 1; jm < num_complex; jm++, idx++ ) {
                             for ( int imag = 0; imag < 2; imag++ ) {
                                 if ( jm <= truncation_ ) {
-                                    fft_in_[idx][imag] =
+                                    fftw_->in[idx][imag] =
                                         scl_fourier[posMethod( jfld, imag, jlat, jm, nb_fields, nlats )];
                                 }
                                 else {
-                                    fft_in_[idx][imag] = 0.;
+                                    fftw_->in[idx][imag] = 0.;
                                 }
                             }
                         }
                     }
-                    fftw_execute_dft_c2r( plans_[0], fft_in_, fft_out_ );
+                    fftw_execute_dft_c2r( fftw_->plans[0], fftw_->in, fftw_->out );
                     for ( int jlat = 0; jlat < nlats; jlat++ ) {
                         for ( int jlon = 0; jlon < nlons; jlon++ ) {
                             int j = jlon + jlonMin_[0];
                             if ( j >= nlonsMaxGlobal_ ) { j -= nlonsMaxGlobal_; }
-                            gp_fields[jlon + nlons * ( jlat + nlats * jfld )] = fft_out_[j + nlonsMaxGlobal_ * jlat];
+                            gp_fields[jlon + nlons * ( jlat + nlats * jfld )] = fftw_->out[j + nlonsMaxGlobal_ * jlat];
                         }
                     }
                 }
@@ -965,18 +982,18 @@ void TransLocal::invtrans_fourier_reduced( const int nlats, const grid::Structur
                         int idx = 0;
                         //Log::info() << jlat << "in:" << std::endl;
                         int num_complex   = ( nlonsGlobal_[jlat] / 2 ) + 1;
-                        fft_in_[idx++][0] = scl_fourier[posMethod( jfld, 0, jlat, 0, nb_fields, nlats )];
-                        //Log::info() << fft_in_[0][0] << " ";
+                        fftw_->in[idx++][0] = scl_fourier[posMethod( jfld, 0, jlat, 0, nb_fields, nlats )];
+                        //Log::info() << fftw_->in[0][0] << " ";
                         for ( int jm = 1; jm < num_complex; jm++, idx++ ) {
                             for ( int imag = 0; imag < 2; imag++ ) {
                                 if ( jm <= truncation_ ) {
-                                    fft_in_[idx][imag] =
+                                    fftw_->in[idx][imag] =
                                         scl_fourier[posMethod( jfld, imag, jlat, jm, nb_fields, nlats )];
                                 }
                                 else {
-                                    fft_in_[idx][imag] = 0.;
+                                    fftw_->in[idx][imag] = 0.;
                                 }
-                                //Log::info() << fft_in_[idx][imag] << " ";
+                                //Log::info() << fftw_->in[idx][imag] << " ";
                             }
                         }
                         //Log::info() << std::endl;
@@ -984,13 +1001,13 @@ void TransLocal::invtrans_fourier_reduced( const int nlats, const grid::Structur
                         int jplan = nlatsLegDomain_ - nlatsNH_ + jlat;
                         if ( jplan >= nlatsLegDomain_ ) { jplan = nlats - 1 + nlatsLegDomain_ - nlatsSH_ - jlat; };
                         //ASSERT( jplan < nlatsLeg_ && jplan >= 0 );
-                        fftw_execute_dft_c2r( plans_[jplan], fft_in_, fft_out_ );
+                        fftw_execute_dft_c2r( fftw_->plans[jplan], fftw_->in, fftw_->out );
                         for ( int jlon = 0; jlon < g.nx( jlat ); jlon++ ) {
                             int j = jlon + jlonMin_[jlat];
                             if ( j >= nlonsGlobal_[jlat] ) { j -= nlonsGlobal_[jlat]; }
-                            //Log::info() << fft_out_[j] << " ";
+                            //Log::info() << fftw_->out[j] << " ";
                             ASSERT( j < nlonsMaxGlobal_ );
-                            gp_fields[jgp++] = fft_out_[j];
+                            gp_fields[jgp++] = fftw_->out[j];
                         }
                         //Log::info() << std::endl;
                     }
diff --git a/src/atlas/trans/local/TransLocal.h b/src/atlas/trans/local/TransLocal.h
index 29005083f..014045ca2 100644
--- a/src/atlas/trans/local/TransLocal.h
+++ b/src/atlas/trans/local/TransLocal.h
@@ -11,13 +11,11 @@
 #pragma once
 
 #include <vector>
+#include <memory>
 
 #include "atlas/array.h"
 #include "atlas/grid/Grid.h"
 #include "atlas/trans/Trans.h"
-#if ATLAS_HAVE_FFTW
-#include <fftw3.h>
-#endif
 
 #define TRANSLOCAL_DGEMM2 0
 
@@ -40,6 +38,10 @@ class FieldSet;
 namespace atlas {
 namespace trans {
 
+namespace detail {
+struct FFTW_Data;
+}
+
 class LegendreCacheCreatorLocal;
 int fourier_truncation( const int truncation,  // truncation
                         const int nx,          // number of longitudes
@@ -186,11 +188,9 @@ class TransLocal : public trans::TransImpl {
     std::vector<size_t> legendre_begin_;
     std::vector<size_t> legendre_sym_begin_;
     std::vector<size_t> legendre_asym_begin_;
-#if ATLAS_HAVE_FFTW
-    fftw_complex* fft_in_;
-    double* fft_out_;
-    std::vector<fftw_plan> plans_;
-#endif
+
+
+    std::unique_ptr<detail::FFTW_Data> fftw_;
 
     Cache cache_;
     Cache export_legendre_;
diff --git a/src/atlas_f/trans/atlas_Trans_module.F90 b/src/atlas_f/trans/atlas_Trans_module.F90
index 25ee8ee34..549d473ac 100644
--- a/src/atlas_f/trans/atlas_Trans_module.F90
+++ b/src/atlas_f/trans/atlas_Trans_module.F90
@@ -207,6 +207,7 @@ function nb_gridpoints_global( this )
 end function
 
 function grid( this )
+  use, intrinsic :: iso_c_binding, only: c_null_ptr
   use atlas_trans_c_binding
   class(atlas_Trans) :: this
   type(atlas_Grid) :: grid
@@ -216,6 +217,7 @@ function grid( this )
 #else
   THROW_ERROR
   FCKIT_SUPPRESS_UNUSED( this )
+  grid = atlas_Grid( c_null_ptr )
   FCKIT_SUPPRESS_UNUSED( grid )
 #endif
 end function

From 7aaf2dcf18030b35892f9ac979b3b19f68a1b1ed Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Wed, 6 Jun 2018 14:27:52 +0100
Subject: [PATCH 108/123] work in progress: merging all spectral data into one
 array with vdoption

---
 src/atlas/trans/local/TransLocal.cc  | 93 +++++++++++++++++++---------
 src/tests/trans/test_transgeneral.cc | 20 +++---
 2 files changed, 76 insertions(+), 37 deletions(-)

diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc
index cdcceaa19..bd9b923f1 100644
--- a/src/atlas/trans/local/TransLocal.cc
+++ b/src/atlas/trans/local/TransLocal.cc
@@ -1327,38 +1327,75 @@ void extend_truncation( const int old_truncation, const int nb_fields, const dou
 void TransLocal::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields,
                            const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[],
                            const eckit::Configuration& config ) const {
-    ATLAS_TRACE( "TransLocal::invtrans" );
-    int nb_gp              = grid_.size();
-    int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields;
-    if ( nb_vordiv_fields > 0 ) {
-        std::vector<double> vorticity_spectra_extended( nb_vordiv_spec_ext, 0. );
-        std::vector<double> divergence_spectra_extended( nb_vordiv_spec_ext, 0. );
-        std::vector<double> U_ext( nb_vordiv_spec_ext, 0. );
-        std::vector<double> V_ext( nb_vordiv_spec_ext, 0. );
-
+    if ( config.getBool( "vdoption", false ) ) {
+        // collect all spectral data into one array "all_spectra":
+        ATLAS_TRACE( "TransLocal::invtrans" );
+        int nb_all_fields = 2 * nb_vordiv_fields + nb_scalar_fields;
+        int nb_all_size   = 2 * legendre_size( truncation_ ) * nb_all_fields;
+        std::vector<double> all_spectra( nb_all_size );
+        int k = 0, i = 0, j = 0, l = 0;
         {
-            ATLAS_TRACE( "extend vordiv" );
-            // increase truncation in vorticity_spectra and divergence_spectra:
-            extend_truncation( truncation_, nb_vordiv_fields, vorticity_spectra, vorticity_spectra_extended.data() );
-            extend_truncation( truncation_, nb_vordiv_fields, divergence_spectra, divergence_spectra_extended.data() );
+            ATLAS_TRACE( "merge all spectra" );
+            for ( int m = 0; m <= truncation_; m++ ) {                           // zonal wavenumber
+                for ( int n = m; n <= truncation_; n++ ) {                       // total wavenumber
+                    for ( int imag = 0; imag < 2; imag++ ) {                     // imaginary/real part
+                        for ( int jfld = 0; jfld < nb_vordiv_fields; jfld++ ) {  // vorticity fields
+                            all_spectra[k++] = vorticity_spectra[i++];
+                        }
+                        for ( int jfld = 0; jfld < nb_vordiv_fields; jfld++ ) {  // divergence fields
+                            all_spectra[k++] = divergence_spectra[j++];
+                        }
+                        for ( int jfld = 0; jfld < nb_scalar_fields; jfld++ ) {  // scalar fields
+                            all_spectra[k++] = scalar_spectra[l++];
+                        }
+                    }
+                }
+            }
         }
+        int nb_vordiv_size = 2 * legendre_size( truncation_ ) * nb_vordiv_fields;
+        int nb_scalar_size = 2 * legendre_size( truncation_ ) * nb_scalar_fields;
+        ASSERT( k == nb_all_size );
+        ASSERT( i == nb_vordiv_size );
+        ASSERT( j == nb_vordiv_size );
+        ASSERT( l == nb_scalar_size );
+        invtrans_uv( truncation_, nb_all_fields, 0, all_spectra.data(), gp_fields, config );
+    }
+    else {
+        ATLAS_TRACE( "TransLocal::invtrans" );
+        int nb_gp              = grid_.size();
+        int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields;
+        if ( nb_vordiv_fields > 0 ) {
+            std::vector<double> vorticity_spectra_extended( nb_vordiv_spec_ext, 0. );
+            std::vector<double> divergence_spectra_extended( nb_vordiv_spec_ext, 0. );
+            std::vector<double> U_ext( nb_vordiv_spec_ext, 0. );
+            std::vector<double> V_ext( nb_vordiv_spec_ext, 0. );
 
-        {
-            ATLAS_TRACE( "vordiv to UV" );
-            // call vd2uv to compute u and v in spectral space
-            trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "local" ) );
-            vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(),
-                                      divergence_spectra_extended.data(), U_ext.data(), V_ext.data() );
-        }
+            {
+                ATLAS_TRACE( "extend vordiv" );
+                // increase truncation in vorticity_spectra and divergence_spectra:
+                extend_truncation( truncation_, nb_vordiv_fields, vorticity_spectra,
+                                   vorticity_spectra_extended.data() );
+                extend_truncation( truncation_, nb_vordiv_fields, divergence_spectra,
+                                   divergence_spectra_extended.data() );
+            }
 
-        // perform spectral transform to compute all fields in grid point space
-        invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config );
-        invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(),
-                     gp_fields + nb_gp * nb_vordiv_fields, config );
-    }
-    if ( nb_scalar_fields > 0 ) {
-        invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields + 2 * nb_gp * nb_vordiv_fields,
-                     config );
+            {
+                ATLAS_TRACE( "vordiv to UV" );
+                // call vd2uv to compute u and v in spectral space
+                trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "local" ) );
+                vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(),
+                                          divergence_spectra_extended.data(), U_ext.data(), V_ext.data() );
+            }
+
+            // perform spectral transform to compute all fields in grid point space
+            invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config );
+            invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(),
+                         gp_fields + nb_gp * nb_vordiv_fields, config );
+        }
+        if ( nb_scalar_fields > 0 ) {
+            invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields + 2 * nb_gp * nb_vordiv_fields,
+                         config );
+        }
     }
 }
 
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index da74a9980..85bc1bb53 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -377,7 +377,7 @@ double compute_rms( const size_t N,    // length of the arrays
 }
 
 //-----------------------------------------------------------------------------
-#if 1
+#if 0
 CASE( "test_trans_vordiv_with_translib" ) {
     Log::info() << "test_trans_vordiv_with_translib" << std::endl;
     // test transgeneral by comparing its result with the trans library
@@ -619,27 +619,29 @@ CASE( "test_trans_domain" ) {
     std::ostream& out = Log::info();
     double tolerance  = 1.e-13;
 
+    util::Config vd2uvoption( "vdoption", true );
     //Domain testdomain = ZonalBandDomain( {-90., 90.} );
     //Domain testdomain = ZonalBandDomain( {-.5, .5} );
     //Domain testdomain = RectangularDomain( {0., 30.}, {-.05, .05} );
-    Domain testdomain1 = ZonalBandDomain( {-10., 5.} );
+    //Domain testdomain1 = ZonalBandDomain( {-10., 5.} );
+    Domain testdomain1 = RectangularDomain( {-1., 1.}, {-.5, 0.} );
     //Domain testdomain1 = RectangularDomain( {-1., 1.}, {50., 55.} );
-    Domain testdomain2 = RectangularDomain( {-10., 10.}, {-5., 40.} );
+    Domain testdomain2 = RectangularDomain( {-1., 1.}, {-.5, 0.} );
     // Grid: (Adjust the following line if the test takes too long!)
 
-    Grid global_grid( "O64" );
+    Grid global_grid( "O512" );
     Grid g1( global_grid, testdomain1 );
     Grid g2( global_grid, testdomain2 );
     //Grid g1( global_grid );
     //Grid g2( global_grid );
 
     bool fourierTrc1 = true;
-    bool fourierTrc2 = false;
+    bool fourierTrc2 = true;
     using grid::StructuredGrid;
     using LinearSpacing = grid::LinearSpacing;
     //StructuredGrid g2( LinearSpacing( {0., 180.}, 181 ), LinearSpacing( {0., 45.}, 46 ) );
 
-    int trc = 63;
+    int trc = 511;
     //Log::info() << "rgp1:" << std::endl;
     if ( eckit::PathName( "legcache.bin" ).exists() ) eckit::PathName( "legcache.bin" ).unlink();
     Trace t1( Here(), "translocal1 construction" );
@@ -692,7 +694,7 @@ CASE( "test_trans_domain" ) {
                         for ( int imag = 0; imag <= 1; imag++ ) {  // real and imaginary part
 
                             if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. &&
-                                 icase < 1000 ) {
+                                 icase < 10 ) {
                                 auto start = std::chrono::system_clock::now();
                                 for ( int j = 0; j < 2 * N * nb_scalar; j++ ) {
                                     sp[j] = 0.;
@@ -727,7 +729,7 @@ CASE( "test_trans_domain" ) {
                                 //Log::info() << std::endl << "rgp1:";
                                 ATLAS_TRACE_SCOPE( "translocal1" )
                                 EXPECT_NO_THROW( transLocal1.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
-                                                                       div.data(), rgp1.data() ) );
+                                                                       div.data(), rgp1.data(), vd2uvoption ) );
 
                                 //Log::info() << std::endl << "rgp2:";
                                 ATLAS_TRACE_SCOPE( "translocal2" )
@@ -737,7 +739,7 @@ CASE( "test_trans_domain" ) {
                                 int pos = ( ivar_out * nb_vordiv + jfld );
 
                                 double rms_gen1 =
-                                    compute_rms( g1.size(), rgp1.data() + pos * g1.size(), rgp1_analytic.data() );
+                                    0.;  //compute_rms( g1.size(), rgp1.data() + pos * g1.size(), rgp1_analytic.data() );
 
                                 double rms_gen2 =
                                     compute_rms( g2.size(), rgp2.data() + pos * g2.size(), rgp2_analytic.data() );

From 0bfbd5d7bbfb1947625dde9901228619c36956b0 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Thu, 7 Jun 2018 16:55:58 +0100
Subject: [PATCH 109/123] added option to merge U,V,scalars before transform
 with old vd2uv; this should be faster because the size of the dgemm calls is
 bigger

---
 src/atlas/trans/local/TransLocal.cc  | 68 +++++++++++++++++++++-------
 src/atlas/trans/local/TransLocal.h   |  5 +-
 src/tests/trans/test_transgeneral.cc | 12 ++---
 3 files changed, 60 insertions(+), 25 deletions(-)

diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc
index bd9b923f1..38a89fdd3 100644
--- a/src/atlas/trans/local/TransLocal.cc
+++ b/src/atlas/trans/local/TransLocal.cc
@@ -712,7 +712,7 @@ void gp_transpose( const int nb_size, const int nb_fields, const double gp_tmp[]
 // --------------------------------------------------------------------------------------------------------------------
 
 void TransLocal::invtrans_legendre( const int truncation, const int nlats, const int nb_fields,
-                                    const double scalar_spectra[], double scl_fourier[],
+                                    const int nb_vordiv_fields, const double scalar_spectra[], double scl_fourier[],
                                     const eckit::Configuration& config ) const {
     // Legendre transform:
     {
@@ -858,8 +858,8 @@ void TransLocal::invtrans_legendre( const int truncation, const int nlats, const
                     }
                 }
             }
-        }
-    }
+        }  // namespace trans
+    }      // namespace atlas
 }
 
 // --------------------------------------------------------------------------------------------------------------------
@@ -1101,7 +1101,7 @@ void TransLocal::invtrans_unstructured_precomp( const int truncation, const int
                 if ( nb_vordiv_fields > 0 ) {
                     //ATLAS_TRACE( " u,v from U,V" );
                     double coslat = std::cos( lat );
-                    for ( int j = 0; j < nb_fields; j++ ) {
+                    for ( int j = 0; j < 2 * nb_vordiv_fields && j < nb_fields; j++ ) {
                         gp_fields[ip + j * grid_.size()] /= coslat;
                     }
                 }
@@ -1199,7 +1199,7 @@ void TransLocal::invtrans_unstructured( const int truncation, const int nb_field
             if ( nb_vordiv_fields > 0 ) {
                 //ATLAS_TRACE( "u,v from U,V" );
                 const double coslat = std::cos( lat );
-                for ( int j = 0; j < nb_fields; j++ ) {
+                for ( int j = 0; j < 2 * nb_vordiv_fields && j < nb_fields; j++ ) {
                     gp_fields[ip + j * grid_.size()] /= coslat;
                 }
             }
@@ -1251,7 +1251,8 @@ void TransLocal::invtrans_uv( const int truncation, const int nb_scalar_fields,
             // ATLAS-159 workaround end
 
             // Legendre transformation:
-            invtrans_legendre( truncation, nlats, nb_scalar_fields, scalar_spectra, scl_fourier, config );
+            invtrans_legendre( truncation, nlats, nb_scalar_fields, nb_vordiv_fields, scalar_spectra, scl_fourier,
+                               config );
 
             // Fourier transformation:
             if ( grid::RegularGrid( gridGlobal_ ) ) {
@@ -1270,7 +1271,7 @@ void TransLocal::invtrans_uv( const int truncation, const int nb_scalar_fields,
                         coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() );
                     }
                     int idx = 0;
-                    for ( int jfld = 0; jfld < nb_vordiv_fields; jfld++ ) {
+                    for ( int jfld = 0; jfld < 2 * nb_vordiv_fields && jfld < nb_fields; jfld++ ) {
                         for ( int jlat = 0; jlat < g.ny(); jlat++ ) {
                             for ( int jlon = 0; jlon < g.nx( jlat ); jlon++ ) {
                                 gp_fields[idx] /= coslats[jlat];
@@ -1327,38 +1328,71 @@ void extend_truncation( const int old_truncation, const int nb_fields, const dou
 void TransLocal::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields,
                            const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[],
                            const eckit::Configuration& config ) const {
-    if ( config.getBool( "vdoption", false ) ) {
+    if ( config.getBool( "mergeBeforeTransform", false ) ) {
         // collect all spectral data into one array "all_spectra":
         ATLAS_TRACE( "TransLocal::invtrans" );
+        int nb_gp              = grid_.size();
+        int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields;
+        std::vector<double> U_ext;
+        std::vector<double> V_ext;
+        std::vector<double> scalar_ext;
+        if ( nb_vordiv_fields > 0 ) {
+            std::vector<double> vorticity_spectra_extended( nb_vordiv_spec_ext );
+            std::vector<double> divergence_spectra_extended( nb_vordiv_spec_ext );
+            U_ext.resize( nb_vordiv_spec_ext );
+            V_ext.resize( nb_vordiv_spec_ext );
+
+            {
+                ATLAS_TRACE( "extend vordiv" );
+                // increase truncation in vorticity_spectra and divergence_spectra:
+                extend_truncation( truncation_, nb_vordiv_fields, vorticity_spectra,
+                                   vorticity_spectra_extended.data() );
+                extend_truncation( truncation_, nb_vordiv_fields, divergence_spectra,
+                                   divergence_spectra_extended.data() );
+            }
+
+            {
+                ATLAS_TRACE( "vordiv to UV" );
+                // call vd2uv to compute u and v in spectral space
+                trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "local" ) );
+                vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(),
+                                          divergence_spectra_extended.data(), U_ext.data(), V_ext.data() );
+            }
+        }
+        if ( nb_scalar_fields > 0 ) {
+            int nb_scalar_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields;
+            scalar_ext.resize( nb_scalar_ext );
+            extend_truncation( truncation_, nb_scalar_fields, scalar_spectra, scalar_ext.data() );
+        }
         int nb_all_fields = 2 * nb_vordiv_fields + nb_scalar_fields;
-        int nb_all_size   = 2 * legendre_size( truncation_ ) * nb_all_fields;
+        int nb_all_size   = 2 * legendre_size( truncation_ + 1 ) * nb_all_fields;
         std::vector<double> all_spectra( nb_all_size );
         int k = 0, i = 0, j = 0, l = 0;
         {
             ATLAS_TRACE( "merge all spectra" );
-            for ( int m = 0; m <= truncation_; m++ ) {                           // zonal wavenumber
-                for ( int n = m; n <= truncation_; n++ ) {                       // total wavenumber
+            for ( int m = 0; m <= truncation_ + 1; m++ ) {                       // zonal wavenumber
+                for ( int n = m; n <= truncation_ + 1; n++ ) {                   // total wavenumber
                     for ( int imag = 0; imag < 2; imag++ ) {                     // imaginary/real part
                         for ( int jfld = 0; jfld < nb_vordiv_fields; jfld++ ) {  // vorticity fields
-                            all_spectra[k++] = vorticity_spectra[i++];
+                            all_spectra[k++] = U_ext[i++];
                         }
                         for ( int jfld = 0; jfld < nb_vordiv_fields; jfld++ ) {  // divergence fields
-                            all_spectra[k++] = divergence_spectra[j++];
+                            all_spectra[k++] = V_ext[j++];
                         }
                         for ( int jfld = 0; jfld < nb_scalar_fields; jfld++ ) {  // scalar fields
-                            all_spectra[k++] = scalar_spectra[l++];
+                            all_spectra[k++] = scalar_ext[l++];
                         }
                     }
                 }
             }
         }
-        int nb_vordiv_size = 2 * legendre_size( truncation_ ) * nb_vordiv_fields;
-        int nb_scalar_size = 2 * legendre_size( truncation_ ) * nb_scalar_fields;
+        int nb_vordiv_size = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields;
+        int nb_scalar_size = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields;
         ASSERT( k == nb_all_size );
         ASSERT( i == nb_vordiv_size );
         ASSERT( j == nb_vordiv_size );
         ASSERT( l == nb_scalar_size );
-        invtrans_uv( truncation_, nb_all_fields, 0, all_spectra.data(), gp_fields, config );
+        invtrans_uv( truncation_ + 1, nb_all_fields, nb_vordiv_fields, all_spectra.data(), gp_fields, config );
     }
     else {
         ATLAS_TRACE( "TransLocal::invtrans" );
diff --git a/src/atlas/trans/local/TransLocal.h b/src/atlas/trans/local/TransLocal.h
index 014045ca2..9d3d69140 100644
--- a/src/atlas/trans/local/TransLocal.h
+++ b/src/atlas/trans/local/TransLocal.h
@@ -134,8 +134,9 @@ class TransLocal : public trans::TransImpl {
 #endif
     };
 
-    void invtrans_legendre( const int truncation, const int nlats, const int nb_fields, const double scalar_spectra[],
-                            double scl_fourier[], const eckit::Configuration& config ) const;
+    void invtrans_legendre( const int truncation, const int nlats, const int nb_fields, const int nb_vordiv_fields,
+                            const double scalar_spectra[], double scl_fourier[],
+                            const eckit::Configuration& config ) const;
 
     void invtrans_fourier_regular( const int nlats, const int nlons, const int nb_fields, double scl_fourier[],
                                    double gp_fields[], const eckit::Configuration& config ) const;
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index 85bc1bb53..678d223b2 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -619,7 +619,6 @@ CASE( "test_trans_domain" ) {
     std::ostream& out = Log::info();
     double tolerance  = 1.e-13;
 
-    util::Config vd2uvoption( "vdoption", true );
     //Domain testdomain = ZonalBandDomain( {-90., 90.} );
     //Domain testdomain = ZonalBandDomain( {-.5, .5} );
     //Domain testdomain = RectangularDomain( {0., 30.}, {-.05, .05} );
@@ -664,7 +663,7 @@ CASE( "test_trans_domain" ) {
 
     functionspace::Spectral spectral( trc );
 
-    int nb_scalar = 1, nb_vordiv = 1;
+    int nb_scalar = 0, nb_vordiv = 1;
     int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv;
     std::vector<double> sp( 2 * N * nb_scalar );
     std::vector<double> vor( 2 * N * nb_vordiv );
@@ -676,8 +675,8 @@ CASE( "test_trans_domain" ) {
     std::vector<double> rgp2_analytic( g2.size() );
 
     int icase = 0;
-    for ( int ivar_in = 0; ivar_in < 3; ivar_in++ ) {         // vorticity, divergence, scalar
-        for ( int ivar_out = 0; ivar_out < 3; ivar_out++ ) {  // u, v, scalar
+    for ( int ivar_in = 0; ivar_in < 2; ivar_in++ ) {         // vorticity, divergence, scalar
+        for ( int ivar_out = 0; ivar_out < 2; ivar_out++ ) {  // u, v, scalar
             int nb_fld = 1;
             if ( ivar_out == 2 ) {
                 tolerance = 1.e-13;
@@ -729,7 +728,8 @@ CASE( "test_trans_domain" ) {
                                 //Log::info() << std::endl << "rgp1:";
                                 ATLAS_TRACE_SCOPE( "translocal1" )
                                 EXPECT_NO_THROW( transLocal1.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
-                                                                       div.data(), rgp1.data(), vd2uvoption ) );
+                                                                       div.data(), rgp1.data(),
+                                                                       util::Config( "mergeBeforeTransform", true ) ) );
 
                                 //Log::info() << std::endl << "rgp2:";
                                 ATLAS_TRACE_SCOPE( "translocal2" )
@@ -739,7 +739,7 @@ CASE( "test_trans_domain" ) {
                                 int pos = ( ivar_out * nb_vordiv + jfld );
 
                                 double rms_gen1 =
-                                    0.;  //compute_rms( g1.size(), rgp1.data() + pos * g1.size(), rgp1_analytic.data() );
+                                    compute_rms( g1.size(), rgp1.data() + pos * g1.size(), rgp1_analytic.data() );
 
                                 double rms_gen2 =
                                     compute_rms( g2.size(), rgp2.data() + pos * g2.size(), rgp2_analytic.data() );

From 2c6ce9bd9fa23ac0b7116125b8511c3135f04ca5 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@ecmwf.int>
Date: Thu, 7 Jun 2018 17:22:05 +0100
Subject: [PATCH 110/123] merging spectral U and V before the transform is now
 the default

---
 src/atlas/trans/local/TransLocal.cc  | 35 ++--------------------------
 src/tests/trans/test_transgeneral.cc | 27 +++++++++++----------
 2 files changed, 15 insertions(+), 47 deletions(-)

diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc
index 38a89fdd3..1e3520557 100644
--- a/src/atlas/trans/local/TransLocal.cc
+++ b/src/atlas/trans/local/TransLocal.cc
@@ -1328,10 +1328,10 @@ void extend_truncation( const int old_truncation, const int nb_fields, const dou
 void TransLocal::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields,
                            const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[],
                            const eckit::Configuration& config ) const {
-    if ( config.getBool( "mergeBeforeTransform", false ) ) {
+    int nb_gp = grid_.size();
+    if ( nb_vordiv_fields > 0 ) {
         // collect all spectral data into one array "all_spectra":
         ATLAS_TRACE( "TransLocal::invtrans" );
-        int nb_gp              = grid_.size();
         int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields;
         std::vector<double> U_ext;
         std::vector<double> V_ext;
@@ -1395,37 +1395,6 @@ void TransLocal::invtrans( const int nb_scalar_fields, const double scalar_spect
         invtrans_uv( truncation_ + 1, nb_all_fields, nb_vordiv_fields, all_spectra.data(), gp_fields, config );
     }
     else {
-        ATLAS_TRACE( "TransLocal::invtrans" );
-        int nb_gp              = grid_.size();
-        int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields;
-        if ( nb_vordiv_fields > 0 ) {
-            std::vector<double> vorticity_spectra_extended( nb_vordiv_spec_ext, 0. );
-            std::vector<double> divergence_spectra_extended( nb_vordiv_spec_ext, 0. );
-            std::vector<double> U_ext( nb_vordiv_spec_ext, 0. );
-            std::vector<double> V_ext( nb_vordiv_spec_ext, 0. );
-
-            {
-                ATLAS_TRACE( "extend vordiv" );
-                // increase truncation in vorticity_spectra and divergence_spectra:
-                extend_truncation( truncation_, nb_vordiv_fields, vorticity_spectra,
-                                   vorticity_spectra_extended.data() );
-                extend_truncation( truncation_, nb_vordiv_fields, divergence_spectra,
-                                   divergence_spectra_extended.data() );
-            }
-
-            {
-                ATLAS_TRACE( "vordiv to UV" );
-                // call vd2uv to compute u and v in spectral space
-                trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "local" ) );
-                vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(),
-                                          divergence_spectra_extended.data(), U_ext.data(), V_ext.data() );
-            }
-
-            // perform spectral transform to compute all fields in grid point space
-            invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config );
-            invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(),
-                         gp_fields + nb_gp * nb_vordiv_fields, config );
-        }
         if ( nb_scalar_fields > 0 ) {
             invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields + 2 * nb_gp * nb_vordiv_fields,
                          config );
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index 678d223b2..f0ed2f679 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -377,7 +377,7 @@ double compute_rms( const size_t N,    // length of the arrays
 }
 
 //-----------------------------------------------------------------------------
-#if 0
+#if 1
 CASE( "test_trans_vordiv_with_translib" ) {
     Log::info() << "test_trans_vordiv_with_translib" << std::endl;
     // test transgeneral by comparing its result with the trans library
@@ -387,7 +387,7 @@ CASE( "test_trans_vordiv_with_translib" ) {
     double tolerance  = 1.e-13;
 
     // Grid: (Adjust the following line if the test takes too long!)
-    Grid g( "F120" );
+    Grid g( "F64" );
 
     grid::StructuredGrid gs( g );
     int ndgl = gs.ny();
@@ -623,15 +623,15 @@ CASE( "test_trans_domain" ) {
     //Domain testdomain = ZonalBandDomain( {-.5, .5} );
     //Domain testdomain = RectangularDomain( {0., 30.}, {-.05, .05} );
     //Domain testdomain1 = ZonalBandDomain( {-10., 5.} );
-    Domain testdomain1 = RectangularDomain( {-1., 1.}, {-.5, 0.} );
+    Domain testdomain1 = RectangularDomain( {-5., 5.}, {-2.5, 0.} );
     //Domain testdomain1 = RectangularDomain( {-1., 1.}, {50., 55.} );
-    Domain testdomain2 = RectangularDomain( {-1., 1.}, {-.5, 0.} );
+    Domain testdomain2 = RectangularDomain( {-5., 5.}, {-2.5, 0.} );
     // Grid: (Adjust the following line if the test takes too long!)
 
-    Grid global_grid( "O512" );
-    Grid g1( global_grid, testdomain1 );
+    Grid global_grid( "O64" );
+    //Grid g1( global_grid, testdomain1 );
     Grid g2( global_grid, testdomain2 );
-    //Grid g1( global_grid );
+    Grid g1( global_grid );
     //Grid g2( global_grid );
 
     bool fourierTrc1 = true;
@@ -640,7 +640,7 @@ CASE( "test_trans_domain" ) {
     using LinearSpacing = grid::LinearSpacing;
     //StructuredGrid g2( LinearSpacing( {0., 180.}, 181 ), LinearSpacing( {0., 45.}, 46 ) );
 
-    int trc = 511;
+    int trc = 63;
     //Log::info() << "rgp1:" << std::endl;
     if ( eckit::PathName( "legcache.bin" ).exists() ) eckit::PathName( "legcache.bin" ).unlink();
     Trace t1( Here(), "translocal1 construction" );
@@ -663,7 +663,7 @@ CASE( "test_trans_domain" ) {
 
     functionspace::Spectral spectral( trc );
 
-    int nb_scalar = 0, nb_vordiv = 1;
+    int nb_scalar = 1, nb_vordiv = 1;
     int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv;
     std::vector<double> sp( 2 * N * nb_scalar );
     std::vector<double> vor( 2 * N * nb_vordiv );
@@ -675,8 +675,8 @@ CASE( "test_trans_domain" ) {
     std::vector<double> rgp2_analytic( g2.size() );
 
     int icase = 0;
-    for ( int ivar_in = 0; ivar_in < 2; ivar_in++ ) {         // vorticity, divergence, scalar
-        for ( int ivar_out = 0; ivar_out < 2; ivar_out++ ) {  // u, v, scalar
+    for ( int ivar_in = 0; ivar_in < 3; ivar_in++ ) {         // vorticity, divergence, scalar
+        for ( int ivar_out = 0; ivar_out < 3; ivar_out++ ) {  // u, v, scalar
             int nb_fld = 1;
             if ( ivar_out == 2 ) {
                 tolerance = 1.e-13;
@@ -693,7 +693,7 @@ CASE( "test_trans_domain" ) {
                         for ( int imag = 0; imag <= 1; imag++ ) {  // real and imaginary part
 
                             if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. &&
-                                 icase < 10 ) {
+                                 icase < 1000 ) {
                                 auto start = std::chrono::system_clock::now();
                                 for ( int j = 0; j < 2 * N * nb_scalar; j++ ) {
                                     sp[j] = 0.;
@@ -728,8 +728,7 @@ CASE( "test_trans_domain" ) {
                                 //Log::info() << std::endl << "rgp1:";
                                 ATLAS_TRACE_SCOPE( "translocal1" )
                                 EXPECT_NO_THROW( transLocal1.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(),
-                                                                       div.data(), rgp1.data(),
-                                                                       util::Config( "mergeBeforeTransform", true ) ) );
+                                                                       div.data(), rgp1.data() ) );
 
                                 //Log::info() << std::endl << "rgp2:";
                                 ATLAS_TRACE_SCOPE( "translocal2" )

From 72384e79bf75c668cab77147bf61d199af9c431b Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Fri, 8 Jun 2018 13:04:49 +0100
Subject: [PATCH 111/123] ATLAS-163 Introduce bit reproducibility for parallel
 interpolation

---
 src/atlas/interpolation/Interpolation.h       |   1 +
 .../interpolation/method/FiniteElement.cc     | 176 +++++++++++++++++-
 .../interpolation/method/FiniteElement.h      |   6 +
 .../interpolation/method/KNearestNeighbours.h |   2 +
 src/atlas/interpolation/method/Method.cc      |   2 +
 src/atlas/interpolation/method/Method.h       |  10 +-
 .../interpolation/method/NearestNeighbour.h   |   2 +
 .../atlas-parallel-interpolation.cc           |   8 +
 8 files changed, 197 insertions(+), 10 deletions(-)

diff --git a/src/atlas/interpolation/Interpolation.h b/src/atlas/interpolation/Interpolation.h
index 0ff0e6aa4..8575159d7 100644
--- a/src/atlas/interpolation/Interpolation.h
+++ b/src/atlas/interpolation/Interpolation.h
@@ -39,6 +39,7 @@ class Interpolation {
 
     operator bool() const { return implementation_; }
 
+    void print( std::ostream& out ) const { implementation_->print(out); }
 private:
     eckit::SharedPtr<const Implementation> implementation_;
 };
diff --git a/src/atlas/interpolation/method/FiniteElement.cc b/src/atlas/interpolation/method/FiniteElement.cc
index 9a70c4a15..e5f433b5e 100644
--- a/src/atlas/interpolation/method/FiniteElement.cc
+++ b/src/atlas/interpolation/method/FiniteElement.cc
@@ -17,6 +17,7 @@
 #include "eckit/log/ProgressTimer.h"
 #include "eckit/log/Seconds.h"
 #include "eckit/mpi/Comm.h"
+#include "eckit/exception/Exceptions.h"
 
 #include "atlas/functionspace/NodeColumns.h"
 #include "atlas/functionspace/PointCloud.h"
@@ -32,6 +33,9 @@
 #include "atlas/util/CoordinateEnums.h"
 #include "atlas/util/Earth.h"
 #include "atlas/util/Point.h"
+#include "atlas/parallel/mpi/Buffer.h"
+#include "atlas/parallel/GatherScatter.h"
+
 
 namespace atlas {
 namespace interpolation {
@@ -49,6 +53,9 @@ static const double parametricEpsilon = 1e-15;
 void FiniteElement::setup( const FunctionSpace& source, const FunctionSpace& target ) {
     ATLAS_TRACE( "atlas::interpolation::method::FiniteElement::setup()" );
 
+    source_ = source;
+    target_ = target;
+
     if ( functionspace::NodeColumns tgt = target ) {
         Mesh meshTarget = tgt.mesh();
 
@@ -78,6 +85,89 @@ void FiniteElement::setup( const FunctionSpace& source, const FunctionSpace& tar
     setup( source );
 }
 
+struct Stencil {
+    enum { max_stencil_size = 4 };
+    Stencil() {
+        g = -1;
+        size = 0;
+    }
+    void add( gidx_t tgt, gidx_t src, double weight ) {
+        if( g >= 0 ) {
+            ASSERT( tgt == g );
+        }
+        g = tgt;
+        size_t i=size;
+        source[i] = src;
+        weights[i] = weight;
+        ++size;
+    }
+    gidx_t g;
+    std::array<gidx_t,max_stencil_size> source;
+    std::array<double,max_stencil_size> weights;
+    size_t size;
+};
+
+void FiniteElement::print(std::ostream& out) const
+{
+    functionspace::NodeColumns src (source_);
+    functionspace::NodeColumns tgt (target_);
+    if( not tgt ) NOTIMP;
+    auto gidx_src = array::make_view<gidx_t,1>( src.nodes().global_index() );
+
+    ASSERT( tgt.nodes().size() == matrix_.rows() );
+
+
+    auto field_stencil_points_loc = tgt.createField<gidx_t>( option::variables(Stencil::max_stencil_size) );
+    auto field_stencil_weigths_loc = tgt.createField<double>( option::variables(Stencil::max_stencil_size) );
+    auto field_stencil_size_loc = tgt.createField<int>();
+
+    auto stencil_points_loc = array::make_view<gidx_t,2>( field_stencil_points_loc );
+    auto stencil_weights_loc = array::make_view<double,2>( field_stencil_weigths_loc );
+    auto stencil_size_loc   = array::make_view<int,1>( field_stencil_size_loc );
+    stencil_size_loc.assign(0);
+
+    for( Matrix::const_iterator it = matrix_.begin(); it!=matrix_.end(); ++it ) {
+        int p = it.row();
+        int& i = stencil_size_loc( p );
+        stencil_points_loc( p, i ) = gidx_src( it.col() );
+        stencil_weights_loc( p, i ) = *it;
+        ++i;
+    }
+
+
+    size_t global_size = tgt.gather().glb_dof();
+
+    auto field_stencil_points_glb = tgt.createField<gidx_t>( option::variables(Stencil::max_stencil_size) | option::global(0) );
+    auto field_stencil_weights_glb = tgt.createField<double>( option::variables(Stencil::max_stencil_size) | option::global(0) );
+    auto field_stencil_size_glb = tgt.createField<int>( option::global(0) );
+
+
+
+    auto stencil_points_glb = array::make_view<gidx_t,2>( field_stencil_points_glb );
+    auto stencil_weights_glb = array::make_view<double,2>( field_stencil_weights_glb );
+    auto stencil_size_glb   = array::make_view<int,1>( field_stencil_size_glb );
+
+    tgt.gather().gather( stencil_size_loc, stencil_size_glb );
+    tgt.gather().gather( stencil_points_loc, stencil_points_glb );
+    tgt.gather().gather( stencil_weights_loc, stencil_weights_glb );
+
+    if( mpi::comm().rank() == 0 ) {
+        for( idx_t i=0; i<global_size; ++i ) {
+            out << std::setw(10) << i+1 << " : ";
+            for( idx_t j=0; j<stencil_size_glb(i); ++j ) {
+                out << std::setw(10) << stencil_points_glb(i,j);
+            }
+            for( idx_t j=stencil_size_glb(i); j<Stencil::max_stencil_size; ++j ) {
+                out << "          ";
+            }
+            for( idx_t j=0; j<stencil_size_glb(i); ++j ) {
+                out << std::setw(12) << std::left <<  stencil_weights_glb(i,j);
+            }
+            out << std::endl;
+        }
+    }
+}
+
 void FiniteElement::setup( const FunctionSpace& source ) {
     const functionspace::NodeColumns src = source;
     ASSERT( src );
@@ -99,6 +189,7 @@ void FiniteElement::setup( const FunctionSpace& source ) {
 
     icoords_.reset( new array::ArrayView<double, 2>( array::make_view<double, 2>( source_xyz ) ) );
     ocoords_.reset( new array::ArrayView<double, 2>( array::make_view<double, 2>( target_xyz_ ) ) );
+    igidx_.reset( new array::ArrayView<gidx_t, 1>( array::make_view<gidx_t, 1>( src.nodes().global_index() ) ) );
 
     connectivity_ = &meshSource.cells().node_connectivity();
 
@@ -183,17 +274,26 @@ void FiniteElement::setup( const FunctionSpace& source ) {
     matrix_.swap( A );
 }
 
+struct ElementEdge {
+    std::array<idx_t,2> idx;
+    void swap() {
+        idx_t tmp = idx[0];
+        idx[0] = idx[1];
+        idx[1] = tmp;
+    }
+};
+
 Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemIndex3::NodeList& elems,
                                                         std::ostream& failures_log ) const {
     ASSERT( elems.begin() != elems.end() );
 
     const size_t inp_points = icoords_->shape( 0 );
-    size_t idx[4];
-    double w[4];
+    std::array<size_t,4> idx;
+    std::array<double,4> w;
 
     Triplets triplets;
     Ray ray( PointXYZ{( *ocoords_ )( ip, 0 ), ( *ocoords_ )( ip, 1 ), ( *ocoords_ )( ip, 2 )} );
-
+    ElementEdge edge;
     for ( ElemIndex3::NodeList::const_iterator itc = elems.begin(); itc != elems.end(); ++itc ) {
         const size_t elem_id = ( *itc ).value().payload();
         ASSERT( elem_id < connectivity_->rows() );
@@ -206,6 +306,48 @@ Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemInd
             ASSERT( idx[i] < inp_points );
         }
 
+        auto on_triag_edge = [&](ElementEdge& edge) {
+            if( w[0] < 1.e-15 ) {
+                edge.idx[0] = 1;
+                edge.idx[1] = 2;
+                return true;
+            }
+            if( w[1] < 1.e-15 ) {
+                edge.idx[0] = 0;
+                edge.idx[1] = 2;
+                return true;
+            }
+            if( w[2] < 1.e-15 ) {
+                edge.idx[0] = 0;
+                edge.idx[1] = 1;
+                return true;
+            }
+            return false;
+        };
+
+        auto on_quad_edge = [&](ElementEdge& edge) {
+            if( w[0] < 1.e-15 && w[1] < 1.e-15 ) {
+                edge.idx[0] = 2;
+                edge.idx[1] = 3;
+                return true;
+            }
+            if( w[1] < 1.e-15 && w[2] < 1.e-15 ) {
+                edge.idx[0] = 0;
+                edge.idx[1] = 3;
+                return true;
+            }
+            if( w[2] < 1.e-15 && w[3] < 1.e-15 ) {
+                edge.idx[0] = 0;
+                edge.idx[1] = 1;
+                return true;
+            }
+            if( w[3] < 1.e-15 && w[0] < 1.e-15 ) {
+                edge.idx[0] = 1;
+                edge.idx[1] = 2;
+                return true;
+            }
+            return false;
+        };
         if ( nb_cols == 3 ) {
             /* triangle */
             element::Triag3D triag(
@@ -227,8 +369,18 @@ Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemInd
                 w[1] = is.u;
                 w[2] = is.v;
 
-                for ( size_t i = 0; i < 3; ++i ) {
-                    triplets.push_back( Triplet( ip, idx[i], w[i] ) );
+                if( on_triag_edge( edge) ) {
+                    if( ( *igidx_ )( idx[edge.idx[1]] ) < ( *igidx_ )( idx[edge.idx[0]] ) ) {
+                        edge.swap();
+                    }
+                    for( size_t i = 0; i < 2; ++i ) {
+                        triplets.push_back( Triplet( ip, idx[edge.idx[i]], w[edge.idx[i]] ) );
+                    }
+                }
+                else {
+                    for ( size_t i = 0; i < 3; ++i ) {
+                        triplets.push_back( Triplet( ip, idx[i], w[i] ) );
+                    }
                 }
 
                 break;  // stop looking for elements
@@ -256,8 +408,18 @@ Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemInd
                 w[2] = is.u * is.v;
                 w[3] = ( 1. - is.u ) * is.v;
 
-                for ( size_t i = 0; i < 4; ++i ) {
-                    triplets.push_back( Triplet( ip, idx[i], w[i] ) );
+                if( on_quad_edge( edge ) ) {
+                    if( ( *igidx_ )( idx[edge.idx[1]] ) < ( *igidx_ )( idx[edge.idx[0]] ) ) {
+                        edge.swap();
+                    }
+                    for( size_t i = 0; i < 2; ++i ) {
+                        triplets.push_back( Triplet( ip, idx[edge.idx[i]], w[edge.idx[i]] ) );
+                    }
+                }
+                else {
+                    for ( size_t i = 0; i < 4; ++i ) {
+                        triplets.push_back( Triplet( ip, idx[i], w[i] ) );
+                    }
                 }
                 break;  // stop looking for elements
             }
diff --git a/src/atlas/interpolation/method/FiniteElement.h b/src/atlas/interpolation/method/FiniteElement.h
index bbadf0289..7d0c8b4b2 100644
--- a/src/atlas/interpolation/method/FiniteElement.h
+++ b/src/atlas/interpolation/method/FiniteElement.h
@@ -33,6 +33,8 @@ class FiniteElement : public Method {
 
     virtual void setup( const FunctionSpace& source, const FunctionSpace& target ) override;
 
+    virtual void print( std::ostream& ) const override;
+
 protected:
     /**
    * @brief Create an interpolant sparse matrix relating two (pre-partitioned)
@@ -59,9 +61,13 @@ class FiniteElement : public Method {
     mesh::MultiBlockConnectivity* connectivity_;
     std::unique_ptr<array::ArrayView<double, 2>> icoords_;
     std::unique_ptr<array::ArrayView<double, 2>> ocoords_;
+    std::unique_ptr<array::ArrayView<gidx_t, 1>> igidx_;
 
     Field target_xyz_;
     Field target_ghost_;
+
+    FunctionSpace source_;
+    FunctionSpace target_;
 };
 
 }  // namespace method
diff --git a/src/atlas/interpolation/method/KNearestNeighbours.h b/src/atlas/interpolation/method/KNearestNeighbours.h
index 9f412512e..ab20d5779 100644
--- a/src/atlas/interpolation/method/KNearestNeighbours.h
+++ b/src/atlas/interpolation/method/KNearestNeighbours.h
@@ -30,6 +30,8 @@ class KNearestNeighbours : public KNearestNeighboursBase {
    */
     virtual void setup( const FunctionSpace& source, const FunctionSpace& target ) override;
 
+    virtual void print( std::ostream& ) const override {}
+
 protected:
     size_t k_;
 };
diff --git a/src/atlas/interpolation/method/Method.cc b/src/atlas/interpolation/method/Method.cc
index 05365cc52..c4d46dd93 100644
--- a/src/atlas/interpolation/method/Method.cc
+++ b/src/atlas/interpolation/method/Method.cc
@@ -22,8 +22,10 @@
 
 #include "atlas/field/Field.h"
 #include "atlas/field/FieldSet.h"
+#include "atlas/functionspace/NodeColumns.h"
 #include "atlas/runtime/Log.h"
 #include "atlas/runtime/Trace.h"
+#include "atlas/mesh/Nodes.h"
 
 // for static linking
 #include "FiniteElement.h"
diff --git a/src/atlas/interpolation/method/Method.h b/src/atlas/interpolation/method/Method.h
index 3cee4e597..34d62965a 100644
--- a/src/atlas/interpolation/method/Method.h
+++ b/src/atlas/interpolation/method/Method.h
@@ -12,6 +12,7 @@
 
 #include <string>
 #include <vector>
+#include <iosfwd>
 
 #include "eckit/config/Configuration.h"
 #include "eckit/linalg/SparseMatrix.h"
@@ -44,10 +45,12 @@ class Method : public eckit::Owned {
     virtual void execute( const FieldSet& source, FieldSet& target ) const;
     virtual void execute( const Field& source, Field& target ) const;
 
+    virtual void print( std::ostream& ) const = 0;
+
 protected:
-    typedef eckit::linalg::Triplet Triplet;
-    typedef std::vector<Triplet> Triplets;
-    typedef eckit::linalg::SparseMatrix Matrix;
+    using Triplet  = eckit::linalg::Triplet;
+    using Triplets = std::vector<Triplet>;
+    using Matrix   = eckit::linalg::SparseMatrix;
 
     static void normalise( Triplets& triplets );
 
@@ -58,6 +61,7 @@ class Method : public eckit::Owned {
     //        so do not expose here, even though only linear operators are now
     //        implemented.
     Matrix matrix_;
+
 };
 
 struct MethodFactory {
diff --git a/src/atlas/interpolation/method/NearestNeighbour.h b/src/atlas/interpolation/method/NearestNeighbour.h
index 9b311c499..829e8647a 100644
--- a/src/atlas/interpolation/method/NearestNeighbour.h
+++ b/src/atlas/interpolation/method/NearestNeighbour.h
@@ -21,6 +21,8 @@ class NearestNeighbour : public KNearestNeighboursBase {
     NearestNeighbour( const Config& config ) : KNearestNeighboursBase( config ) {}
     virtual ~NearestNeighbour() {}
 
+    virtual void print( std::ostream& ) const override {}
+
 protected:
     /**
    * @brief Create an interpolant sparse matrix relating two (pre-partitioned)
diff --git a/src/sandbox/interpolation/atlas-parallel-interpolation.cc b/src/sandbox/interpolation/atlas-parallel-interpolation.cc
index 67a5d3dfe..6a4efe4ef 100644
--- a/src/sandbox/interpolation/atlas-parallel-interpolation.cc
+++ b/src/sandbox/interpolation/atlas-parallel-interpolation.cc
@@ -70,6 +70,10 @@ class AtlasParallelInterpolation : public AtlasTool {
         add_option( new SimpleOption<double>( "target-mesh-generator-angle",
                                               "target mesh generator angle option (default 0.)" ) );
         add_option( new SimpleOption<size_t>( "target-mesh-halo", "target mesh halo size (default 1)" ) );
+        add_option( new SimpleOption<bool>( "forward-interpolator-output",
+                                            "Output forward interpolator's points and weights" ) );
+        add_option( new SimpleOption<bool>( "backward-interpolator-output",
+                                            "Output backward interpolator's points and weights" ) );
     }
 };
 
@@ -148,6 +152,10 @@ void AtlasParallelInterpolation::execute( const AtlasTool::Args& args ) {
             Interpolation( option::type( interpolation_method ), tgt_functionspace, src_functionspace );
     }
 
+    if( args.getBool( "forward-interpolator-output", false ) ) {
+        interpolator_forward.print( Log::info() );
+    }
+
     // Create source FunctionSpace and fields
 
     FieldSet src_fields;

From bd0c9725d7169b36ec547c3f286d9d1b8f055a46 Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Fri, 8 Jun 2018 14:43:27 +0100
Subject: [PATCH 112/123] ATLAS-163 Introduce bit reproducibility for parallel
 interpolation for coinciding meshes

---
 .../interpolation/method/FiniteElement.cc     | 48 ++++++++++++++-----
 1 file changed, 36 insertions(+), 12 deletions(-)

diff --git a/src/atlas/interpolation/method/FiniteElement.cc b/src/atlas/interpolation/method/FiniteElement.cc
index e5f433b5e..610ea53ba 100644
--- a/src/atlas/interpolation/method/FiniteElement.cc
+++ b/src/atlas/interpolation/method/FiniteElement.cc
@@ -294,6 +294,7 @@ Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemInd
     Triplets triplets;
     Ray ray( PointXYZ{( *ocoords_ )( ip, 0 ), ( *ocoords_ )( ip, 1 ), ( *ocoords_ )( ip, 2 )} );
     ElementEdge edge;
+    idx_t single_point;
     for ( ElemIndex3::NodeList::const_iterator itc = elems.begin(); itc != elems.end(); ++itc ) {
         const size_t elem_id = ( *itc ).value().payload();
         ASSERT( elem_id < connectivity_->rows() );
@@ -306,7 +307,7 @@ Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemInd
             ASSERT( idx[i] < inp_points );
         }
 
-        auto on_triag_edge = [&](ElementEdge& edge) {
+        auto on_triag_edge = [&]() {
             if( w[0] < 1.e-15 ) {
                 edge.idx[0] = 1;
                 edge.idx[1] = 2;
@@ -325,7 +326,7 @@ Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemInd
             return false;
         };
 
-        auto on_quad_edge = [&](ElementEdge& edge) {
+        auto on_quad_edge = [&]() {
             if( w[0] < 1.e-15 && w[1] < 1.e-15 ) {
                 edge.idx[0] = 2;
                 edge.idx[1] = 3;
@@ -348,6 +349,19 @@ Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemInd
             }
             return false;
         };
+
+        auto on_single_point = [&]() {
+            if( w[edge.idx[0]] < 1.e-15 ) {
+                single_point = edge.idx[1];
+                return true;
+            }
+            if( w[edge.idx[1]] < 1.e-15 ) {
+                single_point = edge.idx[0];
+                return true;
+            }
+            return false;
+        };
+
         if ( nb_cols == 3 ) {
             /* triangle */
             element::Triag3D triag(
@@ -369,12 +383,17 @@ Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemInd
                 w[1] = is.u;
                 w[2] = is.v;
 
-                if( on_triag_edge( edge) ) {
-                    if( ( *igidx_ )( idx[edge.idx[1]] ) < ( *igidx_ )( idx[edge.idx[0]] ) ) {
-                        edge.swap();
+                if( on_triag_edge() ) {
+                    if( on_single_point() ) {
+                        triplets.push_back( Triplet( ip, idx[single_point], w[single_point] ) );
                     }
-                    for( size_t i = 0; i < 2; ++i ) {
-                        triplets.push_back( Triplet( ip, idx[edge.idx[i]], w[edge.idx[i]] ) );
+                    else {
+                        if( ( *igidx_ )( idx[edge.idx[1]] ) < ( *igidx_ )( idx[edge.idx[0]] ) ) {
+                            edge.swap();
+                        }
+                        for( size_t i = 0; i < 2; ++i ) {
+                            triplets.push_back( Triplet( ip, idx[edge.idx[i]], w[edge.idx[i]] ) );
+                        }
                     }
                 }
                 else {
@@ -408,12 +427,17 @@ Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemInd
                 w[2] = is.u * is.v;
                 w[3] = ( 1. - is.u ) * is.v;
 
-                if( on_quad_edge( edge ) ) {
-                    if( ( *igidx_ )( idx[edge.idx[1]] ) < ( *igidx_ )( idx[edge.idx[0]] ) ) {
-                        edge.swap();
+                if( on_quad_edge() ) {
+                    if( on_single_point() ) {
+                        triplets.push_back( Triplet( ip, idx[single_point], w[single_point] ) );
                     }
-                    for( size_t i = 0; i < 2; ++i ) {
-                        triplets.push_back( Triplet( ip, idx[edge.idx[i]], w[edge.idx[i]] ) );
+                    else {
+                        if( ( *igidx_ )( idx[edge.idx[1]] ) < ( *igidx_ )( idx[edge.idx[0]] ) ) {
+                            edge.swap();
+                        }
+                        for( size_t i = 0; i < 2; ++i ) {
+                            triplets.push_back( Triplet( ip, idx[edge.idx[i]], w[edge.idx[i]] ) );
+                        }
                     }
                 }
                 else {

From caa760fddd7480e545ac9bbdc7963d5dea7461b9 Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Fri, 8 Jun 2018 16:32:03 +0100
Subject: [PATCH 113/123] ATLAS-163 Recompute weights on edges

---
 src/atlas/interpolation/Interpolation.cc      | 10 +++++-
 src/atlas/interpolation/element/Quad3D.h      |  7 ++++
 src/atlas/interpolation/element/Triag3D.h     |  6 ++++
 .../interpolation/method/FiniteElement.cc     | 33 +++++++++++++------
 4 files changed, 45 insertions(+), 11 deletions(-)

diff --git a/src/atlas/interpolation/Interpolation.cc b/src/atlas/interpolation/Interpolation.cc
index 579211174..2247d867f 100644
--- a/src/atlas/interpolation/Interpolation.cc
+++ b/src/atlas/interpolation/Interpolation.cc
@@ -24,7 +24,15 @@ Interpolation::Interpolation( const Config& config, const FunctionSpace& source,
         Implementation* impl = interpolation::MethodFactory::build( type, config );
         impl->setup( source, target );
         return impl;
-    }() ) {}
+    }() ) {
+
+        std::string path;
+        if( config.get( "output", path ) ) {
+            std::ofstream file( path );
+            print( file );
+        }
+
+    }
 
 Interpolation::Interpolation( const Interpolation& other ) : implementation_( other.implementation_ ) {}
 
diff --git a/src/atlas/interpolation/element/Quad3D.h b/src/atlas/interpolation/element/Quad3D.h
index 6c5dd1753..122c97324 100644
--- a/src/atlas/interpolation/element/Quad3D.h
+++ b/src/atlas/interpolation/element/Quad3D.h
@@ -52,6 +52,13 @@ class Quad3D {
         return s;
     }
 
+    const Vector3D& p(int i) {
+        if(i==0) return v00;
+        if(i==1) return v10;
+        if(i==2) return v11;
+        if(i==3) return v01;
+    }
+
 private:           // members
     Vector3D v00;  // aka v0
     Vector3D v10;  // aka v1
diff --git a/src/atlas/interpolation/element/Triag3D.h b/src/atlas/interpolation/element/Triag3D.h
index 1bd758c23..d28fdfc31 100644
--- a/src/atlas/interpolation/element/Triag3D.h
+++ b/src/atlas/interpolation/element/Triag3D.h
@@ -55,6 +55,12 @@ class Triag3D {
         return s;
     }
 
+    const Vector3D& p(int i) {
+        if(i==0) return v0;
+        if(i==1) return v1;
+        if(i==2) return v2;
+    }
+
 private:  // members
     Vector3D v0;
     Vector3D v1;
diff --git a/src/atlas/interpolation/method/FiniteElement.cc b/src/atlas/interpolation/method/FiniteElement.cc
index 610ea53ba..7e94d64bb 100644
--- a/src/atlas/interpolation/method/FiniteElement.cc
+++ b/src/atlas/interpolation/method/FiniteElement.cc
@@ -9,6 +9,7 @@
  */
 
 #include <cmath>
+#include <limits>
 
 #include "atlas/interpolation/method/FiniteElement.h"
 
@@ -152,6 +153,7 @@ void FiniteElement::print(std::ostream& out) const
     tgt.gather().gather( stencil_weights_loc, stencil_weights_glb );
 
     if( mpi::comm().rank() == 0 ) {
+        int precision = std::numeric_limits<double>::max_digits10;
         for( idx_t i=0; i<global_size; ++i ) {
             out << std::setw(10) << i+1 << " : ";
             for( idx_t j=0; j<stencil_size_glb(i); ++j ) {
@@ -161,7 +163,7 @@ void FiniteElement::print(std::ostream& out) const
                 out << "          ";
             }
             for( idx_t j=0; j<stencil_size_glb(i); ++j ) {
-                out << std::setw(12) << std::left <<  stencil_weights_glb(i,j);
+                out << std::setw(precision+5) << std::left <<  std::setprecision(precision) << stencil_weights_glb(i,j);
             }
             out << std::endl;
         }
@@ -293,6 +295,7 @@ Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemInd
 
     Triplets triplets;
     Ray ray( PointXYZ{( *ocoords_ )( ip, 0 ), ( *ocoords_ )( ip, 1 ), ( *ocoords_ )( ip, 2 )} );
+    Vector3D p{ ( *ocoords_ )( ip, 0 ), ( *ocoords_ )( ip, 1 ), ( *ocoords_ )( ip, 2 ) };
     ElementEdge edge;
     idx_t single_point;
     for ( ElemIndex3::NodeList::const_iterator itc = elems.begin(); itc != elems.end(); ++itc ) {
@@ -307,18 +310,20 @@ Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemInd
             ASSERT( idx[i] < inp_points );
         }
 
+        const double tolerance = 1.e-12;
+
         auto on_triag_edge = [&]() {
-            if( w[0] < 1.e-15 ) {
+            if( w[0] < tolerance ) {
                 edge.idx[0] = 1;
                 edge.idx[1] = 2;
                 return true;
             }
-            if( w[1] < 1.e-15 ) {
+            if( w[1] < tolerance ) {
                 edge.idx[0] = 0;
                 edge.idx[1] = 2;
                 return true;
             }
-            if( w[2] < 1.e-15 ) {
+            if( w[2] < tolerance ) {
                 edge.idx[0] = 0;
                 edge.idx[1] = 1;
                 return true;
@@ -327,22 +332,22 @@ Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemInd
         };
 
         auto on_quad_edge = [&]() {
-            if( w[0] < 1.e-15 && w[1] < 1.e-15 ) {
+            if( w[0] < tolerance && w[1] < tolerance ) {
                 edge.idx[0] = 2;
                 edge.idx[1] = 3;
                 return true;
             }
-            if( w[1] < 1.e-15 && w[2] < 1.e-15 ) {
+            if( w[1] < tolerance && w[2] < tolerance ) {
                 edge.idx[0] = 0;
                 edge.idx[1] = 3;
                 return true;
             }
-            if( w[2] < 1.e-15 && w[3] < 1.e-15 ) {
+            if( w[2] < tolerance && w[3] < tolerance ) {
                 edge.idx[0] = 0;
                 edge.idx[1] = 1;
                 return true;
             }
-            if( w[3] < 1.e-15 && w[0] < 1.e-15 ) {
+            if( w[3] < tolerance && w[0] < tolerance ) {
                 edge.idx[0] = 1;
                 edge.idx[1] = 2;
                 return true;
@@ -351,17 +356,23 @@ Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemInd
         };
 
         auto on_single_point = [&]() {
-            if( w[edge.idx[0]] < 1.e-15 ) {
+            if( w[edge.idx[0]] < tolerance ) {
                 single_point = edge.idx[1];
                 return true;
             }
-            if( w[edge.idx[1]] < 1.e-15 ) {
+            if( w[edge.idx[1]] < tolerance ) {
                 single_point = edge.idx[0];
                 return true;
             }
             return false;
         };
 
+        auto interpolate_edge = [&]( const Vector3D& p0, const Vector3D& p1 ) {
+            double t = ( p - p0 ).squaredNorm() / ( p1 - p0 ).squaredNorm();
+            w[edge.idx[0]] = t;
+            w[edge.idx[1]] = 1.-t;
+        };
+
         if ( nb_cols == 3 ) {
             /* triangle */
             element::Triag3D triag(
@@ -391,6 +402,7 @@ Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemInd
                         if( ( *igidx_ )( idx[edge.idx[1]] ) < ( *igidx_ )( idx[edge.idx[0]] ) ) {
                             edge.swap();
                         }
+                        interpolate_edge( triag.p(edge.idx[0]), triag.p(edge.idx[1] ) );
                         for( size_t i = 0; i < 2; ++i ) {
                             triplets.push_back( Triplet( ip, idx[edge.idx[i]], w[edge.idx[i]] ) );
                         }
@@ -435,6 +447,7 @@ Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemInd
                         if( ( *igidx_ )( idx[edge.idx[1]] ) < ( *igidx_ )( idx[edge.idx[0]] ) ) {
                             edge.swap();
                         }
+                        interpolate_edge( quad.p(edge.idx[0]), quad.p(edge.idx[1] ) );
                         for( size_t i = 0; i < 2; ++i ) {
                             triplets.push_back( Triplet( ip, idx[edge.idx[i]], w[edge.idx[i]] ) );
                         }

From 28bbf4bd03c6028fca168341874e8f5bb3bc58be Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Mon, 18 Jun 2018 15:56:00 +0100
Subject: [PATCH 114/123] ATLAS-164 Array uses raw C pointer rather than
 std::vector and is initialised with signaling_NaN for debug builds

---
 CMakeLists.txt                           | 20 ++++++-
 src/CMakeLists.txt                       |  6 ++
 src/atlas/array/native/NativeDataStore.h | 73 ++++++++++++++++--------
 src/atlas/library/defines.h.in           |  1 +
 src/atlas/output/detail/PointCloudIO.cc  | 51 +++++++++++++----
 src/tests/io/test_pointcloud_io.cc       |  6 +-
 6 files changed, 115 insertions(+), 42 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 917d6da60..bd0231591 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -191,15 +191,29 @@ set( ATLAS_BITS_GLOBAL 64 )
 ### Bounds checking
 if( ${CMAKE_BUILD_TYPE} MATCHES "Debug" )
   set( DEFAULT_BOUNDSCHECKING ON )
+  set( DEFAULT_INIT_SNAN ON )
 else()
   set( DEFAULT_BOUNDSCHECKING OFF )
+  set( DEFAULT_INIT_SNAN OFF )
 endif()
+
 ecbuild_add_option( FEATURE BOUNDSCHECKING
                     DEFAULT ${DEFAULT_BOUNDSCHECKING}
                     DESCRIPTION "Bounds checking for atlas::ArrayView and atlas::IndexView" )
-if( ${CMAKE_BUILD_TYPE} MATCHES "Debug" AND NOT ATLAS_HAVE_BOUNDSCHECKING )
-  ecbuild_info( "Turning BOUNDSCHECKING ON for Debug build" )
-  set( ATLAS_HAVE_BOUNDSCHECKING 1 )
+
+ecbuild_add_option( FEATURE INIT_SNAN
+                    DEFAULT ${DEFAULT_INIT_SNAN}
+                    DESCRIPTION "Initialise atlas arrays with signaling_NaN (real types) or other invalid values (other types)" )
+
+if( ${CMAKE_BUILD_TYPE} MATCHES "Debug" )
+  if( NOT ATLAS_HAVE_INIT_SNAN )
+    ecbuild_info( "Turning INIT_SNAN ON for Debug build" )
+    set( ATLAS_HAVE_INIT_SNAN 1 )
+  endif()
+  if( NOT ATLAS_HAVE_BOUNDSCHECKING )
+    ecbuild_info( "Turning BOUNDSCHECKING ON for Debug build" )
+    set( ATLAS_HAVE_BOUNDSCHECKING 1 )
+  endif()
 endif()
 
 ### sandbox
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 98a5e1dd0..8dd72c231 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -48,6 +48,12 @@ else()
   set( ATLAS_HAVE_BOUNDSCHECKING 0 )
 endif()
 
+if( ATLAS_HAVE_INIT_SNAN )
+  set( ATLAS_HAVE_INIT_SNAN 1 )
+else()
+  set( ATLAS_HAVE_INIT_SNAN 0 )
+endif()
+
 if( ATLAS_HAVE_GRIDTOOLS_STORAGE )
   set( ATLAS_HAVE_GRIDTOOLS_STORAGE 1 )
 else()
diff --git a/src/atlas/array/native/NativeDataStore.h b/src/atlas/array/native/NativeDataStore.h
index f85c76de9..cfc29cc9d 100644
--- a/src/atlas/array/native/NativeDataStore.h
+++ b/src/atlas/array/native/NativeDataStore.h
@@ -10,6 +10,9 @@
 
 #pragma once
 
+
+#include <algorithm>    // std::fill
+#include <limits>       // std::numeric_limits<T>::signaling_NaN
 #include "atlas/array/ArrayUtil.h"
 #include "atlas/library/config.h"
 
@@ -19,35 +22,57 @@ namespace atlas {
 namespace array {
 namespace native {
 
+template <typename Value>
+static constexpr Value invalid_value() {
+    return std::numeric_limits<Value>::has_signaling_NaN ? std::numeric_limits<Value>::signaling_NaN() :
+           std::numeric_limits<Value>::has_quiet_NaN     ? std::numeric_limits<Value>::quiet_NaN()     :
+           std::numeric_limits<Value>::has_infinity      ? std::numeric_limits<Value>::infinity()      :
+                                                           std::numeric_limits<Value>::max();
+}
+
+#if ATLAS_INIT_SNAN
+template< typename Value >
+void initialise( Value array[], size_t size ) {
+    std::fill_n( array, size, invalid_value<Value>() );
+}
+#else
+template< typename Value > void initialise( Value[], size_t ) {}
+#endif
+
 template <typename Value>
 class DataStore : public ArrayDataStore {
 public:
-    DataStore( size_t size ) : data_store_( size ) {}
+    DataStore( size_t size ) : data_store_( new Value[size] ), size_( size ) {
+        initialise( data_store_, size_ );
+    }
 
-    void cloneToDevice() const {}
+    virtual ~DataStore() override { delete[] data_store_; }
 
-    void cloneFromDevice() const {}
+    virtual void cloneToDevice() const override {}
 
-    bool valid() const { return true; }
+    virtual void cloneFromDevice() const override {}
 
-    void syncHostDevice() const {}
+    virtual bool valid() const override { return true; }
 
-    bool hostNeedsUpdate() const { return false; }
+    virtual void syncHostDevice() const override {}
 
-    bool deviceNeedsUpdate() const { return false; }
+    virtual bool hostNeedsUpdate() const override { return false; }
 
-    void reactivateDeviceWriteViews() const {}
+    virtual bool deviceNeedsUpdate() const override { return false; }
 
-    void reactivateHostWriteViews() const {}
+    virtual void reactivateDeviceWriteViews() const override {}
 
-    void* voidDataStore() { return static_cast<void*>( &data_store_.front() ); }
+    virtual void reactivateHostWriteViews() const override {}
 
-    void* voidHostData() { return static_cast<void*>( &data_store_.front() ); }
+    virtual void* voidDataStore() override { return static_cast<void*>( data_store_ ); }
 
-    void* voidDeviceData() { return static_cast<void*>( &data_store_.front() ); }
+    virtual void* voidHostData() override { return static_cast<void*>( data_store_ ); }
+
+    virtual void* voidDeviceData() override { return static_cast<void*>( data_store_ ); }
 
 private:
-    std::vector<Value> data_store_;
+    Value* data_store_;
+    size_t size_;
 };
 
 //------------------------------------------------------------------------------
@@ -57,27 +82,27 @@ class WrappedDataStore : public ArrayDataStore {
 public:
     WrappedDataStore( Value* data_store ) : data_store_( data_store ) {}
 
-    void cloneToDevice() const {}
+    virtual void cloneToDevice() const override {}
 
-    void cloneFromDevice() const {}
+    virtual void cloneFromDevice() const override {}
 
-    bool valid() const { return true; }
+    virtual bool valid() const override { return true; }
 
-    void syncHostDevice() const {}
+    virtual void syncHostDevice() const override {}
 
-    bool hostNeedsUpdate() const { return true; }
+    virtual bool hostNeedsUpdate() const override { return true; }
 
-    bool deviceNeedsUpdate() const { return false; }
+    virtual bool deviceNeedsUpdate() const override { return false; }
 
-    void reactivateDeviceWriteViews() const {}
+    virtual void reactivateDeviceWriteViews() const override {}
 
-    void reactivateHostWriteViews() const {}
+    virtual void reactivateHostWriteViews() const override {}
 
-    void* voidDataStore() { return static_cast<void*>( data_store_ ); }
+    virtual void* voidDataStore() override { return static_cast<void*>( data_store_ ); }
 
-    void* voidHostData() { return static_cast<void*>( data_store_ ); }
+    virtual void* voidHostData() override { return static_cast<void*>( data_store_ ); }
 
-    void* voidDeviceData() { return static_cast<void*>( data_store_ ); }
+    virtual void* voidDeviceData() override { return static_cast<void*>( data_store_ ); }
 
 private:
     Value* data_store_;
diff --git a/src/atlas/library/defines.h.in b/src/atlas/library/defines.h.in
index e644fc73e..e8050e1fe 100644
--- a/src/atlas/library/defines.h.in
+++ b/src/atlas/library/defines.h.in
@@ -14,6 +14,7 @@
 #define ATLAS_BITS_GLOBAL                    @ATLAS_BITS_GLOBAL@
 #define ATLAS_ARRAYVIEW_BOUNDS_CHECKING      @ATLAS_HAVE_BOUNDSCHECKING@
 #define ATLAS_INDEXVIEW_BOUNDS_CHECKING      @ATLAS_HAVE_BOUNDSCHECKING@
+#define ATLAS_INIT_SNAN                      @ATLAS_HAVE_INIT_SNAN@
 #define ATLAS_HAVE_GRIDTOOLS_STORAGE         @ATLAS_HAVE_GRIDTOOLS_STORAGE@
 #define ATLAS_GRIDTOOLS_STORAGE_BACKEND_HOST @ATLAS_GRIDTOOLS_STORAGE_BACKEND_HOST@
 #define ATLAS_GRIDTOOLS_STORAGE_BACKEND_CUDA @ATLAS_GRIDTOOLS_STORAGE_BACKEND_CUDA@
diff --git a/src/atlas/output/detail/PointCloudIO.cc b/src/atlas/output/detail/PointCloudIO.cc
index d164b57a6..29da1f424 100644
--- a/src/atlas/output/detail/PointCloudIO.cc
+++ b/src/atlas/output/detail/PointCloudIO.cc
@@ -53,6 +53,8 @@ std::string sanitize_field_name( const std::string& s ) {
 Mesh PointCloudIO::read( const eckit::PathName& path, std::vector<std::string>& vfnames ) {
     const std::string msg( "PointCloudIO::read: " );
 
+    Log::debug() << "PointCloudIO reading " << path << std::endl;
+
     Mesh mesh;
 
     vfnames.clear();
@@ -82,14 +84,17 @@ Mesh PointCloudIO::read( const eckit::PathName& path, std::vector<std::string>&
                    << ")";
             throw eckit::BadParameter( errmsg.str(), Here() );
         }
-        if ( nb_pts == 0 ) throw eckit::BadValue( msg + "invalid number of points (failed: nb_pts>0)" );
-        if ( nb_columns < 2 ) throw eckit::BadValue( msg + "invalid number of columns (failed: nb_columns>=2)" );
+        if ( nb_pts == 0 ) throw eckit::BadValue( msg + " invalid number of points (failed: nb_pts>0)" );
+        if ( nb_columns < 2 ) throw eckit::BadValue( msg + " invalid number of columns (failed: nb_columns>=2)" );
 
         mesh.nodes().resize( nb_pts );
 
         mesh::Nodes& nodes             = mesh.nodes();
         array::ArrayView<double, 2> xy = array::make_view<double, 2>( nodes.xy() );
-
+        array::ArrayView<double, 2> lonlat = array::make_view<double, 2>( nodes.lonlat() );
+        array::ArrayView<gidx_t, 1> glb_idx = array::make_view<gidx_t, 1>( nodes.global_index() );
+        array::ArrayView<int, 1> part = array::make_view<int, 1>( nodes.partition() );
+        part.assign(0);
         // header, part 2:
         // determine columns' labels
         // (check end of first line for possible column labels, starting from
@@ -97,9 +102,10 @@ Mesh PointCloudIO::read( const eckit::PathName& path, std::vector<std::string>&
 
         vfnames.resize( nb_columns );
         for ( size_t j = 0; j < nb_columns; ++j ) {
-            oss.str( "column_" );
-            oss << ( j + 1 );
-            vfnames[j] = ( iss && iss >> line ) ? sanitize_field_name( line ) : oss.str();
+            std::stringstream name;
+            name.str( "column_" );
+            name << ( j + 1 );
+            vfnames[j] = ( iss && iss >> line ) ? sanitize_field_name( line ) : name.str();
         }
 
         // (preallocate data, and define fields without the first two columns
@@ -109,7 +115,7 @@ Mesh PointCloudIO::read( const eckit::PathName& path, std::vector<std::string>&
 
         std::vector<array::ArrayView<double, 1>> fields;
         for ( size_t j = 0; j < nb_fld; ++j ) {
-            fields.push_back( array::make_view<double, 1>(
+            fields.emplace_back( array::make_view<double, 1>(
                 nodes.add( Field( vfnames[j], array::make_datatype<double>(), array::make_shape( nb_pts ) ) ) ) );
         }
 
@@ -121,18 +127,25 @@ Mesh PointCloudIO::read( const eckit::PathName& path, std::vector<std::string>&
             iss.str( line );
 
             // NOTE always expects (lon,lat) order, maybe make it configurable?
-            iss >> xy( i, XX ) >> xy( i, YY );
-            ;
+            PointXY pxy;
+            iss >> pxy.x() >>pxy.y();
+
+            xy(i,XX) = pxy.x();
+            xy(i,YY) = pxy.y();
+            lonlat(i,LON) = pxy.x();
+            lonlat(i,LAT) = pxy.y();
+            glb_idx(i) = i+1;
+
             for ( j = 0; iss && j < nb_fld; ++j )
                 iss >> fields[j]( i );
             if ( j < nb_fld ) {
-                oss << "invalid number of fields in data section, on line " << ( i + 1 ) << ", read " << j
+                oss << " Invalid number of fields in data section, on line " << ( i + 1 ) << ", read " << j
                     << " fields, expected " << nb_fld << ".";
                 throw eckit::BadValue( msg + oss.str() );
             }
         }
         if ( i < nb_pts ) {
-            oss << "invalid number of lines in data section, read " << ( i ) << " lines, expected " << nb_pts << ".";
+            oss << " Invalid number of lines in data section, read " << ( i ) << " lines, expected " << nb_pts << ".";
             throw eckit::BadValue( msg + oss.str() );
         }
 
@@ -150,6 +163,8 @@ Mesh PointCloudIO::read( const eckit::PathName& path ) {
 void PointCloudIO::write( const eckit::PathName& path, const Mesh& mesh ) {
     const std::string msg( "PointCloudIO::write: " );
 
+    Log::debug() << "PointCloudIO writing " << path << std::endl;
+
     // operate in mesh function space, creating transversing data structures
     // @warning: several copy operations here
 
@@ -200,13 +215,15 @@ void PointCloudIO::write( const eckit::PathName& path, const FieldSet& fieldset,
                           const functionspace::NodeColumns& function_space ) {
     const std::string msg( "PointCloudIO::write: " );
 
+    Log::debug() << "PointCloudIO writing " << path << std::endl;
+
     // operate in field sets with same grid and consistent size(s), creating
     // transversing data structures
     // @warning: several copy operations here
 
     ASSERT( fieldset.size() );
 
-    array::ArrayView<double, 2> lonlat = array::make_view<double, 2>( function_space.nodes().lonlat() );
+    array::ArrayView<double, 2> lonlat = array::make_view<double, 2>( function_space.nodes().xy() );
     if ( !lonlat.size() ) throw eckit::BadParameter( msg + "invalid number of points (failed: nb_pts>0)" );
 
     // get the fields (sanitized) names and values
@@ -247,6 +264,9 @@ void PointCloudIO::write( const eckit::PathName& path, const FieldSet& fieldset,
 }
 
 void PointCloudIO::write( const eckit::PathName& path, const std::vector<PointLonLat>& pts ) {
+
+    Log::debug() << "PointCloudIO writing " << path << std::endl;
+
     std::ofstream f( path.asString().c_str() );
     if ( !f.is_open() ) throw eckit::CantOpenFile( path.asString() );
 
@@ -262,6 +282,9 @@ void PointCloudIO::write( const eckit::PathName& path, const std::vector<PointLo
 
 void PointCloudIO::write( const eckit::PathName& path, const std::vector<double>& lon, const std::vector<double>& lat,
                           const std::vector<std::vector<double>*>& vfvalues, const std::vector<std::string>& vfnames ) {
+
+    Log::debug() << "PointCloudIO writing " << path << std::endl;
+
     const std::string msg( "PointCloudIO::write: " );
     const size_t Npts( lon.size() ), Nfld( vfvalues.size() );
     if ( Npts != lat.size() ) throw eckit::BadParameter( msg + "number of points inconsistent (failed: #lon == #lat)" );
@@ -295,6 +318,10 @@ void PointCloudIO::write( const eckit::PathName& path, const std::vector<double>
 
 void PointCloudIO::write( const eckit::PathName& path, const int& nb_pts, const double* lon, const double* lat,
                           const int& nb_fld, const double** afvalues, const char** afnames ) {
+
+    Log::debug() << "PointCloudIO writing " << path << std::endl;
+
+
     const std::string msg( "PointCloudIO::write: " );
 
     const size_t Npts( nb_pts > 0 ? nb_pts : 0 ), Nfld( nb_fld > 0 && afvalues && afnames ? nb_fld : 0 );
diff --git a/src/tests/io/test_pointcloud_io.cc b/src/tests/io/test_pointcloud_io.cc
index 56c86dace..e45996df1 100644
--- a/src/tests/io/test_pointcloud_io.cc
+++ b/src/tests/io/test_pointcloud_io.cc
@@ -326,6 +326,8 @@ CASE( "write_read_write_field" ) {
     for ( size_t i = 0; i < field_data.size(); ++i ) {
         EXPECT( eckit::types::is_approximately_equal( funny_formula( i ), field_data( i ),
                                                       0.001 ) );  // 0.001% relative error
+        EXPECT( eckit::types::is_approximately_equal( funny_formula( i ), field_data( i ),
+                                                      0.001 ) );  // 0.001% relative error
     }
 
     // PART 4
@@ -338,6 +340,7 @@ CASE( "write_read_write_field" ) {
     EXPECT_NO_THROW( fieldset.add( field ) );
 
     functionspace::NodeColumns functionspace( mesh );
+
     EXPECT_NO_THROW( output::detail::PointCloudIO::write( "pointcloud_FieldSet.txt", fieldset, functionspace ) );
     EXPECT_NO_THROW( output::detail::PointCloudIO::write( "pointcloud_Grid.txt", mesh ) );
 
@@ -350,9 +353,6 @@ CASE( "write_read_write_field" ) {
     EXPECT( grid_from_FieldSet );
     EXPECT( grid_from_Grid );
 
-    // (guarantee different grid, to make checks useful)
-    EXPECT( grid != grid_from_FieldSet );
-    EXPECT( grid != grid_from_Grid );
 
     // PART 5
     // compare reading of reference data to:

From 67c202a8ec58d2c29ec50087ad5b842433aac572 Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Mon, 18 Jun 2018 16:45:38 +0100
Subject: [PATCH 115/123] ATLAS-165 Fix intents in Fortran functions

---
 .../atlas_functionspace_NodeColumns_module.F90 | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/atlas_f/functionspace/atlas_functionspace_NodeColumns_module.F90 b/src/atlas_f/functionspace/atlas_functionspace_NodeColumns_module.F90
index 1cbc9d74d..b8bc99614 100644
--- a/src/atlas_f/functionspace/atlas_functionspace_NodeColumns_module.F90
+++ b/src/atlas_f/functionspace/atlas_functionspace_NodeColumns_module.F90
@@ -1846,8 +1846,8 @@ subroutine minloc_per_level(this,field,minimum,location)
   use atlas_functionspace_NodeColumns_c_binding
   class(atlas_functionspace_NodeColumns), intent(in) :: this
   type(atlas_Field), intent(in) :: field
-  type(atlas_Field), intent(out) :: minimum
-  type(atlas_Field), intent(out) :: location
+  type(atlas_Field), intent(inout) :: minimum
+  type(atlas_Field), intent(inout) :: location
   call atlas__NodesFunctionSpace__minloc_per_level(this%c_ptr(),field%c_ptr(),minimum%c_ptr(),location%c_ptr())
 end subroutine
 
@@ -1857,8 +1857,8 @@ subroutine maxloc_per_level(this,field,maximum,location)
   use atlas_functionspace_NodeColumns_c_binding
   class(atlas_functionspace_NodeColumns), intent(in) :: this
   type(atlas_Field), intent(in) :: field
-  type(atlas_Field), intent(out) :: maximum
-  type(atlas_Field), intent(out) :: location
+  type(atlas_Field), intent(inout) :: maximum
+  type(atlas_Field), intent(inout) :: location
   call atlas__NodesFunctionSpace__maxloc_per_level(this%c_ptr(),field%c_ptr(),maximum%c_ptr(),location%c_ptr())
 end subroutine
 
@@ -1868,7 +1868,7 @@ subroutine minimum_per_level(this,field,minimum)
   use atlas_functionspace_NodeColumns_c_binding
   class(atlas_functionspace_NodeColumns), intent(in) :: this
   type(atlas_Field), intent(in) :: field
-  type(atlas_Field), intent(out) :: minimum
+  type(atlas_Field), intent(inout) :: minimum
   call atlas__NodesFunctionSpace__min_per_level(this%c_ptr(),field%c_ptr(),minimum%c_ptr())
 end subroutine
 
@@ -1878,7 +1878,7 @@ subroutine maximum_per_level(this,field,maximum)
   use atlas_functionspace_NodeColumns_c_binding
   class(atlas_functionspace_NodeColumns), intent(in) :: this
   type(atlas_Field), intent(in) :: field
-  type(atlas_Field), intent(out) :: maximum
+  type(atlas_Field), intent(inout) :: maximum
   call atlas__NodesFunctionSpace__max_per_level(this%c_ptr(),field%c_ptr(),maximum%c_ptr())
 end subroutine
 
@@ -1889,7 +1889,7 @@ subroutine sum_per_level(this,field,sum,N)
   use, intrinsic :: iso_c_binding, only : c_int
   class(atlas_functionspace_NodeColumns), intent(in) :: this
   type(atlas_Field), intent(in) :: field
-  type(atlas_Field), intent(out) :: sum
+  type(atlas_Field), intent(inout) :: sum
   integer(c_int), intent(out), optional :: N
   integer(c_int) :: opt_N
   call atlas__NodesFunctionSpace__sum_per_level(this%c_ptr(),field%c_ptr(),sum%c_ptr(),opt_N)
@@ -1903,7 +1903,7 @@ subroutine order_independent_sum_per_level(this,field,sum,N)
   use, intrinsic :: iso_c_binding, only : c_int
   class(atlas_functionspace_NodeColumns), intent(in) :: this
   type(atlas_Field), intent(in) :: field
-  type(atlas_Field), intent(out) :: sum
+  type(atlas_Field), intent(inout) :: sum
   integer(c_int), intent(out), optional :: N
   integer(c_int) :: opt_N
   call atlas__NodesFunctionSpace__oisum_per_level(this%c_ptr(),field%c_ptr(),sum%c_ptr(),opt_N)
@@ -1917,7 +1917,7 @@ subroutine mean_per_level(this,field,mean,N)
   use, intrinsic :: iso_c_binding, only : c_int
   class(atlas_functionspace_NodeColumns), intent(in) :: this
   type(atlas_Field), intent(in) :: field
-  type(atlas_Field), intent(out) :: mean
+  type(atlas_Field), intent(inout) :: mean
   integer(c_int), intent(out), optional :: N
   integer(c_int) :: opt_N
   call atlas__NodesFunctionSpace__mean_per_level(this%c_ptr(),field%c_ptr(),mean%c_ptr(),opt_N)

From c5bd183c2b84ca04ce8204dfee53fcb151c789dc Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Mon, 18 Jun 2018 16:48:20 +0100
Subject: [PATCH 116/123] Apply clang-format

---
 src/atlas/array/native/NativeDataStore.h      |  23 +--
 src/atlas/grid/Grid.cc                        |   2 +-
 src/atlas/grid/Grid.h                         |   2 +-
 src/atlas/interpolation/Interpolation.h       |   3 +-
 .../interpolation/method/FiniteElement.cc     | 143 +++++++++---------
 src/atlas/interpolation/method/Method.cc      |   2 +-
 src/atlas/interpolation/method/Method.h       |   3 +-
 src/atlas/mesh/actions/BuildCellCentres.cc    |  22 +--
 src/atlas/mesh/actions/BuildDualMesh.cc       |   2 +-
 src/atlas/mesh/actions/BuildEdges.cc          |   2 +-
 src/atlas/mesh/actions/BuildHalo.cc           |  26 ++--
 src/atlas/mesh/actions/BuildParallelFields.cc |  19 ++-
 src/atlas/output/detail/PointCloudIO.cc       |  25 ++-
 src/atlas/trans/local/TransLocal.cc           |  15 +-
 src/atlas/trans/local/TransLocal.h            |   2 +-
 .../atlas-parallel-interpolation.cc           |   4 +-
 src/tests/acceptance_tests/atest_mgrids.cc    |  64 ++++----
 src/tests/trans/test_transgeneral.cc          |   2 +-
 18 files changed, 177 insertions(+), 184 deletions(-)

diff --git a/src/atlas/array/native/NativeDataStore.h b/src/atlas/array/native/NativeDataStore.h
index cfc29cc9d..d11dd7f49 100644
--- a/src/atlas/array/native/NativeDataStore.h
+++ b/src/atlas/array/native/NativeDataStore.h
@@ -11,8 +11,8 @@
 #pragma once
 
 
-#include <algorithm>    // std::fill
-#include <limits>       // std::numeric_limits<T>::signaling_NaN
+#include <algorithm>  // std::fill
+#include <limits>     // std::numeric_limits<T>::signaling_NaN
 #include "atlas/array/ArrayUtil.h"
 #include "atlas/library/config.h"
 
@@ -24,27 +24,28 @@ namespace native {
 
 template <typename Value>
 static constexpr Value invalid_value() {
-    return std::numeric_limits<Value>::has_signaling_NaN ? std::numeric_limits<Value>::signaling_NaN() :
-           std::numeric_limits<Value>::has_quiet_NaN     ? std::numeric_limits<Value>::quiet_NaN()     :
-           std::numeric_limits<Value>::has_infinity      ? std::numeric_limits<Value>::infinity()      :
-                                                           std::numeric_limits<Value>::max();
+    return std::numeric_limits<Value>::has_signaling_NaN
+               ? std::numeric_limits<Value>::signaling_NaN()
+               : std::numeric_limits<Value>::has_quiet_NaN
+                     ? std::numeric_limits<Value>::quiet_NaN()
+                     : std::numeric_limits<Value>::has_infinity ? std::numeric_limits<Value>::infinity()
+                                                                : std::numeric_limits<Value>::max();
 }
 
 #if ATLAS_INIT_SNAN
-template< typename Value >
+template <typename Value>
 void initialise( Value array[], size_t size ) {
     std::fill_n( array, size, invalid_value<Value>() );
 }
 #else
-template< typename Value > void initialise( Value[], size_t ) {}
+template <typename Value>
+void initialise( Value[], size_t ) {}
 #endif
 
 template <typename Value>
 class DataStore : public ArrayDataStore {
 public:
-    DataStore( size_t size ) : data_store_( new Value[size] ), size_( size ) {
-        initialise( data_store_, size_ );
-    }
+    DataStore( size_t size ) : data_store_( new Value[size] ), size_( size ) { initialise( data_store_, size_ ); }
 
     virtual ~DataStore() override { delete[] data_store_; }
 
diff --git a/src/atlas/grid/Grid.cc b/src/atlas/grid/Grid.cc
index a0c18922e..2a130116b 100644
--- a/src/atlas/grid/Grid.cc
+++ b/src/atlas/grid/Grid.cc
@@ -11,8 +11,8 @@
 #include "atlas/grid/Grid.h"
 
 #include <limits>
-#include <vector>
 #include <string>
+#include <vector>
 
 #include "eckit/config/Parametrisation.h"
 #include "eckit/exception/Exceptions.h"
diff --git a/src/atlas/grid/Grid.h b/src/atlas/grid/Grid.h
index 9f45b14b6..69916b108 100644
--- a/src/atlas/grid/Grid.h
+++ b/src/atlas/grid/Grid.h
@@ -316,7 +316,7 @@ class RegularGaussianGrid : public Gaussian<RegularGrid> {
 
 public:
     using grid_t::grid_t;
-    RegularGaussianGrid( int N , const Domain& = Domain() );
+    RegularGaussianGrid( int N, const Domain& = Domain() );
 
     inline double lon( size_t i ) const { return x( i ); }
 
diff --git a/src/atlas/interpolation/Interpolation.h b/src/atlas/interpolation/Interpolation.h
index 8575159d7..2e9468c8f 100644
--- a/src/atlas/interpolation/Interpolation.h
+++ b/src/atlas/interpolation/Interpolation.h
@@ -39,7 +39,8 @@ class Interpolation {
 
     operator bool() const { return implementation_; }
 
-    void print( std::ostream& out ) const { implementation_->print(out); }
+    void print( std::ostream& out ) const { implementation_->print( out ); }
+
 private:
     eckit::SharedPtr<const Implementation> implementation_;
 };
diff --git a/src/atlas/interpolation/method/FiniteElement.cc b/src/atlas/interpolation/method/FiniteElement.cc
index e5f433b5e..975c06a36 100644
--- a/src/atlas/interpolation/method/FiniteElement.cc
+++ b/src/atlas/interpolation/method/FiniteElement.cc
@@ -12,12 +12,12 @@
 
 #include "atlas/interpolation/method/FiniteElement.h"
 
+#include "eckit/exception/Exceptions.h"
 #include "eckit/geometry/Point3.h"
 #include "eckit/log/Plural.h"
 #include "eckit/log/ProgressTimer.h"
 #include "eckit/log/Seconds.h"
 #include "eckit/mpi/Comm.h"
-#include "eckit/exception/Exceptions.h"
 
 #include "atlas/functionspace/NodeColumns.h"
 #include "atlas/functionspace/PointCloud.h"
@@ -28,13 +28,13 @@
 #include "atlas/mesh/Nodes.h"
 #include "atlas/mesh/actions/BuildCellCentres.h"
 #include "atlas/mesh/actions/BuildXYZField.h"
+#include "atlas/parallel/GatherScatter.h"
+#include "atlas/parallel/mpi/Buffer.h"
 #include "atlas/runtime/Log.h"
 #include "atlas/runtime/Trace.h"
 #include "atlas/util/CoordinateEnums.h"
 #include "atlas/util/Earth.h"
 #include "atlas/util/Point.h"
-#include "atlas/parallel/mpi/Buffer.h"
-#include "atlas/parallel/GatherScatter.h"
 
 
 namespace atlas {
@@ -86,50 +86,50 @@ void FiniteElement::setup( const FunctionSpace& source, const FunctionSpace& tar
 }
 
 struct Stencil {
-    enum { max_stencil_size = 4 };
+    enum
+    {
+        max_stencil_size = 4
+    };
     Stencil() {
-        g = -1;
+        g    = -1;
         size = 0;
     }
     void add( gidx_t tgt, gidx_t src, double weight ) {
-        if( g >= 0 ) {
-            ASSERT( tgt == g );
-        }
-        g = tgt;
-        size_t i=size;
-        source[i] = src;
+        if ( g >= 0 ) { ASSERT( tgt == g ); }
+        g          = tgt;
+        size_t i   = size;
+        source[i]  = src;
         weights[i] = weight;
         ++size;
     }
     gidx_t g;
-    std::array<gidx_t,max_stencil_size> source;
-    std::array<double,max_stencil_size> weights;
+    std::array<gidx_t, max_stencil_size> source;
+    std::array<double, max_stencil_size> weights;
     size_t size;
 };
 
-void FiniteElement::print(std::ostream& out) const
-{
-    functionspace::NodeColumns src (source_);
-    functionspace::NodeColumns tgt (target_);
-    if( not tgt ) NOTIMP;
-    auto gidx_src = array::make_view<gidx_t,1>( src.nodes().global_index() );
+void FiniteElement::print( std::ostream& out ) const {
+    functionspace::NodeColumns src( source_ );
+    functionspace::NodeColumns tgt( target_ );
+    if ( not tgt ) NOTIMP;
+    auto gidx_src = array::make_view<gidx_t, 1>( src.nodes().global_index() );
 
     ASSERT( tgt.nodes().size() == matrix_.rows() );
 
 
-    auto field_stencil_points_loc = tgt.createField<gidx_t>( option::variables(Stencil::max_stencil_size) );
-    auto field_stencil_weigths_loc = tgt.createField<double>( option::variables(Stencil::max_stencil_size) );
-    auto field_stencil_size_loc = tgt.createField<int>();
+    auto field_stencil_points_loc  = tgt.createField<gidx_t>( option::variables( Stencil::max_stencil_size ) );
+    auto field_stencil_weigths_loc = tgt.createField<double>( option::variables( Stencil::max_stencil_size ) );
+    auto field_stencil_size_loc    = tgt.createField<int>();
 
-    auto stencil_points_loc = array::make_view<gidx_t,2>( field_stencil_points_loc );
-    auto stencil_weights_loc = array::make_view<double,2>( field_stencil_weigths_loc );
-    auto stencil_size_loc   = array::make_view<int,1>( field_stencil_size_loc );
-    stencil_size_loc.assign(0);
+    auto stencil_points_loc  = array::make_view<gidx_t, 2>( field_stencil_points_loc );
+    auto stencil_weights_loc = array::make_view<double, 2>( field_stencil_weigths_loc );
+    auto stencil_size_loc    = array::make_view<int, 1>( field_stencil_size_loc );
+    stencil_size_loc.assign( 0 );
 
-    for( Matrix::const_iterator it = matrix_.begin(); it!=matrix_.end(); ++it ) {
-        int p = it.row();
-        int& i = stencil_size_loc( p );
-        stencil_points_loc( p, i ) = gidx_src( it.col() );
+    for ( Matrix::const_iterator it = matrix_.begin(); it != matrix_.end(); ++it ) {
+        int p                       = it.row();
+        int& i                      = stencil_size_loc( p );
+        stencil_points_loc( p, i )  = gidx_src( it.col() );
         stencil_weights_loc( p, i ) = *it;
         ++i;
     }
@@ -137,31 +137,32 @@ void FiniteElement::print(std::ostream& out) const
 
     size_t global_size = tgt.gather().glb_dof();
 
-    auto field_stencil_points_glb = tgt.createField<gidx_t>( option::variables(Stencil::max_stencil_size) | option::global(0) );
-    auto field_stencil_weights_glb = tgt.createField<double>( option::variables(Stencil::max_stencil_size) | option::global(0) );
-    auto field_stencil_size_glb = tgt.createField<int>( option::global(0) );
-
+    auto field_stencil_points_glb =
+        tgt.createField<gidx_t>( option::variables( Stencil::max_stencil_size ) | option::global( 0 ) );
+    auto field_stencil_weights_glb =
+        tgt.createField<double>( option::variables( Stencil::max_stencil_size ) | option::global( 0 ) );
+    auto field_stencil_size_glb = tgt.createField<int>( option::global( 0 ) );
 
 
-    auto stencil_points_glb = array::make_view<gidx_t,2>( field_stencil_points_glb );
-    auto stencil_weights_glb = array::make_view<double,2>( field_stencil_weights_glb );
-    auto stencil_size_glb   = array::make_view<int,1>( field_stencil_size_glb );
+    auto stencil_points_glb  = array::make_view<gidx_t, 2>( field_stencil_points_glb );
+    auto stencil_weights_glb = array::make_view<double, 2>( field_stencil_weights_glb );
+    auto stencil_size_glb    = array::make_view<int, 1>( field_stencil_size_glb );
 
     tgt.gather().gather( stencil_size_loc, stencil_size_glb );
     tgt.gather().gather( stencil_points_loc, stencil_points_glb );
     tgt.gather().gather( stencil_weights_loc, stencil_weights_glb );
 
-    if( mpi::comm().rank() == 0 ) {
-        for( idx_t i=0; i<global_size; ++i ) {
-            out << std::setw(10) << i+1 << " : ";
-            for( idx_t j=0; j<stencil_size_glb(i); ++j ) {
-                out << std::setw(10) << stencil_points_glb(i,j);
+    if ( mpi::comm().rank() == 0 ) {
+        for ( idx_t i = 0; i < global_size; ++i ) {
+            out << std::setw( 10 ) << i + 1 << " : ";
+            for ( idx_t j = 0; j < stencil_size_glb( i ); ++j ) {
+                out << std::setw( 10 ) << stencil_points_glb( i, j );
             }
-            for( idx_t j=stencil_size_glb(i); j<Stencil::max_stencil_size; ++j ) {
+            for ( idx_t j = stencil_size_glb( i ); j < Stencil::max_stencil_size; ++j ) {
                 out << "          ";
             }
-            for( idx_t j=0; j<stencil_size_glb(i); ++j ) {
-                out << std::setw(12) << std::left <<  stencil_weights_glb(i,j);
+            for ( idx_t j = 0; j < stencil_size_glb( i ); ++j ) {
+                out << std::setw( 12 ) << std::left << stencil_weights_glb( i, j );
             }
             out << std::endl;
         }
@@ -179,8 +180,8 @@ void FiniteElement::setup( const FunctionSpace& source ) {
 
     // generate barycenters of each triangle & insert them on a kd-tree
     util::Config config;
-    config.set("name", "centre ");
-    config.set("flatten_virtual_elements", false );
+    config.set( "name", "centre " );
+    config.set( "flatten_virtual_elements", false );
     Field cell_centres = mesh::actions::BuildCellCentres( config )( meshSource );
 
     eckit::ScopedPtr<ElemIndex3> eTree( create_element_kdtree( cell_centres ) );
@@ -244,7 +245,7 @@ void FiniteElement::setup( const FunctionSpace& source ) {
                                 "---------------------\n";
                 PointLonLat pll;
                 util::Earth::convertCartesianToSpherical( p, pll );
-                if( pll.lon() < 0 ) pll.lon() += 360.;
+                if ( pll.lon() < 0 ) pll.lon() += 360.;
                 Log::debug() << "Failed to project point (lon,lat)=" << pll << '\n';
                 Log::debug() << failures_log.str();
             }
@@ -261,7 +262,7 @@ void FiniteElement::setup( const FunctionSpace& source ) {
             const PointXYZ p{( *ocoords_ )( *i, 0 ), ( *ocoords_ )( *i, 1 ), ( *ocoords_ )( *i, 2 )};  // lookup point
             PointLonLat pll;
             util::Earth::convertCartesianToSpherical( p, pll );
-            if( pll.lon() < 0 ) pll.lon() += 360.;
+            if ( pll.lon() < 0 ) pll.lon() += 360.;
             msg << "\t(lon,lat) = " << pll << "\n";
         }
 
@@ -275,11 +276,11 @@ void FiniteElement::setup( const FunctionSpace& source ) {
 }
 
 struct ElementEdge {
-    std::array<idx_t,2> idx;
+    std::array<idx_t, 2> idx;
     void swap() {
         idx_t tmp = idx[0];
-        idx[0] = idx[1];
-        idx[1] = tmp;
+        idx[0]    = idx[1];
+        idx[1]    = tmp;
     }
 };
 
@@ -288,8 +289,8 @@ Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemInd
     ASSERT( elems.begin() != elems.end() );
 
     const size_t inp_points = icoords_->shape( 0 );
-    std::array<size_t,4> idx;
-    std::array<double,4> w;
+    std::array<size_t, 4> idx;
+    std::array<double, 4> w;
 
     Triplets triplets;
     Ray ray( PointXYZ{( *ocoords_ )( ip, 0 ), ( *ocoords_ )( ip, 1 ), ( *ocoords_ )( ip, 2 )} );
@@ -306,18 +307,18 @@ Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemInd
             ASSERT( idx[i] < inp_points );
         }
 
-        auto on_triag_edge = [&](ElementEdge& edge) {
-            if( w[0] < 1.e-15 ) {
+        auto on_triag_edge = [&]( ElementEdge& edge ) {
+            if ( w[0] < 1.e-15 ) {
                 edge.idx[0] = 1;
                 edge.idx[1] = 2;
                 return true;
             }
-            if( w[1] < 1.e-15 ) {
+            if ( w[1] < 1.e-15 ) {
                 edge.idx[0] = 0;
                 edge.idx[1] = 2;
                 return true;
             }
-            if( w[2] < 1.e-15 ) {
+            if ( w[2] < 1.e-15 ) {
                 edge.idx[0] = 0;
                 edge.idx[1] = 1;
                 return true;
@@ -325,23 +326,23 @@ Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemInd
             return false;
         };
 
-        auto on_quad_edge = [&](ElementEdge& edge) {
-            if( w[0] < 1.e-15 && w[1] < 1.e-15 ) {
+        auto on_quad_edge = [&]( ElementEdge& edge ) {
+            if ( w[0] < 1.e-15 && w[1] < 1.e-15 ) {
                 edge.idx[0] = 2;
                 edge.idx[1] = 3;
                 return true;
             }
-            if( w[1] < 1.e-15 && w[2] < 1.e-15 ) {
+            if ( w[1] < 1.e-15 && w[2] < 1.e-15 ) {
                 edge.idx[0] = 0;
                 edge.idx[1] = 3;
                 return true;
             }
-            if( w[2] < 1.e-15 && w[3] < 1.e-15 ) {
+            if ( w[2] < 1.e-15 && w[3] < 1.e-15 ) {
                 edge.idx[0] = 0;
                 edge.idx[1] = 1;
                 return true;
             }
-            if( w[3] < 1.e-15 && w[0] < 1.e-15 ) {
+            if ( w[3] < 1.e-15 && w[0] < 1.e-15 ) {
                 edge.idx[0] = 1;
                 edge.idx[1] = 2;
                 return true;
@@ -369,11 +370,9 @@ Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemInd
                 w[1] = is.u;
                 w[2] = is.v;
 
-                if( on_triag_edge( edge) ) {
-                    if( ( *igidx_ )( idx[edge.idx[1]] ) < ( *igidx_ )( idx[edge.idx[0]] ) ) {
-                        edge.swap();
-                    }
-                    for( size_t i = 0; i < 2; ++i ) {
+                if ( on_triag_edge( edge ) ) {
+                    if ( ( *igidx_ )( idx[edge.idx[1]] ) < ( *igidx_ )( idx[edge.idx[0]] ) ) { edge.swap(); }
+                    for ( size_t i = 0; i < 2; ++i ) {
                         triplets.push_back( Triplet( ip, idx[edge.idx[i]], w[edge.idx[i]] ) );
                     }
                 }
@@ -408,11 +407,9 @@ Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemInd
                 w[2] = is.u * is.v;
                 w[3] = ( 1. - is.u ) * is.v;
 
-                if( on_quad_edge( edge ) ) {
-                    if( ( *igidx_ )( idx[edge.idx[1]] ) < ( *igidx_ )( idx[edge.idx[0]] ) ) {
-                        edge.swap();
-                    }
-                    for( size_t i = 0; i < 2; ++i ) {
+                if ( on_quad_edge( edge ) ) {
+                    if ( ( *igidx_ )( idx[edge.idx[1]] ) < ( *igidx_ )( idx[edge.idx[0]] ) ) { edge.swap(); }
+                    for ( size_t i = 0; i < 2; ++i ) {
                         triplets.push_back( Triplet( ip, idx[edge.idx[i]], w[edge.idx[i]] ) );
                     }
                 }
diff --git a/src/atlas/interpolation/method/Method.cc b/src/atlas/interpolation/method/Method.cc
index c4d46dd93..f51c31134 100644
--- a/src/atlas/interpolation/method/Method.cc
+++ b/src/atlas/interpolation/method/Method.cc
@@ -23,9 +23,9 @@
 #include "atlas/field/Field.h"
 #include "atlas/field/FieldSet.h"
 #include "atlas/functionspace/NodeColumns.h"
+#include "atlas/mesh/Nodes.h"
 #include "atlas/runtime/Log.h"
 #include "atlas/runtime/Trace.h"
-#include "atlas/mesh/Nodes.h"
 
 // for static linking
 #include "FiniteElement.h"
diff --git a/src/atlas/interpolation/method/Method.h b/src/atlas/interpolation/method/Method.h
index 34d62965a..0f57609f5 100644
--- a/src/atlas/interpolation/method/Method.h
+++ b/src/atlas/interpolation/method/Method.h
@@ -10,9 +10,9 @@
 
 #pragma once
 
+#include <iosfwd>
 #include <string>
 #include <vector>
-#include <iosfwd>
 
 #include "eckit/config/Configuration.h"
 #include "eckit/linalg/SparseMatrix.h"
@@ -61,7 +61,6 @@ class Method : public eckit::Owned {
     //        so do not expose here, even though only linear operators are now
     //        implemented.
     Matrix matrix_;
-
 };
 
 struct MethodFactory {
diff --git a/src/atlas/mesh/actions/BuildCellCentres.cc b/src/atlas/mesh/actions/BuildCellCentres.cc
index 2c2e1f773..2d23c36e9 100644
--- a/src/atlas/mesh/actions/BuildCellCentres.cc
+++ b/src/atlas/mesh/actions/BuildCellCentres.cc
@@ -29,19 +29,18 @@ namespace actions {
 BuildCellCentres::BuildCellCentres( const std::string& field_name, bool force_recompute ) :
     field_name_( field_name ),
     force_recompute_( force_recompute ),
-    flatten_virtual_elements_( true ) {
-}
+    flatten_virtual_elements_( true ) {}
 
 BuildCellCentres::BuildCellCentres( eckit::Configuration& config ) :
     field_name_( config.getString( "name", "centre" ) ),
     force_recompute_( config.getBool( "force_recompute", false ) ),
-    flatten_virtual_elements_( config.getBool( "flatten_virtual_elements", true) ) {
-}
+    flatten_virtual_elements_( config.getBool( "flatten_virtual_elements", true ) ) {}
 
 Field& BuildCellCentres::operator()( Mesh& mesh ) const {
     bool recompute = force_recompute_;
     if ( !mesh.cells().has_field( field_name_ ) ) {
-        mesh.cells().add( Field( field_name_, array::make_datatype<double>(), array::make_shape( mesh.cells().size(), 3 ) ) );
+        mesh.cells().add(
+            Field( field_name_, array::make_datatype<double>(), array::make_shape( mesh.cells().size(), 3 ) ) );
         recompute = true;
     }
     if ( recompute ) {
@@ -51,8 +50,8 @@ Field& BuildCellCentres::operator()( Mesh& mesh ) const {
         size_t firstVirtualPoint = std::numeric_limits<size_t>::max();
         if ( nodes.metadata().has( "NbRealPts" ) ) { firstVirtualPoint = nodes.metadata().get<size_t>( "NbRealPts" ); }
 
-        size_t nb_cells                       = mesh.cells().size();
-        auto centroids = array::make_view<double, 2>( mesh.cells().field( field_name_ ) );
+        size_t nb_cells = mesh.cells().size();
+        auto centroids  = array::make_view<double, 2>( mesh.cells().field( field_name_ ) );
         const mesh::HybridElements::Connectivity& cell_node_connectivity = mesh.cells().node_connectivity();
 
         for ( size_t e = 0; e < nb_cells; ++e ) {
@@ -82,7 +81,7 @@ Field& BuildCellCentres::operator()( Mesh& mesh ) const {
             int nb_unique_nodes = int( nb_cell_nodes ) - nb_equal_nodes;
             if ( nb_unique_nodes < 3 ) { continue; }
 
-            if( flatten_virtual_elements_ ) {
+            if ( flatten_virtual_elements_ ) {
                 // calculate centroid by averaging coordinates (uses only "real" nodes)
                 size_t nb_real_nodes = 0;
                 for ( size_t n = 0; n < nb_cell_nodes; ++n ) {
@@ -101,11 +100,12 @@ Field& BuildCellCentres::operator()( Mesh& mesh ) const {
                     centroids( e, YY ) *= average_coefficient;
                     centroids( e, ZZ ) *= average_coefficient;
                 }
-            } else {
-                const double average_coefficient = 1./ static_cast<double>( nb_cell_nodes );
+            }
+            else {
+                const double average_coefficient = 1. / static_cast<double>( nb_cell_nodes );
                 for ( size_t n = 0; n < nb_cell_nodes; ++n ) {
                     const size_t i = size_t( cell_node_connectivity( e, n ) );
-                    for ( size_t d=0; d<3; ++d ) {
+                    for ( size_t d = 0; d < 3; ++d ) {
                         centroids( e, d ) += coords( i, d ) * average_coefficient;
                     }
                 }
diff --git a/src/atlas/mesh/actions/BuildDualMesh.cc b/src/atlas/mesh/actions/BuildDualMesh.cc
index 2fe9fb693..68543f879 100644
--- a/src/atlas/mesh/actions/BuildDualMesh.cc
+++ b/src/atlas/mesh/actions/BuildDualMesh.cc
@@ -167,7 +167,7 @@ void add_median_dual_volume_contribution_cells( const mesh::HybridElements& cell
     const array::ArrayView<double, 2> edge_centroids = array::make_view<double, 2>( edges.field( "centroids_xy" ) );
     const mesh::HybridElements::Connectivity& cell_edge_connectivity = cells.edge_connectivity();
     const mesh::HybridElements::Connectivity& edge_node_connectivity = edges.node_connectivity();
-    auto field_flags = array::make_view<int, 1>( cells.flags() );
+    auto field_flags                                                 = array::make_view<int, 1>( cells.flags() );
 
     auto patch = [&field_flags]( size_t e ) {
         using Topology = atlas::mesh::Nodes::Topology;
diff --git a/src/atlas/mesh/actions/BuildEdges.cc b/src/atlas/mesh/actions/BuildEdges.cc
index d06296268..5a6067ce2 100644
--- a/src/atlas/mesh/actions/BuildEdges.cc
+++ b/src/atlas/mesh/actions/BuildEdges.cc
@@ -116,7 +116,7 @@ void build_element_to_edge_connectivity( Mesh& mesh ) {
 
     // Verify that all edges have been found
     auto field_flags = array::make_view<int, 1>( mesh.cells().flags() );
-    auto patch = [&field_flags]( size_t e ) {
+    auto patch       = [&field_flags]( size_t e ) {
         using Topology = atlas::mesh::Nodes::Topology;
         return Topology::check( field_flags( e ), Topology::PATCH );
     };
diff --git a/src/atlas/mesh/actions/BuildHalo.cc b/src/atlas/mesh/actions/BuildHalo.cc
index 3fb385ceb..deaefea4a 100644
--- a/src/atlas/mesh/actions/BuildHalo.cc
+++ b/src/atlas/mesh/actions/BuildHalo.cc
@@ -199,16 +199,16 @@ void make_cells_global_index_human_readable( const mesh::actions::BuildHalo& bui
         }
     }
     else {
-        size_t nb_cells_to_edit(0);
-        for( const auto& new_cells : build_halo.periodic_cells_local_index_ ) {
+        size_t nb_cells_to_edit( 0 );
+        for ( const auto& new_cells : build_halo.periodic_cells_local_index_ ) {
             nb_cells_to_edit += new_cells.size();
         }
         cells_to_edit.resize( nb_cells_to_edit );
-        int c{ 0 };
-        int i{ 0 };
+        int c{0};
+        int i{0};
         for ( int t = 0; t < cells.nb_types(); ++t ) {
             for ( idx_t p : build_halo.periodic_cells_local_index_[t] ) {
-               cells_to_edit[i++] = c + p;
+                cells_to_edit[i++] = c + p;
             }
             c += cells.elements( t ).size();
         }
@@ -308,7 +308,7 @@ void build_lookup_node2elem( const Mesh& mesh, Node2Elem& node2elem ) {
 
     const mesh::HybridElements::Connectivity& elem_nodes = mesh.cells().node_connectivity();
     auto field_flags                                     = array::make_view<int, 1>( mesh.cells().flags() );
-    auto patched = [&field_flags]( size_t e ) {
+    auto patched                                         = [&field_flags]( size_t e ) {
         using Topology = atlas::mesh::Nodes::Topology;
         return Topology::check( field_flags( e ), Topology::PATCH );
     };
@@ -699,7 +699,7 @@ class BuildHaloHelper {
             buf.elem_glb_idx[p][jelem] = elem_glb_idx( ielem );
             buf.elem_part[p][jelem]    = elem_part( ielem );
             Topology::set( buf.elem_flags[p][jelem], elem_flags( ielem ) );
-            buf.elem_type[p][jelem]    = mesh.cells().type_idx( ielem );
+            buf.elem_type[p][jelem] = mesh.cells().type_idx( ielem );
             for ( size_t jnode = 0; jnode < elem_nodes->cols( ielem ); ++jnode )
                 buf.elem_nodes_id[p][jelemnode++] = compute_uid( ( *elem_nodes )( ielem, jnode ) );
         }
@@ -762,7 +762,7 @@ class BuildHaloHelper {
             size_t ielem                    = elems[jelem];
             buf.elem_part[p][jelem]         = elem_part( ielem );
             Topology::set( buf.elem_flags[p][jelem], elem_flags( ielem ) | newflags );
-            buf.elem_type[p][jelem]         = mesh.cells().type_idx( ielem );
+            buf.elem_type[p][jelem] = mesh.cells().type_idx( ielem );
             std::vector<double> crds( elem_nodes->cols( ielem ) * 2 );
             for ( size_t jnode = 0; jnode < elem_nodes->cols( ielem ); ++jnode ) {
                 double crd[] = {xy( ( *elem_nodes )( ielem, jnode ), XX ), xy( ( *elem_nodes )( ielem, jnode ), YY )};
@@ -853,8 +853,8 @@ class BuildHaloHelper {
                 lonlat( loc_idx, XX ) = pll.lon();
                 lonlat( loc_idx, YY ) = pll.lat();
 
-                if ( Topology::check( flags( loc_idx ), Topology::PERIODIC ) and not
-                     Topology::check( flags( loc_idx ), Topology::BC ) ) {
+                if ( Topology::check( flags( loc_idx ), Topology::PERIODIC ) and
+                     not Topology::check( flags( loc_idx ), Topology::BC ) ) {
                     status.new_periodic_ghost_points.push_back( loc_idx );
                 }
 
@@ -970,7 +970,7 @@ class BuildHaloHelper {
                             loc_idx, n, uid2node[buf.elem_nodes_id[jpart][buf.elem_nodes_displs[jpart][jelem] + n]] );
                     }
 
-                    if( Topology::check( elem_type_flags( loc_idx ), Topology::PERIODIC ) ) {
+                    if ( Topology::check( elem_type_flags( loc_idx ), Topology::PERIODIC ) ) {
                         status.new_periodic_ghost_cells[t].push_back( old_size + new_elem );
                     }
                     ++new_elem;
@@ -1230,7 +1230,7 @@ void increase_halo_periodic( BuildHaloHelper& helper, const PeriodicPoints& peri
     helper.add_buffers( recvmesh );
 }
 
-BuildHalo::BuildHalo(Mesh& mesh) : mesh_( mesh ), periodic_cells_local_index_( mesh.cells().nb_types() ) {}
+BuildHalo::BuildHalo( Mesh& mesh ) : mesh_( mesh ), periodic_cells_local_index_( mesh.cells().nb_types() ) {}
 
 void BuildHalo::operator()( int nb_elems ) {
     ATLAS_TRACE( "BuildHalo" );
@@ -1275,7 +1275,7 @@ void BuildHalo::operator()( int nb_elems ) {
         }
         for ( int t = 0; t < mesh_.cells().nb_types(); ++t ) {
             for ( idx_t p : helper.status.new_periodic_ghost_cells[t] ) {
-               periodic_cells_local_index_[t].push_back( p );
+                periodic_cells_local_index_[t].push_back( p );
             }
         }
 
diff --git a/src/atlas/mesh/actions/BuildParallelFields.cc b/src/atlas/mesh/actions/BuildParallelFields.cc
index 120a4f86a..be297b962 100644
--- a/src/atlas/mesh/actions/BuildParallelFields.cc
+++ b/src/atlas/mesh/actions/BuildParallelFields.cc
@@ -413,9 +413,7 @@ Field& build_edges_partition( Mesh& mesh ) {
             // if( not domain_bdry(jedge) ) {
             bdry_edges.push_back( edge_glb_idx( jedge ) );
             p = elem_part( elem1 );
-            if( pn1 != p && pn2 == pn1 && elem_halo( elem1 ) > 0 ) {
-                p = pn1;
-            }
+            if ( pn1 != p && pn2 == pn1 && elem_halo( elem1 ) > 0 ) { p = pn1; }
             // }
         }
         else if ( p != elem_part( elem1 ) && p != elem_part( elem2 ) ) {
@@ -508,12 +506,12 @@ Field& build_edges_partition( Mesh& mesh ) {
         if ( edge_is_partition_boundary ) {
             if ( not edge_partition_is_same_as_one_of_nodes ) {
                 if ( elem1 != edge_to_elem.missing_value() ) {
-                    Log::error() << "[" << mypart << "] " << EDGE( jedge ) << " [p" << p << "] is not correct elem1[p" << elem_part( elem1 )
-                                 << "]" << std::endl;
+                    Log::error() << "[" << mypart << "] " << EDGE( jedge ) << " [p" << p << "] is not correct elem1[p"
+                                 << elem_part( elem1 ) << "]" << std::endl;
                 }
                 else {
-                    Log::error() << "[" << mypart << "] " << EDGE( jedge ) << " [p" << p << "] is not correct elem2[p" << elem_part( elem2 )
-                                 << "]" << std::endl;
+                    Log::error() << "[" << mypart << "] " << EDGE( jedge ) << " [p" << p << "] is not correct elem2[p"
+                                 << elem_part( elem2 ) << "]" << std::endl;
                 }
                 insane = 1;
             }
@@ -665,13 +663,14 @@ Field& build_edges_remote_idx( Mesh& mesh ) {
             else {
                 std::stringstream msg;
 #ifdef DEBUGGING_PARFIELDS
-                msg << "Edge(" << recv_edge[ jedge * varsize + 2 ] << "[p" << recv_edge[ jedge * varsize + 4 ] << "] "
-                    << recv_edge[ jedge *varsize + 3 ] << "[p" << recv_edge[ jedge * varsize + 5 ] << "])";
+                msg << "Edge(" << recv_edge[jedge * varsize + 2] << "[p" << recv_edge[jedge * varsize + 4] << "] "
+                    << recv_edge[jedge * varsize + 3] << "[p" << recv_edge[jedge * varsize + 5] << "])";
 #else
                 msg << "Edge with uid " << recv_uid;
 #endif
                 msg << " requested by rank [" << jpart << "]";
-                msg << " that should be owned by " << mpi::comm().rank() << " is not found. This could be because no "
+                msg << " that should be owned by " << mpi::comm().rank()
+                    << " is not found. This could be because no "
                        "halo was built.";
                 // throw eckit::SeriousBug(msg.str(),Here());
                 Log::warning() << msg.str() << " @ " << Here() << std::endl;
diff --git a/src/atlas/output/detail/PointCloudIO.cc b/src/atlas/output/detail/PointCloudIO.cc
index 29da1f424..1e24441d7 100644
--- a/src/atlas/output/detail/PointCloudIO.cc
+++ b/src/atlas/output/detail/PointCloudIO.cc
@@ -89,12 +89,12 @@ Mesh PointCloudIO::read( const eckit::PathName& path, std::vector<std::string>&
 
         mesh.nodes().resize( nb_pts );
 
-        mesh::Nodes& nodes             = mesh.nodes();
-        array::ArrayView<double, 2> xy = array::make_view<double, 2>( nodes.xy() );
-        array::ArrayView<double, 2> lonlat = array::make_view<double, 2>( nodes.lonlat() );
+        mesh::Nodes& nodes                  = mesh.nodes();
+        array::ArrayView<double, 2> xy      = array::make_view<double, 2>( nodes.xy() );
+        array::ArrayView<double, 2> lonlat  = array::make_view<double, 2>( nodes.lonlat() );
         array::ArrayView<gidx_t, 1> glb_idx = array::make_view<gidx_t, 1>( nodes.global_index() );
-        array::ArrayView<int, 1> part = array::make_view<int, 1>( nodes.partition() );
-        part.assign(0);
+        array::ArrayView<int, 1> part       = array::make_view<int, 1>( nodes.partition() );
+        part.assign( 0 );
         // header, part 2:
         // determine columns' labels
         // (check end of first line for possible column labels, starting from
@@ -128,13 +128,13 @@ Mesh PointCloudIO::read( const eckit::PathName& path, std::vector<std::string>&
 
             // NOTE always expects (lon,lat) order, maybe make it configurable?
             PointXY pxy;
-            iss >> pxy.x() >>pxy.y();
+            iss >> pxy.x() >> pxy.y();
 
-            xy(i,XX) = pxy.x();
-            xy(i,YY) = pxy.y();
-            lonlat(i,LON) = pxy.x();
-            lonlat(i,LAT) = pxy.y();
-            glb_idx(i) = i+1;
+            xy( i, XX )      = pxy.x();
+            xy( i, YY )      = pxy.y();
+            lonlat( i, LON ) = pxy.x();
+            lonlat( i, LAT ) = pxy.y();
+            glb_idx( i )     = i + 1;
 
             for ( j = 0; iss && j < nb_fld; ++j )
                 iss >> fields[j]( i );
@@ -264,7 +264,6 @@ void PointCloudIO::write( const eckit::PathName& path, const FieldSet& fieldset,
 }
 
 void PointCloudIO::write( const eckit::PathName& path, const std::vector<PointLonLat>& pts ) {
-
     Log::debug() << "PointCloudIO writing " << path << std::endl;
 
     std::ofstream f( path.asString().c_str() );
@@ -282,7 +281,6 @@ void PointCloudIO::write( const eckit::PathName& path, const std::vector<PointLo
 
 void PointCloudIO::write( const eckit::PathName& path, const std::vector<double>& lon, const std::vector<double>& lat,
                           const std::vector<std::vector<double>*>& vfvalues, const std::vector<std::string>& vfnames ) {
-
     Log::debug() << "PointCloudIO writing " << path << std::endl;
 
     const std::string msg( "PointCloudIO::write: " );
@@ -318,7 +316,6 @@ void PointCloudIO::write( const eckit::PathName& path, const std::vector<double>
 
 void PointCloudIO::write( const eckit::PathName& path, const int& nb_pts, const double* lon, const double* lat,
                           const int& nb_fld, const double** afvalues, const char** afnames ) {
-
     Log::debug() << "PointCloudIO writing " << path << std::endl;
 
 
diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc
index 1e3520557..03b4a0802 100644
--- a/src/atlas/trans/local/TransLocal.cc
+++ b/src/atlas/trans/local/TransLocal.cc
@@ -230,7 +230,7 @@ struct FFTW_Data {
     std::vector<fftw_plan> plans;
 #endif
 };
-}
+}  // namespace detail
 
 
 // --------------------------------------------------------------------------------------------------------------------
@@ -495,8 +495,8 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma
             {
                 ATLAS_TRACE( "Fourier precomputations (FFTW)" );
                 int num_complex = ( nlonsMaxGlobal_ / 2 ) + 1;
-                fftw_->in         = fftw_alloc_complex( nlats * num_complex );
-                fftw_->out        = fftw_alloc_real( nlats * nlonsMaxGlobal_ );
+                fftw_->in       = fftw_alloc_complex( nlats * num_complex );
+                fftw_->out      = fftw_alloc_real( nlats * nlonsMaxGlobal_ );
 
                 if ( fft_cache_ ) {
                     Log::debug() << "Import FFTW wisdom from cache" << std::endl;
@@ -516,8 +516,9 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma
                 //                if ( wisdomString.length() > 0 ) { fftw_import_wisdom_from_string( &wisdomString[0u] ); }
                 if ( grid::RegularGrid( gridGlobal_ ) ) {
                     fftw_->plans.resize( 1 );
-                    fftw_->plans[0] = fftw_plan_many_dft_c2r( 1, &nlonsMaxGlobal_, nlats, fftw_->in, NULL, 1, num_complex,
-                                                              fftw_->out, NULL, 1, nlonsMaxGlobal_, FFTW_ESTIMATE );
+                    fftw_->plans[0] =
+                        fftw_plan_many_dft_c2r( 1, &nlonsMaxGlobal_, nlats, fftw_->in, NULL, 1, num_complex, fftw_->out,
+                                                NULL, 1, nlonsMaxGlobal_, FFTW_ESTIMATE );
                 }
                 else {
                     fftw_->plans.resize( nlatsLegDomain_ );
@@ -546,7 +547,7 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma
                 //                    write.close();
                 //                }
             }
-            // other FFT implementations should be added with #elif statements
+                // other FFT implementations should be added with #elif statements
 #else
             useFFT_               = false;  // no FFT implemented => default to dgemm
             std::string file_path = TransParameters( config ).write_fft();
@@ -981,7 +982,7 @@ void TransLocal::invtrans_fourier_reduced( const int nlats, const grid::Structur
                     for ( int jlat = 0; jlat < nlats; jlat++ ) {
                         int idx = 0;
                         //Log::info() << jlat << "in:" << std::endl;
-                        int num_complex   = ( nlonsGlobal_[jlat] / 2 ) + 1;
+                        int num_complex     = ( nlonsGlobal_[jlat] / 2 ) + 1;
                         fftw_->in[idx++][0] = scl_fourier[posMethod( jfld, 0, jlat, 0, nb_fields, nlats )];
                         //Log::info() << fftw_->in[0][0] << " ";
                         for ( int jm = 1; jm < num_complex; jm++, idx++ ) {
diff --git a/src/atlas/trans/local/TransLocal.h b/src/atlas/trans/local/TransLocal.h
index 9d3d69140..4e66473ee 100644
--- a/src/atlas/trans/local/TransLocal.h
+++ b/src/atlas/trans/local/TransLocal.h
@@ -10,8 +10,8 @@
 
 #pragma once
 
-#include <vector>
 #include <memory>
+#include <vector>
 
 #include "atlas/array.h"
 #include "atlas/grid/Grid.h"
diff --git a/src/sandbox/interpolation/atlas-parallel-interpolation.cc b/src/sandbox/interpolation/atlas-parallel-interpolation.cc
index 6a4efe4ef..18bad95c6 100644
--- a/src/sandbox/interpolation/atlas-parallel-interpolation.cc
+++ b/src/sandbox/interpolation/atlas-parallel-interpolation.cc
@@ -152,9 +152,7 @@ void AtlasParallelInterpolation::execute( const AtlasTool::Args& args ) {
             Interpolation( option::type( interpolation_method ), tgt_functionspace, src_functionspace );
     }
 
-    if( args.getBool( "forward-interpolator-output", false ) ) {
-        interpolator_forward.print( Log::info() );
-    }
+    if ( args.getBool( "forward-interpolator-output", false ) ) { interpolator_forward.print( Log::info() ); }
 
     // Create source FunctionSpace and fields
 
diff --git a/src/tests/acceptance_tests/atest_mgrids.cc b/src/tests/acceptance_tests/atest_mgrids.cc
index c71df090f..837b782f5 100644
--- a/src/tests/acceptance_tests/atest_mgrids.cc
+++ b/src/tests/acceptance_tests/atest_mgrids.cc
@@ -16,19 +16,19 @@
 #include <sstream>
 #include <vector>
 
+#include "atlas/field.h"
+#include "atlas/functionspace.h"
 #include "atlas/grid.h"
+#include "atlas/interpolation/Interpolation.h"
 #include "atlas/mesh.h"
-#include "atlas/functionspace.h"
-#include "atlas/field.h"
 #include "atlas/meshgenerator.h"
+#include "atlas/numerics/fvm/Method.h"
 #include "atlas/option.h"
+#include "atlas/output/Gmsh.h"
 #include "atlas/parallel/mpi/mpi.h"
 #include "atlas/runtime/AtlasTool.h"
 #include "atlas/runtime/Log.h"
 #include "atlas/util/Config.h"
-#include "atlas/output/Gmsh.h"
-#include "atlas/numerics/fvm/Method.h"
-#include "atlas/interpolation/Interpolation.h"
 
 #include "atlas/mesh/actions/BuildHalo.h"
 
@@ -38,9 +38,10 @@ using namespace atlas;
 
 class Program : public AtlasTool {
     virtual void execute( const Args& args );
+
 public:
     Program( int argc, char** argv );
-  };
+};
 
 //-----------------------------------------------------------------------------
 
@@ -50,49 +51,48 @@ Program::Program( int argc, char** argv ) : AtlasTool( argc, argv ) {
     add_option( new SimpleOption<bool>( "ghost", "Output ghost elements" ) );
     add_option( new SimpleOption<long>( "haloA", "Halo size" ) );
     add_option( new SimpleOption<long>( "haloB", "Halo size" ) );
-    add_option( new SimpleOption<bool>( "no-forward",  "no forward interpolation" ) );
+    add_option( new SimpleOption<bool>( "no-forward", "no forward interpolation" ) );
     add_option( new SimpleOption<bool>( "no-backward", "no backward interpolation" ) );
 }
 
 //-----------------------------------------------------------------------------
 
 void Program::execute( const Args& args ) {
+    auto ghost = util::Config( "ghost", args.getBool( "ghost", false ) );
+    auto haloA = option::halo( args.getLong( "haloA", 1 ) );
+    auto haloB = option::halo( args.getLong( "haloB", 1 ) );
 
-  auto ghost = util::Config("ghost",args.getBool("ghost",false));
-  auto haloA = option::halo( args.getLong("haloA",1) );
-  auto haloB = option::halo( args.getLong("haloB",1) );
-
-  auto gridA = Grid( args.getString("gridA") );
-  auto gridB = Grid( args.getString("gridB") );
+    auto gridA = Grid( args.getString( "gridA" ) );
+    auto gridB = Grid( args.getString( "gridB" ) );
 
-  auto meshgenerator = MeshGenerator( "structured" );
+    auto meshgenerator = MeshGenerator( "structured" );
 
-  auto distA = grid::Distribution( gridA, grid::Partitioner( "trans" ) );
+    auto distA = grid::Distribution( gridA, grid::Partitioner( "trans" ) );
 
-  auto meshA = meshgenerator.generate( gridA, distA );
+    auto meshA = meshgenerator.generate( gridA, distA );
 
-  numerics::fvm::Method fvmA(meshA,haloA);
-  auto gmshA = output::Gmsh( "meshA.msh", ghost );
-  gmshA.write(meshA);
+    numerics::fvm::Method fvmA( meshA, haloA );
+    auto gmshA = output::Gmsh( "meshA.msh", ghost );
+    gmshA.write( meshA );
 
-  auto distB = grid::Distribution( gridB, grid::MatchingMeshPartitioner( meshA ) );
+    auto distB = grid::Distribution( gridB, grid::MatchingMeshPartitioner( meshA ) );
 
-  auto meshB = meshgenerator.generate( gridB, distB );
+    auto meshB = meshgenerator.generate( gridB, distB );
 
-  numerics::fvm::Method fvmB(meshB,haloB);
+    numerics::fvm::Method fvmB( meshB, haloB );
 
-  Field fieldB = fvmB.node_columns().createField<double>();
+    Field fieldB = fvmB.node_columns().createField<double>();
 
-  output::Gmsh gmshB( "meshB.msh", ghost );
-  gmshB.write(meshB);
-  gmshB.write(fieldB);
+    output::Gmsh gmshB( "meshB.msh", ghost );
+    gmshB.write( meshB );
+    gmshB.write( fieldB );
 
-  if( not args.getBool("no-forward",false) ) {
-    Interpolation AtoB( option::type("finite-element"), fvmA.node_columns(), fvmB.node_columns() );
-  }
-  if( not args.getBool("no-backward",false) ) {  
-    Interpolation BtoA( option::type("finite-element"), fvmB.node_columns(), fvmA.node_columns() );
-  }
+    if ( not args.getBool( "no-forward", false ) ) {
+        Interpolation AtoB( option::type( "finite-element" ), fvmA.node_columns(), fvmB.node_columns() );
+    }
+    if ( not args.getBool( "no-backward", false ) ) {
+        Interpolation BtoA( option::type( "finite-element" ), fvmB.node_columns(), fvmA.node_columns() );
+    }
 }
 
 //------------------------------------------------------------------------------
diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc
index f0ed2f679..e2ccea7de 100644
--- a/src/tests/trans/test_transgeneral.cc
+++ b/src/tests/trans/test_transgeneral.cc
@@ -942,7 +942,7 @@ CASE( "test_trans_unstructured" ) {
 }
 #endif
 
-//-----------------------------------------------------------------------------
+    //-----------------------------------------------------------------------------
 
 #if 0
 CASE( "test_trans_fourier_truncation" ) {

From 279bae10d6b9382085a1e270fb5211e7363c9357 Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Tue, 19 Jun 2018 17:43:58 +0100
Subject: [PATCH 117/123] Fix a wrong template type

---
 src/atlas/functionspace/NodeColumns.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/atlas/functionspace/NodeColumns.cc b/src/atlas/functionspace/NodeColumns.cc
index 8ffff7276..1e34143ec 100644
--- a/src/atlas/functionspace/NodeColumns.cc
+++ b/src/atlas/functionspace/NodeColumns.cc
@@ -1593,7 +1593,7 @@ void dispatch_minimum_and_location_per_level( const NodeColumns& fs, const Field
             }
         }
 
-        array::ArrayT<T> glb_idx_private( glb_idx.shape( 0 ), glb_idx.shape( 1 ) );
+        array::ArrayT<gidx_t> glb_idx_private( glb_idx.shape( 0 ), glb_idx.shape( 1 ) );
         array::ArrayView<gidx_t, 2> glb_idx_private_view = array::make_view<gidx_t, 2>( glb_idx_private );
         const size_t npts                                = arr.shape( 0 );
         atlas_omp_for( size_t n = 0; n < npts; ++n ) {
@@ -1692,7 +1692,7 @@ void dispatch_maximum_and_location_per_level( const NodeColumns& fs, const Field
             }
         }
 
-        array::ArrayT<T> glb_idx_private( glb_idx.shape( 0 ), glb_idx.shape( 1 ) );
+        array::ArrayT<gidx_t> glb_idx_private( glb_idx.shape( 0 ), glb_idx.shape( 1 ) );
         array::ArrayView<gidx_t, 2> glb_idx_private_view = array::make_view<gidx_t, 2>( glb_idx_private );
         const size_t npts                                = arr.shape( 0 );
         atlas_omp_for( size_t n = 0; n < npts; ++n ) {

From 3d23761f6824e4f15867ab6eb4b206d7dcba629e Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Tue, 19 Jun 2018 18:29:38 +0100
Subject: [PATCH 118/123] Version 0.15.0

---
 CHANGELOG.md  | 10 ++++++++++
 VERSION.cmake |  2 +-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 386b4815a..f352289ad 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,15 @@ This project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html
 
 ## [Unreleased]
 
+## [0.15.0] - 2018-06-19
+### Changed
+- Native Array data storage uses now a raw C pointer instead of std::vector
+- Significant performance improvements to Spherical harmonics transforms
+
+### Fixed
+- Various bugs related to parallel halos
+- Bit reproducibility for parallel interpolation
+
 ## [0.14.0] - 2018-03-22
 ### Added
 - Spherical Harmonics transforms can receive a cache memory handle
@@ -30,6 +39,7 @@ This project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html
 ## 0.13.0 - 2018-02-16
 
 [Unreleased]: https://github.com/ecmwf/atlas/compare/master...develop
+[0.15.0]: https://github.com/ecmwf/atlas/compare/0.14.0...0.15.0
 [0.14.0]: https://github.com/ecmwf/atlas/compare/0.13.2...0.14.0
 [0.13.2]: https://github.com/ecmwf/atlas/compare/0.13.1...0.13.2
 [0.13.1]: https://github.com/ecmwf/atlas/compare/0.13.0...0.13.1
diff --git a/VERSION.cmake b/VERSION.cmake
index 7cd7d9260..eed3fc5f0 100644
--- a/VERSION.cmake
+++ b/VERSION.cmake
@@ -6,5 +6,5 @@
 # granted to it by virtue of its status as an intergovernmental organisation nor
 # does it submit to any jurisdiction.
 
-set  ( ${PROJECT_NAME}_VERSION_STR  "0.14.0-develop" )
+set  ( ${PROJECT_NAME}_VERSION_STR  "0.15.0" )
 

From 1ce7e7037f50f622d89674e6f585c11a316b4c4d Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Tue, 19 Jun 2018 19:06:48 +0100
Subject: [PATCH 119/123] Fix various warnings shown by pgi/18.4 -- still
 failing tests for pgi

---
 src/apps/atlas-benchmark.cc                   |  7 ------
 src/atlas/domain/detail/ZonalBandDomain.cc    |  2 ++
 src/atlas/field/detail/FieldImpl.cc           |  2 --
 src/atlas/functionspace/EdgeColumns.cc        | 24 -------------------
 src/atlas/functionspace/NodeColumns.cc        |  1 -
 src/atlas/functionspace/PointCloud.cc         |  1 -
 src/atlas/functionspace/StructuredColumns.cc  |  1 -
 src/atlas/grid/detail/grid/GridBuilder.cc     |  9 +++++--
 src/atlas/grid/detail/grid/Structured.h       |  3 ++-
 src/atlas/grid/detail/grid/Unstructured.h     |  2 ++
 src/atlas/interpolation/element/Quad3D.h      |  1 +
 src/atlas/interpolation/element/Triag3D.h     |  1 +
 src/atlas/mesh/ElementType.cc                 |  1 -
 src/atlas/mesh/actions/BuildDualMesh.cc       |  4 ----
 src/atlas/numerics/fvm/Nabla.cc               |  1 -
 src/atlas/output/detail/GmshIO.cc             |  5 ++--
 src/atlas/parallel/Checksum.h                 |  1 -
 src/atlas/parallel/GatherScatter.cc           |  4 ----
 src/atlas/runtime/trace/Timings.cc            |  1 -
 src/atlas/trans/local/TransLocal.cc           |  1 -
 .../functionspace/test_structuredcolumns.cc   |  1 -
 21 files changed, 17 insertions(+), 56 deletions(-)

diff --git a/src/apps/atlas-benchmark.cc b/src/apps/atlas-benchmark.cc
index f9ac36505..8186332e5 100644
--- a/src/apps/atlas-benchmark.cc
+++ b/src/apps/atlas-benchmark.cc
@@ -89,13 +89,6 @@ using namespace atlas::functionspace;
 using namespace atlas::meshgenerator;
 using atlas::AtlasTool;
 
-namespace {
-void usage( const std::string& tool ) {
-    Log::info() << "Usage: " << tool << " [OPTIONS]..." << std::endl;
-}
-
-}  // namespace
-
 //----------------------------------------------------------------------------------------------------------------------
 
 struct TimerStats {
diff --git a/src/atlas/domain/detail/ZonalBandDomain.cc b/src/atlas/domain/detail/ZonalBandDomain.cc
index 0f9d57eb1..509e70ba6 100644
--- a/src/atlas/domain/detail/ZonalBandDomain.cc
+++ b/src/atlas/domain/detail/ZonalBandDomain.cc
@@ -20,9 +20,11 @@ static std::array<double, 2> get_interval_y( const eckit::Parametrisation& param
     return {ymin, ymax};
 }
 
+/*
 constexpr std::array<double, 2> interval_x() {
     return {0., 360.};
 }
+*/
 }  // namespace
 
 constexpr char ZonalBandDomain::units_[];
diff --git a/src/atlas/field/detail/FieldImpl.cc b/src/atlas/field/detail/FieldImpl.cc
index 67f7c015f..19500835c 100644
--- a/src/atlas/field/detail/FieldImpl.cc
+++ b/src/atlas/field/detail/FieldImpl.cc
@@ -40,8 +40,6 @@ FieldImpl* FieldImpl::create( const eckit::Parametrisation& params ) {
         throw eckit::Exception(
             "Could not find parameter 'creator' "
             "in Parametrisation for call to FieldImpl::create()" );
-
-    return 0;
 }
 
 FieldImpl* FieldImpl::create( const std::string& name, array::DataType datatype, const array::ArrayShape& shape ) {
diff --git a/src/atlas/functionspace/EdgeColumns.cc b/src/atlas/functionspace/EdgeColumns.cc
index 8c35306e9..e90d6d78f 100644
--- a/src/atlas/functionspace/EdgeColumns.cc
+++ b/src/atlas/functionspace/EdgeColumns.cc
@@ -526,30 +526,6 @@ const parallel::Checksum& EdgeColumns::checksum() const {
     return *checksum_;
 }
 
-namespace {
-void reverse_copy( const int variables[], const int size, std::vector<size_t>& reverse ) {
-    int r = size;
-    for ( int i = 0; i < size; ++i ) {
-        reverse[--r] = static_cast<size_t>( variables[i] );
-    }
-}
-
-void copy( const int variables[], const int size, std::vector<size_t>& cpy ) {
-    for ( int i = 0; i < size; ++i ) {
-        cpy[i] = static_cast<size_t>( variables[i] );
-    }
-}
-
-std::vector<size_t> variables_to_vector( const int variables[], const int size, bool fortran_ordering ) {
-    std::vector<size_t> vec( size );
-    if ( fortran_ordering )
-        reverse_copy( variables, size, vec );
-    else
-        copy( variables, size, vec );
-    return vec;
-}
-}  // namespace
-
 //------------------------------------------------------------------------------
 //------------------------------------------------------------------------------
 //------------------------------------------------------------------------------
diff --git a/src/atlas/functionspace/NodeColumns.cc b/src/atlas/functionspace/NodeColumns.cc
index 1e34143ec..d73e60f53 100644
--- a/src/atlas/functionspace/NodeColumns.cc
+++ b/src/atlas/functionspace/NodeColumns.cc
@@ -428,7 +428,6 @@ void NodeColumns::haloExchange( FieldSet& fieldset, bool on_device ) const {
                 break;
             default:
                 throw eckit::Exception( "Rank not supported", Here() );
-                break;
         }
     }
 }
diff --git a/src/atlas/functionspace/PointCloud.cc b/src/atlas/functionspace/PointCloud.cc
index 084f42eef..48793d6ba 100644
--- a/src/atlas/functionspace/PointCloud.cc
+++ b/src/atlas/functionspace/PointCloud.cc
@@ -39,7 +39,6 @@ const Field& PointCloud::ghost() const {
 
 Field PointCloud::createField( const eckit::Configuration& options ) const {
     NOTIMP;
-    return Field();
 }
 
 Field PointCloud::createField( const Field& other, const eckit::Configuration& config ) const {
diff --git a/src/atlas/functionspace/StructuredColumns.cc b/src/atlas/functionspace/StructuredColumns.cc
index ac199f8a3..2997b2e14 100644
--- a/src/atlas/functionspace/StructuredColumns.cc
+++ b/src/atlas/functionspace/StructuredColumns.cc
@@ -774,7 +774,6 @@ void StructuredColumns::haloExchange( FieldSet& fieldset ) const {
                 break;
             default:
                 throw eckit::Exception( "Rank not supported", Here() );
-                break;
         }
     }
 }
diff --git a/src/atlas/grid/detail/grid/GridBuilder.cc b/src/atlas/grid/detail/grid/GridBuilder.cc
index 957084d84..859ff7ec2 100644
--- a/src/atlas/grid/detail/grid/GridBuilder.cc
+++ b/src/atlas/grid/detail/grid/GridBuilder.cc
@@ -70,14 +70,21 @@ int regex_match_impl( const std::string& string, const std::string& regex, std::
 class Regex {
 public:
     Regex( const std::string& regex, bool use_case = true ) : regex_( regex ), use_case_( use_case ) {}
+/*
+    // unused
     bool match( const std::string& string ) const {
         std::vector<std::string> substr;
         return regex_match_impl( string, regex_, substr, false, use_case_ );
     }
+*/
     bool match( const std::string& string, std::vector<std::string>& substr ) const {
         return regex_match_impl( string, regex_, substr, true, use_case_ );
     }
+
+/*
+    // unused
     operator std::string() const { return regex_; }
+*/
 
 private:
     std::string regex_;
@@ -183,8 +190,6 @@ const Grid::Implementation* GridBuilder::create( const Grid::Config& config ) co
     else {
         throw eckit::BadParameter( "name or type in configuration don't exist", Here() );
     }
-
-    return nullptr;
 }
 
 bool GridBuilder::match( const std::string& string, std::vector<std::string>& matches, int& id ) const {
diff --git a/src/atlas/grid/detail/grid/Structured.h b/src/atlas/grid/detail/grid/Structured.h
index d26891f20..4cb402be8 100644
--- a/src/atlas/grid/detail/grid/Structured.h
+++ b/src/atlas/grid/detail/grid/Structured.h
@@ -101,7 +101,7 @@ class Structured : public Grid {
 
         virtual bool next( PointXY& xy ) {
             NOTIMP;
-
+#if 0
             if ( j_ < grid_.ny() && i_ < grid_.nx( j_ ) ) {
                 xy = grid_.xy( i_++, j_ );
 
@@ -112,6 +112,7 @@ class Structured : public Grid {
                 return true;
             }
             return false;
+#endif
         }
 
         virtual const PointXY operator*() const { return grid_.xy( i_, j_ ); }
diff --git a/src/atlas/grid/detail/grid/Unstructured.h b/src/atlas/grid/detail/grid/Unstructured.h
index ab9c5c427..135b6d726 100644
--- a/src/atlas/grid/detail/grid/Unstructured.h
+++ b/src/atlas/grid/detail/grid/Unstructured.h
@@ -82,6 +82,7 @@ class Unstructured : public Grid {
 
         virtual bool next( PointXY& xy ) {
             NOTIMP;
+#if 0
             if ( n_ != grid_.points_->size() ) {
                 xy = grid_.xy( n_++ );
                 return true;
@@ -89,6 +90,7 @@ class Unstructured : public Grid {
             else {
                 return false;
             }
+#endif
         }
 
         virtual const PointXY operator*() const { return grid_.xy( n_ ); }
diff --git a/src/atlas/interpolation/element/Quad3D.h b/src/atlas/interpolation/element/Quad3D.h
index 36d0b4597..e6af6527a 100644
--- a/src/atlas/interpolation/element/Quad3D.h
+++ b/src/atlas/interpolation/element/Quad3D.h
@@ -57,6 +57,7 @@ class Quad3D {
         if ( i == 1 ) return v10;
         if ( i == 2 ) return v11;
         if ( i == 3 ) return v01;
+        return Vector3D();
     }
 
 private:           // members
diff --git a/src/atlas/interpolation/element/Triag3D.h b/src/atlas/interpolation/element/Triag3D.h
index 8b1ed3fa8..1f7455e7c 100644
--- a/src/atlas/interpolation/element/Triag3D.h
+++ b/src/atlas/interpolation/element/Triag3D.h
@@ -59,6 +59,7 @@ class Triag3D {
         if ( i == 0 ) return v0;
         if ( i == 1 ) return v1;
         if ( i == 2 ) return v2;
+        return Vector3D();
     }
 
 private:  // members
diff --git a/src/atlas/mesh/ElementType.cc b/src/atlas/mesh/ElementType.cc
index d4ea081e7..09407a8c0 100644
--- a/src/atlas/mesh/ElementType.cc
+++ b/src/atlas/mesh/ElementType.cc
@@ -19,7 +19,6 @@ namespace mesh {
 
 ElementType* ElementType::create( const std::string& ) {
     NOTIMP;
-    return 0;
 }
 
 ElementType::ElementType() {}
diff --git a/src/atlas/mesh/actions/BuildDualMesh.cc b/src/atlas/mesh/actions/BuildDualMesh.cc
index 68543f879..af13bb413 100644
--- a/src/atlas/mesh/actions/BuildDualMesh.cc
+++ b/src/atlas/mesh/actions/BuildDualMesh.cc
@@ -76,10 +76,6 @@ struct Node {
     bool operator<( const Node& other ) const { return ( g < other.g ); }
 };
 
-inline double sqr( double a ) {
-    return a * a;
-}
-
 }  // namespace
 
 array::Array* build_centroids_xy( const mesh::HybridElements&, const Field& xy );
diff --git a/src/atlas/numerics/fvm/Nabla.cc b/src/atlas/numerics/fvm/Nabla.cc
index 95f244a90..676127c8e 100644
--- a/src/atlas/numerics/fvm/Nabla.cc
+++ b/src/atlas/numerics/fvm/Nabla.cc
@@ -72,7 +72,6 @@ void Nabla::gradient( const Field& field, Field& grad_field ) const {
     else {
         return gradient_of_scalar( field, grad_field );
     }
-    throw eckit::SeriousBug( "Cannot figure out if field is a scalar or vector field", Here() );
 }
 
 void Nabla::gradient_of_scalar( const Field& scalar_field, Field& grad_field ) const {
diff --git a/src/atlas/output/detail/GmshIO.cc b/src/atlas/output/detail/GmshIO.cc
index c8c2a60e4..456bafead 100644
--- a/src/atlas/output/detail/GmshIO.cc
+++ b/src/atlas/output/detail/GmshIO.cc
@@ -502,7 +502,6 @@ mesh::ElementType* make_element_type( int type ) {
     if ( type == TRIAG ) return new mesh::temporary::Triangle();
     if ( type == LINE ) return new mesh::temporary::Line();
     throw eckit::SeriousBug( "Element type not supported", Here() );
-    return 0;
 }
 }  // namespace
 
@@ -956,7 +955,7 @@ void GmshIO::write_delegate( const FieldSet& fieldset, const functionspace::Node
     bool binary( !options.get<bool>( "ascii" ) );
     if ( binary ) mode |= std::ios_base::binary;
     bool gather = options.has( "gather" ) ? options.get<bool>( "gather" ) : false;
-    GmshFile file( file_path, mode, gather ? -1 : atlas::mpi::comm().rank() );
+    GmshFile file( file_path, mode, gather ? -1 : int(atlas::mpi::comm().rank()) );
 
     // Header
     if ( is_new_file ) { write_header_ascii( file ); }
@@ -995,7 +994,7 @@ void GmshIO::write_delegate( const FieldSet& fieldset, const functionspace::Stru
 
     bool gather = options.has( "gather" ) ? options.get<bool>( "gather" ) : false;
 
-    GmshFile file( file_path, mode, gather ? -1 : atlas::mpi::comm().rank() );
+    GmshFile file( file_path, mode, gather ? -1 : int(atlas::mpi::comm().rank()) );
 
     // Header
     if ( is_new_file ) write_header_ascii( file );
diff --git a/src/atlas/parallel/Checksum.h b/src/atlas/parallel/Checksum.h
index db5289a40..111f716bc 100644
--- a/src/atlas/parallel/Checksum.h
+++ b/src/atlas/parallel/Checksum.h
@@ -149,7 +149,6 @@ std::string Checksum::execute( const array::ArrayView<DATA_TYPE, LRANK>& lfield
         Log::error() << "lfield.shape(0) = " << lfield.shape( 0 );
         NOTIMP;  // Need to implement with parallel ranks > 1
     }
-    return std::string( "" );
 }
 
 // ------------------------------------------------------------------
diff --git a/src/atlas/parallel/GatherScatter.cc b/src/atlas/parallel/GatherScatter.cc
index 47242218d..e0e91069e 100644
--- a/src/atlas/parallel/GatherScatter.cc
+++ b/src/atlas/parallel/GatherScatter.cc
@@ -59,10 +59,6 @@ struct Node {
     bool operator==( const Node& other ) const { return ( g == other.g ); }
 };
 
-bool operator<( const gidx_t g, const Node& n ) {
-    return ( g < n.g );
-}
-
 }  // namespace
 
 GatherScatter::GatherScatter() : name_(), is_setup_( false ) {
diff --git a/src/atlas/runtime/trace/Timings.cc b/src/atlas/runtime/trace/Timings.cc
index d3cb5c599..b6c191b68 100644
--- a/src/atlas/runtime/trace/Timings.cc
+++ b/src/atlas/runtime/trace/Timings.cc
@@ -340,7 +340,6 @@ std::string TimingsRegistry::filter_filepath( const std::string& filepath ) cons
     std::regex filepath_re( "(.*)?/atlas/src/(.*)" );
     std::smatch matches;
     std::string filtered( "" );
-    bool is_atlas = false;
     if ( std::regex_search( filepath, matches, filepath_re ) ) {
         // filtered = matches[2];
         filtered = "[atlas] ";
diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc
index 03b4a0802..9ed9536b1 100644
--- a/src/atlas/trans/local/TransLocal.cc
+++ b/src/atlas/trans/local/TransLocal.cc
@@ -1033,7 +1033,6 @@ void TransLocal::invtrans_unstructured_precomp( const int truncation, const int
 
     const int nlats        = grid_.size();
     const int size_fourier = nb_fields * 2;
-    double* legendre;
     double* scl_fourier;
     double* scl_fourier_tp;
     double* fouriertp;
diff --git a/src/tests/functionspace/test_structuredcolumns.cc b/src/tests/functionspace/test_structuredcolumns.cc
index c0e974af5..aa8eaeaa7 100644
--- a/src/tests/functionspace/test_structuredcolumns.cc
+++ b/src/tests/functionspace/test_structuredcolumns.cc
@@ -92,7 +92,6 @@ CASE( "test_functionspace_StructuredColumns_no_halo" ) {
 
 CASE( "test_functionspace_StructuredColumns_halo" ) {
     ATLAS_DEBUG_VAR( mpi::comm().size() );
-    int root = 0;
     //  grid::StructuredGrid grid(
     //      grid::StructuredGrid::XSpace( {0.,360.} , {2,4,6,6,4,2} , false ),
     //      grid::StructuredGrid::YSpace( grid::LinearSpacing( {80.,-80.}, 6 ) ),

From 31f329b8c02bc56325fa645f3275e7ed620a71ff Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Wed, 20 Jun 2018 17:00:02 +0100
Subject: [PATCH 120/123] Remove warnings related to unreachable statements
 detected by PGI compiler

---
 src/atlas/array/native/NativeArray.cc        | 1 -
 src/atlas/functionspace/Spectral.cc          | 1 -
 src/atlas/functionspace/StructuredColumns.cc | 2 +-
 3 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/atlas/array/native/NativeArray.cc b/src/atlas/array/native/NativeArray.cc
index 3e5f3c346..ef87cad2f 100644
--- a/src/atlas/array/native/NativeArray.cc
+++ b/src/atlas/array/native/NativeArray.cc
@@ -69,7 +69,6 @@ Array* Array::create( DataType datatype, const ArrayShape& shape ) {
             throw eckit::BadParameter( err.str(), Here() );
         }
     }
-    return 0;
 }
 
 template <typename Value>
diff --git a/src/atlas/functionspace/Spectral.cc b/src/atlas/functionspace/Spectral.cc
index 2a833f453..f5aab2087 100644
--- a/src/atlas/functionspace/Spectral.cc
+++ b/src/atlas/functionspace/Spectral.cc
@@ -287,7 +287,6 @@ void Spectral::scatter( const Field& global, Field& local ) const {
 std::string Spectral::checksum( const FieldSet& fieldset ) const {
     eckit::MD5 md5;
     NOTIMP;
-    return md5;
 }
 std::string Spectral::checksum( const Field& field ) const {
     FieldSet fieldset;
diff --git a/src/atlas/functionspace/StructuredColumns.cc b/src/atlas/functionspace/StructuredColumns.cc
index 2997b2e14..3a10f19ba 100644
--- a/src/atlas/functionspace/StructuredColumns.cc
+++ b/src/atlas/functionspace/StructuredColumns.cc
@@ -86,7 +86,7 @@ struct GridPoint {
         return false;
     }
 
-    bool operator==( const GridPoint& other ) const { return ( j == other.j && i == other.i ); }
+    //bool operator==( const GridPoint& other ) const { return ( j == other.j && i == other.i ); }
 };
 
 struct GridPointSet {

From 2abad35cb219d973c5f7c30f09486e6437962fac Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Wed, 20 Jun 2018 17:11:31 +0100
Subject: [PATCH 121/123] Reduce test times

---
 src/atlas/interpolation/element/Quad3D.h  | 2 +-
 src/atlas/interpolation/element/Triag3D.h | 3 ++-
 src/tests/numerics/test_fvm_nabla.cc      | 2 +-
 src/tests/util/test_footprint.cc          | 6 +++++-
 4 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/atlas/interpolation/element/Quad3D.h b/src/atlas/interpolation/element/Quad3D.h
index e6af6527a..f0d4a2b92 100644
--- a/src/atlas/interpolation/element/Quad3D.h
+++ b/src/atlas/interpolation/element/Quad3D.h
@@ -57,7 +57,7 @@ class Quad3D {
         if ( i == 1 ) return v10;
         if ( i == 2 ) return v11;
         if ( i == 3 ) return v01;
-        return Vector3D();
+        throw eckit::OutOfRange(i,4,Here());
     }
 
 private:           // members
diff --git a/src/atlas/interpolation/element/Triag3D.h b/src/atlas/interpolation/element/Triag3D.h
index 1f7455e7c..f4da50450 100644
--- a/src/atlas/interpolation/element/Triag3D.h
+++ b/src/atlas/interpolation/element/Triag3D.h
@@ -12,6 +12,7 @@
 
 #include <limits>
 
+#include "eckit/exception/Exceptions.h"
 #include "atlas/interpolation/Vector3D.h"
 #include "atlas/interpolation/method/Intersect.h"
 #include "atlas/util/Point.h"
@@ -59,7 +60,7 @@ class Triag3D {
         if ( i == 0 ) return v0;
         if ( i == 1 ) return v1;
         if ( i == 2 ) return v2;
-        return Vector3D();
+        throw eckit::OutOfRange(i,3,Here());
     }
 
 private:  // members
diff --git a/src/tests/numerics/test_fvm_nabla.cc b/src/tests/numerics/test_fvm_nabla.cc
index 5983ad56f..115ce4c08 100644
--- a/src/tests/numerics/test_fvm_nabla.cc
+++ b/src/tests/numerics/test_fvm_nabla.cc
@@ -106,7 +106,7 @@ void rotated_flow_magnitude( const fvm::Method& fvm, Field& field, const double&
 }
 
 static std::string griduid() {
-    return "Slat80";
+    return "Slat20";
 }
 
 //-----------------------------------------------------------------------------
diff --git a/src/tests/util/test_footprint.cc b/src/tests/util/test_footprint.cc
index 281474e98..19876a460 100644
--- a/src/tests/util/test_footprint.cc
+++ b/src/tests/util/test_footprint.cc
@@ -30,6 +30,10 @@ using namespace atlas::util;
 namespace atlas {
 namespace test {
 
+static std::string griduid() {
+    return "O64";
+}
+
 //-----------------------------------------------------------------------------
 
 CASE( "test_broadcast_to_self" ) {
@@ -39,7 +43,7 @@ CASE( "test_broadcast_to_self" ) {
     Field field( "field", array::make_datatype<double>(), array::make_shape( 10, 2 ) );
     Log::info() << "field.footprint = " << eckit::Bytes( field.footprint() ) << std::endl;
 
-    Grid grid( "O640" );
+    Grid grid( griduid() );
     MeshGenerator meshgen( "structured" );
     Mesh mesh = meshgen.generate( grid );
 

From 4ba33483d5b4d8b0338784d94c3999e3a47fdad3 Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Wed, 20 Jun 2018 19:34:51 +0100
Subject: [PATCH 122/123] Print more configuration information

---
 src/atlas/library/Library.cc | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/atlas/library/Library.cc b/src/atlas/library/Library.cc
index 084311533..92e99b845 100644
--- a/src/atlas/library/Library.cc
+++ b/src/atlas/library/Library.cc
@@ -220,11 +220,18 @@ void Library::Information::print( std::ostream& out ) const {
     bool feature_fortran( ATLAS_HAVE_FORTRAN );
     bool feature_OpenMP( ATLAS_HAVE_OMP );
     bool feature_Trans( ATLAS_HAVE_TRANS );
+    bool feature_FFTW( ATLAS_HAVE_FFTW );
+    bool feature_Eigen( ATLAS_HAVE_EIGEN );
     bool feature_Tesselation( ATLAS_HAVE_TESSELATION );
     bool feature_BoundsChecking( ATLAS_ARRAYVIEW_BOUNDS_CHECKING );
+    bool feature_Init_sNaN( ATLAS_INIT_SNAN );
     bool feature_MPI( false );
 #ifdef ECKIT_HAVE_MPI
     feature_MPI = true;
+#endif
+    bool feature_MKL( false );
+#ifdef ECKIT_HAVE_MKL
+    feature_MKL = true;
 #endif
     std::string array_data_store = "Native";
 #if ATLAS_HAVE_GRIDTOOLS_STORAGE
@@ -238,7 +245,11 @@ void Library::Information::print( std::ostream& out ) const {
         << "    MPI            : " << str( feature_MPI ) << '\n'
         << "    OpenMP         : " << str( feature_OpenMP ) << '\n'
         << "    BoundsChecking : " << str( feature_BoundsChecking ) << '\n'
+        << "    Init_sNaN      : " << str( feature_Init_sNaN ) << '\n'
         << "    Trans          : " << str( feature_Trans ) << '\n'
+        << "    FFTW           : " << str( feature_FFTW ) << '\n'
+        << "    Eigen          : " << str( feature_Eigen ) << '\n'
+        << "    MKL            : " << str( feature_MKL ) << '\n'
         << "    Tesselation    : " << str( feature_Tesselation ) << '\n'
         << "    ArrayDataStore : " << array_data_store << '\n'
         << "    gidx_t         : " << ATLAS_BITS_GLOBAL << " bit integer" << '\n'

From e8a347f8e605e7caef487bff05e1308a5baeec4b Mon Sep 17 00:00:00 2001
From: Willem Deconinck <willem.deconinck@ecmwf.int>
Date: Wed, 20 Jun 2018 19:39:59 +0100
Subject: [PATCH 123/123] Disable PGI build from travis ci

---
 .travis.yml | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index f02f88aa1..d32f179b3 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -61,16 +61,6 @@ matrix:
           sources:  ['ubuntu-toolchain-r-test']
           packages: ['g++-7', 'gcc-7', 'gfortran-7', 'libfftw3-dev', 'lcov']
 
-    - os: linux
-      compiler: gcc
-      env:
-        - CACHE_NAME=linux-pgi-openmpi
-        - CXX_COMPILER='pgc++' C_COMPILER='pgcc' Fortran_COMPILER='pgfortran'
-        - MPI='openmpi'
-        - PGI_VERSION="CommunityEdition"
-        - ECKIT_CMAKE_OPTIONS="-DRT_LIB=/usr/lib/x86_64-linux-gnu/librt.so -DCURSES_LIBRARY=/usr/lib/x86_64-linux-gnu/libcurses.so"
-        - ATLAS_CMAKE_OPTIONS="-DCMAKE_BUILD_TYPE=DEBUG -DENABLE_FORTRAN=OFF" # Fortran tests known to be broken with pgi/17.10
-
     - os: osx
       env:
         - CACHE_NAME=osx-clang-openmpi
@@ -90,6 +80,16 @@ matrix:
     #     - MPI=mpich
     #   osx_image: xcode9
 
+    # - os: linux
+    #   compiler: gcc
+    #   env:
+    #     - CACHE_NAME=linux-pgi-openmpi
+    #     - CXX_COMPILER='pgc++' C_COMPILER='pgcc' Fortran_COMPILER='pgfortran'
+    #     - MPI='openmpi'
+    #     - PGI_VERSION="CommunityEdition"
+    #     - ECKIT_CMAKE_OPTIONS="-DRT_LIB=/usr/lib/x86_64-linux-gnu/librt.so -DCURSES_LIBRARY=/usr/lib/x86_64-linux-gnu/libcurses.so"
+    #     - ATLAS_CMAKE_OPTIONS="-DCMAKE_BUILD_TYPE=DEBUG -DENABLE_FORTRAN=OFF" # Fortran tests known to be broken with pgi/17.10
+
 before_install:
 
   #################################################################