From d62afa05a09119df1dec7610cecb4fa0c9580777 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Fri, 23 Mar 2018 11:46:03 +0000 Subject: [PATCH 001/123] ATLAS-158 Fix assertion in Polygon --- src/atlas/util/Polygon.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/atlas/util/Polygon.cc b/src/atlas/util/Polygon.cc index 7c365768d..ba4f24edf 100644 --- a/src/atlas/util/Polygon.cc +++ b/src/atlas/util/Polygon.cc @@ -118,6 +118,8 @@ PolygonCoordinates::PolygonCoordinates( const Polygon& poly, const atlas::Field& coordinatesMin_ = PointLonLat( ll( poly[0], LON ), ll( poly[0], LAT ) ); coordinatesMax_ = coordinatesMin_; + size_t nb_removed_points_due_to_alignment = 0; + for ( size_t i = 0; i < poly.size(); ++i ) { PointLonLat A( ll( poly[i], LON ), ll( poly[i], LAT ) ); coordinatesMin_ = PointLonLat::componentsMin( coordinatesMin_, A ); @@ -130,6 +132,7 @@ PolygonCoordinates::PolygonCoordinates( const Polygon& poly, const atlas::Field& const PointLonLat& C = coordinates_[coordinates_.size() - 2]; if ( eckit::types::is_approximately_equal( 0., cross_product_analog( A, B, C ) ) ) { coordinates_.back() = A; + ++nb_removed_points_due_to_alignment; continue; } } @@ -137,7 +140,7 @@ PolygonCoordinates::PolygonCoordinates( const Polygon& poly, const atlas::Field& coordinates_.push_back( A ); } - ASSERT( coordinates_.size() == poly.size() ); + ASSERT( coordinates_.size() == poly.size() - nb_removed_points_due_to_alignment ); } PolygonCoordinates::PolygonCoordinates( const std::vector& points ) : coordinates_( points ) { From d9d237cc0221f99e579725a6f3b6c562d462c262 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Wed, 18 Apr 2018 08:26:18 +0100 Subject: [PATCH 002/123] bamboo: CLANG-env to use gfortran 6.3.0 --- bamboo/CLANG-env.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bamboo/CLANG-env.sh b/bamboo/CLANG-env.sh index 33c46d1dc..7549c741d 100644 --- a/bamboo/CLANG-env.sh +++ b/bamboo/CLANG-env.sh @@ -11,3 +11,5 @@ module unload fftw module unload libemos module switch gnu clang +export FC=/usr/local/apps/gcc/6.3.0/bin/gfortran + From 4f2c483d4489da220c82efc71f4251863ea29ecd Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Tue, 27 Feb 2018 16:52:19 +0000 Subject: [PATCH 003/123] duplicated TransLocal into TransLocalopt. Test_transgeneral is using both to compare their results and timings --- src/atlas/CMakeLists.txt | 10 + src/atlas/trans/Trans.cc | 2 + src/atlas/trans/VorDivToUV.cc | 2 + .../trans/localopt/FourierTransformsopt.cc | 78 +++++ .../trans/localopt/FourierTransformsopt.h | 38 ++ .../trans/localopt/LegendrePolynomialsopt.cc | 153 ++++++++ .../trans/localopt/LegendrePolynomialsopt.h | 43 +++ .../trans/localopt/LegendreTransformsopt.cc | 61 ++++ .../trans/localopt/LegendreTransformsopt.h | 36 ++ src/atlas/trans/localopt/TransLocalopt.cc | 327 ++++++++++++++++++ src/atlas/trans/localopt/TransLocalopt.h | 121 +++++++ .../trans/localopt/VorDivToUVLocalopt.cc | 184 ++++++++++ src/atlas/trans/localopt/VorDivToUVLocalopt.h | 67 ++++ src/tests/trans/test_transgeneral.cc | 16 +- 14 files changed, 1136 insertions(+), 2 deletions(-) create mode 100644 src/atlas/trans/localopt/FourierTransformsopt.cc create mode 100644 src/atlas/trans/localopt/FourierTransformsopt.h create mode 100644 src/atlas/trans/localopt/LegendrePolynomialsopt.cc create mode 100644 src/atlas/trans/localopt/LegendrePolynomialsopt.h create mode 100644 src/atlas/trans/localopt/LegendreTransformsopt.cc create mode 100644 src/atlas/trans/localopt/LegendreTransformsopt.h create mode 100644 src/atlas/trans/localopt/TransLocalopt.cc create mode 100644 src/atlas/trans/localopt/TransLocalopt.h create mode 100644 src/atlas/trans/localopt/VorDivToUVLocalopt.cc create mode 100644 src/atlas/trans/localopt/VorDivToUVLocalopt.h diff --git a/src/atlas/CMakeLists.txt b/src/atlas/CMakeLists.txt index 1675b693e..ecbbbbb06 100644 --- a/src/atlas/CMakeLists.txt +++ b/src/atlas/CMakeLists.txt @@ -331,6 +331,16 @@ trans/local/FourierTransforms.h trans/local/FourierTransforms.cc trans/local/VorDivToUVLocal.h trans/local/VorDivToUVLocal.cc +trans/localopt/TransLocalopt.h +trans/localopt/TransLocalopt.cc +trans/localopt/LegendrePolynomialsopt.h +trans/localopt/LegendrePolynomialsopt.cc +trans/localopt/LegendreTransformsopt.h +trans/localopt/LegendreTransformsopt.cc +trans/localopt/FourierTransformsopt.h +trans/localopt/FourierTransformsopt.cc +trans/localopt/VorDivToUVLocalopt.h +trans/localopt/VorDivToUVLocalopt.cc ) if( ATLAS_HAVE_TRANS ) diff --git a/src/atlas/trans/Trans.cc b/src/atlas/trans/Trans.cc index c10408891..9c555170d 100644 --- a/src/atlas/trans/Trans.cc +++ b/src/atlas/trans/Trans.cc @@ -28,6 +28,7 @@ #define TRANS_DEFAULT "local" #endif #include "atlas/trans/local/TransLocal.h" +#include "atlas/trans/localopt/TransLocalopt.h" namespace atlas { namespace trans { @@ -62,6 +63,7 @@ struct force_link { load_builder_grid(); #endif load_builder_grid(); + load_builder_grid(); } }; diff --git a/src/atlas/trans/VorDivToUV.cc b/src/atlas/trans/VorDivToUV.cc index f71e2a6c2..b958599fb 100644 --- a/src/atlas/trans/VorDivToUV.cc +++ b/src/atlas/trans/VorDivToUV.cc @@ -27,6 +27,7 @@ #define TRANS_DEFAULT "local" #endif #include "atlas/trans/local/VorDivToUVLocal.h" +#include "atlas/trans/localopt/VorDivToUVLocalopt.h" namespace atlas { namespace trans { @@ -55,6 +56,7 @@ struct force_link { load_builder(); #endif load_builder(); + load_builder(); } }; diff --git a/src/atlas/trans/localopt/FourierTransformsopt.cc b/src/atlas/trans/localopt/FourierTransformsopt.cc new file mode 100644 index 000000000..ede59cea8 --- /dev/null +++ b/src/atlas/trans/localopt/FourierTransformsopt.cc @@ -0,0 +1,78 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor + * does it submit to any jurisdiction. + */ + +#include +#include +#include + +#include "atlas/trans/localopt/FourierTransformsopt.h" + +namespace atlas { +namespace trans { + +//----------------------------------------------------------------------------- + +void invtrans_fourieropt( const size_t trcFT, + const double lon, // longitude in radians (in) + const int nb_fields, // Number of fields + const double rlegReal[], // associated Legendre functions, size (trc+1)*trc/2 (in) + const double rlegImag[], // associated Legendre functions, size (trc+1)*trc/2 (in) + double rgp[] ) // gridpoint +{ + for ( int jfld = 0; jfld < nb_fields; ++jfld ) { + rgp[jfld] = 0.; + } + // local Fourier transformation: + for ( int jm = 0; jm <= trcFT; ++jm ) { + const double cos = std::cos( jm * lon ); + const double sin = std::sin( jm * lon ); + for ( int jfld = 0; jfld < nb_fields; ++jfld ) { + double real = cos * rlegReal[jm * nb_fields + jfld]; + double imag = sin * rlegImag[jm * nb_fields + jfld]; + rgp[jfld] += real - imag; + } + } +} + +int fourier_truncationopt( const int truncation, // truncation + const int nx, // number of longitudes + const int nxmax, // maximum nx + const int ndgl, // number of latitudes + const double lat, // latitude in radian + const bool fullgrid ) { // regular grid + int trc = truncation; + int trclin = ndgl - 1; + int trcquad = ndgl * 2 / 3 - 1; + if ( truncation >= trclin || fullgrid ) { + // linear + trc = ( nx - 1 ) / 2; + } + else if ( truncation >= trcquad ) { + // quadratic + double weight = 3 * ( trclin - truncation ) / ndgl; + double sqcos = std::pow( std::cos( lat ), 2 ); + + trc = ( nx - 1 ) / ( 2 + weight * sqcos ); + } + else { + // cubic + double sqcos = std::pow( std::cos( lat ), 2 ); + + trc = ( nx - 1 ) / ( 2 + sqcos ) - 1; + } + trc = std::min( truncation, trc ); + return trc; +} + +// -------------------------------------------------------------------------------------------------------------------- + +} // namespace trans +} // namespace atlas diff --git a/src/atlas/trans/localopt/FourierTransformsopt.h b/src/atlas/trans/localopt/FourierTransformsopt.h new file mode 100644 index 000000000..0ddfdfcdf --- /dev/null +++ b/src/atlas/trans/localopt/FourierTransformsopt.h @@ -0,0 +1,38 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#pragma once + +#include + +namespace atlas { +namespace trans { + +//----------------------------------------------------------------------------- +// Routine to compute the local Fourier transformation +// +// Author: +// Andreas Mueller *ECMWF* +// + +void invtrans_fourieropt( const size_t trcFT, + const double lon, // longitude in radians (in) + const int nb_fields, // Number of fields + const double rlegReal[], // values of associated Legendre functions, size (trc+1)*trc/2 (in) + const double rlegImag[], // values of associated Legendre functions, size (trc+1)*trc/2 (in) + double rgp[] ); // gridpoint + +int fourier_truncationopt( const int truncation, const int nx, const int nxmax, const int ndgl, const double lat, + const bool fullgrid ); + +// -------------------------------------------------------------------------------------------------------------------- + +} // namespace trans +} // namespace atlas diff --git a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc new file mode 100644 index 000000000..948cf90a1 --- /dev/null +++ b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc @@ -0,0 +1,153 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor + * does it submit to any jurisdiction. + */ + +#include +#include + +#include "atlas/array.h" +#include "atlas/trans/localopt/LegendrePolynomialsopt.h" + +namespace atlas { +namespace trans { + +//----------------------------------------------------------------------------- + +void compute_legendre_polynomialsopt( + const size_t trc, // truncation (in) + const double lat, // latitude in radians (in) + double legpol[] ) // values of associated Legendre functions, size (trc+1)*trc/2 (out) +{ + array::ArrayT idxmn_( trc + 1, trc + 1 ); + array::ArrayView idxmn = array::make_view( idxmn_ ); + + int j = 0; + for ( int jm = 0; jm <= trc; ++jm ) { + for ( int jn = jm; jn <= trc; ++jn ) { + idxmn( jm, jn ) = j++; + } + } + + array::ArrayT zfn_( trc + 1, trc + 1 ); + array::ArrayView zfn = array::make_view( zfn_ ); + + int iodd; + + // Compute coefficients for Taylor series in Belousov (19) and (21) + // Belousov, Swarztrauber use zfn(0,0)=std::sqrt(2.) + // IFS normalisation chosen to be 0.5*Integral(Pnm**2) = 1 + zfn( 0, 0 ) = 2.; + for ( int jn = 1; jn <= trc; ++jn ) { + double zfnn = zfn( 0, 0 ); + for ( int jgl = 1; jgl <= jn; ++jgl ) { + zfnn *= std::sqrt( 1. - 0.25 / ( jgl * jgl ) ); + } + iodd = jn % 2; + zfn( jn, jn ) = zfnn; + for ( int jgl = 2; jgl <= jn - iodd; jgl += 2 ) { + double zfjn = ( ( jgl - 1. ) * ( 2. * jn - jgl + 2. ) ); // new factor numerator + double zfjd = ( jgl * ( 2. * jn - jgl + 1. ) ); // new factor denominator + + zfn( jn, jn - jgl ) = zfn( jn, jn - jgl + 2 ) * zfjn / zfjd; + } + } + + // -------------------- + // 1. First two columns + // -------------------- + double zdlx1 = ( M_PI_2 - lat ); // theta + double zdlx = std::cos( zdlx1 ); // cos(theta) + double zdlsita = std::sqrt( 1. - zdlx * zdlx ); // sin(theta) (this is how trans library does it) + + legpol[0] = 1.; + double zdl1sita = 0.; + + // if we are less than 1 meter from the pole, + if ( std::abs( zdlsita ) <= std::sqrt( std::numeric_limits::epsilon() ) ) { + zdlx = 1.; + zdlsita = 0.; + } + else { + zdl1sita = 1. / zdlsita; + } + + // ordinary Legendre polynomials from series expansion + // --------------------------------------------------- + + // even N + for ( int jn = 2; jn <= trc; jn += 2 ) { + double zdlk = 0.5 * zfn( jn, 0 ); + double zdlldn = 0.0; + double zdsq = 1. / std::sqrt( jn * ( jn + 1. ) ); + // represented by only even k + for ( int jk = 2; jk <= jn; jk += 2 ) { + // normalised ordinary Legendre polynomial == \overbar{P_n}^0 + zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 ); + // normalised associated Legendre polynomial == \overbar{P_n}^1 + zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 ); + } + legpol[idxmn( 0, jn )] = zdlk; + legpol[idxmn( 1, jn )] = zdlldn; + } + + // odd N + for ( int jn = 1; jn <= trc; jn += 2 ) { + zfn( jn, 0 ) = 0.; + double zdlk = 0.; + double zdlldn = 0.0; + double zdsq = 1. / std::sqrt( jn * ( jn + 1. ) ); + // represented by only even k + for ( int jk = 1; jk <= jn; jk += 2 ) { + // normalised ordinary Legendre polynomial == \overbar{P_n}^0 + zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 ); + // normalised associated Legendre polynomial == \overbar{P_n}^1 + zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 ); + } + legpol[idxmn( 0, jn )] = zdlk; + legpol[idxmn( 1, jn )] = zdlldn; + } + + // -------------------------------------------------------------- + // 2. Diagonal (the terms 0,0 and 1,1 have already been computed) + // Belousov, equation (23) + // -------------------------------------------------------------- + + double zdls = zdl1sita * std::numeric_limits::min(); + for ( int jn = 2; jn <= trc; ++jn ) { + double sq = std::sqrt( ( 2. * jn + 1. ) / ( 2. * jn ) ); + + legpol[idxmn( jn, jn )] = legpol[idxmn( jn - 1, jn - 1 )] * zdlsita * sq; + if ( std::abs( legpol[idxmn( jn, jn )] ) < zdls ) legpol[idxmn( jn, jn )] = 0.0; + } + + // --------------------------------------------- + // 3. General recurrence (Belousov, equation 17) + // --------------------------------------------- + + for ( int jn = 3; jn <= trc; ++jn ) { + for ( int jm = 2; jm < jn; ++jm ) { + double cn = ( ( 2. * jn + 1. ) * ( jn + jm - 3. ) * ( jn + jm - 1. ) ); // numerator of c in Belousov + double cd = ( ( 2. * jn - 3. ) * ( jn + jm - 2. ) * ( jn + jm ) ); // denominator of c in Belousov + double dn = ( ( 2. * jn + 1. ) * ( jn - jm + 1. ) * ( jn + jm - 1. ) ); // numerator of d in Belousov + double dd = ( ( 2. * jn - 1. ) * ( jn + jm - 2. ) * ( jn + jm ) ); // denominator of d in Belousov + double en = ( ( 2. * jn + 1. ) * ( jn - jm ) ); // numerator of e in Belousov + double ed = ( ( 2. * jn - 1. ) * ( jn + jm ) ); // denominator of e in Belousov + + legpol[idxmn( jm, jn )] = std::sqrt( cn / cd ) * legpol[idxmn( jm - 2, jn - 2 )] - + std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1 )] * zdlx + + std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1 )] * zdlx; + } + } +} + +// -------------------------------------------------------------------------------------------------------------------- + +} // namespace trans +} // namespace atlas diff --git a/src/atlas/trans/localopt/LegendrePolynomialsopt.h b/src/atlas/trans/localopt/LegendrePolynomialsopt.h new file mode 100644 index 000000000..1162eb6f4 --- /dev/null +++ b/src/atlas/trans/localopt/LegendrePolynomialsopt.h @@ -0,0 +1,43 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#pragma once + +#include + +namespace atlas { +namespace trans { + +//----------------------------------------------------------------------------- +// Routine to compute the Legendre polynomials in serial according to Belousov +// (using correction by Swarztrauber) +// +// Reference: +// S.L. Belousov, Tables of normalized associated Legendre Polynomials, Pergamon +// Press (1962) +// P.N. Swarztrauber, On computing the points and weights for Gauss-Legendre +// quadrature, +// SIAM J. Sci. Comput. Vol. 24 (3) pp. 945-954 (2002) +// +// Author of Fortran version: +// Mats Hamrud, Philippe Courtier, Nils Wedi *ECMWF* +// +// Ported to C++ by: +// Andreas Mueller *ECMWF* +// +void compute_legendre_polynomialsopt( + const size_t trc, // truncation (in) + const double lat, // latitude in radians (in) + double legpol[] ); // values of associated Legendre functions, size (trc+1)*trc/2 (out) + +// -------------------------------------------------------------------------------------------------------------------- + +} // namespace trans +} // namespace atlas diff --git a/src/atlas/trans/localopt/LegendreTransformsopt.cc b/src/atlas/trans/localopt/LegendreTransformsopt.cc new file mode 100644 index 000000000..e88feda69 --- /dev/null +++ b/src/atlas/trans/localopt/LegendreTransformsopt.cc @@ -0,0 +1,61 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include + +#include "atlas/trans/localopt/LegendreTransformsopt.h" + +namespace atlas { +namespace trans { + +//----------------------------------------------------------------------------- + +void invtrans_legendreopt( const size_t trc, // truncation (in) + const size_t trcFT, // truncation for Fourier transformation (in) + const size_t trcLP, // truncation of Legendre polynomials data legpol. Needs to be >= trc (in) + const double legpol[], // values of associated Legendre functions, size (trc+1)*trc/2 (in) + const int nb_fields, // number of fields + const double spec[], // spectral data, size (trc+1)*trc (in) + double leg_real[], // values of associated Legendre functions, size (trc+1)*trc/2 (out) + double leg_imag[] ) // values of associated Legendre functions, size (trc+1)*trc/2 (out) +{ + // Legendre transformation: + int k = 0, klp = 0; + for ( int jm = 0; jm <= trcFT; ++jm ) { + for ( int jfld = 0; jfld < nb_fields; ++jfld ) { + leg_real[jm * nb_fields + jfld] = 0.; + leg_imag[jm * nb_fields + jfld] = 0.; + } + for ( int jn = jm; jn <= trcLP; ++jn, ++klp ) { + if ( jn <= trc ) { + for ( int jfld = 0; jfld < nb_fields; ++jfld ) { + // not completely sure where this factor 2 comes from. One possible + // explanation: + // normalization of trigonometric functions in the spherical harmonics + // integral over square of trig function is 1 for m=0 and 0.5 (?) for + // m>0 + leg_real[jm * nb_fields + jfld] += 2. * spec[( 2 * k ) * nb_fields + jfld] * legpol[klp]; + leg_imag[jm * nb_fields + jfld] += 2. * spec[( 2 * k + 1 ) * nb_fields + jfld] * legpol[klp]; + } + ++k; + } + } + } + // Undo factor 2 for (jm == 0) + for ( int jfld = 0; jfld < nb_fields; ++jfld ) { + leg_real[jfld] /= 2.; + leg_imag[jfld] /= 2.; + } +} + +// -------------------------------------------------------------------------------------------------------------------- + +} // namespace trans +} // namespace atlas diff --git a/src/atlas/trans/localopt/LegendreTransformsopt.h b/src/atlas/trans/localopt/LegendreTransformsopt.h new file mode 100644 index 000000000..55dcf0212 --- /dev/null +++ b/src/atlas/trans/localopt/LegendreTransformsopt.h @@ -0,0 +1,36 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#pragma once + +#include + +namespace atlas { +namespace trans { + +//----------------------------------------------------------------------------- +// Routine to compute the Legendre transformation +// +// Author: +// Andreas Mueller *ECMWF* +// +void invtrans_legendreopt( const size_t trc, // truncation (in) + const size_t trcFT, // truncation for Fourier transformation (in) + const size_t trcLP, // truncation of Legendre polynomials data legpol. Needs to be >= trc (in) + const double legpol[], // values of associated Legendre functions, size (trc+1)*trc/2 (in) + const int nb_fields, // number of fields + const double spec[], // spectral data, size (trc+1)*trc (in) + double leg_real[], // values of associated Legendre functions, size (trc+1)*trc/2 (out) + double leg_imag[] ); // values of associated Legendre functions, size (trc+1)*trc/2 (out) + +// -------------------------------------------------------------------------------------------------------------------- + +} // namespace trans +} // namespace atlas diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc new file mode 100644 index 000000000..0a1477519 --- /dev/null +++ b/src/atlas/trans/localopt/TransLocalopt.cc @@ -0,0 +1,327 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include "atlas/trans/localopt/TransLocalopt.h" +#include "atlas/array.h" +#include "atlas/option.h" +#include "atlas/parallel/mpi/mpi.h" +#include "atlas/runtime/ErrorHandling.h" +#include "atlas/runtime/Log.h" +#include "atlas/trans/VorDivToUV.h" +#include "atlas/trans/localopt/FourierTransformsopt.h" +#include "atlas/trans/localopt/LegendrePolynomialsopt.h" +#include "atlas/trans/localopt/LegendreTransformsopt.h" +#include "atlas/util/Constants.h" + +namespace atlas { +namespace trans { + +namespace { +static TransBuilderGrid builder( "localopt" ); +} + +// -------------------------------------------------------------------------------------------------------------------- +// Helper functions +// -------------------------------------------------------------------------------------------------------------------- +namespace { // anonymous + +size_t legendre_size( const size_t truncation ) { + return ( truncation + 2 ) * ( truncation + 1 ) / 2; +} + +} // namespace + +// -------------------------------------------------------------------------------------------------------------------- +// Class TransLocalopt +// -------------------------------------------------------------------------------------------------------------------- + +TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long truncation, + const eckit::Configuration& config ) : + grid_( grid ), + truncation_( truncation ), + precompute_( config.getBool( "precompute", true ) ) { + if ( precompute_ ) { + if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { + ATLAS_TRACE( "Precompute legendre structured opt" ); + grid::StructuredGrid g( grid_ ); + size_t size( 0 ); + legendre_begin_.resize( g.ny() ); + for ( size_t j = 0; j < g.ny(); ++j ) { + legendre_begin_[j] = size; + size += legendre_size( truncation_ + 1 ); + } + legendre_.resize( size ); + + for ( size_t j = 0; j < g.ny(); ++j ) { + double lat = g.y( j ) * util::Constants::degreesToRadians(); + compute_legendre_polynomialsopt( truncation_ + 1, lat, legendre_data( j ) ); + } + } + else { + ATLAS_TRACE( "Precompute legendre unstructured opt" ); + size_t size( 0 ); + legendre_begin_.resize( grid_.size() ); + for ( size_t j = 0; j < grid_.size(); ++j ) { + legendre_begin_[j] = size; + size += legendre_size( truncation_ + 1 ); + } + legendre_.resize( size ); + int j( 0 ); + for ( PointXY p : grid_.xy() ) { + double lat = p.y() * util::Constants::degreesToRadians(); + compute_legendre_polynomialsopt( truncation_ + 1, lat, legendre_data( j++ ) ); + } + } + } +} + +// -------------------------------------------------------------------------------------------------------------------- + +TransLocalopt::TransLocalopt( const Grid& grid, const long truncation, const eckit::Configuration& config ) : + TransLocalopt( Cache(), grid, truncation, config ) {} + +// -------------------------------------------------------------------------------------------------------------------- + +TransLocalopt::~TransLocalopt() {} + +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt::invtrans( const Field& spfield, Field& gpfield, const eckit::Configuration& config ) const { + NOTIMP; +} + +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt::invtrans( const FieldSet& spfields, FieldSet& gpfields, const eckit::Configuration& config ) const { + NOTIMP; +} + +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt::invtrans_grad( const Field& spfield, Field& gradfield, const eckit::Configuration& config ) const { + NOTIMP; +} + +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt::invtrans_grad( const FieldSet& spfields, FieldSet& gradfields, + const eckit::Configuration& config ) const { + NOTIMP; +} + +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt::invtrans_vordiv2wind( const Field& spvor, const Field& spdiv, Field& gpwind, + const eckit::Configuration& config ) const { + NOTIMP; +} + +void TransLocalopt::invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[], + const eckit::Configuration& config ) const { + invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields, config ); +} + +void gp_transposeopt( const int nb_size, const int nb_fields, const double gp_tmp[], double gp_fields[] ) { + for ( int jgp = 0; jgp < nb_size; jgp++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + gp_fields[jfld * nb_size + jgp] = gp_tmp[jgp * nb_fields + jfld]; + } + } +} + +//----------------------------------------------------------------------------- +// Routine to compute the spectral transform by using a localopt Fourier +// transformation +// for a grid (same latitude for all longitudes, allows to compute Legendre +// functions +// once for all longitudes). U and v components are divided by cos(latitude) for +// nb_vordiv_fields > 0. +// +// Author: +// Andreas Mueller *ECMWF* +// +void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields, + const double scalar_spectra[], double gp_fields[], + const eckit::Configuration& config ) const { + if ( nb_scalar_fields > 0 ) { + int nb_fields = nb_scalar_fields; + + // Depending on "precompute_legendre_", we have to compute the + // legendre polynomials for every latitute + std::vector recomputed_legendre_; + + auto legPol = [&]( double lat, int j ) -> const double* { + if ( precompute_ ) { return legendre_data( j ); } + else { + recomputed_legendre_.resize( legendre_size( truncation ) ); + compute_legendre_polynomialsopt( truncation, lat, recomputed_legendre_.data() ); + return recomputed_legendre_.data(); + } + }; + + // Temporary storage for legendre space + std::vector legReal( nb_fields * ( truncation + 1 ) ); + std::vector legImag( nb_fields * ( truncation + 1 ) ); + std::vector gp_tmp( nb_fields * grid_.size(), 0. ); + + // Transform + if ( grid::StructuredGrid g = grid_ ) { + ATLAS_TRACE( "invtrans_uv structured opt" ); + int idx = 0; + for ( size_t j = 0; j < g.ny(); ++j ) { + double lat = g.y( j ) * util::Constants::degreesToRadians(); + double trcFT = + fourier_truncationopt( truncation, g.nx( j ), g.nxmax(), g.ny(), lat, grid::RegularGrid( grid_ ) ); + + // Legendre transform: + invtrans_legendreopt( truncation, trcFT, truncation_ + 1, legPol( lat, j ), nb_fields, scalar_spectra, + legReal.data(), legImag.data() ); + + // Fourier transform: + for ( size_t i = 0; i < g.nx( j ); ++i ) { + double lon = g.x( i, j ) * util::Constants::degreesToRadians(); + invtrans_fourieropt( trcFT, lon, nb_fields, legReal.data(), legImag.data(), + gp_tmp.data() + ( nb_fields * idx ) ); + for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { + gp_tmp[nb_fields * idx + jfld] /= std::cos( lat ); + } + ++idx; + } + } + } + else { + ATLAS_TRACE( "invtrans_uv unstructured opt" ); + int idx = 0; + for ( PointXY p : grid_.xy() ) { + double lon = p.x() * util::Constants::degreesToRadians(); + double lat = p.y() * util::Constants::degreesToRadians(); + double trcFT = truncation; + + // Legendre transform: + invtrans_legendreopt( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra, + legReal.data(), legImag.data() ); + + // Fourier transform: + invtrans_fourieropt( trcFT, lon, nb_fields, legReal.data(), legImag.data(), + gp_tmp.data() + ( nb_fields * idx ) ); + for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { + gp_tmp[nb_fields * idx + jfld] /= std::cos( lat ); + } + ++idx; + } + } + + // transpose result (gp_tmp: jfld is fastest index. gp_fields: jfld needs to + // be slowest index) + gp_transposeopt( grid_.size(), nb_fields, gp_tmp.data(), gp_fields ); + } +} + +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[], + const double divergence_spectra[], double gp_fields[], + const eckit::Configuration& config ) const { + invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config ); +} + +void extend_truncationopt( const int old_truncation, const int nb_fields, const double old_spectra[], + double new_spectra[] ) { + int k = 0, k_old = 0; + for ( int m = 0; m <= old_truncation + 1; m++ ) { // zonal wavenumber + for ( int n = m; n <= old_truncation + 1; n++ ) { // total wavenumber + for ( int imag = 0; imag < 2; imag++ ) { // imaginary/real part + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { // field + if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; } + else { + new_spectra[k++] = old_spectra[k_old++]; + } + } + } + } + } +} + +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields, + const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[], + const eckit::Configuration& config ) const { + ATLAS_TRACE( "TransLocalopt::invtrans" ); + int nb_gp = grid_.size(); + + // increase truncation in vorticity_spectra and divergence_spectra: + int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields; + std::vector vorticity_spectra_extended( nb_vordiv_spec_ext, 0. ); + std::vector divergence_spectra_extended( nb_vordiv_spec_ext, 0. ); + extend_truncationopt( truncation_, nb_vordiv_fields, vorticity_spectra, vorticity_spectra_extended.data() ); + extend_truncationopt( truncation_, nb_vordiv_fields, divergence_spectra, divergence_spectra_extended.data() ); + + // call vd2uv to compute u and v in spectral space + std::vector U_ext( nb_vordiv_spec_ext, 0. ); + std::vector V_ext( nb_vordiv_spec_ext, 0. ); + trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt" ) ); + vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(), + divergence_spectra_extended.data(), U_ext.data(), V_ext.data() ); + + // perform spectral transform to compute all fields in grid point space + invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config ); + invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(), + gp_fields + nb_gp * nb_vordiv_fields, config ); + invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields + 2 * nb_gp * nb_vordiv_fields, config ); +} + +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. +} + +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt::dirtrans( const FieldSet& gpfields, FieldSet& spfields, const eckit::Configuration& config ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. +} + +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv, + const eckit::Configuration& config ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. +} + +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[], + const eckit::Configuration& ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. +} + +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[], + double divergence_spectra[], const eckit::Configuration& ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. +} + +// -------------------------------------------------------------------------------------------------------------------- + +} // namespace trans +} // namespace atlas diff --git a/src/atlas/trans/localopt/TransLocalopt.h b/src/atlas/trans/localopt/TransLocalopt.h new file mode 100644 index 000000000..7b4a4d202 --- /dev/null +++ b/src/atlas/trans/localopt/TransLocalopt.h @@ -0,0 +1,121 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#pragma once + +#include + +#include "atlas/grid/Grid.h" +#include "atlas/trans/Trans.h" + +//----------------------------------------------------------------------------- +// Forward declarations + +namespace atlas { +class Field; +class FieldSet; +} // namespace atlas + +//----------------------------------------------------------------------------- + +namespace atlas { +namespace trans { + +//----------------------------------------------------------------------------- + +/// @class TransLocalopt +/// +/// Localopt spherical harmonics transformations to any grid +/// Optimisations are present for structured grids +/// For global grids, please consider using TransIFS instead. +/// +/// @todo: +/// - support multiple fields +/// - support atlas::Field and atlas::FieldSet based on function spaces +/// +/// @note: Direct transforms are not implemented and cannot be unless +/// the grid is global. There are no plans to support this at the moment. +class TransLocalopt : public trans::TransImpl { +public: + TransLocalopt( const Grid& g, const long truncation, const eckit::Configuration& = util::NoConfig() ); + TransLocalopt( const Cache&, const Grid& g, const long truncation, const eckit::Configuration& = util::NoConfig() ); + + virtual ~TransLocalopt(); + + virtual int truncation() const override { return truncation_; } + virtual size_t spectralCoefficients() const override { return ( truncation_ + 1 ) * ( truncation_ + 2 ); } + + virtual const Grid& grid() const override { return grid_; } + + virtual void invtrans( const Field& spfield, Field& gpfield, + const eckit::Configuration& = util::NoConfig() ) const override; + + virtual void invtrans( const FieldSet& spfields, FieldSet& gpfields, + const eckit::Configuration& = util::NoConfig() ) const override; + + virtual void invtrans_grad( const Field& spfield, Field& gradfield, + const eckit::Configuration& = util::NoConfig() ) const override; + + virtual void invtrans_grad( const FieldSet& spfields, FieldSet& gradfields, + const eckit::Configuration& = util::NoConfig() ) const override; + + virtual void invtrans_vordiv2wind( const Field& spvor, const Field& spdiv, Field& gpwind, + const eckit::Configuration& = util::NoConfig() ) const override; + + // -- IFS style API -- + + virtual void invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields, + const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[], + const eckit::Configuration& = util::NoConfig() ) const override; + + virtual void invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[], + const eckit::Configuration& = util::NoConfig() ) const override; + + virtual void invtrans( const int nb_vordiv_fields, const double vorticity_spectra[], + const double divergence_spectra[], double gp_fields[], + const eckit::Configuration& = util::NoConfig() ) const override; + + // -- NOT SUPPORTED -- // + + virtual void dirtrans( const Field& gpfield, Field& spfield, + const eckit::Configuration& = util::NoConfig() ) const override; + + virtual void dirtrans( const FieldSet& gpfields, FieldSet& spfields, + const eckit::Configuration& = util::NoConfig() ) const override; + + virtual void dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv, + const eckit::Configuration& = util::NoConfig() ) const override; + + virtual void dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[], + const eckit::Configuration& = util::NoConfig() ) const override; + + virtual void dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[], + double divergence_spectra[], const eckit::Configuration& = util::NoConfig() ) const override; + +private: + const double* legendre_data( int j ) const { return legendre_.data() + legendre_begin_[j]; } + double* legendre_data( int j ) { return legendre_.data() + legendre_begin_[j]; } + + void invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields, + const double scalar_spectra[], double gp_fields[], + const eckit::Configuration& = util::NoConfig() ) const; + +private: + Grid grid_; + int truncation_; + bool precompute_; + std::vector legendre_; + std::vector legendre_begin_; +}; + +//----------------------------------------------------------------------------- + +} // namespace trans +} // namespace atlas diff --git a/src/atlas/trans/localopt/VorDivToUVLocalopt.cc b/src/atlas/trans/localopt/VorDivToUVLocalopt.cc new file mode 100644 index 000000000..8ed6ed09c --- /dev/null +++ b/src/atlas/trans/localopt/VorDivToUVLocalopt.cc @@ -0,0 +1,184 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include "atlas/trans/localopt/VorDivToUVLocalopt.h" +#include // for std::sqrt +#include "atlas/functionspace/Spectral.h" +#include "atlas/runtime/Log.h" +#include "atlas/util/Earth.h" + +using atlas::FunctionSpace; +using atlas::functionspace::Spectral; + +namespace atlas { +namespace trans { + +namespace { +static VorDivToUVBuilder builder( "localopt" ); +} + +// -------------------------------------------------------------------------------------------------------------------- +// Routine to copy spectral data into internal storage form of IFS trans +// Ported to C++ by: Andreas Mueller *ECMWF* +void prfi1bopt( const int truncation, + const int km, // zonal wavenumber + const int nb_fields, // number of fields + const double rspec[], // spectral data + double pia[] ) // spectral components in data layout of trans library +{ + int ilcm = truncation + 1 - km, ioff = ( 2 * truncation - km + 3 ) * km, + nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2; + for ( int j = 1; j <= ilcm; j++ ) { + int inm = ioff + ( ilcm - j ) * 2; + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int ir = 2 * jfld, ii = ir + 1; + pia[ir * nlei1 + j + 1] = rspec[inm * nb_fields + jfld]; + pia[ii * nlei1 + j + 1] = rspec[( inm + 1 ) * nb_fields + jfld]; + } + } + + for ( int jfld = 0; jfld < 2 * nb_fields; jfld++ ) { + pia[jfld * nlei1] = 0.; + pia[jfld * nlei1 + 1] = 0.; + pia[jfld * nlei1 + ilcm + 2] = 0.; + } +} + +// -------------------------------------------------------------------------------------------------------------------- +// Routine to compute spectral velocities (*cos(latitude)) out of spectral +// vorticity and divergence +// Reference: +// ECMWF Research Department documentation of the IFS +// Temperton, 1991, MWR 119 p1303 +// Ported to C++ by: Andreas Mueller *ECMWF* +void vd2uvopt( const int truncation, // truncation + const int km, // zonal wavenumber + const int nb_vordiv_fields, // number of vorticity and divergence fields + const double vorticity_spectra[], // spectral data of vorticity + const double divergence_spectra[], // spectral data of divergence + double U[], // spectral data of U + double V[], // spectral data of V + const eckit::Configuration& config ) { + int nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2; + + // repsnm: epsilon from eq.(2.12) and (2.13) in [Temperton 1991] + std::vector repsnm( ( truncation + 1 ) * ( truncation + 6 ) / 2 ); + int idx = 0; + for ( int jm = 0; jm <= truncation; ++jm ) { + for ( int jn = jm; jn <= truncation + 2; ++jn, ++idx ) { + repsnm[idx] = std::sqrt( ( jn * jn - jm * jm ) / ( 4. * jn * jn - 1. ) ); + } + } + repsnm[0] = 0.; + + // rlapin: constant factor from eq.(2.2) and (2.3) in [Temperton 1991] + double ra = util::Earth::radius(); + std::vector rlapin( truncation + 3 ); + for ( int jn = 1; jn <= truncation + 2; ++jn ) { + rlapin[jn] = -ra * ra / ( jn * ( jn + 1. ) ); + } + rlapin[0] = 0.; + + // inverse the order of repsnm and rlapin for improved accuracy + std::vector zepsnm( truncation + 6 ); + std::vector zlapin( truncation + 6 ); + std::vector zn( truncation + 6 ); + for ( int jn = km - 1; jn <= truncation + 2; ++jn ) { + int ij = truncation + 3 - jn; + if ( jn >= 0 ) { + zlapin[ij] = rlapin[jn]; + if ( jn < km ) { zepsnm[ij] = 0.; } + else { + zepsnm[ij] = repsnm[jn + ( 2 * truncation - km + 5 ) * km / 2]; + } + } + else { + zlapin[ij] = 0.; + zepsnm[ij] = 0.; + } + zn[ij] = jn; + } + zn[0] = truncation + 3; + + // copy spectral data into internal trans storage: + std::vector rvor( 2 * nb_vordiv_fields * nlei1 ); + std::vector rdiv( 2 * nb_vordiv_fields * nlei1 ); + std::vector ru( 2 * nb_vordiv_fields * nlei1 ); + std::vector rv( 2 * nb_vordiv_fields * nlei1 ); + prfi1bopt( truncation, km, nb_vordiv_fields, vorticity_spectra, rvor.data() ); + prfi1bopt( truncation, km, nb_vordiv_fields, divergence_spectra, rdiv.data() ); + + // compute eq.(2.12) and (2.13) in [Temperton 1991]: + if ( km == 0 ) { + for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { + int ir = 2 * jfld * nlei1 - 1; + for ( int ji = 2; ji < truncation + 4 - km; ++ji ) { + double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1]; + double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1]; + ru[ir + ji] = +psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1]; + rv[ir + ji] = -psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1]; + } + } + } + else { + for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { + int ir = 2 * jfld * nlei1 - 1, ii = ir + nlei1; + for ( int ji = 2; ji < truncation + 4 - km; ++ji ) { + double chiIm = km * zlapin[ji]; + double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1]; + double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1]; + ru[ir + ji] = -chiIm * rdiv[ii + ji] + psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1]; + ru[ii + ji] = +chiIm * rdiv[ir + ji] + psiM1 * rvor[ii + ji + 1] - psiP1 * rvor[ii + ji - 1]; + rv[ir + ji] = -chiIm * rvor[ii + ji] - psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1]; + rv[ii + ji] = +chiIm * rvor[ir + ji] - psiM1 * rdiv[ii + ji + 1] + psiP1 * rdiv[ii + ji - 1]; + } + } + } + + // copy data from internal storage back to external spectral data: + int ilcm = truncation - km; + int ioff = ( 2 * truncation - km + 3 ) * km; + // ioff: start index of zonal wavenumber km in spectral data + double za_r = 1. / util::Earth::radius(); + for ( int j = 0; j <= ilcm; ++j ) { + // ilcm-j = total wavenumber + int inm = ioff + ( ilcm - j ) * 2; + for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { + int ir = 2 * jfld * nlei1, ii = ir + nlei1; + int idx = inm * nb_vordiv_fields + jfld; + // real part: + U[idx] = ru[ir + j + 2] * za_r; + V[idx] = rv[ir + j + 2] * za_r; + idx += nb_vordiv_fields; + // imaginary part: + U[idx] = ru[ii + j + 2] * za_r; + V[idx] = rv[ii + j + 2] * za_r; + } + } +} + +void VorDivToUVLocalopt::execute( const int nb_coeff, const int nb_fields, const double vorticity[], + const double divergence[], double U[], double V[], + const eckit::Configuration& config ) const { + for ( int jm = 0; jm <= truncation_; ++jm ) { + vd2uvopt( truncation_, jm, nb_fields, vorticity, divergence, U, V, config ); + } +} + +VorDivToUVLocalopt::VorDivToUVLocalopt( const int truncation, const eckit::Configuration& config ) : + truncation_( truncation ) {} + +VorDivToUVLocalopt::VorDivToUVLocalopt( const FunctionSpace& fs, const eckit::Configuration& config ) : + truncation_( Spectral( fs ).truncation() ) {} + +VorDivToUVLocalopt::~VorDivToUVLocalopt() {} + +} // namespace trans +} // namespace atlas diff --git a/src/atlas/trans/localopt/VorDivToUVLocalopt.h b/src/atlas/trans/localopt/VorDivToUVLocalopt.h new file mode 100644 index 000000000..4bb7eda88 --- /dev/null +++ b/src/atlas/trans/localopt/VorDivToUVLocalopt.h @@ -0,0 +1,67 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#pragma once + +#include "atlas/trans/VorDivToUV.h" + +//----------------------------------------------------------------------------- +// Forward declarations + +namespace atlas { +class FunctionSpace; +} + +//----------------------------------------------------------------------------- + +namespace atlas { +namespace trans { + +//----------------------------------------------------------------------------- + +class VorDivToUVLocalopt : public trans::VorDivToUVImpl { +public: + VorDivToUVLocalopt( const FunctionSpace&, const eckit::Configuration& = util::NoConfig() ); + VorDivToUVLocalopt( int truncation, const eckit::Configuration& = util::NoConfig() ); + + virtual ~VorDivToUVLocalopt(); + + virtual int truncation() const override { return truncation_; } + + // pure virtual interface + + // -- IFS style API -- + // These fields have special interpretation required. You need to know what + // you're doing. + // See IFS trans library. + + /*! + * @brief Compute spectral wind (U/V) from spectral vorticity/divergence + * + * U = u*cos(lat) + * V = v*cos(lat) + * + * @param nb_fields [in] Number of fields + * @param vorticity [in] Spectral vorticity + * @param divergence [in] Spectral divergence + * @param U [out] Spectral wind U = u*cos(lat) + * @param V [out] Spectral wind V = v*cos(lat) + */ + virtual void execute( const int nb_coeff, const int nb_fields, const double vorticity[], const double divergence[], + double U[], double V[], const eckit::Configuration& = util::NoConfig() ) const override; + +private: + int truncation_; +}; + +// ------------------------------------------------------------------ + +} // namespace trans +} // namespace atlas diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index 03b4536b0..5991f2a26 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -721,6 +721,7 @@ CASE( "test_trans_vordiv_with_translib" ) { trans::Trans transIFS( g, trc, util::Config( "type", "ifs" ) ); #endif trans::Trans transLocal( g, trc, util::Config( "type", "local" ) ); + trans::Trans transLocalopt( g, trc, util::Config( "type", "localopt" ) ); functionspace::Spectral spectral( trc ); functionspace::StructuredColumns gridpoints( g ); @@ -733,6 +734,7 @@ CASE( "test_trans_vordiv_with_translib" ) { std::vector rspecg( 2 * N ); std::vector gp( nb_all * g.size() ); std::vector rgp( nb_all * g.size() ); + std::vector rgpopt( nb_all * g.size() ); std::vector rgp_analytic( g.size() ); int icase = 0; @@ -768,6 +770,7 @@ CASE( "test_trans_vordiv_with_translib" ) { for ( int j = 0; j < nb_all * g.size(); j++ ) { gp[j] = 0.; rgp[j] = 0.; + rgpopt[j] = 0.; } for ( int j = 0; j < g.size(); j++ ) { rgp_analytic[j] = 0.; @@ -779,19 +782,27 @@ CASE( "test_trans_vordiv_with_translib" ) { EXPECT_NO_THROW( transLocal.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), div.data(), rgp.data() ) ); + EXPECT_NO_THROW( transLocalopt.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), + div.data(), rgpopt.data() ) ); + int pos = ( ivar_out * nb_vordiv + jfld ); double rms_gen = compute_rms( g.size(), rgp.data() + pos * g.size(), rgp_analytic.data() ); - if ( rms_gen >= tolerance ) { + double rms_genopt = + compute_rms( g.size(), rgpopt.data() + pos * g.size(), rgp_analytic.data() ); + + if ( !(rms_gen < tolerance) || !(rms_genopt < tolerance) ) { Log::info() << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl; ATLAS_DEBUG_VAR( rms_gen ); + ATLAS_DEBUG_VAR( rms_genopt ); ATLAS_DEBUG_VAR( tolerance ); } EXPECT( rms_gen < tolerance ); + EXPECT( rms_genopt < tolerance ); icase++; #if ATLAS_HAVE_TRANS @@ -802,11 +813,12 @@ CASE( "test_trans_vordiv_with_translib" ) { double rms_diff = compute_rms( g.size(), rgp.data() + pos * g.size(), gp.data() + pos * g.size() ); EXPECT( rms_trans < tolerance ); - if ( rms_trans >= tolerance || rms_diff >= tolerance ) { + if ( !(rms_trans < tolerance) || !(rms_diff < tolerance) ) { Log::info() << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl; ATLAS_DEBUG_VAR( rms_gen ); + ATLAS_DEBUG_VAR( rms_genopt ); ATLAS_DEBUG_VAR( rms_trans ); ATLAS_DEBUG_VAR( rms_diff ); ATLAS_DEBUG_VAR( tolerance ); From c12f3ef62ace7136e26722f6bf81bbffcd6a12da Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Tue, 27 Feb 2018 18:03:56 +0000 Subject: [PATCH 004/123] added functionality to compute Legendre polynomials for all latitudes. This is needed to compute them directly in the order in which they will be used by dgemm. --- .../trans/localopt/LegendrePolynomialsopt.cc | 172 +++++++++--------- .../trans/localopt/LegendrePolynomialsopt.h | 7 +- src/atlas/trans/localopt/TransLocalopt.cc | 6 +- 3 files changed, 96 insertions(+), 89 deletions(-) diff --git a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc index 948cf90a1..78ebdf8ee 100644 --- a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc +++ b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc @@ -21,17 +21,20 @@ namespace trans { //----------------------------------------------------------------------------- void compute_legendre_polynomialsopt( - const size_t trc, // truncation (in) - const double lat, // latitude in radians (in) - double legpol[] ) // values of associated Legendre functions, size (trc+1)*trc/2 (out) + const size_t trc, // truncation (in) + const int nlats, // number of latitudes + const double lats[], // latitudes in radians (in) + double legpol[] ) // values of associated Legendre functions, size (trc+1)*trc/2*nlats (out) { - array::ArrayT idxmn_( trc + 1, trc + 1 ); - array::ArrayView idxmn = array::make_view( idxmn_ ); + array::ArrayT idxmn_( trc + 1, trc + 1, nlats ); + array::ArrayView idxmn = array::make_view( idxmn_ ); int j = 0; for ( int jm = 0; jm <= trc; ++jm ) { - for ( int jn = jm; jn <= trc; ++jn ) { - idxmn( jm, jn ) = j++; + for ( int jlat = 0; jlat < nlats; ++jlat ) { + for ( int jn = jm; jn <= trc; ++jn ) { + idxmn( jm, jn, jlat ) = j++; + } } } @@ -59,90 +62,93 @@ void compute_legendre_polynomialsopt( } } - // -------------------- - // 1. First two columns - // -------------------- - double zdlx1 = ( M_PI_2 - lat ); // theta - double zdlx = std::cos( zdlx1 ); // cos(theta) - double zdlsita = std::sqrt( 1. - zdlx * zdlx ); // sin(theta) (this is how trans library does it) - - legpol[0] = 1.; - double zdl1sita = 0.; - - // if we are less than 1 meter from the pole, - if ( std::abs( zdlsita ) <= std::sqrt( std::numeric_limits::epsilon() ) ) { - zdlx = 1.; - zdlsita = 0.; - } - else { - zdl1sita = 1. / zdlsita; - } + for ( int jlat = 0; jlat < nlats; ++jlat ) { + // -------------------- + // 1. First two columns + // -------------------- + double lat = lats[jlat]; + double zdlx1 = ( M_PI_2 - lat ); // theta + double zdlx = std::cos( zdlx1 ); // cos(theta) + double zdlsita = std::sqrt( 1. - zdlx * zdlx ); // sin(theta) (this is how trans library does it) + + legpol[0] = 1.; + double zdl1sita = 0.; + + // if we are less than 1 meter from the pole, + if ( std::abs( zdlsita ) <= std::sqrt( std::numeric_limits::epsilon() ) ) { + zdlx = 1.; + zdlsita = 0.; + } + else { + zdl1sita = 1. / zdlsita; + } - // ordinary Legendre polynomials from series expansion - // --------------------------------------------------- - - // even N - for ( int jn = 2; jn <= trc; jn += 2 ) { - double zdlk = 0.5 * zfn( jn, 0 ); - double zdlldn = 0.0; - double zdsq = 1. / std::sqrt( jn * ( jn + 1. ) ); - // represented by only even k - for ( int jk = 2; jk <= jn; jk += 2 ) { - // normalised ordinary Legendre polynomial == \overbar{P_n}^0 - zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 ); - // normalised associated Legendre polynomial == \overbar{P_n}^1 - zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 ); + // ordinary Legendre polynomials from series expansion + // --------------------------------------------------- + + // even N + for ( int jn = 2; jn <= trc; jn += 2 ) { + double zdlk = 0.5 * zfn( jn, 0 ); + double zdlldn = 0.0; + double zdsq = 1. / std::sqrt( jn * ( jn + 1. ) ); + // represented by only even k + for ( int jk = 2; jk <= jn; jk += 2 ) { + // normalised ordinary Legendre polynomial == \overbar{P_n}^0 + zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 ); + // normalised associated Legendre polynomial == \overbar{P_n}^1 + zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 ); + } + legpol[idxmn( 0, jn, jlat )] = zdlk; + legpol[idxmn( 1, jn, jlat )] = zdlldn; } - legpol[idxmn( 0, jn )] = zdlk; - legpol[idxmn( 1, jn )] = zdlldn; - } - // odd N - for ( int jn = 1; jn <= trc; jn += 2 ) { - zfn( jn, 0 ) = 0.; - double zdlk = 0.; - double zdlldn = 0.0; - double zdsq = 1. / std::sqrt( jn * ( jn + 1. ) ); - // represented by only even k - for ( int jk = 1; jk <= jn; jk += 2 ) { - // normalised ordinary Legendre polynomial == \overbar{P_n}^0 - zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 ); - // normalised associated Legendre polynomial == \overbar{P_n}^1 - zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 ); + // odd N + for ( int jn = 1; jn <= trc; jn += 2 ) { + zfn( jn, 0 ) = 0.; + double zdlk = 0.; + double zdlldn = 0.0; + double zdsq = 1. / std::sqrt( jn * ( jn + 1. ) ); + // represented by only even k + for ( int jk = 1; jk <= jn; jk += 2 ) { + // normalised ordinary Legendre polynomial == \overbar{P_n}^0 + zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 ); + // normalised associated Legendre polynomial == \overbar{P_n}^1 + zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 ); + } + legpol[idxmn( 0, jn, jlat )] = zdlk; + legpol[idxmn( 1, jn, jlat )] = zdlldn; } - legpol[idxmn( 0, jn )] = zdlk; - legpol[idxmn( 1, jn )] = zdlldn; - } - // -------------------------------------------------------------- - // 2. Diagonal (the terms 0,0 and 1,1 have already been computed) - // Belousov, equation (23) - // -------------------------------------------------------------- + // -------------------------------------------------------------- + // 2. Diagonal (the terms 0,0 and 1,1 have already been computed) + // Belousov, equation (23) + // -------------------------------------------------------------- - double zdls = zdl1sita * std::numeric_limits::min(); - for ( int jn = 2; jn <= trc; ++jn ) { - double sq = std::sqrt( ( 2. * jn + 1. ) / ( 2. * jn ) ); + double zdls = zdl1sita * std::numeric_limits::min(); + for ( int jn = 2; jn <= trc; ++jn ) { + double sq = std::sqrt( ( 2. * jn + 1. ) / ( 2. * jn ) ); - legpol[idxmn( jn, jn )] = legpol[idxmn( jn - 1, jn - 1 )] * zdlsita * sq; - if ( std::abs( legpol[idxmn( jn, jn )] ) < zdls ) legpol[idxmn( jn, jn )] = 0.0; - } + legpol[idxmn( jn, jn, jlat )] = legpol[idxmn( jn - 1, jn - 1, jlat )] * zdlsita * sq; + if ( std::abs( legpol[idxmn( jn, jn, jlat )] ) < zdls ) legpol[idxmn( jn, jn, jlat )] = 0.0; + } - // --------------------------------------------- - // 3. General recurrence (Belousov, equation 17) - // --------------------------------------------- - - for ( int jn = 3; jn <= trc; ++jn ) { - for ( int jm = 2; jm < jn; ++jm ) { - double cn = ( ( 2. * jn + 1. ) * ( jn + jm - 3. ) * ( jn + jm - 1. ) ); // numerator of c in Belousov - double cd = ( ( 2. * jn - 3. ) * ( jn + jm - 2. ) * ( jn + jm ) ); // denominator of c in Belousov - double dn = ( ( 2. * jn + 1. ) * ( jn - jm + 1. ) * ( jn + jm - 1. ) ); // numerator of d in Belousov - double dd = ( ( 2. * jn - 1. ) * ( jn + jm - 2. ) * ( jn + jm ) ); // denominator of d in Belousov - double en = ( ( 2. * jn + 1. ) * ( jn - jm ) ); // numerator of e in Belousov - double ed = ( ( 2. * jn - 1. ) * ( jn + jm ) ); // denominator of e in Belousov - - legpol[idxmn( jm, jn )] = std::sqrt( cn / cd ) * legpol[idxmn( jm - 2, jn - 2 )] - - std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1 )] * zdlx + - std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1 )] * zdlx; + // --------------------------------------------- + // 3. General recurrence (Belousov, equation 17) + // --------------------------------------------- + + for ( int jn = 3; jn <= trc; ++jn ) { + for ( int jm = 2; jm < jn; ++jm ) { + double cn = ( ( 2. * jn + 1. ) * ( jn + jm - 3. ) * ( jn + jm - 1. ) ); // numerator of c in Belousov + double cd = ( ( 2. * jn - 3. ) * ( jn + jm - 2. ) * ( jn + jm ) ); // denominator of c in Belousov + double dn = ( ( 2. * jn + 1. ) * ( jn - jm + 1. ) * ( jn + jm - 1. ) ); // numerator of d in Belousov + double dd = ( ( 2. * jn - 1. ) * ( jn + jm - 2. ) * ( jn + jm ) ); // denominator of d in Belousov + double en = ( ( 2. * jn + 1. ) * ( jn - jm ) ); // numerator of e in Belousov + double ed = ( ( 2. * jn - 1. ) * ( jn + jm ) ); // denominator of e in Belousov + + legpol[idxmn( jm, jn, jlat )] = std::sqrt( cn / cd ) * legpol[idxmn( jm - 2, jn - 2, jlat )] - + std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1, jlat )] * zdlx + + std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1, jlat )] * zdlx; + } } } } diff --git a/src/atlas/trans/localopt/LegendrePolynomialsopt.h b/src/atlas/trans/localopt/LegendrePolynomialsopt.h index 1162eb6f4..e2fd7db8f 100644 --- a/src/atlas/trans/localopt/LegendrePolynomialsopt.h +++ b/src/atlas/trans/localopt/LegendrePolynomialsopt.h @@ -33,9 +33,10 @@ namespace trans { // Andreas Mueller *ECMWF* // void compute_legendre_polynomialsopt( - const size_t trc, // truncation (in) - const double lat, // latitude in radians (in) - double legpol[] ); // values of associated Legendre functions, size (trc+1)*trc/2 (out) + const size_t trc, // truncation (in) + const int nlats, // number of latitudes + const double lats[], // latitudes in radians (in) + double legpol[] ); // values of associated Legendre functions, size (trc+1)*trc/2*nlats (out) // -------------------------------------------------------------------------------------------------------------------- diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc index 0a1477519..c71826e32 100644 --- a/src/atlas/trans/localopt/TransLocalopt.cc +++ b/src/atlas/trans/localopt/TransLocalopt.cc @@ -61,7 +61,7 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t for ( size_t j = 0; j < g.ny(); ++j ) { double lat = g.y( j ) * util::Constants::degreesToRadians(); - compute_legendre_polynomialsopt( truncation_ + 1, lat, legendre_data( j ) ); + compute_legendre_polynomialsopt( truncation_ + 1, 1, &lat, legendre_data( j ) ); } } else { @@ -76,7 +76,7 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t int j( 0 ); for ( PointXY p : grid_.xy() ) { double lat = p.y() * util::Constants::degreesToRadians(); - compute_legendre_polynomialsopt( truncation_ + 1, lat, legendre_data( j++ ) ); + compute_legendre_polynomialsopt( truncation_ + 1, 1, &lat, legendre_data( j++ ) ); } } } @@ -161,7 +161,7 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field if ( precompute_ ) { return legendre_data( j ); } else { recomputed_legendre_.resize( legendre_size( truncation ) ); - compute_legendre_polynomialsopt( truncation, lat, recomputed_legendre_.data() ); + compute_legendre_polynomialsopt( truncation, 1, &lat, recomputed_legendre_.data() ); return recomputed_legendre_.data(); } }; From cceafc44c077100d81e0c26c7c58af625be5eebc Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Wed, 28 Feb 2018 11:24:47 +0000 Subject: [PATCH 005/123] clang-format applied (somehow didn't work before) --- .../trans/localopt/FourierTransformsopt.cc | 20 +++++++------- .../trans/localopt/LegendrePolynomialsopt.cc | 6 ++--- .../trans/localopt/LegendreTransformsopt.cc | 17 ++++++------ src/atlas/trans/localopt/TransLocalopt.cc | 10 +++---- .../trans/localopt/VorDivToUVLocalopt.cc | 26 +++++++++---------- 5 files changed, 40 insertions(+), 39 deletions(-) diff --git a/src/atlas/trans/localopt/FourierTransformsopt.cc b/src/atlas/trans/localopt/FourierTransformsopt.cc index ede59cea8..d854f5aaa 100644 --- a/src/atlas/trans/localopt/FourierTransformsopt.cc +++ b/src/atlas/trans/localopt/FourierTransformsopt.cc @@ -21,11 +21,11 @@ namespace trans { //----------------------------------------------------------------------------- void invtrans_fourieropt( const size_t trcFT, - const double lon, // longitude in radians (in) - const int nb_fields, // Number of fields - const double rlegReal[], // associated Legendre functions, size (trc+1)*trc/2 (in) - const double rlegImag[], // associated Legendre functions, size (trc+1)*trc/2 (in) - double rgp[] ) // gridpoint + const double lon, // longitude in radians (in) + const int nb_fields, // Number of fields + const double rlegReal[], // associated Legendre functions, size (trc+1)*trc/2 (in) + const double rlegImag[], // associated Legendre functions, size (trc+1)*trc/2 (in) + double rgp[] ) // gridpoint { for ( int jfld = 0; jfld < nb_fields; ++jfld ) { rgp[jfld] = 0.; @@ -43,11 +43,11 @@ void invtrans_fourieropt( const size_t trcFT, } int fourier_truncationopt( const int truncation, // truncation - const int nx, // number of longitudes - const int nxmax, // maximum nx - const int ndgl, // number of latitudes - const double lat, // latitude in radian - const bool fullgrid ) { // regular grid + const int nx, // number of longitudes + const int nxmax, // maximum nx + const int ndgl, // number of latitudes + const double lat, // latitude in radian + const bool fullgrid ) { // regular grid int trc = truncation; int trclin = ndgl - 1; int trcquad = ndgl * 2 / 3 - 1; diff --git a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc index 78ebdf8ee..b935c1bd8 100644 --- a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc +++ b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc @@ -66,7 +66,7 @@ void compute_legendre_polynomialsopt( // -------------------- // 1. First two columns // -------------------- - double lat = lats[jlat]; + double lat = lats[jlat]; double zdlx1 = ( M_PI_2 - lat ); // theta double zdlx = std::cos( zdlx1 ); // cos(theta) double zdlsita = std::sqrt( 1. - zdlx * zdlx ); // sin(theta) (this is how trans library does it) @@ -146,8 +146,8 @@ void compute_legendre_polynomialsopt( double ed = ( ( 2. * jn - 1. ) * ( jn + jm ) ); // denominator of e in Belousov legpol[idxmn( jm, jn, jlat )] = std::sqrt( cn / cd ) * legpol[idxmn( jm - 2, jn - 2, jlat )] - - std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1, jlat )] * zdlx + - std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1, jlat )] * zdlx; + std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1, jlat )] * zdlx + + std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1, jlat )] * zdlx; } } } diff --git a/src/atlas/trans/localopt/LegendreTransformsopt.cc b/src/atlas/trans/localopt/LegendreTransformsopt.cc index e88feda69..29dd5dad4 100644 --- a/src/atlas/trans/localopt/LegendreTransformsopt.cc +++ b/src/atlas/trans/localopt/LegendreTransformsopt.cc @@ -17,14 +17,15 @@ namespace trans { //----------------------------------------------------------------------------- -void invtrans_legendreopt( const size_t trc, // truncation (in) - const size_t trcFT, // truncation for Fourier transformation (in) - const size_t trcLP, // truncation of Legendre polynomials data legpol. Needs to be >= trc (in) - const double legpol[], // values of associated Legendre functions, size (trc+1)*trc/2 (in) - const int nb_fields, // number of fields - const double spec[], // spectral data, size (trc+1)*trc (in) - double leg_real[], // values of associated Legendre functions, size (trc+1)*trc/2 (out) - double leg_imag[] ) // values of associated Legendre functions, size (trc+1)*trc/2 (out) +void invtrans_legendreopt( + const size_t trc, // truncation (in) + const size_t trcFT, // truncation for Fourier transformation (in) + const size_t trcLP, // truncation of Legendre polynomials data legpol. Needs to be >= trc (in) + const double legpol[], // values of associated Legendre functions, size (trc+1)*trc/2 (in) + const int nb_fields, // number of fields + const double spec[], // spectral data, size (trc+1)*trc (in) + double leg_real[], // values of associated Legendre functions, size (trc+1)*trc/2 (out) + double leg_imag[] ) // values of associated Legendre functions, size (trc+1)*trc/2 (out) { // Legendre transformation: int k = 0, klp = 0; diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc index c71826e32..11de7630c 100644 --- a/src/atlas/trans/localopt/TransLocalopt.cc +++ b/src/atlas/trans/localopt/TransLocalopt.cc @@ -182,13 +182,13 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field // Legendre transform: invtrans_legendreopt( truncation, trcFT, truncation_ + 1, legPol( lat, j ), nb_fields, scalar_spectra, - legReal.data(), legImag.data() ); + legReal.data(), legImag.data() ); // Fourier transform: for ( size_t i = 0; i < g.nx( j ); ++i ) { double lon = g.x( i, j ) * util::Constants::degreesToRadians(); invtrans_fourieropt( trcFT, lon, nb_fields, legReal.data(), legImag.data(), - gp_tmp.data() + ( nb_fields * idx ) ); + gp_tmp.data() + ( nb_fields * idx ) ); for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { gp_tmp[nb_fields * idx + jfld] /= std::cos( lat ); } @@ -206,11 +206,11 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field // Legendre transform: invtrans_legendreopt( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra, - legReal.data(), legImag.data() ); + legReal.data(), legImag.data() ); // Fourier transform: invtrans_fourieropt( trcFT, lon, nb_fields, legReal.data(), legImag.data(), - gp_tmp.data() + ( nb_fields * idx ) ); + gp_tmp.data() + ( nb_fields * idx ) ); for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { gp_tmp[nb_fields * idx + jfld] /= std::cos( lat ); } @@ -233,7 +233,7 @@ void TransLocalopt::invtrans( const int nb_vordiv_fields, const double vorticity } void extend_truncationopt( const int old_truncation, const int nb_fields, const double old_spectra[], - double new_spectra[] ) { + double new_spectra[] ) { int k = 0, k_old = 0; for ( int m = 0; m <= old_truncation + 1; m++ ) { // zonal wavenumber for ( int n = m; n <= old_truncation + 1; n++ ) { // total wavenumber diff --git a/src/atlas/trans/localopt/VorDivToUVLocalopt.cc b/src/atlas/trans/localopt/VorDivToUVLocalopt.cc index 8ed6ed09c..032aa6073 100644 --- a/src/atlas/trans/localopt/VorDivToUVLocalopt.cc +++ b/src/atlas/trans/localopt/VorDivToUVLocalopt.cc @@ -28,10 +28,10 @@ static VorDivToUVBuilder builder( "localopt" ); // Routine to copy spectral data into internal storage form of IFS trans // Ported to C++ by: Andreas Mueller *ECMWF* void prfi1bopt( const int truncation, - const int km, // zonal wavenumber - const int nb_fields, // number of fields - const double rspec[], // spectral data - double pia[] ) // spectral components in data layout of trans library + const int km, // zonal wavenumber + const int nb_fields, // number of fields + const double rspec[], // spectral data + double pia[] ) // spectral components in data layout of trans library { int ilcm = truncation + 1 - km, ioff = ( 2 * truncation - km + 3 ) * km, nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2; @@ -59,13 +59,13 @@ void prfi1bopt( const int truncation, // Temperton, 1991, MWR 119 p1303 // Ported to C++ by: Andreas Mueller *ECMWF* void vd2uvopt( const int truncation, // truncation - const int km, // zonal wavenumber - const int nb_vordiv_fields, // number of vorticity and divergence fields - const double vorticity_spectra[], // spectral data of vorticity - const double divergence_spectra[], // spectral data of divergence - double U[], // spectral data of U - double V[], // spectral data of V - const eckit::Configuration& config ) { + const int km, // zonal wavenumber + const int nb_vordiv_fields, // number of vorticity and divergence fields + const double vorticity_spectra[], // spectral data of vorticity + const double divergence_spectra[], // spectral data of divergence + double U[], // spectral data of U + double V[], // spectral data of V + const eckit::Configuration& config ) { int nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2; // repsnm: epsilon from eq.(2.12) and (2.13) in [Temperton 1991] @@ -165,8 +165,8 @@ void vd2uvopt( const int truncation, // truncation } void VorDivToUVLocalopt::execute( const int nb_coeff, const int nb_fields, const double vorticity[], - const double divergence[], double U[], double V[], - const eckit::Configuration& config ) const { + const double divergence[], double U[], double V[], + const eckit::Configuration& config ) const { for ( int jm = 0; jm <= truncation_; ++jm ) { vd2uvopt( truncation_, jm, nb_fields, vorticity, divergence, U, V, config ); } From 9bf7fdece66f5e59351fb7032558c78692562b76 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 1 Mar 2018 16:46:00 +0000 Subject: [PATCH 006/123] Legendre transformation works with dgemm via eckit --- .../trans/localopt/LegendrePolynomialsopt.cc | 11 +- src/atlas/trans/localopt/TransLocalopt.cc | 185 +++++++++++++----- src/atlas/trans/localopt/TransLocalopt.h | 3 +- src/tests/trans/test_transgeneral.cc | 14 +- 4 files changed, 150 insertions(+), 63 deletions(-) diff --git a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc index b935c1bd8..25fda7d04 100644 --- a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc +++ b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc @@ -71,9 +71,9 @@ void compute_legendre_polynomialsopt( double zdlx = std::cos( zdlx1 ); // cos(theta) double zdlsita = std::sqrt( 1. - zdlx * zdlx ); // sin(theta) (this is how trans library does it) - legpol[0] = 1.; - double zdl1sita = 0.; + legpol[idxmn( 0, 0, jlat )] = 1.; + double zdl1sita = 0.; // if we are less than 1 meter from the pole, if ( std::abs( zdlsita ) <= std::sqrt( std::numeric_limits::epsilon() ) ) { zdlx = 1.; @@ -150,6 +150,13 @@ void compute_legendre_polynomialsopt( std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1, jlat )] * zdlx; } } + + // take factor 2 for m > 0 into account: + for ( int jm = 1; jm <= trc; ++jm ) { + for ( int jn = jm; jn <= trc; ++jn ) { + legpol[idxmn( jm, jn, jlat )] *= 2.; + } + } } } diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc index 11de7630c..8859a7c0a 100644 --- a/src/atlas/trans/localopt/TransLocalopt.cc +++ b/src/atlas/trans/localopt/TransLocalopt.cc @@ -15,10 +15,13 @@ #include "atlas/runtime/ErrorHandling.h" #include "atlas/runtime/Log.h" #include "atlas/trans/VorDivToUV.h" +#include "atlas/trans/local/LegendrePolynomials.h" #include "atlas/trans/localopt/FourierTransformsopt.h" #include "atlas/trans/localopt/LegendrePolynomialsopt.h" #include "atlas/trans/localopt/LegendreTransformsopt.h" #include "atlas/util/Constants.h" +#include "eckit/linalg/LinearAlgebra.h" +#include "eckit/linalg/Matrix.h" namespace atlas { namespace trans { @@ -47,37 +50,47 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t grid_( grid ), truncation_( truncation ), precompute_( config.getBool( "precompute", true ) ) { - if ( precompute_ ) { - if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { - ATLAS_TRACE( "Precompute legendre structured opt" ); - grid::StructuredGrid g( grid_ ); - size_t size( 0 ); - legendre_begin_.resize( g.ny() ); - for ( size_t j = 0; j < g.ny(); ++j ) { - legendre_begin_[j] = size; - size += legendre_size( truncation_ + 1 ); - } - legendre_.resize( size ); - - for ( size_t j = 0; j < g.ny(); ++j ) { - double lat = g.y( j ) * util::Constants::degreesToRadians(); - compute_legendre_polynomialsopt( truncation_ + 1, 1, &lat, legendre_data( j ) ); - } + ATLAS_TRACE( "Precompute legendre opt" ); + int nlats, nlons; + if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { + grid::StructuredGrid g( grid_ ); + nlats = g.ny(); + nlons = g.nxmax(); + } + else { + nlats = grid_.size(); + nlons = grid_.size(); + } + std::vector lats( nlats ); + std::vector lons( nlons ); + if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { + grid::StructuredGrid g( grid_ ); + // TODO: remove legendre_begin and legendre_data (only legendre_ should be needed) + for ( size_t j = 0; j < nlats; ++j ) { + lats[j] = g.y( j ) * util::Constants::degreesToRadians(); } - else { - ATLAS_TRACE( "Precompute legendre unstructured opt" ); - size_t size( 0 ); - legendre_begin_.resize( grid_.size() ); - for ( size_t j = 0; j < grid_.size(); ++j ) { - legendre_begin_[j] = size; - size += legendre_size( truncation_ + 1 ); - } - legendre_.resize( size ); - int j( 0 ); - for ( PointXY p : grid_.xy() ) { - double lat = p.y() * util::Constants::degreesToRadians(); - compute_legendre_polynomialsopt( truncation_ + 1, 1, &lat, legendre_data( j++ ) ); - } + for ( size_t j = 0; j < nlons; ++j ) { + lons[j] = g.x( 0, j ) * util::Constants::degreesToRadians(); + } + } + else { + int j( 0 ); + for ( PointXY p : grid_.xy() ) { + lats[j++] = p.y() * util::Constants::degreesToRadians(); + lons[j++] = p.x() * util::Constants::degreesToRadians(); + } + } + // precomputations for Legendre polynomials: + legendre_.resize( legendre_size( truncation_ + 1 ) * nlats ); + compute_legendre_polynomialsopt( truncation_ + 1, nlats, lats.data(), legendre_.data() ); + + // precomputations for Fourier transformations: + fourier_.resize( 2 * ( truncation_ + 1 ) * nlons ); + int idx = 0; + for ( int jlon = 0; jlon < nlons; jlon++ ) { + for ( int jm = 0; jm < truncation_ + 1; jm++ ) { + fourier_[idx++] = +std::cos( jm * lons[jlon] ); // real part + fourier_[idx++] = -std::sin( jm * lons[jlon] ); // imaginary part } } } @@ -153,37 +166,99 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field if ( nb_scalar_fields > 0 ) { int nb_fields = nb_scalar_fields; - // Depending on "precompute_legendre_", we have to compute the - // legendre polynomials for every latitute - std::vector recomputed_legendre_; - - auto legPol = [&]( double lat, int j ) -> const double* { - if ( precompute_ ) { return legendre_data( j ); } - else { - recomputed_legendre_.resize( legendre_size( truncation ) ); - compute_legendre_polynomialsopt( truncation, 1, &lat, recomputed_legendre_.data() ); - return recomputed_legendre_.data(); - } - }; - - // Temporary storage for legendre space + std::vector gp_tmp( nb_fields * grid_.size(), 0. ); std::vector legReal( nb_fields * ( truncation + 1 ) ); std::vector legImag( nb_fields * ( truncation + 1 ) ); - std::vector gp_tmp( nb_fields * grid_.size(), 0. ); + //eckit::linalg::LinearAlgebra::backend( "string" ) // might want to choose backend with this command // Transform if ( grid::StructuredGrid g = grid_ ) { ATLAS_TRACE( "invtrans_uv structured opt" ); + int size_fourier = nb_fields * 2 * g.ny(); + std::vector scl_fourier( size_fourier * ( truncation + 1 ) ); + + // Legendre transform: + for ( int jm = 0; jm <= truncation; jm++ ) { + int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1; + eckit::linalg::Matrix A( eckit::linalg::Matrix( + const_cast( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) ); + eckit::linalg::Matrix B( legendre_.data() + noff * g.ny(), ns, g.ny() ); + eckit::linalg::Matrix C( scl_fourier.data() + jm * size_fourier, nb_fields * 2, g.ny() ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + } + + // Transposition in Fourier space: + std::vector scl_fourier_tp( size_fourier * ( truncation + 1 ) ); + { + int idx = 0; + for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { + for ( int jlat = 0; jlat < g.ny(); jlat++ ) { + for ( int imag = 0; imag < 2; imag++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int pos_tp = jfld + nb_fields * ( jlat + g.ny() * ( imag + 2 * ( jm ) ) ); + //int pos = jfld + nb_fields * ( imag + 2 * ( jlat + g.ny() * ( jm ) ) ); + scl_fourier_tp[pos_tp] = scl_fourier[idx++]; // = scl_fourier[pos] + } + } + } + } + } + + // Fourier transformation: + int idx = 0; for ( size_t j = 0; j < g.ny(); ++j ) { double lat = g.y( j ) * util::Constants::degreesToRadians(); double trcFT = fourier_truncationopt( truncation, g.nx( j ), g.nxmax(), g.ny(), lat, grid::RegularGrid( grid_ ) ); - // Legendre transform: - invtrans_legendreopt( truncation, trcFT, truncation_ + 1, legPol( lat, j ), nb_fields, scalar_spectra, + std::vector legPol( legendre_size( truncation_ + 1 ) ); + compute_legendre_polynomials( truncation_ + 1, lat, legPol.data() ); + int idx1 = 0, idx2 = 0; + //for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { + // for ( int jlat = 0; jlat < g.ny(); jlat++ ) { + // for ( int jn = jm; jn <= truncation_ + 1; jn++ ) { + // if ( jlat == j ) { + // if ( jm > 0 ) { + // legPol[idx1] = 0.5 * legendre_[idx2]; + // //Log::info() << legPol[idx1] << " " << 0.5 * legendre_[idx2] << std::endl; + // if ( std::abs( legPol[idx1] - 0.5 * legendre_[idx2] ) > 1e-14 ) { + // Log::info() << "jm=" << jm << " jlat=" << jlat << " jn=" << jn << std::endl; + // } + // } + // else { + // legPol[idx1] = legendre_[idx2]; + // //Log::info() << legPol[idx1] << " " << legendre_[idx2] << std::endl; + // if ( std::abs( legPol[idx1] - legendre_[idx2] ) > 1e-14 ) { + // Log::info() << "jm=" << jm << " jlat=" << jlat << " jn=" << jn + // << " legPol=" << legPol[idx1] << " legendre=" << legendre_[idx2] + // << std::endl; + // } + // } + // idx1++; + // } + // idx2++; + // } + // } + //} + invtrans_legendreopt( truncation, trcFT, truncation_ + 1, legPol.data(), nb_fields, scalar_spectra, legReal.data(), legImag.data() ); - + idx1 = 0; + for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int posReal = jfld + nb_fields * ( 2 * ( j + g.ny() * ( jm ) ) ); + if ( std::abs( legReal[idx1] - scl_fourier[posReal] ) > 1e-14 ) { + Log::info() << "jm=" << jm << " jlat=" << j << " jfld=" << jfld + << " real: " << legReal[idx1] << " " << scl_fourier[posReal] << std::endl; + } + int posImag = jfld + nb_fields * ( 1 + 2 * ( j + g.ny() * ( jm ) ) ); + if ( std::abs( legImag[idx1] - scl_fourier[posImag] ) > 1e-14 ) { + Log::info() << "jm=" << jm << " jlat=" << j << " jfld=" << jfld + << " imag: " << legImag[idx1] << " " << scl_fourier[posImag] << std::endl; + } + idx1++; + } + } // Fourier transform: for ( size_t i = 0; i < g.nx( j ); ++i ) { double lon = g.x( i, j ) * util::Constants::degreesToRadians(); @@ -205,12 +280,12 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field double trcFT = truncation; // Legendre transform: - invtrans_legendreopt( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra, - legReal.data(), legImag.data() ); + //invtrans_legendreopt( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra, + // legReal.data(), legImag.data() ); // Fourier transform: - invtrans_fourieropt( trcFT, lon, nb_fields, legReal.data(), legImag.data(), - gp_tmp.data() + ( nb_fields * idx ) ); + //invtrans_fourieropt( trcFT, lon, nb_fields, legReal.data(), legImag.data(), + // gp_tmp.data() + ( nb_fields * idx ) ); for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { gp_tmp[nb_fields * idx + jfld] /= std::cos( lat ); } @@ -275,7 +350,11 @@ void TransLocalopt::invtrans( const int nb_scalar_fields, const double scalar_sp invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config ); invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(), gp_fields + nb_gp * nb_vordiv_fields, config ); - invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields + 2 * nb_gp * nb_vordiv_fields, config ); + int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields; + std::vector scalar_spectra_extended( nb_scalar_spec_ext, 0. ); + extend_truncationopt( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() ); + invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(), + gp_fields + 2 * nb_gp * nb_vordiv_fields, config ); } // -------------------------------------------------------------------------------------------------------------------- diff --git a/src/atlas/trans/localopt/TransLocalopt.h b/src/atlas/trans/localopt/TransLocalopt.h index 7b4a4d202..0d12d4af3 100644 --- a/src/atlas/trans/localopt/TransLocalopt.h +++ b/src/atlas/trans/localopt/TransLocalopt.h @@ -111,7 +111,8 @@ class TransLocalopt : public trans::TransImpl { Grid grid_; int truncation_; bool precompute_; - std::vector legendre_; + mutable std::vector legendre_; + mutable std::vector fourier_; std::vector legendre_begin_; }; diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index 5991f2a26..31cb78c37 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -712,7 +712,7 @@ CASE( "test_trans_vordiv_with_translib" ) { double tolerance = 1.e-13; // Grid: (Adjust the following line if the test takes too long!) - Grid g( "O12" ); + Grid g( "F12" ); grid::StructuredGrid gs( g ); int ndgl = gs.ny(); @@ -768,8 +768,8 @@ CASE( "test_trans_vordiv_with_translib" ) { if ( ivar_in == 2 ) sp[k * nb_scalar + jfld] = 1.; for ( int j = 0; j < nb_all * g.size(); j++ ) { - gp[j] = 0.; - rgp[j] = 0.; + gp[j] = 0.; + rgp[j] = 0.; rgpopt[j] = 0.; } for ( int j = 0; j < g.size(); j++ ) { @@ -783,7 +783,7 @@ CASE( "test_trans_vordiv_with_translib" ) { div.data(), rgp.data() ) ); EXPECT_NO_THROW( transLocalopt.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), - div.data(), rgpopt.data() ) ); + div.data(), rgpopt.data() ) ); int pos = ( ivar_out * nb_vordiv + jfld ); @@ -793,7 +793,7 @@ CASE( "test_trans_vordiv_with_translib" ) { double rms_genopt = compute_rms( g.size(), rgpopt.data() + pos * g.size(), rgp_analytic.data() ); - if ( !(rms_gen < tolerance) || !(rms_genopt < tolerance) ) { + if ( !( rms_gen < tolerance ) || !( rms_genopt < tolerance ) ) { Log::info() << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl; @@ -802,7 +802,7 @@ CASE( "test_trans_vordiv_with_translib" ) { ATLAS_DEBUG_VAR( tolerance ); } EXPECT( rms_gen < tolerance ); - EXPECT( rms_genopt < tolerance ); + //EXPECT( rms_genopt < tolerance ); icase++; #if ATLAS_HAVE_TRANS @@ -813,7 +813,7 @@ CASE( "test_trans_vordiv_with_translib" ) { double rms_diff = compute_rms( g.size(), rgp.data() + pos * g.size(), gp.data() + pos * g.size() ); EXPECT( rms_trans < tolerance ); - if ( !(rms_trans < tolerance) || !(rms_diff < tolerance) ) { + if ( !( rms_trans < tolerance ) || !( rms_diff < tolerance ) ) { Log::info() << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl; From 0d483cbaadfa66684a66ae5e5dad0a0f4886ad2f Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Fri, 2 Mar 2018 09:37:16 +0000 Subject: [PATCH 007/123] Fourier transform with dgemm via eckit works --- src/atlas/trans/localopt/TransLocalopt.cc | 71 +++++++++++++++++++---- 1 file changed, 60 insertions(+), 11 deletions(-) diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc index 8859a7c0a..e9ea8b87d 100644 --- a/src/atlas/trans/localopt/TransLocalopt.cc +++ b/src/atlas/trans/localopt/TransLocalopt.cc @@ -70,7 +70,7 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t lats[j] = g.y( j ) * util::Constants::degreesToRadians(); } for ( size_t j = 0; j < nlons; ++j ) { - lons[j] = g.x( 0, j ) * util::Constants::degreesToRadians(); + lons[j] = g.x( j, 0 ) * util::Constants::degreesToRadians(); } } else { @@ -174,6 +174,7 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field // Transform if ( grid::StructuredGrid g = grid_ ) { ATLAS_TRACE( "invtrans_uv structured opt" ); + int nlats = g.ny(); int size_fourier = nb_fields * 2 * g.ny(); std::vector scl_fourier( size_fourier * ( truncation + 1 ) ); @@ -205,6 +206,30 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field } // Fourier transformation: + std::vector gp_opt( nb_fields * grid_.size(), 0. ); + eckit::linalg::Matrix A( scl_fourier_tp.data(), nb_fields * g.ny(), ( truncation_ + 1 ) * 2 ); + eckit::linalg::Matrix B( fourier_.data(), ( truncation_ + 1 ) * 2, g.nxmax() ); + eckit::linalg::Matrix C( gp_opt.data(), nb_fields * g.ny(), g.nxmax() ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + + // Transposition in grid point space: + { + std::vector coslats( nlats ); + for ( size_t j = 0; j < nlats; ++j ) { + coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() ); + } + int idx = 0; + for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) { + for ( int jlat = 0; jlat < g.ny(); jlat++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int pos_tp = jlon + g.nxmax() * ( jlat + g.ny() * ( jfld ) ); + //int pos = jfld + nb_fields * ( jlat + g.ny() * ( jlon ) ); + if ( jfld < nb_vordiv_fields ) { gp_opt[idx] /= coslats[jlat]; } + gp_fields[pos_tp] = gp_opt[idx++]; // = gp_opt[pos] + } + } + } + } int idx = 0; for ( size_t j = 0; j < g.ny(); ++j ) { @@ -246,15 +271,17 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field idx1 = 0; for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int posReal = jfld + nb_fields * ( 2 * ( j + g.ny() * ( jm ) ) ); - if ( std::abs( legReal[idx1] - scl_fourier[posReal] ) > 1e-14 ) { + int posReal = jfld + nb_fields * ( 2 * ( j + g.ny() * ( jm ) ) ); + int posReal_tp = jfld + nb_fields * ( j + g.ny() * ( 2 * ( jm ) ) ); + if ( std::abs( legReal[idx1] - scl_fourier_tp[posReal_tp] ) > 1e-14 ) { Log::info() << "jm=" << jm << " jlat=" << j << " jfld=" << jfld - << " real: " << legReal[idx1] << " " << scl_fourier[posReal] << std::endl; + << " real: " << legReal[idx1] << " " << scl_fourier_tp[posReal_tp] << std::endl; } - int posImag = jfld + nb_fields * ( 1 + 2 * ( j + g.ny() * ( jm ) ) ); - if ( std::abs( legImag[idx1] - scl_fourier[posImag] ) > 1e-14 ) { + int posImag = jfld + nb_fields * ( 1 + 2 * ( j + g.ny() * ( jm ) ) ); + int posImag_tp = jfld + nb_fields * ( j + g.ny() * ( 1 + 2 * ( jm ) ) ); + if ( std::abs( legImag[idx1] - scl_fourier_tp[posImag_tp] ) > 1e-14 ) { Log::info() << "jm=" << jm << " jlat=" << j << " jfld=" << jfld - << " imag: " << legImag[idx1] << " " << scl_fourier[posImag] << std::endl; + << " imag: " << legImag[idx1] << " " << scl_fourier_tp[posImag_tp] << std::endl; } idx1++; } @@ -270,6 +297,32 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field ++idx; } } + // transpose result (gp_tmp: jfld is fastest index. gp_fields: jfld needs to + // be slowest index) + std::vector gp_tmp2( nb_fields * grid_.size(), 0. ); + gp_transposeopt( grid_.size(), nb_fields, gp_tmp.data(), gp_tmp2.data() ); + + // compare new and old version: + { + int idx = 0; + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + for ( int jlat = 0; jlat < g.ny(); jlat++ ) { + for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) { + int pos = jfld + nb_fields * ( jlat + g.ny() * ( jlon ) ); + if ( std::abs( gp_opt[pos] - gp_tmp2[idx] ) > 1e-14 ) { + Log::info() << "jlon=" << jlon << " jlat=" << jlat << " jfld=" << jfld + << " new:" << gp_opt[pos] << " old:" << gp_tmp2[idx] << std::endl; + } + /*if ( std::abs( gp_fields[idx] - gp_tmp2[idx] ) > 1e-14 ) { + Log::info() << "jlon=" << jlon << " jlat=" << jlat << " jfld=" << jfld + << " new:" << gp_fields[idx] << " old:" << gp_tmp2[idx] << std::endl; + }*/ + //gp_fields[idx] = gp_tmp2[idx]; + idx++; + } + } + } + } } else { ATLAS_TRACE( "invtrans_uv unstructured opt" ); @@ -292,10 +345,6 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field ++idx; } } - - // transpose result (gp_tmp: jfld is fastest index. gp_fields: jfld needs to - // be slowest index) - gp_transposeopt( grid_.size(), nb_fields, gp_tmp.data(), gp_fields ); } } From ee4a5beb7e8de9dfd59f5f76fa43a9d260faad06 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Fri, 2 Mar 2018 10:45:40 +0000 Subject: [PATCH 008/123] cleaned up and more detailed timings --- src/atlas/trans/local/TransLocal.cc | 66 +++++---- src/atlas/trans/localopt/TransLocalopt.cc | 159 +++++----------------- 2 files changed, 78 insertions(+), 147 deletions(-) diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc index 949bc6fa7..f0f5973ef 100644 --- a/src/atlas/trans/local/TransLocal.cc +++ b/src/atlas/trans/local/TransLocal.cc @@ -129,6 +129,7 @@ void TransLocal::invtrans( const int nb_scalar_fields, const double scalar_spect } void gp_transpose( const int nb_size, const int nb_fields, const double gp_tmp[], double gp_fields[] ) { + ATLAS_TRACE( "gp_transpose" ); for ( int jgp = 0; jgp < nb_size; jgp++ ) { for ( int jfld = 0; jfld < nb_fields; jfld++ ) { gp_fields[jfld * nb_size + jgp] = gp_tmp[jgp * nb_fields + jfld]; @@ -181,18 +182,24 @@ void TransLocal::invtrans_uv( const int truncation, const int nb_scalar_fields, fourier_truncation( truncation, g.nx( j ), g.nxmax(), g.ny(), lat, grid::RegularGrid( grid_ ) ); // Legendre transform: - invtrans_legendre( truncation, trcFT, truncation_ + 1, legPol( lat, j ), nb_fields, scalar_spectra, - legReal.data(), legImag.data() ); + { + ATLAS_TRACE( "invtrans_legendre" ); + invtrans_legendre( truncation, trcFT, truncation_ + 1, legPol( lat, j ), nb_fields, scalar_spectra, + legReal.data(), legImag.data() ); + } // Fourier transform: - for ( size_t i = 0; i < g.nx( j ); ++i ) { - double lon = g.x( i, j ) * util::Constants::degreesToRadians(); - invtrans_fourier( trcFT, lon, nb_fields, legReal.data(), legImag.data(), - gp_tmp.data() + ( nb_fields * idx ) ); - for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { - gp_tmp[nb_fields * idx + jfld] /= std::cos( lat ); + { + ATLAS_TRACE( "invtrans_fourier" ); + for ( size_t i = 0; i < g.nx( j ); ++i ) { + double lon = g.x( i, j ) * util::Constants::degreesToRadians(); + invtrans_fourier( trcFT, lon, nb_fields, legReal.data(), legImag.data(), + gp_tmp.data() + ( nb_fields * idx ) ); + for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { + gp_tmp[nb_fields * idx + jfld] /= std::cos( lat ); + } + ++idx; } - ++idx; } } } @@ -205,14 +212,20 @@ void TransLocal::invtrans_uv( const int truncation, const int nb_scalar_fields, double trcFT = truncation; // Legendre transform: - invtrans_legendre( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra, - legReal.data(), legImag.data() ); + { + ATLAS_TRACE( "invtrans_legendre" ); + invtrans_legendre( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, + scalar_spectra, legReal.data(), legImag.data() ); + } // Fourier transform: - invtrans_fourier( trcFT, lon, nb_fields, legReal.data(), legImag.data(), - gp_tmp.data() + ( nb_fields * idx ) ); - for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { - gp_tmp[nb_fields * idx + jfld] /= std::cos( lat ); + { + ATLAS_TRACE( "invtrans_fourier" ); + invtrans_fourier( trcFT, lon, nb_fields, legReal.data(), legImag.data(), + gp_tmp.data() + ( nb_fields * idx ) ); + for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { + gp_tmp[nb_fields * idx + jfld] /= std::cos( lat ); + } } ++idx; } @@ -255,21 +268,24 @@ void TransLocal::invtrans( const int nb_scalar_fields, const double scalar_spect const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[], const eckit::Configuration& config ) const { ATLAS_TRACE( "TransLocal::invtrans" ); - int nb_gp = grid_.size(); - - // increase truncation in vorticity_spectra and divergence_spectra: + int nb_gp = grid_.size(); int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields; std::vector vorticity_spectra_extended( nb_vordiv_spec_ext, 0. ); std::vector divergence_spectra_extended( nb_vordiv_spec_ext, 0. ); - extend_truncation( truncation_, nb_vordiv_fields, vorticity_spectra, vorticity_spectra_extended.data() ); - extend_truncation( truncation_, nb_vordiv_fields, divergence_spectra, divergence_spectra_extended.data() ); - - // call vd2uv to compute u and v in spectral space std::vector U_ext( nb_vordiv_spec_ext, 0. ); std::vector V_ext( nb_vordiv_spec_ext, 0. ); - trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "local" ) ); - vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(), - divergence_spectra_extended.data(), U_ext.data(), V_ext.data() ); + + { + ATLAS_TRACE( "vordiv to UV" ); + // increase truncation in vorticity_spectra and divergence_spectra: + extend_truncation( truncation_, nb_vordiv_fields, vorticity_spectra, vorticity_spectra_extended.data() ); + extend_truncation( truncation_, nb_vordiv_fields, divergence_spectra, divergence_spectra_extended.data() ); + + // call vd2uv to compute u and v in spectral space + trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt" ) ); + vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(), + divergence_spectra_extended.data(), U_ext.data(), V_ext.data() ); + } // perform spectral transform to compute all fields in grid point space invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config ); diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc index e9ea8b87d..7aaca7f05 100644 --- a/src/atlas/trans/localopt/TransLocalopt.cc +++ b/src/atlas/trans/localopt/TransLocalopt.cc @@ -166,9 +166,6 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field if ( nb_scalar_fields > 0 ) { int nb_fields = nb_scalar_fields; - std::vector gp_tmp( nb_fields * grid_.size(), 0. ); - std::vector legReal( nb_fields * ( truncation + 1 ) ); - std::vector legImag( nb_fields * ( truncation + 1 ) ); //eckit::linalg::LinearAlgebra::backend( "string" ) // might want to choose backend with this command // Transform @@ -179,18 +176,22 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field std::vector scl_fourier( size_fourier * ( truncation + 1 ) ); // Legendre transform: - for ( int jm = 0; jm <= truncation; jm++ ) { - int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1; - eckit::linalg::Matrix A( eckit::linalg::Matrix( - const_cast( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) ); - eckit::linalg::Matrix B( legendre_.data() + noff * g.ny(), ns, g.ny() ); - eckit::linalg::Matrix C( scl_fourier.data() + jm * size_fourier, nb_fields * 2, g.ny() ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + { + ATLAS_TRACE( "opt Legendre dgemm" ); + for ( int jm = 0; jm <= truncation; jm++ ) { + int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1; + eckit::linalg::Matrix A( eckit::linalg::Matrix( + const_cast( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) ); + eckit::linalg::Matrix B( legendre_.data() + noff * g.ny(), ns, g.ny() ); + eckit::linalg::Matrix C( scl_fourier.data() + jm * size_fourier, nb_fields * 2, g.ny() ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + } } // Transposition in Fourier space: std::vector scl_fourier_tp( size_fourier * ( truncation + 1 ) ); { + ATLAS_TRACE( "opt transposition in Fourier" ); int idx = 0; for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { for ( int jlat = 0; jlat < g.ny(); jlat++ ) { @@ -207,17 +208,21 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field // Fourier transformation: std::vector gp_opt( nb_fields * grid_.size(), 0. ); - eckit::linalg::Matrix A( scl_fourier_tp.data(), nb_fields * g.ny(), ( truncation_ + 1 ) * 2 ); - eckit::linalg::Matrix B( fourier_.data(), ( truncation_ + 1 ) * 2, g.nxmax() ); - eckit::linalg::Matrix C( gp_opt.data(), nb_fields * g.ny(), g.nxmax() ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + { + ATLAS_TRACE( "opt Fourier dgemm" ); + eckit::linalg::Matrix A( scl_fourier_tp.data(), nb_fields * g.ny(), ( truncation_ + 1 ) * 2 ); + eckit::linalg::Matrix B( fourier_.data(), ( truncation_ + 1 ) * 2, g.nxmax() ); + eckit::linalg::Matrix C( gp_opt.data(), nb_fields * g.ny(), g.nxmax() ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + } // Transposition in grid point space: + std::vector coslats( nlats ); + for ( size_t j = 0; j < nlats; ++j ) { + coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() ); + } { - std::vector coslats( nlats ); - for ( size_t j = 0; j < nlats; ++j ) { - coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() ); - } + ATLAS_TRACE( "opt transposition in gp-space" ); int idx = 0; for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) { for ( int jlat = 0; jlat < g.ny(); jlat++ ) { @@ -230,99 +235,6 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field } } } - - int idx = 0; - for ( size_t j = 0; j < g.ny(); ++j ) { - double lat = g.y( j ) * util::Constants::degreesToRadians(); - double trcFT = - fourier_truncationopt( truncation, g.nx( j ), g.nxmax(), g.ny(), lat, grid::RegularGrid( grid_ ) ); - - std::vector legPol( legendre_size( truncation_ + 1 ) ); - compute_legendre_polynomials( truncation_ + 1, lat, legPol.data() ); - int idx1 = 0, idx2 = 0; - //for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { - // for ( int jlat = 0; jlat < g.ny(); jlat++ ) { - // for ( int jn = jm; jn <= truncation_ + 1; jn++ ) { - // if ( jlat == j ) { - // if ( jm > 0 ) { - // legPol[idx1] = 0.5 * legendre_[idx2]; - // //Log::info() << legPol[idx1] << " " << 0.5 * legendre_[idx2] << std::endl; - // if ( std::abs( legPol[idx1] - 0.5 * legendre_[idx2] ) > 1e-14 ) { - // Log::info() << "jm=" << jm << " jlat=" << jlat << " jn=" << jn << std::endl; - // } - // } - // else { - // legPol[idx1] = legendre_[idx2]; - // //Log::info() << legPol[idx1] << " " << legendre_[idx2] << std::endl; - // if ( std::abs( legPol[idx1] - legendre_[idx2] ) > 1e-14 ) { - // Log::info() << "jm=" << jm << " jlat=" << jlat << " jn=" << jn - // << " legPol=" << legPol[idx1] << " legendre=" << legendre_[idx2] - // << std::endl; - // } - // } - // idx1++; - // } - // idx2++; - // } - // } - //} - invtrans_legendreopt( truncation, trcFT, truncation_ + 1, legPol.data(), nb_fields, scalar_spectra, - legReal.data(), legImag.data() ); - idx1 = 0; - for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int posReal = jfld + nb_fields * ( 2 * ( j + g.ny() * ( jm ) ) ); - int posReal_tp = jfld + nb_fields * ( j + g.ny() * ( 2 * ( jm ) ) ); - if ( std::abs( legReal[idx1] - scl_fourier_tp[posReal_tp] ) > 1e-14 ) { - Log::info() << "jm=" << jm << " jlat=" << j << " jfld=" << jfld - << " real: " << legReal[idx1] << " " << scl_fourier_tp[posReal_tp] << std::endl; - } - int posImag = jfld + nb_fields * ( 1 + 2 * ( j + g.ny() * ( jm ) ) ); - int posImag_tp = jfld + nb_fields * ( j + g.ny() * ( 1 + 2 * ( jm ) ) ); - if ( std::abs( legImag[idx1] - scl_fourier_tp[posImag_tp] ) > 1e-14 ) { - Log::info() << "jm=" << jm << " jlat=" << j << " jfld=" << jfld - << " imag: " << legImag[idx1] << " " << scl_fourier_tp[posImag_tp] << std::endl; - } - idx1++; - } - } - // Fourier transform: - for ( size_t i = 0; i < g.nx( j ); ++i ) { - double lon = g.x( i, j ) * util::Constants::degreesToRadians(); - invtrans_fourieropt( trcFT, lon, nb_fields, legReal.data(), legImag.data(), - gp_tmp.data() + ( nb_fields * idx ) ); - for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { - gp_tmp[nb_fields * idx + jfld] /= std::cos( lat ); - } - ++idx; - } - } - // transpose result (gp_tmp: jfld is fastest index. gp_fields: jfld needs to - // be slowest index) - std::vector gp_tmp2( nb_fields * grid_.size(), 0. ); - gp_transposeopt( grid_.size(), nb_fields, gp_tmp.data(), gp_tmp2.data() ); - - // compare new and old version: - { - int idx = 0; - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - for ( int jlat = 0; jlat < g.ny(); jlat++ ) { - for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) { - int pos = jfld + nb_fields * ( jlat + g.ny() * ( jlon ) ); - if ( std::abs( gp_opt[pos] - gp_tmp2[idx] ) > 1e-14 ) { - Log::info() << "jlon=" << jlon << " jlat=" << jlat << " jfld=" << jfld - << " new:" << gp_opt[pos] << " old:" << gp_tmp2[idx] << std::endl; - } - /*if ( std::abs( gp_fields[idx] - gp_tmp2[idx] ) > 1e-14 ) { - Log::info() << "jlon=" << jlon << " jlat=" << jlat << " jfld=" << jfld - << " new:" << gp_fields[idx] << " old:" << gp_tmp2[idx] << std::endl; - }*/ - //gp_fields[idx] = gp_tmp2[idx]; - idx++; - } - } - } - } } else { ATLAS_TRACE( "invtrans_uv unstructured opt" ); @@ -340,7 +252,7 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field //invtrans_fourieropt( trcFT, lon, nb_fields, legReal.data(), legImag.data(), // gp_tmp.data() + ( nb_fields * idx ) ); for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { - gp_tmp[nb_fields * idx + jfld] /= std::cos( lat ); + //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat ); } ++idx; } @@ -379,21 +291,24 @@ void TransLocalopt::invtrans( const int nb_scalar_fields, const double scalar_sp const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[], const eckit::Configuration& config ) const { ATLAS_TRACE( "TransLocalopt::invtrans" ); - int nb_gp = grid_.size(); - - // increase truncation in vorticity_spectra and divergence_spectra: + int nb_gp = grid_.size(); int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields; std::vector vorticity_spectra_extended( nb_vordiv_spec_ext, 0. ); std::vector divergence_spectra_extended( nb_vordiv_spec_ext, 0. ); - extend_truncationopt( truncation_, nb_vordiv_fields, vorticity_spectra, vorticity_spectra_extended.data() ); - extend_truncationopt( truncation_, nb_vordiv_fields, divergence_spectra, divergence_spectra_extended.data() ); - - // call vd2uv to compute u and v in spectral space std::vector U_ext( nb_vordiv_spec_ext, 0. ); std::vector V_ext( nb_vordiv_spec_ext, 0. ); - trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt" ) ); - vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(), - divergence_spectra_extended.data(), U_ext.data(), V_ext.data() ); + + { + ATLAS_TRACE( "vordiv to UV opt" ); + // increase truncation in vorticity_spectra and divergence_spectra: + extend_truncationopt( truncation_, nb_vordiv_fields, vorticity_spectra, vorticity_spectra_extended.data() ); + extend_truncationopt( truncation_, nb_vordiv_fields, divergence_spectra, divergence_spectra_extended.data() ); + + // call vd2uv to compute u and v in spectral space + trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt" ) ); + vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(), + divergence_spectra_extended.data(), U_ext.data(), V_ext.data() ); + } // perform spectral transform to compute all fields in grid point space invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config ); From b71afc12681bb3163bf08c5e3a36f26955ed1427 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Tue, 6 Mar 2018 10:50:24 +0000 Subject: [PATCH 009/123] added some more timings --- src/atlas/trans/localopt/TransLocalopt.cc | 78 +++++++++++++---------- 1 file changed, 46 insertions(+), 32 deletions(-) diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc index 7aaca7f05..a088b8a13 100644 --- a/src/atlas/trans/localopt/TransLocalopt.cc +++ b/src/atlas/trans/localopt/TransLocalopt.cc @@ -81,16 +81,22 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t } } // precomputations for Legendre polynomials: - legendre_.resize( legendre_size( truncation_ + 1 ) * nlats ); - compute_legendre_polynomialsopt( truncation_ + 1, nlats, lats.data(), legendre_.data() ); + { + ATLAS_TRACE( "opt precomp Legendre" ); + legendre_.resize( legendre_size( truncation_ + 1 ) * nlats ); + compute_legendre_polynomialsopt( truncation_ + 1, nlats, lats.data(), legendre_.data() ); + } // precomputations for Fourier transformations: - fourier_.resize( 2 * ( truncation_ + 1 ) * nlons ); - int idx = 0; - for ( int jlon = 0; jlon < nlons; jlon++ ) { - for ( int jm = 0; jm < truncation_ + 1; jm++ ) { - fourier_[idx++] = +std::cos( jm * lons[jlon] ); // real part - fourier_[idx++] = -std::sin( jm * lons[jlon] ); // imaginary part + { + ATLAS_TRACE( "opt precomp Fourier" ); + fourier_.resize( 2 * ( truncation_ + 1 ) * nlons ); + int idx = 0; + for ( int jlon = 0; jlon < nlons; jlon++ ) { + for ( int jm = 0; jm < truncation_ + 1; jm++ ) { + fourier_[idx++] = +std::cos( jm * lons[jlon] ); // real part + fourier_[idx++] = -std::sin( jm * lons[jlon] ); // imaginary part + } } } } @@ -293,32 +299,40 @@ void TransLocalopt::invtrans( const int nb_scalar_fields, const double scalar_sp ATLAS_TRACE( "TransLocalopt::invtrans" ); int nb_gp = grid_.size(); int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields; - std::vector vorticity_spectra_extended( nb_vordiv_spec_ext, 0. ); - std::vector divergence_spectra_extended( nb_vordiv_spec_ext, 0. ); - std::vector U_ext( nb_vordiv_spec_ext, 0. ); - std::vector V_ext( nb_vordiv_spec_ext, 0. ); + if ( nb_vordiv_fields > 0 ) { + std::vector vorticity_spectra_extended( nb_vordiv_spec_ext, 0. ); + std::vector divergence_spectra_extended( nb_vordiv_spec_ext, 0. ); + std::vector U_ext( nb_vordiv_spec_ext, 0. ); + std::vector V_ext( nb_vordiv_spec_ext, 0. ); + + { + ATLAS_TRACE( "opt extend vordiv" ); + // increase truncation in vorticity_spectra and divergence_spectra: + extend_truncationopt( truncation_, nb_vordiv_fields, vorticity_spectra, vorticity_spectra_extended.data() ); + extend_truncationopt( truncation_, nb_vordiv_fields, divergence_spectra, + divergence_spectra_extended.data() ); + } - { - ATLAS_TRACE( "vordiv to UV opt" ); - // increase truncation in vorticity_spectra and divergence_spectra: - extend_truncationopt( truncation_, nb_vordiv_fields, vorticity_spectra, vorticity_spectra_extended.data() ); - extend_truncationopt( truncation_, nb_vordiv_fields, divergence_spectra, divergence_spectra_extended.data() ); - - // call vd2uv to compute u and v in spectral space - trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt" ) ); - vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(), - divergence_spectra_extended.data(), U_ext.data(), V_ext.data() ); - } + { + ATLAS_TRACE( "vordiv to UV opt" ); + // call vd2uv to compute u and v in spectral space + trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt" ) ); + vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(), + divergence_spectra_extended.data(), U_ext.data(), V_ext.data() ); + } - // perform spectral transform to compute all fields in grid point space - invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config ); - invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(), - gp_fields + nb_gp * nb_vordiv_fields, config ); - int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields; - std::vector scalar_spectra_extended( nb_scalar_spec_ext, 0. ); - extend_truncationopt( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() ); - invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(), - gp_fields + 2 * nb_gp * nb_vordiv_fields, config ); + // perform spectral transform to compute all fields in grid point space + invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config ); + invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(), + gp_fields + nb_gp * nb_vordiv_fields, config ); + } + if ( nb_scalar_fields > 0 ) { + int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields; + std::vector scalar_spectra_extended( nb_scalar_spec_ext, 0. ); + extend_truncationopt( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() ); + invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(), + gp_fields + 2 * nb_gp * nb_vordiv_fields, config ); + } } // -------------------------------------------------------------------------------------------------------------------- From abe488481c3d0304694ae62f705f9e48b2af5ab5 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Tue, 6 Mar 2018 14:30:15 +0000 Subject: [PATCH 010/123] added option without transposition in gp-space --- src/atlas/trans/localopt/TransLocalopt.cc | 70 +++++++++++- src/atlas/trans/localopt/TransLocalopt.h | 1 + src/tests/trans/test_transgeneral.cc | 132 +++++++++++----------- 3 files changed, 132 insertions(+), 71 deletions(-) diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc index a088b8a13..7e157d80c 100644 --- a/src/atlas/trans/localopt/TransLocalopt.cc +++ b/src/atlas/trans/localopt/TransLocalopt.cc @@ -99,6 +99,19 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t } } } + { + ATLAS_TRACE( "opt precomp Fourier tp" ); + fouriertp_.resize( 2 * ( truncation_ + 1 ) * nlons ); + int idx = 0; + for ( int jm = 0; jm < truncation_ + 1; jm++ ) { + for ( int jlon = 0; jlon < nlons; jlon++ ) { + fouriertp_[idx++] = +std::cos( jm * lons[jlon] ); // real part + } + for ( int jlon = 0; jlon < nlons; jlon++ ) { + fouriertp_[idx++] = -std::sin( jm * lons[jlon] ); // imaginary part + } + } + } } // -------------------------------------------------------------------------------------------------------------------- @@ -193,13 +206,14 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); } } +#if 0 // 1: better for small number of columns, large truncation; 0: better for large number of columns // Transposition in Fourier space: std::vector scl_fourier_tp( size_fourier * ( truncation + 1 ) ); { ATLAS_TRACE( "opt transposition in Fourier" ); int idx = 0; - for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { + for ( int jm = 0; jm < truncation_ + 1; jm++ ) { for ( int jlat = 0; jlat < g.ny(); jlat++ ) { for ( int imag = 0; imag < 2; imag++ ) { for ( int jfld = 0; jfld < nb_fields; jfld++ ) { @@ -223,10 +237,6 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field } // Transposition in grid point space: - std::vector coslats( nlats ); - for ( size_t j = 0; j < nlats; ++j ) { - coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() ); - } { ATLAS_TRACE( "opt transposition in gp-space" ); int idx = 0; @@ -235,12 +245,60 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field for ( int jfld = 0; jfld < nb_fields; jfld++ ) { int pos_tp = jlon + g.nxmax() * ( jlat + g.ny() * ( jfld ) ); //int pos = jfld + nb_fields * ( jlat + g.ny() * ( jlon ) ); - if ( jfld < nb_vordiv_fields ) { gp_opt[idx] /= coslats[jlat]; } gp_fields[pos_tp] = gp_opt[idx++]; // = gp_opt[pos] } } } } +#else + // Transposition in Fourier space: + std::vector scl_fourier_tp( size_fourier * ( truncation + 1 ) ); + { + ATLAS_TRACE( "opt transposition in Fourier" ); + int idx = 0; + for ( int jm = 0; jm < truncation_ + 1; jm++ ) { + for ( int jlat = 0; jlat < g.ny(); jlat++ ) { + for ( int imag = 0; imag < 2; imag++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int pos_tp = imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + g.ny() * ( jfld ) ) ); + //int pos = jfld + nb_fields * ( imag + 2 * ( jlat + g.ny() * ( jm ) ) ); + scl_fourier_tp[pos_tp] = scl_fourier[idx++]; // = scl_fourier[pos] + } + } + } + } + } + + // Fourier transformation: + std::vector gp_opt( nb_fields * grid_.size(), 0. ); + { + ATLAS_TRACE( "opt Fourier dgemm" ); + eckit::linalg::Matrix A( fouriertp_.data(), g.nxmax(), ( truncation_ + 1 ) * 2 ); + eckit::linalg::Matrix B( scl_fourier_tp.data(), ( truncation_ + 1 ) * 2, nb_fields * g.ny() ); + eckit::linalg::Matrix C( gp_fields, g.nxmax(), nb_fields * g.ny() ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + } + +#endif + // Computing u,v from U,V: + { + if ( nb_vordiv_fields > 0 ) { + ATLAS_TRACE( "opt u,v from U,V" ); + std::vector coslats( nlats ); + for ( size_t j = 0; j < nlats; ++j ) { + coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() ); + } + int idx = 0; + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + for ( int jlat = 0; jlat < g.ny(); jlat++ ) { + for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) { + gp_fields[idx] /= coslats[jlat]; + idx++; + } + } + } + } + } } else { ATLAS_TRACE( "invtrans_uv unstructured opt" ); diff --git a/src/atlas/trans/localopt/TransLocalopt.h b/src/atlas/trans/localopt/TransLocalopt.h index 0d12d4af3..ff77db4a9 100644 --- a/src/atlas/trans/localopt/TransLocalopt.h +++ b/src/atlas/trans/localopt/TransLocalopt.h @@ -113,6 +113,7 @@ class TransLocalopt : public trans::TransImpl { bool precompute_; mutable std::vector legendre_; mutable std::vector fourier_; + mutable std::vector fouriertp_; std::vector legendre_begin_; }; diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index 31cb78c37..6cd0c0cb5 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -756,74 +756,76 @@ CASE( "test_trans_vordiv_with_translib" ) { for ( int imag = 0; imag <= 1; imag++ ) { // real and imaginary part if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. ) { - for ( int j = 0; j < 2 * N * nb_scalar; j++ ) { - sp[j] = 0.; - } - for ( int j = 0; j < 2 * N * nb_vordiv; j++ ) { - vor[j] = 0.; - div[j] = 0.; - } - if ( ivar_in == 0 ) vor[k * nb_vordiv + jfld] = 1.; - if ( ivar_in == 1 ) div[k * nb_vordiv + jfld] = 1.; - if ( ivar_in == 2 ) sp[k * nb_scalar + jfld] = 1.; - - for ( int j = 0; j < nb_all * g.size(); j++ ) { - gp[j] = 0.; - rgp[j] = 0.; - rgpopt[j] = 0.; - } - for ( int j = 0; j < g.size(); j++ ) { - rgp_analytic[j] = 0.; - } - - spectral_transform_grid_analytic( trc, trc, n, m, imag, g, rspecg.data(), - rgp_analytic.data(), ivar_in, ivar_out ); - - EXPECT_NO_THROW( transLocal.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), - div.data(), rgp.data() ) ); - - EXPECT_NO_THROW( transLocalopt.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), - div.data(), rgpopt.data() ) ); - - int pos = ( ivar_out * nb_vordiv + jfld ); - - double rms_gen = - compute_rms( g.size(), rgp.data() + pos * g.size(), rgp_analytic.data() ); - - double rms_genopt = - compute_rms( g.size(), rgpopt.data() + pos * g.size(), rgp_analytic.data() ); - - if ( !( rms_gen < tolerance ) || !( rms_genopt < tolerance ) ) { - Log::info() - << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out - << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl; - ATLAS_DEBUG_VAR( rms_gen ); - ATLAS_DEBUG_VAR( rms_genopt ); - ATLAS_DEBUG_VAR( tolerance ); - } - EXPECT( rms_gen < tolerance ); - //EXPECT( rms_genopt < tolerance ); - icase++; + if ( true ) { //if ( icase == 378 ) { + for ( int j = 0; j < 2 * N * nb_scalar; j++ ) { + sp[j] = 0.; + } + for ( int j = 0; j < 2 * N * nb_vordiv; j++ ) { + vor[j] = 0.; + div[j] = 0.; + } + if ( ivar_in == 0 ) vor[k * nb_vordiv + jfld] = 1.; + if ( ivar_in == 1 ) div[k * nb_vordiv + jfld] = 1.; + if ( ivar_in == 2 ) sp[k * nb_scalar + jfld] = 1.; + + for ( int j = 0; j < nb_all * g.size(); j++ ) { + gp[j] = 0.; + rgp[j] = 0.; + rgpopt[j] = 0.; + } + for ( int j = 0; j < g.size(); j++ ) { + rgp_analytic[j] = 0.; + } + + spectral_transform_grid_analytic( trc, trc, n, m, imag, g, rspecg.data(), + rgp_analytic.data(), ivar_in, ivar_out ); + + EXPECT_NO_THROW( transLocal.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), + div.data(), rgp.data() ) ); + + EXPECT_NO_THROW( transLocalopt.invtrans( nb_scalar, sp.data(), nb_vordiv, + vor.data(), div.data(), rgpopt.data() ) ); + + int pos = ( ivar_out * nb_vordiv + jfld ); + + double rms_gen = + compute_rms( g.size(), rgp.data() + pos * g.size(), rgp_analytic.data() ); + + double rms_genopt = + compute_rms( g.size(), rgpopt.data() + pos * g.size(), rgp_analytic.data() ); + + if ( !( rms_gen < tolerance ) || !( rms_genopt < tolerance ) ) { + Log::info() + << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out + << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl; + ATLAS_DEBUG_VAR( rms_gen ); + ATLAS_DEBUG_VAR( rms_genopt ); + ATLAS_DEBUG_VAR( tolerance ); + } + EXPECT( rms_gen < tolerance ); + //EXPECT( rms_genopt < tolerance ); #if ATLAS_HAVE_TRANS - EXPECT_NO_THROW( transIFS.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), - div.data(), gp.data() ) ); - double rms_trans = - compute_rms( g.size(), gp.data() + pos * g.size(), rgp_analytic.data() ); - double rms_diff = - compute_rms( g.size(), rgp.data() + pos * g.size(), gp.data() + pos * g.size() ); - EXPECT( rms_trans < tolerance ); - if ( !( rms_trans < tolerance ) || !( rms_diff < tolerance ) ) { - Log::info() - << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out - << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl; - ATLAS_DEBUG_VAR( rms_gen ); - ATLAS_DEBUG_VAR( rms_genopt ); - ATLAS_DEBUG_VAR( rms_trans ); - ATLAS_DEBUG_VAR( rms_diff ); - ATLAS_DEBUG_VAR( tolerance ); - } + EXPECT_NO_THROW( transIFS.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), + div.data(), gp.data() ) ); + double rms_trans = + compute_rms( g.size(), gp.data() + pos * g.size(), rgp_analytic.data() ); + double rms_diff = compute_rms( g.size(), rgp.data() + pos * g.size(), + gp.data() + pos * g.size() ); + EXPECT( rms_trans < tolerance ); + if ( !( rms_trans < tolerance ) || !( rms_diff < tolerance ) ) { + Log::info() + << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out + << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl; + ATLAS_DEBUG_VAR( rms_gen ); + ATLAS_DEBUG_VAR( rms_genopt ); + ATLAS_DEBUG_VAR( rms_trans ); + ATLAS_DEBUG_VAR( rms_diff ); + ATLAS_DEBUG_VAR( tolerance ); + } #endif + } + icase++; } k++; } From 2fdd12c4e7c43d4cbc0277c0f9435e18942b58b1 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Tue, 6 Mar 2018 14:56:46 +0000 Subject: [PATCH 011/123] created second optimised local transform to compare two optimisations --- src/tests/trans/test_transgeneral.cc | 145 ++++++++++++++------------- 1 file changed, 73 insertions(+), 72 deletions(-) diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index 6cd0c0cb5..b4c659430 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -712,11 +712,12 @@ CASE( "test_trans_vordiv_with_translib" ) { double tolerance = 1.e-13; // Grid: (Adjust the following line if the test takes too long!) - Grid g( "F12" ); + Grid g( "F320" ); grid::StructuredGrid gs( g ); int ndgl = gs.ny(); - int trc = ndgl - 1; // linear + //int trc = ndgl - 1; // linear + int trc = ndgl / 2. - 1; // cubic #if ATLAS_HAVE_TRANS trans::Trans transIFS( g, trc, util::Config( "type", "ifs" ) ); #endif @@ -726,7 +727,7 @@ CASE( "test_trans_vordiv_with_translib" ) { functionspace::Spectral spectral( trc ); functionspace::StructuredColumns gridpoints( g ); - int nb_scalar = 2, nb_vordiv = 2; + int nb_scalar = 1, nb_vordiv = 0; int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv; std::vector sp( 2 * N * nb_scalar ); std::vector vor( 2 * N * nb_vordiv ); @@ -738,8 +739,8 @@ CASE( "test_trans_vordiv_with_translib" ) { std::vector rgp_analytic( g.size() ); int icase = 0; - for ( int ivar_in = 0; ivar_in < 3; ivar_in++ ) { // vorticity, divergence, scalar - for ( int ivar_out = 0; ivar_out < 3; ivar_out++ ) { // u, v, scalar + for ( int ivar_in = 2; ivar_in < 3; ivar_in++ ) { // vorticity, divergence, scalar + for ( int ivar_out = 2; ivar_out < 3; ivar_out++ ) { // u, v, scalar int nb_fld = 1; if ( ivar_out == 2 ) { tolerance = 1.e-13; @@ -756,76 +757,76 @@ CASE( "test_trans_vordiv_with_translib" ) { for ( int imag = 0; imag <= 1; imag++ ) { // real and imaginary part if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. ) { - if ( true ) { //if ( icase == 378 ) { - for ( int j = 0; j < 2 * N * nb_scalar; j++ ) { - sp[j] = 0.; - } - for ( int j = 0; j < 2 * N * nb_vordiv; j++ ) { - vor[j] = 0.; - div[j] = 0.; - } - if ( ivar_in == 0 ) vor[k * nb_vordiv + jfld] = 1.; - if ( ivar_in == 1 ) div[k * nb_vordiv + jfld] = 1.; - if ( ivar_in == 2 ) sp[k * nb_scalar + jfld] = 1.; - - for ( int j = 0; j < nb_all * g.size(); j++ ) { - gp[j] = 0.; - rgp[j] = 0.; - rgpopt[j] = 0.; - } - for ( int j = 0; j < g.size(); j++ ) { - rgp_analytic[j] = 0.; - } - - spectral_transform_grid_analytic( trc, trc, n, m, imag, g, rspecg.data(), - rgp_analytic.data(), ivar_in, ivar_out ); - - EXPECT_NO_THROW( transLocal.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), - div.data(), rgp.data() ) ); - - EXPECT_NO_THROW( transLocalopt.invtrans( nb_scalar, sp.data(), nb_vordiv, - vor.data(), div.data(), rgpopt.data() ) ); - - int pos = ( ivar_out * nb_vordiv + jfld ); - - double rms_gen = - compute_rms( g.size(), rgp.data() + pos * g.size(), rgp_analytic.data() ); - - double rms_genopt = - compute_rms( g.size(), rgpopt.data() + pos * g.size(), rgp_analytic.data() ); - - if ( !( rms_gen < tolerance ) || !( rms_genopt < tolerance ) ) { - Log::info() - << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out - << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl; - ATLAS_DEBUG_VAR( rms_gen ); - ATLAS_DEBUG_VAR( rms_genopt ); - ATLAS_DEBUG_VAR( tolerance ); - } - EXPECT( rms_gen < tolerance ); - //EXPECT( rms_genopt < tolerance ); + for ( int j = 0; j < 2 * N * nb_scalar; j++ ) { + sp[j] = 0.; + } + for ( int j = 0; j < 2 * N * nb_vordiv; j++ ) { + vor[j] = 0.; + div[j] = 0.; + } + if ( ivar_in == 0 ) vor[k * nb_vordiv + jfld] = 1.; + if ( ivar_in == 1 ) div[k * nb_vordiv + jfld] = 1.; + if ( ivar_in == 2 ) sp[k * nb_scalar + jfld] = 1.; + + for ( int j = 0; j < nb_all * g.size(); j++ ) { + gp[j] = 0.; + rgp[j] = 0.; + rgpopt[j] = 0.; + } + for ( int j = 0; j < g.size(); j++ ) { + rgp_analytic[j] = 0.; + } -#if ATLAS_HAVE_TRANS - EXPECT_NO_THROW( transIFS.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), - div.data(), gp.data() ) ); - double rms_trans = - compute_rms( g.size(), gp.data() + pos * g.size(), rgp_analytic.data() ); - double rms_diff = compute_rms( g.size(), rgp.data() + pos * g.size(), - gp.data() + pos * g.size() ); - EXPECT( rms_trans < tolerance ); - if ( !( rms_trans < tolerance ) || !( rms_diff < tolerance ) ) { - Log::info() - << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out - << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl; - ATLAS_DEBUG_VAR( rms_gen ); - ATLAS_DEBUG_VAR( rms_genopt ); - ATLAS_DEBUG_VAR( rms_trans ); - ATLAS_DEBUG_VAR( rms_diff ); - ATLAS_DEBUG_VAR( tolerance ); - } -#endif + spectral_transform_grid_analytic( trc, trc, n, m, imag, g, rspecg.data(), + rgp_analytic.data(), ivar_in, ivar_out ); + + //EXPECT_NO_THROW( transLocal.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), + // div.data(), rgp.data() ) ); + + EXPECT_NO_THROW( transLocalopt.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), + div.data(), rgpopt.data() ) ); + + int pos = ( ivar_out * nb_vordiv + jfld ); + + double rms_gen = + compute_rms( g.size(), rgpopt.data() + pos * g.size(), rgp_analytic.data() ); + + double rms_genopt = + compute_rms( g.size(), rgpopt.data() + pos * g.size(), rgp_analytic.data() ); + + if ( !( rms_gen < tolerance ) || !( rms_genopt < tolerance ) ) { + Log::info() + << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out + << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl; + ATLAS_DEBUG_VAR( rms_gen ); + ATLAS_DEBUG_VAR( rms_genopt ); + ATLAS_DEBUG_VAR( tolerance ); } + EXPECT( rms_gen < tolerance ); + //EXPECT( rms_genopt < tolerance ); icase++; + +#if ATLAS_HAVE_TRANS + EXPECT_NO_THROW( transIFS.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), + div.data(), gp.data() ) ); + double rms_trans = + compute_rms( g.size(), gp.data() + pos * g.size(), rgp_analytic.data() ); + double rms_diff = + compute_rms( g.size(), rgpopt.data() + pos * g.size(), gp.data() + pos * g.size() ); + EXPECT( rms_trans < tolerance ); + if ( !( rms_trans < tolerance ) || !( rms_diff < tolerance ) ) { + Log::info() + << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out + << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl; + ATLAS_DEBUG_VAR( rms_gen ); + ATLAS_DEBUG_VAR( rms_genopt ); + ATLAS_DEBUG_VAR( rms_trans ); + ATLAS_DEBUG_VAR( rms_diff ); + ATLAS_DEBUG_VAR( tolerance ); + } +#endif + if ( icase > 2 ) EXPECT( false ); + Log::info() << "test " << icase << std::endl; } k++; } From 26481b562fe14607b1a9307c50597cae96a8f6a4 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Tue, 6 Mar 2018 14:57:10 +0000 Subject: [PATCH 012/123] missing files for previous commit --- src/atlas/CMakeLists.txt | 10 + src/atlas/trans/Trans.cc | 2 + src/atlas/trans/VorDivToUV.cc | 2 + src/atlas/trans/localopt/TransLocalopt.cc | 2 +- .../trans/localopt2/FourierTransformsopt2.cc | 78 +++ .../trans/localopt2/FourierTransformsopt2.h | 38 ++ .../localopt2/LegendrePolynomialsopt2.cc | 166 +++++++ .../trans/localopt2/LegendrePolynomialsopt2.h | 44 ++ .../trans/localopt2/LegendreTransformsopt2.cc | 62 +++ .../trans/localopt2/LegendreTransformsopt2.h | 37 ++ src/atlas/trans/localopt2/TransLocalopt2.cc | 445 ++++++++++++++++++ src/atlas/trans/localopt2/TransLocalopt2.h | 124 +++++ .../trans/localopt2/VorDivToUVLocalopt2.cc | 184 ++++++++ .../trans/localopt2/VorDivToUVLocalopt2.h | 67 +++ src/tests/trans/test_transgeneral.cc | 10 +- 15 files changed, 1265 insertions(+), 6 deletions(-) create mode 100644 src/atlas/trans/localopt2/FourierTransformsopt2.cc create mode 100644 src/atlas/trans/localopt2/FourierTransformsopt2.h create mode 100644 src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc create mode 100644 src/atlas/trans/localopt2/LegendrePolynomialsopt2.h create mode 100644 src/atlas/trans/localopt2/LegendreTransformsopt2.cc create mode 100644 src/atlas/trans/localopt2/LegendreTransformsopt2.h create mode 100644 src/atlas/trans/localopt2/TransLocalopt2.cc create mode 100644 src/atlas/trans/localopt2/TransLocalopt2.h create mode 100644 src/atlas/trans/localopt2/VorDivToUVLocalopt2.cc create mode 100644 src/atlas/trans/localopt2/VorDivToUVLocalopt2.h diff --git a/src/atlas/CMakeLists.txt b/src/atlas/CMakeLists.txt index ecbbbbb06..8cb63ed1f 100644 --- a/src/atlas/CMakeLists.txt +++ b/src/atlas/CMakeLists.txt @@ -341,6 +341,16 @@ trans/localopt/FourierTransformsopt.h trans/localopt/FourierTransformsopt.cc trans/localopt/VorDivToUVLocalopt.h trans/localopt/VorDivToUVLocalopt.cc +trans/localopt2/TransLocalopt2.h +trans/localopt2/TransLocalopt2.cc +trans/localopt2/LegendrePolynomialsopt2.h +trans/localopt2/LegendrePolynomialsopt2.cc +trans/localopt2/LegendreTransformsopt2.h +trans/localopt2/LegendreTransformsopt2.cc +trans/localopt2/FourierTransformsopt2.h +trans/localopt2/FourierTransformsopt2.cc +trans/localopt2/VorDivToUVLocalopt2.h +trans/localopt2/VorDivToUVLocalopt2.cc ) if( ATLAS_HAVE_TRANS ) diff --git a/src/atlas/trans/Trans.cc b/src/atlas/trans/Trans.cc index 9c555170d..efd8eabe9 100644 --- a/src/atlas/trans/Trans.cc +++ b/src/atlas/trans/Trans.cc @@ -29,6 +29,7 @@ #endif #include "atlas/trans/local/TransLocal.h" #include "atlas/trans/localopt/TransLocalopt.h" +#include "atlas/trans/localopt2/TransLocalopt2.h" namespace atlas { namespace trans { @@ -64,6 +65,7 @@ struct force_link { #endif load_builder_grid(); load_builder_grid(); + load_builder_grid(); } }; diff --git a/src/atlas/trans/VorDivToUV.cc b/src/atlas/trans/VorDivToUV.cc index b958599fb..566303c52 100644 --- a/src/atlas/trans/VorDivToUV.cc +++ b/src/atlas/trans/VorDivToUV.cc @@ -28,6 +28,7 @@ #endif #include "atlas/trans/local/VorDivToUVLocal.h" #include "atlas/trans/localopt/VorDivToUVLocalopt.h" +#include "atlas/trans/localopt2/VorDivToUVLocalopt2.h" namespace atlas { namespace trans { @@ -57,6 +58,7 @@ struct force_link { #endif load_builder(); load_builder(); + load_builder(); } }; diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc index 7e157d80c..907be3e24 100644 --- a/src/atlas/trans/localopt/TransLocalopt.cc +++ b/src/atlas/trans/localopt/TransLocalopt.cc @@ -206,7 +206,7 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); } } -#if 0 // 1: better for small number of columns, large truncation; 0: better for large number of columns +#if 1 // 1: better for small number of columns, large truncation; 0: better for large number of columns // Transposition in Fourier space: std::vector scl_fourier_tp( size_fourier * ( truncation + 1 ) ); diff --git a/src/atlas/trans/localopt2/FourierTransformsopt2.cc b/src/atlas/trans/localopt2/FourierTransformsopt2.cc new file mode 100644 index 000000000..71d3202dd --- /dev/null +++ b/src/atlas/trans/localopt2/FourierTransformsopt2.cc @@ -0,0 +1,78 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor + * does it submit to any jurisdiction. + */ + +#include +#include +#include + +#include "atlas/trans/localopt2/FourierTransformsopt2.h" + +namespace atlas { +namespace trans { + +//----------------------------------------------------------------------------- + +void invtrans_fourieropt2( const size_t trcFT, + const double lon, // longitude in radians (in) + const int nb_fields, // Number of fields + const double rlegReal[], // associated Legendre functions, size (trc+1)*trc/2 (in) + const double rlegImag[], // associated Legendre functions, size (trc+1)*trc/2 (in) + double rgp[] ) // gridpoint +{ + for ( int jfld = 0; jfld < nb_fields; ++jfld ) { + rgp[jfld] = 0.; + } + // local Fourier transformation: + for ( int jm = 0; jm <= trcFT; ++jm ) { + const double cos = std::cos( jm * lon ); + const double sin = std::sin( jm * lon ); + for ( int jfld = 0; jfld < nb_fields; ++jfld ) { + double real = cos * rlegReal[jm * nb_fields + jfld]; + double imag = sin * rlegImag[jm * nb_fields + jfld]; + rgp[jfld] += real - imag; + } + } +} + +int fourier_truncationopt2( const int truncation, // truncation + const int nx, // number of longitudes + const int nxmax, // maximum nx + const int ndgl, // number of latitudes + const double lat, // latitude in radian + const bool fullgrid ) { // regular grid + int trc = truncation; + int trclin = ndgl - 1; + int trcquad = ndgl * 2 / 3 - 1; + if ( truncation >= trclin || fullgrid ) { + // linear + trc = ( nx - 1 ) / 2; + } + else if ( truncation >= trcquad ) { + // quadratic + double weight = 3 * ( trclin - truncation ) / ndgl; + double sqcos = std::pow( std::cos( lat ), 2 ); + + trc = ( nx - 1 ) / ( 2 + weight * sqcos ); + } + else { + // cubic + double sqcos = std::pow( std::cos( lat ), 2 ); + + trc = ( nx - 1 ) / ( 2 + sqcos ) - 1; + } + trc = std::min( truncation, trc ); + return trc; +} + +// -------------------------------------------------------------------------------------------------------------------- + +} // namespace trans +} // namespace atlas diff --git a/src/atlas/trans/localopt2/FourierTransformsopt2.h b/src/atlas/trans/localopt2/FourierTransformsopt2.h new file mode 100644 index 000000000..4281a92cc --- /dev/null +++ b/src/atlas/trans/localopt2/FourierTransformsopt2.h @@ -0,0 +1,38 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#pragma once + +#include + +namespace atlas { +namespace trans { + +//----------------------------------------------------------------------------- +// Routine to compute the local Fourier transformation +// +// Author: +// Andreas Mueller *ECMWF* +// + +void invtrans_fourieropt2( const size_t trcFT, + const double lon, // longitude in radians (in) + const int nb_fields, // Number of fields + const double rlegReal[], // values of associated Legendre functions, size (trc+1)*trc/2 (in) + const double rlegImag[], // values of associated Legendre functions, size (trc+1)*trc/2 (in) + double rgp[] ); // gridpoint + +int fourier_truncationopt2( const int truncation, const int nx, const int nxmax, const int ndgl, const double lat, + const bool fullgrid ); + +// -------------------------------------------------------------------------------------------------------------------- + +} // namespace trans +} // namespace atlas diff --git a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc new file mode 100644 index 000000000..8f55231d5 --- /dev/null +++ b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc @@ -0,0 +1,166 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor + * does it submit to any jurisdiction. + */ + +#include +#include + +#include "atlas/array.h" +#include "atlas/trans/localopt2/LegendrePolynomialsopt2.h" + +namespace atlas { +namespace trans { + +//----------------------------------------------------------------------------- + +void compute_legendre_polynomialsopt2( + const size_t trc, // truncation (in) + const int nlats, // number of latitudes + const double lats[], // latitudes in radians (in) + double legpol[] ) // values of associated Legendre functions, size (trc+1)*trc/2*nlats (out) +{ + array::ArrayT idxmn_( trc + 1, trc + 1, nlats ); + array::ArrayView idxmn = array::make_view( idxmn_ ); + + int j = 0; + for ( int jm = 0; jm <= trc; ++jm ) { + for ( int jlat = 0; jlat < nlats; ++jlat ) { + for ( int jn = jm; jn <= trc; ++jn ) { + idxmn( jm, jn, jlat ) = j++; + } + } + } + + array::ArrayT zfn_( trc + 1, trc + 1 ); + array::ArrayView zfn = array::make_view( zfn_ ); + + int iodd; + + // Compute coefficients for Taylor series in Belousov (19) and (21) + // Belousov, Swarztrauber use zfn(0,0)=std::sqrt(2.) + // IFS normalisation chosen to be 0.5*Integral(Pnm**2) = 1 + zfn( 0, 0 ) = 2.; + for ( int jn = 1; jn <= trc; ++jn ) { + double zfnn = zfn( 0, 0 ); + for ( int jgl = 1; jgl <= jn; ++jgl ) { + zfnn *= std::sqrt( 1. - 0.25 / ( jgl * jgl ) ); + } + iodd = jn % 2; + zfn( jn, jn ) = zfnn; + for ( int jgl = 2; jgl <= jn - iodd; jgl += 2 ) { + double zfjn = ( ( jgl - 1. ) * ( 2. * jn - jgl + 2. ) ); // new factor numerator + double zfjd = ( jgl * ( 2. * jn - jgl + 1. ) ); // new factor denominator + + zfn( jn, jn - jgl ) = zfn( jn, jn - jgl + 2 ) * zfjn / zfjd; + } + } + + for ( int jlat = 0; jlat < nlats; ++jlat ) { + // -------------------- + // 1. First two columns + // -------------------- + double lat = lats[jlat]; + double zdlx1 = ( M_PI_2 - lat ); // theta + double zdlx = std::cos( zdlx1 ); // cos(theta) + double zdlsita = std::sqrt( 1. - zdlx * zdlx ); // sin(theta) (this is how trans library does it) + + legpol[idxmn( 0, 0, jlat )] = 1.; + + double zdl1sita = 0.; + // if we are less than 1 meter from the pole, + if ( std::abs( zdlsita ) <= std::sqrt( std::numeric_limits::epsilon() ) ) { + zdlx = 1.; + zdlsita = 0.; + } + else { + zdl1sita = 1. / zdlsita; + } + + // ordinary Legendre polynomials from series expansion + // --------------------------------------------------- + + // even N + for ( int jn = 2; jn <= trc; jn += 2 ) { + double zdlk = 0.5 * zfn( jn, 0 ); + double zdlldn = 0.0; + double zdsq = 1. / std::sqrt( jn * ( jn + 1. ) ); + // represented by only even k + for ( int jk = 2; jk <= jn; jk += 2 ) { + // normalised ordinary Legendre polynomial == \overbar{P_n}^0 + zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 ); + // normalised associated Legendre polynomial == \overbar{P_n}^1 + zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 ); + } + legpol[idxmn( 0, jn, jlat )] = zdlk; + legpol[idxmn( 1, jn, jlat )] = zdlldn; + } + + // odd N + for ( int jn = 1; jn <= trc; jn += 2 ) { + zfn( jn, 0 ) = 0.; + double zdlk = 0.; + double zdlldn = 0.0; + double zdsq = 1. / std::sqrt( jn * ( jn + 1. ) ); + // represented by only even k + for ( int jk = 1; jk <= jn; jk += 2 ) { + // normalised ordinary Legendre polynomial == \overbar{P_n}^0 + zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 ); + // normalised associated Legendre polynomial == \overbar{P_n}^1 + zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 ); + } + legpol[idxmn( 0, jn, jlat )] = zdlk; + legpol[idxmn( 1, jn, jlat )] = zdlldn; + } + + // -------------------------------------------------------------- + // 2. Diagonal (the terms 0,0 and 1,1 have already been computed) + // Belousov, equation (23) + // -------------------------------------------------------------- + + double zdls = zdl1sita * std::numeric_limits::min(); + for ( int jn = 2; jn <= trc; ++jn ) { + double sq = std::sqrt( ( 2. * jn + 1. ) / ( 2. * jn ) ); + + legpol[idxmn( jn, jn, jlat )] = legpol[idxmn( jn - 1, jn - 1, jlat )] * zdlsita * sq; + if ( std::abs( legpol[idxmn( jn, jn, jlat )] ) < zdls ) legpol[idxmn( jn, jn, jlat )] = 0.0; + } + + // --------------------------------------------- + // 3. General recurrence (Belousov, equation 17) + // --------------------------------------------- + + for ( int jn = 3; jn <= trc; ++jn ) { + for ( int jm = 2; jm < jn; ++jm ) { + double cn = ( ( 2. * jn + 1. ) * ( jn + jm - 3. ) * ( jn + jm - 1. ) ); // numerator of c in Belousov + double cd = ( ( 2. * jn - 3. ) * ( jn + jm - 2. ) * ( jn + jm ) ); // denominator of c in Belousov + double dn = ( ( 2. * jn + 1. ) * ( jn - jm + 1. ) * ( jn + jm - 1. ) ); // numerator of d in Belousov + double dd = ( ( 2. * jn - 1. ) * ( jn + jm - 2. ) * ( jn + jm ) ); // denominator of d in Belousov + double en = ( ( 2. * jn + 1. ) * ( jn - jm ) ); // numerator of e in Belousov + double ed = ( ( 2. * jn - 1. ) * ( jn + jm ) ); // denominator of e in Belousov + + legpol[idxmn( jm, jn, jlat )] = std::sqrt( cn / cd ) * legpol[idxmn( jm - 2, jn - 2, jlat )] - + std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1, jlat )] * zdlx + + std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1, jlat )] * zdlx; + } + } + + // take factor 2 for m > 0 into account: + for ( int jm = 1; jm <= trc; ++jm ) { + for ( int jn = jm; jn <= trc; ++jn ) { + legpol[idxmn( jm, jn, jlat )] *= 2.; + } + } + } +} + +// -------------------------------------------------------------------------------------------------------------------- + +} // namespace trans +} // namespace atlas diff --git a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h new file mode 100644 index 000000000..ae550d30d --- /dev/null +++ b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h @@ -0,0 +1,44 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#pragma once + +#include + +namespace atlas { +namespace trans { + +//----------------------------------------------------------------------------- +// Routine to compute the Legendre polynomials in serial according to Belousov +// (using correction by Swarztrauber) +// +// Reference: +// S.L. Belousov, Tables of normalized associated Legendre Polynomials, Pergamon +// Press (1962) +// P.N. Swarztrauber, On computing the points and weights for Gauss-Legendre +// quadrature, +// SIAM J. Sci. Comput. Vol. 24 (3) pp. 945-954 (2002) +// +// Author of Fortran version: +// Mats Hamrud, Philippe Courtier, Nils Wedi *ECMWF* +// +// Ported to C++ by: +// Andreas Mueller *ECMWF* +// +void compute_legendre_polynomialsopt2( + const size_t trc, // truncation (in) + const int nlats, // number of latitudes + const double lats[], // latitudes in radians (in) + double legpol[] ); // values of associated Legendre functions, size (trc+1)*trc/2*nlats (out) + +// -------------------------------------------------------------------------------------------------------------------- + +} // namespace trans +} // namespace atlas diff --git a/src/atlas/trans/localopt2/LegendreTransformsopt2.cc b/src/atlas/trans/localopt2/LegendreTransformsopt2.cc new file mode 100644 index 000000000..ddb71a967 --- /dev/null +++ b/src/atlas/trans/localopt2/LegendreTransformsopt2.cc @@ -0,0 +1,62 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include + +#include "atlas/trans/localopt2/LegendreTransformsopt2.h" + +namespace atlas { +namespace trans { + +//----------------------------------------------------------------------------- + +void invtrans_legendreopt2( + const size_t trc, // truncation (in) + const size_t trcFT, // truncation for Fourier transformation (in) + const size_t trcLP, // truncation of Legendre polynomials data legpol. Needs to be >= trc (in) + const double legpol[], // values of associated Legendre functions, size (trc+1)*trc/2 (in) + const int nb_fields, // number of fields + const double spec[], // spectral data, size (trc+1)*trc (in) + double leg_real[], // values of associated Legendre functions, size (trc+1)*trc/2 (out) + double leg_imag[] ) // values of associated Legendre functions, size (trc+1)*trc/2 (out) +{ + // Legendre transformation: + int k = 0, klp = 0; + for ( int jm = 0; jm <= trcFT; ++jm ) { + for ( int jfld = 0; jfld < nb_fields; ++jfld ) { + leg_real[jm * nb_fields + jfld] = 0.; + leg_imag[jm * nb_fields + jfld] = 0.; + } + for ( int jn = jm; jn <= trcLP; ++jn, ++klp ) { + if ( jn <= trc ) { + for ( int jfld = 0; jfld < nb_fields; ++jfld ) { + // not completely sure where this factor 2 comes from. One possible + // explanation: + // normalization of trigonometric functions in the spherical harmonics + // integral over square of trig function is 1 for m=0 and 0.5 (?) for + // m>0 + leg_real[jm * nb_fields + jfld] += 2. * spec[( 2 * k ) * nb_fields + jfld] * legpol[klp]; + leg_imag[jm * nb_fields + jfld] += 2. * spec[( 2 * k + 1 ) * nb_fields + jfld] * legpol[klp]; + } + ++k; + } + } + } + // Undo factor 2 for (jm == 0) + for ( int jfld = 0; jfld < nb_fields; ++jfld ) { + leg_real[jfld] /= 2.; + leg_imag[jfld] /= 2.; + } +} + +// -------------------------------------------------------------------------------------------------------------------- + +} // namespace trans +} // namespace atlas diff --git a/src/atlas/trans/localopt2/LegendreTransformsopt2.h b/src/atlas/trans/localopt2/LegendreTransformsopt2.h new file mode 100644 index 000000000..ef10eb885 --- /dev/null +++ b/src/atlas/trans/localopt2/LegendreTransformsopt2.h @@ -0,0 +1,37 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#pragma once + +#include + +namespace atlas { +namespace trans { + +//----------------------------------------------------------------------------- +// Routine to compute the Legendre transformation +// +// Author: +// Andreas Mueller *ECMWF* +// +void invtrans_legendreopt2( + const size_t trc, // truncation (in) + const size_t trcFT, // truncation for Fourier transformation (in) + const size_t trcLP, // truncation of Legendre polynomials data legpol. Needs to be >= trc (in) + const double legpol[], // values of associated Legendre functions, size (trc+1)*trc/2 (in) + const int nb_fields, // number of fields + const double spec[], // spectral data, size (trc+1)*trc (in) + double leg_real[], // values of associated Legendre functions, size (trc+1)*trc/2 (out) + double leg_imag[] ); // values of associated Legendre functions, size (trc+1)*trc/2 (out) + +// -------------------------------------------------------------------------------------------------------------------- + +} // namespace trans +} // namespace atlas diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc new file mode 100644 index 000000000..45548ce12 --- /dev/null +++ b/src/atlas/trans/localopt2/TransLocalopt2.cc @@ -0,0 +1,445 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include "atlas/trans/localopt2/TransLocalopt2.h" +#include "atlas/array.h" +#include "atlas/option.h" +#include "atlas/parallel/mpi/mpi.h" +#include "atlas/runtime/ErrorHandling.h" +#include "atlas/runtime/Log.h" +#include "atlas/trans/VorDivToUV.h" +#include "atlas/trans/local/LegendrePolynomials.h" +#include "atlas/trans/localopt2/FourierTransformsopt2.h" +#include "atlas/trans/localopt2/LegendrePolynomialsopt2.h" +#include "atlas/trans/localopt2/LegendreTransformsopt2.h" +#include "atlas/util/Constants.h" +#include "eckit/linalg/LinearAlgebra.h" +#include "eckit/linalg/Matrix.h" + +namespace atlas { +namespace trans { + +namespace { +static TransBuilderGrid builder( "localopt2" ); +} + +// -------------------------------------------------------------------------------------------------------------------- +// Helper functions +// -------------------------------------------------------------------------------------------------------------------- +namespace { // anonymous + +size_t legendre_size( const size_t truncation ) { + return ( truncation + 2 ) * ( truncation + 1 ) / 2; +} + +} // namespace + +// -------------------------------------------------------------------------------------------------------------------- +// Class TransLocalopt2 +// -------------------------------------------------------------------------------------------------------------------- + +TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long truncation, + const eckit::Configuration& config ) : + grid_( grid ), + truncation_( truncation ), + precompute_( config.getBool( "precompute", true ) ) { + ATLAS_TRACE( "Precompute legendre opt2" ); + int nlats, nlons; + if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { + grid::StructuredGrid g( grid_ ); + nlats = g.ny(); + nlons = g.nxmax(); + } + else { + nlats = grid_.size(); + nlons = grid_.size(); + } + std::vector lats( nlats ); + std::vector lons( nlons ); + if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { + grid::StructuredGrid g( grid_ ); + // TODO: remove legendre_begin and legendre_data (only legendre_ should be needed) + for ( size_t j = 0; j < nlats; ++j ) { + lats[j] = g.y( j ) * util::Constants::degreesToRadians(); + } + for ( size_t j = 0; j < nlons; ++j ) { + lons[j] = g.x( j, 0 ) * util::Constants::degreesToRadians(); + } + } + else { + int j( 0 ); + for ( PointXY p : grid_.xy() ) { + lats[j++] = p.y() * util::Constants::degreesToRadians(); + lons[j++] = p.x() * util::Constants::degreesToRadians(); + } + } + // precomputations for Legendre polynomials: + { + ATLAS_TRACE( "opt2 precomp Legendre" ); + legendre_.resize( legendre_size( truncation_ + 1 ) * nlats ); + compute_legendre_polynomialsopt2( truncation_ + 1, nlats, lats.data(), legendre_.data() ); + } + + // precomputations for Fourier transformations: + { + ATLAS_TRACE( "opt2 precomp Fourier" ); + fourier_.resize( 2 * ( truncation_ + 1 ) * nlons ); + int idx = 0; + for ( int jlon = 0; jlon < nlons; jlon++ ) { + for ( int jm = 0; jm < truncation_ + 1; jm++ ) { + fourier_[idx++] = +std::cos( jm * lons[jlon] ); // real part + fourier_[idx++] = -std::sin( jm * lons[jlon] ); // imaginary part + } + } + } + { + ATLAS_TRACE( "opt2 precomp Fourier tp" ); + fouriertp_.resize( 2 * ( truncation_ + 1 ) * nlons ); + int idx = 0; + for ( int jm = 0; jm < truncation_ + 1; jm++ ) { + for ( int jlon = 0; jlon < nlons; jlon++ ) { + fouriertp_[idx++] = +std::cos( jm * lons[jlon] ); // real part + } + for ( int jlon = 0; jlon < nlons; jlon++ ) { + fouriertp_[idx++] = -std::sin( jm * lons[jlon] ); // imaginary part + } + } + } +} + +// -------------------------------------------------------------------------------------------------------------------- + +TransLocalopt2::TransLocalopt2( const Grid& grid, const long truncation, const eckit::Configuration& config ) : + TransLocalopt2( Cache(), grid, truncation, config ) {} + +// -------------------------------------------------------------------------------------------------------------------- + +TransLocalopt2::~TransLocalopt2() {} + +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt2::invtrans( const Field& spfield, Field& gpfield, const eckit::Configuration& config ) const { + NOTIMP; +} + +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt2::invtrans( const FieldSet& spfields, FieldSet& gpfields, + const eckit::Configuration& config ) const { + NOTIMP; +} + +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt2::invtrans_grad( const Field& spfield, Field& gradfield, const eckit::Configuration& config ) const { + NOTIMP; +} + +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt2::invtrans_grad( const FieldSet& spfields, FieldSet& gradfields, + const eckit::Configuration& config ) const { + NOTIMP; +} + +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt2::invtrans_vordiv2wind( const Field& spvor, const Field& spdiv, Field& gpwind, + const eckit::Configuration& config ) const { + NOTIMP; +} + +void TransLocalopt2::invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[], + const eckit::Configuration& config ) const { + invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields, config ); +} + +void gp_transposeopt2( const int nb_size, const int nb_fields, const double gp_tmp[], double gp_fields[] ) { + for ( int jgp = 0; jgp < nb_size; jgp++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + gp_fields[jfld * nb_size + jgp] = gp_tmp[jgp * nb_fields + jfld]; + } + } +} + +//----------------------------------------------------------------------------- +// Routine to compute the spectral transform by using a localopt2 Fourier +// transformation +// for a grid (same latitude for all longitudes, allows to compute Legendre +// functions +// once for all longitudes). U and v components are divided by cos(latitude) for +// nb_vordiv_fields > 0. +// +// Author: +// Andreas Mueller *ECMWF* +// +void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields, + const double scalar_spectra[], double gp_fields[], + const eckit::Configuration& config ) const { + if ( nb_scalar_fields > 0 ) { + int nb_fields = nb_scalar_fields; + + //eckit::linalg::LinearAlgebra::backend( "string" ) // might want to choose backend with this command + + // Transform + if ( grid::StructuredGrid g = grid_ ) { + ATLAS_TRACE( "invtrans_uv structured opt2" ); + int nlats = g.ny(); + int size_fourier = nb_fields * 2 * g.ny(); + std::vector scl_fourier( size_fourier * ( truncation + 1 ) ); + + // Legendre transform: + { + ATLAS_TRACE( "opt2 Legendre dgemm" ); + for ( int jm = 0; jm <= truncation; jm++ ) { + int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1; + eckit::linalg::Matrix A( eckit::linalg::Matrix( + const_cast( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) ); + eckit::linalg::Matrix B( legendre_.data() + noff * g.ny(), ns, g.ny() ); + eckit::linalg::Matrix C( scl_fourier.data() + jm * size_fourier, nb_fields * 2, g.ny() ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + } + } +#if 0 // 1: better for small number of columns, large truncation; 0: better for large number of columns + + // Transposition in Fourier space: + std::vector scl_fourier_tp( size_fourier * ( truncation + 1 ) ); + { + ATLAS_TRACE( "opt2 transposition in Fourier" ); + int idx = 0; + for ( int jm = 0; jm < truncation_ + 1; jm++ ) { + for ( int jlat = 0; jlat < g.ny(); jlat++ ) { + for ( int imag = 0; imag < 2; imag++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int pos_tp = jfld + nb_fields * ( jlat + g.ny() * ( imag + 2 * ( jm ) ) ); + //int pos = jfld + nb_fields * ( imag + 2 * ( jlat + g.ny() * ( jm ) ) ); + scl_fourier_tp[pos_tp] = scl_fourier[idx++]; // = scl_fourier[pos] + } + } + } + } + } + + // Fourier transformation: + std::vector gp_opt2( nb_fields * grid_.size(), 0. ); + { + ATLAS_TRACE( "opt2 Fourier dgemm" ); + eckit::linalg::Matrix A( scl_fourier_tp.data(), nb_fields * g.ny(), ( truncation_ + 1 ) * 2 ); + eckit::linalg::Matrix B( fourier_.data(), ( truncation_ + 1 ) * 2, g.nxmax() ); + eckit::linalg::Matrix C( gp_opt2.data(), nb_fields * g.ny(), g.nxmax() ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + } + + // Transposition in grid point space: + { + ATLAS_TRACE( "opt2 transposition in gp-space" ); + int idx = 0; + for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) { + for ( int jlat = 0; jlat < g.ny(); jlat++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int pos_tp = jlon + g.nxmax() * ( jlat + g.ny() * ( jfld ) ); + //int pos = jfld + nb_fields * ( jlat + g.ny() * ( jlon ) ); + gp_fields[pos_tp] = gp_opt2[idx++]; // = gp_opt2[pos] + } + } + } + } +#else + // Transposition in Fourier space: + std::vector scl_fourier_tp( size_fourier * ( truncation + 1 ) ); + { + ATLAS_TRACE( "opt2 transposition in Fourier" ); + int idx = 0; + for ( int jm = 0; jm < truncation_ + 1; jm++ ) { + for ( int jlat = 0; jlat < g.ny(); jlat++ ) { + for ( int imag = 0; imag < 2; imag++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int pos_tp = imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + g.ny() * ( jfld ) ) ); + //int pos = jfld + nb_fields * ( imag + 2 * ( jlat + g.ny() * ( jm ) ) ); + scl_fourier_tp[pos_tp] = scl_fourier[idx++]; // = scl_fourier[pos] + } + } + } + } + } + + // Fourier transformation: + std::vector gp_opt2( nb_fields * grid_.size(), 0. ); + { + ATLAS_TRACE( "opt2 Fourier dgemm" ); + eckit::linalg::Matrix A( fouriertp_.data(), g.nxmax(), ( truncation_ + 1 ) * 2 ); + eckit::linalg::Matrix B( scl_fourier_tp.data(), ( truncation_ + 1 ) * 2, nb_fields * g.ny() ); + eckit::linalg::Matrix C( gp_fields, g.nxmax(), nb_fields * g.ny() ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + } + +#endif + // Computing u,v from U,V: + { + if ( nb_vordiv_fields > 0 ) { + ATLAS_TRACE( "opt2 u,v from U,V" ); + std::vector coslats( nlats ); + for ( size_t j = 0; j < nlats; ++j ) { + coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() ); + } + int idx = 0; + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + for ( int jlat = 0; jlat < g.ny(); jlat++ ) { + for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) { + gp_fields[idx] /= coslats[jlat]; + idx++; + } + } + } + } + } + } + else { + ATLAS_TRACE( "invtrans_uv unstructured opt2" ); + int idx = 0; + for ( PointXY p : grid_.xy() ) { + double lon = p.x() * util::Constants::degreesToRadians(); + double lat = p.y() * util::Constants::degreesToRadians(); + double trcFT = truncation; + + // Legendre transform: + //invtrans_legendreopt2( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra, + // legReal.data(), legImag.data() ); + + // Fourier transform: + //invtrans_fourieropt2( trcFT, lon, nb_fields, legReal.data(), legImag.data(), + // gp_tmp.data() + ( nb_fields * idx ) ); + for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { + //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat ); + } + ++idx; + } + } + } +} + +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt2::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[], + const double divergence_spectra[], double gp_fields[], + const eckit::Configuration& config ) const { + invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config ); +} + +void extend_truncationopt2( const int old_truncation, const int nb_fields, const double old_spectra[], + double new_spectra[] ) { + int k = 0, k_old = 0; + for ( int m = 0; m <= old_truncation + 1; m++ ) { // zonal wavenumber + for ( int n = m; n <= old_truncation + 1; n++ ) { // total wavenumber + for ( int imag = 0; imag < 2; imag++ ) { // imaginary/real part + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { // field + if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; } + else { + new_spectra[k++] = old_spectra[k_old++]; + } + } + } + } + } +} + +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt2::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields, + const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[], + const eckit::Configuration& config ) const { + ATLAS_TRACE( "TransLocalopt2::invtrans" ); + int nb_gp = grid_.size(); + int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields; + if ( nb_vordiv_fields > 0 ) { + std::vector vorticity_spectra_extended( nb_vordiv_spec_ext, 0. ); + std::vector divergence_spectra_extended( nb_vordiv_spec_ext, 0. ); + std::vector U_ext( nb_vordiv_spec_ext, 0. ); + std::vector V_ext( nb_vordiv_spec_ext, 0. ); + + { + ATLAS_TRACE( "opt2 extend vordiv" ); + // increase truncation in vorticity_spectra and divergence_spectra: + extend_truncationopt2( truncation_, nb_vordiv_fields, vorticity_spectra, + vorticity_spectra_extended.data() ); + extend_truncationopt2( truncation_, nb_vordiv_fields, divergence_spectra, + divergence_spectra_extended.data() ); + } + + { + ATLAS_TRACE( "vordiv to UV opt2" ); + // call vd2uv to compute u and v in spectral space + trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt2" ) ); + vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(), + divergence_spectra_extended.data(), U_ext.data(), V_ext.data() ); + } + + // perform spectral transform to compute all fields in grid point space + invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config ); + invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(), + gp_fields + nb_gp * nb_vordiv_fields, config ); + } + if ( nb_scalar_fields > 0 ) { + int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields; + std::vector scalar_spectra_extended( nb_scalar_spec_ext, 0. ); + extend_truncationopt2( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() ); + invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(), + gp_fields + 2 * nb_gp * nb_vordiv_fields, config ); + } +} + +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt2::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. +} + +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt2::dirtrans( const FieldSet& gpfields, FieldSet& spfields, + const eckit::Configuration& config ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. +} + +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt2::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv, + const eckit::Configuration& config ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. +} + +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt2::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[], + const eckit::Configuration& ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. +} + +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt2::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[], + double divergence_spectra[], const eckit::Configuration& ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. +} + +// -------------------------------------------------------------------------------------------------------------------- + +} // namespace trans +} // namespace atlas diff --git a/src/atlas/trans/localopt2/TransLocalopt2.h b/src/atlas/trans/localopt2/TransLocalopt2.h new file mode 100644 index 000000000..8f3de137c --- /dev/null +++ b/src/atlas/trans/localopt2/TransLocalopt2.h @@ -0,0 +1,124 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#pragma once + +#include + +#include "atlas/grid/Grid.h" +#include "atlas/trans/Trans.h" + +//----------------------------------------------------------------------------- +// Forward declarations + +namespace atlas { +class Field; +class FieldSet; +} // namespace atlas + +//----------------------------------------------------------------------------- + +namespace atlas { +namespace trans { + +//----------------------------------------------------------------------------- + +/// @class TransLocalopt2 +/// +/// Localopt2 spherical harmonics transformations to any grid +/// Optimisations are present for structured grids +/// For global grids, please consider using TransIFS instead. +/// +/// @todo: +/// - support multiple fields +/// - support atlas::Field and atlas::FieldSet based on function spaces +/// +/// @note: Direct transforms are not implemented and cannot be unless +/// the grid is global. There are no plans to support this at the moment. +class TransLocalopt2 : public trans::TransImpl { +public: + TransLocalopt2( const Grid& g, const long truncation, const eckit::Configuration& = util::NoConfig() ); + TransLocalopt2( const Cache&, const Grid& g, const long truncation, + const eckit::Configuration& = util::NoConfig() ); + + virtual ~TransLocalopt2(); + + virtual int truncation() const override { return truncation_; } + virtual size_t spectralCoefficients() const override { return ( truncation_ + 1 ) * ( truncation_ + 2 ); } + + virtual const Grid& grid() const override { return grid_; } + + virtual void invtrans( const Field& spfield, Field& gpfield, + const eckit::Configuration& = util::NoConfig() ) const override; + + virtual void invtrans( const FieldSet& spfields, FieldSet& gpfields, + const eckit::Configuration& = util::NoConfig() ) const override; + + virtual void invtrans_grad( const Field& spfield, Field& gradfield, + const eckit::Configuration& = util::NoConfig() ) const override; + + virtual void invtrans_grad( const FieldSet& spfields, FieldSet& gradfields, + const eckit::Configuration& = util::NoConfig() ) const override; + + virtual void invtrans_vordiv2wind( const Field& spvor, const Field& spdiv, Field& gpwind, + const eckit::Configuration& = util::NoConfig() ) const override; + + // -- IFS style API -- + + virtual void invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields, + const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[], + const eckit::Configuration& = util::NoConfig() ) const override; + + virtual void invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[], + const eckit::Configuration& = util::NoConfig() ) const override; + + virtual void invtrans( const int nb_vordiv_fields, const double vorticity_spectra[], + const double divergence_spectra[], double gp_fields[], + const eckit::Configuration& = util::NoConfig() ) const override; + + // -- NOT SUPPORTED -- // + + virtual void dirtrans( const Field& gpfield, Field& spfield, + const eckit::Configuration& = util::NoConfig() ) const override; + + virtual void dirtrans( const FieldSet& gpfields, FieldSet& spfields, + const eckit::Configuration& = util::NoConfig() ) const override; + + virtual void dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv, + const eckit::Configuration& = util::NoConfig() ) const override; + + virtual void dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[], + const eckit::Configuration& = util::NoConfig() ) const override; + + virtual void dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[], + double divergence_spectra[], const eckit::Configuration& = util::NoConfig() ) const override; + +private: + const double* legendre_data( int j ) const { return legendre_.data() + legendre_begin_[j]; } + double* legendre_data( int j ) { return legendre_.data() + legendre_begin_[j]; } + + void invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields, + const double scalar_spectra[], double gp_fields[], + const eckit::Configuration& = util::NoConfig() ) const; + +private: + Grid grid_; + int truncation_; + bool precompute_; + mutable std::vector legendre_; + mutable std::vector fourier_; + mutable std::vector fouriertp_; + std::vector legendre_begin_; +}; + +//----------------------------------------------------------------------------- + +} // namespace trans +} // namespace atlas diff --git a/src/atlas/trans/localopt2/VorDivToUVLocalopt2.cc b/src/atlas/trans/localopt2/VorDivToUVLocalopt2.cc new file mode 100644 index 000000000..72c5a3ac9 --- /dev/null +++ b/src/atlas/trans/localopt2/VorDivToUVLocalopt2.cc @@ -0,0 +1,184 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include "atlas/trans/localopt2/VorDivToUVLocalopt2.h" +#include // for std::sqrt +#include "atlas/functionspace/Spectral.h" +#include "atlas/runtime/Log.h" +#include "atlas/util/Earth.h" + +using atlas::FunctionSpace; +using atlas::functionspace::Spectral; + +namespace atlas { +namespace trans { + +namespace { +static VorDivToUVBuilder builder( "localopt2" ); +} + +// -------------------------------------------------------------------------------------------------------------------- +// Routine to copy spectral data into internal storage form of IFS trans +// Ported to C++ by: Andreas Mueller *ECMWF* +void prfi1bopt2( const int truncation, + const int km, // zonal wavenumber + const int nb_fields, // number of fields + const double rspec[], // spectral data + double pia[] ) // spectral components in data layout of trans library +{ + int ilcm = truncation + 1 - km, ioff = ( 2 * truncation - km + 3 ) * km, + nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2; + for ( int j = 1; j <= ilcm; j++ ) { + int inm = ioff + ( ilcm - j ) * 2; + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int ir = 2 * jfld, ii = ir + 1; + pia[ir * nlei1 + j + 1] = rspec[inm * nb_fields + jfld]; + pia[ii * nlei1 + j + 1] = rspec[( inm + 1 ) * nb_fields + jfld]; + } + } + + for ( int jfld = 0; jfld < 2 * nb_fields; jfld++ ) { + pia[jfld * nlei1] = 0.; + pia[jfld * nlei1 + 1] = 0.; + pia[jfld * nlei1 + ilcm + 2] = 0.; + } +} + +// -------------------------------------------------------------------------------------------------------------------- +// Routine to compute spectral velocities (*cos(latitude)) out of spectral +// vorticity and divergence +// Reference: +// ECMWF Research Department documentation of the IFS +// Temperton, 1991, MWR 119 p1303 +// Ported to C++ by: Andreas Mueller *ECMWF* +void vd2uvopt2( const int truncation, // truncation + const int km, // zonal wavenumber + const int nb_vordiv_fields, // number of vorticity and divergence fields + const double vorticity_spectra[], // spectral data of vorticity + const double divergence_spectra[], // spectral data of divergence + double U[], // spectral data of U + double V[], // spectral data of V + const eckit::Configuration& config ) { + int nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2; + + // repsnm: epsilon from eq.(2.12) and (2.13) in [Temperton 1991] + std::vector repsnm( ( truncation + 1 ) * ( truncation + 6 ) / 2 ); + int idx = 0; + for ( int jm = 0; jm <= truncation; ++jm ) { + for ( int jn = jm; jn <= truncation + 2; ++jn, ++idx ) { + repsnm[idx] = std::sqrt( ( jn * jn - jm * jm ) / ( 4. * jn * jn - 1. ) ); + } + } + repsnm[0] = 0.; + + // rlapin: constant factor from eq.(2.2) and (2.3) in [Temperton 1991] + double ra = util::Earth::radius(); + std::vector rlapin( truncation + 3 ); + for ( int jn = 1; jn <= truncation + 2; ++jn ) { + rlapin[jn] = -ra * ra / ( jn * ( jn + 1. ) ); + } + rlapin[0] = 0.; + + // inverse the order of repsnm and rlapin for improved accuracy + std::vector zepsnm( truncation + 6 ); + std::vector zlapin( truncation + 6 ); + std::vector zn( truncation + 6 ); + for ( int jn = km - 1; jn <= truncation + 2; ++jn ) { + int ij = truncation + 3 - jn; + if ( jn >= 0 ) { + zlapin[ij] = rlapin[jn]; + if ( jn < km ) { zepsnm[ij] = 0.; } + else { + zepsnm[ij] = repsnm[jn + ( 2 * truncation - km + 5 ) * km / 2]; + } + } + else { + zlapin[ij] = 0.; + zepsnm[ij] = 0.; + } + zn[ij] = jn; + } + zn[0] = truncation + 3; + + // copy spectral data into internal trans storage: + std::vector rvor( 2 * nb_vordiv_fields * nlei1 ); + std::vector rdiv( 2 * nb_vordiv_fields * nlei1 ); + std::vector ru( 2 * nb_vordiv_fields * nlei1 ); + std::vector rv( 2 * nb_vordiv_fields * nlei1 ); + prfi1bopt2( truncation, km, nb_vordiv_fields, vorticity_spectra, rvor.data() ); + prfi1bopt2( truncation, km, nb_vordiv_fields, divergence_spectra, rdiv.data() ); + + // compute eq.(2.12) and (2.13) in [Temperton 1991]: + if ( km == 0 ) { + for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { + int ir = 2 * jfld * nlei1 - 1; + for ( int ji = 2; ji < truncation + 4 - km; ++ji ) { + double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1]; + double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1]; + ru[ir + ji] = +psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1]; + rv[ir + ji] = -psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1]; + } + } + } + else { + for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { + int ir = 2 * jfld * nlei1 - 1, ii = ir + nlei1; + for ( int ji = 2; ji < truncation + 4 - km; ++ji ) { + double chiIm = km * zlapin[ji]; + double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1]; + double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1]; + ru[ir + ji] = -chiIm * rdiv[ii + ji] + psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1]; + ru[ii + ji] = +chiIm * rdiv[ir + ji] + psiM1 * rvor[ii + ji + 1] - psiP1 * rvor[ii + ji - 1]; + rv[ir + ji] = -chiIm * rvor[ii + ji] - psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1]; + rv[ii + ji] = +chiIm * rvor[ir + ji] - psiM1 * rdiv[ii + ji + 1] + psiP1 * rdiv[ii + ji - 1]; + } + } + } + + // copy data from internal storage back to external spectral data: + int ilcm = truncation - km; + int ioff = ( 2 * truncation - km + 3 ) * km; + // ioff: start index of zonal wavenumber km in spectral data + double za_r = 1. / util::Earth::radius(); + for ( int j = 0; j <= ilcm; ++j ) { + // ilcm-j = total wavenumber + int inm = ioff + ( ilcm - j ) * 2; + for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { + int ir = 2 * jfld * nlei1, ii = ir + nlei1; + int idx = inm * nb_vordiv_fields + jfld; + // real part: + U[idx] = ru[ir + j + 2] * za_r; + V[idx] = rv[ir + j + 2] * za_r; + idx += nb_vordiv_fields; + // imaginary part: + U[idx] = ru[ii + j + 2] * za_r; + V[idx] = rv[ii + j + 2] * za_r; + } + } +} + +void VorDivToUVLocalopt2::execute( const int nb_coeff, const int nb_fields, const double vorticity[], + const double divergence[], double U[], double V[], + const eckit::Configuration& config ) const { + for ( int jm = 0; jm <= truncation_; ++jm ) { + vd2uvopt2( truncation_, jm, nb_fields, vorticity, divergence, U, V, config ); + } +} + +VorDivToUVLocalopt2::VorDivToUVLocalopt2( const int truncation, const eckit::Configuration& config ) : + truncation_( truncation ) {} + +VorDivToUVLocalopt2::VorDivToUVLocalopt2( const FunctionSpace& fs, const eckit::Configuration& config ) : + truncation_( Spectral( fs ).truncation() ) {} + +VorDivToUVLocalopt2::~VorDivToUVLocalopt2() {} + +} // namespace trans +} // namespace atlas diff --git a/src/atlas/trans/localopt2/VorDivToUVLocalopt2.h b/src/atlas/trans/localopt2/VorDivToUVLocalopt2.h new file mode 100644 index 000000000..30c386a92 --- /dev/null +++ b/src/atlas/trans/localopt2/VorDivToUVLocalopt2.h @@ -0,0 +1,67 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#pragma once + +#include "atlas/trans/VorDivToUV.h" + +//----------------------------------------------------------------------------- +// Forward declarations + +namespace atlas { +class FunctionSpace; +} + +//----------------------------------------------------------------------------- + +namespace atlas { +namespace trans { + +//----------------------------------------------------------------------------- + +class VorDivToUVLocalopt2 : public trans::VorDivToUVImpl { +public: + VorDivToUVLocalopt2( const FunctionSpace&, const eckit::Configuration& = util::NoConfig() ); + VorDivToUVLocalopt2( int truncation, const eckit::Configuration& = util::NoConfig() ); + + virtual ~VorDivToUVLocalopt2(); + + virtual int truncation() const override { return truncation_; } + + // pure virtual interface + + // -- IFS style API -- + // These fields have special interpretation required. You need to know what + // you're doing. + // See IFS trans library. + + /*! + * @brief Compute spectral wind (U/V) from spectral vorticity/divergence + * + * U = u*cos(lat) + * V = v*cos(lat) + * + * @param nb_fields [in] Number of fields + * @param vorticity [in] Spectral vorticity + * @param divergence [in] Spectral divergence + * @param U [out] Spectral wind U = u*cos(lat) + * @param V [out] Spectral wind V = v*cos(lat) + */ + virtual void execute( const int nb_coeff, const int nb_fields, const double vorticity[], const double divergence[], + double U[], double V[], const eckit::Configuration& = util::NoConfig() ) const override; + +private: + int truncation_; +}; + +// ------------------------------------------------------------------ + +} // namespace trans +} // namespace atlas diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index b4c659430..5838bd796 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -721,8 +721,8 @@ CASE( "test_trans_vordiv_with_translib" ) { #if ATLAS_HAVE_TRANS trans::Trans transIFS( g, trc, util::Config( "type", "ifs" ) ); #endif - trans::Trans transLocal( g, trc, util::Config( "type", "local" ) ); - trans::Trans transLocalopt( g, trc, util::Config( "type", "localopt" ) ); + trans::Trans transLocal( g, trc, util::Config( "type", "localopt" ) ); + trans::Trans transLocalopt( g, trc, util::Config( "type", "localopt2" ) ); functionspace::Spectral spectral( trc ); functionspace::StructuredColumns gridpoints( g ); @@ -780,8 +780,8 @@ CASE( "test_trans_vordiv_with_translib" ) { spectral_transform_grid_analytic( trc, trc, n, m, imag, g, rspecg.data(), rgp_analytic.data(), ivar_in, ivar_out ); - //EXPECT_NO_THROW( transLocal.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), - // div.data(), rgp.data() ) ); + EXPECT_NO_THROW( transLocal.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), + div.data(), rgp.data() ) ); EXPECT_NO_THROW( transLocalopt.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), div.data(), rgpopt.data() ) ); @@ -803,7 +803,7 @@ CASE( "test_trans_vordiv_with_translib" ) { ATLAS_DEBUG_VAR( tolerance ); } EXPECT( rms_gen < tolerance ); - //EXPECT( rms_genopt < tolerance ); + EXPECT( rms_genopt < tolerance ); icase++; #if ATLAS_HAVE_TRANS From 5b52a5393f2a5e33e9e6653b235bcedc6a1d568e Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 8 Mar 2018 12:06:00 +0000 Subject: [PATCH 013/123] using the symmetry of Legendre polynomials works --- src/atlas/trans/localopt2/TransLocalopt2.cc | 144 +++++++++++++-- src/atlas/trans/localopt2/TransLocalopt2.h | 4 + src/tests/trans/test_transgeneral.cc | 187 +++++++++++++++++--- 3 files changed, 298 insertions(+), 37 deletions(-) diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc index 45548ce12..745c6f065 100644 --- a/src/atlas/trans/localopt2/TransLocalopt2.cc +++ b/src/atlas/trans/localopt2/TransLocalopt2.cc @@ -9,6 +9,7 @@ */ #include "atlas/trans/localopt2/TransLocalopt2.h" +#include #include "atlas/array.h" #include "atlas/option.h" #include "atlas/parallel/mpi/mpi.h" @@ -39,6 +40,37 @@ size_t legendre_size( const size_t truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; } +int nlats_northernHemisphere( const int nlats ) { + return ceil( nlats / 2. ); + // using ceil here should make it possible to have odd number of latitudes (with the centre latitude being the equator) +} + +int num_n( const int truncation, const int m, const bool symmetric ) { + int len = 0; + if ( symmetric ) { len = ( truncation - m + 2 ) / 2; } + else { + len = ( truncation - m + 1 ) / 2; + } + return len; +} + +std::vector n_indices( const int truncation, const int m, const bool symmetric ) { + int len = num_n( truncation, m, symmetric ), jn0 = 0; + if ( !symmetric ) { jn0 = 1; } + std::vector jns( len ); + int ia = 0, id = len - 1; + for ( int jn = jn0; jn <= truncation - m; jn += 2, ia++, id-- ) { +#if 1 // 1: ascending, 0: descending + int idx = ia; +#else + int idx = id; +#endif + jns[idx] = jn; + ASSERT( idx < len && idx >= 0 ); + } + return jns; +} + } // namespace // -------------------------------------------------------------------------------------------------------------------- @@ -51,22 +83,26 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long truncation_( truncation ), precompute_( config.getBool( "precompute", true ) ) { ATLAS_TRACE( "Precompute legendre opt2" ); - int nlats, nlons; + int nlats = 0; + int nlons = 0; + int nlatsNH = nlats_northernHemisphere( nlats ); if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { grid::StructuredGrid g( grid_ ); - nlats = g.ny(); - nlons = g.nxmax(); + nlats = g.ny(); + nlons = g.nxmax(); + nlatsNH = nlats_northernHemisphere( nlats ); } else { - nlats = grid_.size(); - nlons = grid_.size(); + nlats = grid_.size(); + nlons = grid_.size(); + nlatsNH = nlats; } - std::vector lats( nlats ); + std::vector lats( nlatsNH ); std::vector lons( nlons ); if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { grid::StructuredGrid g( grid_ ); // TODO: remove legendre_begin and legendre_data (only legendre_ should be needed) - for ( size_t j = 0; j < nlats; ++j ) { + for ( size_t j = 0; j < nlatsNH; ++j ) { lats[j] = g.y( j ) * util::Constants::degreesToRadians(); } for ( size_t j = 0; j < nlons; ++j ) { @@ -83,8 +119,37 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long // precomputations for Legendre polynomials: { ATLAS_TRACE( "opt2 precomp Legendre" ); - legendre_.resize( legendre_size( truncation_ + 1 ) * nlats ); - compute_legendre_polynomialsopt2( truncation_ + 1, nlats, lats.data(), legendre_.data() ); + legendre_.resize( legendre_size( truncation_ + 1 ) * nlatsNH ); + compute_legendre_polynomialsopt2( truncation_ + 1, nlatsNH, lats.data(), legendre_.data() ); + } + { + ATLAS_TRACE( "opt2 split Legendre" ); + int size_sym = 0; + int size_asym = 0; + legendre_sym_begin_.resize( truncation_ + 3 ); + legendre_asym_begin_.resize( truncation_ + 3 ); + legendre_sym_begin_[0] = 0; + legendre_asym_begin_[0] = 0; + for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { + size_sym += num_n( truncation_ + 1, jm, true ); + size_asym += num_n( truncation_ + 1, jm, false ); + legendre_sym_begin_[jm + 1] = size_sym; + legendre_asym_begin_[jm + 1] = size_asym; + } + legendre_sym_.resize( size_sym * nlatsNH ); + legendre_asym_.resize( size_asym * nlatsNH ); + int idx = 0, is = 0, ia = 0; + for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { + for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { + for ( int jn = 0; jn <= truncation_ - jm + 1; jn++, idx++ ) { + if ( jn % 2 == 0 ) { legendre_sym_[is++] = legendre_[idx]; } + else { + legendre_asym_[ia++] = legendre_[idx]; + } + } + } + } + ASSERT( ia == size_asym * nlatsNH && is == size_sym * nlatsNH ); } // precomputations for Fourier transformations: @@ -192,19 +257,76 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel if ( grid::StructuredGrid g = grid_ ) { ATLAS_TRACE( "invtrans_uv structured opt2" ); int nlats = g.ny(); + int nlatsNH = nlats_northernHemisphere( nlats ); int size_fourier = nb_fields * 2 * g.ny(); std::vector scl_fourier( size_fourier * ( truncation + 1 ) ); // Legendre transform: { ATLAS_TRACE( "opt2 Legendre dgemm" ); - for ( int jm = 0; jm <= truncation; jm++ ) { + for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { +#if 1 // 0: no symmetry, 1: use symmetry + int size_sym = num_n( truncation_ + 1, jm, true ); + int size_asym = num_n( truncation_ + 1, jm, false ); + std::vector scalar_sym( 2 * nb_fields * size_sym, -1234. ); + std::vector scalar_asym( 2 * nb_fields * size_asym, -1234. ); + std::vector scl_fourier_sym( size_fourier ); + std::vector scl_fourier_asym( size_fourier ); + { + //ATLAS_TRACE( "opt2 Legendre split" ); + int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2; + for ( int jn = 0; jn <= truncation_ - jm + 1; jn++ ) { + for ( int imag = 0; imag < 2; imag++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { + if ( jn % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; } + else { + scalar_asym[ia++] = scalar_spectra[idx + ioff]; + } + } + } + } + ASSERT( ia == 2 * nb_fields * size_asym && is == 2 * nb_fields * size_sym ); + } + { + eckit::linalg::Matrix A( scalar_sym.data(), nb_fields * 2, size_sym ); + eckit::linalg::Matrix B( legendre_sym_.data() + legendre_sym_begin_[jm] * nlatsNH, size_sym, + nlatsNH ); + eckit::linalg::Matrix C( scl_fourier_sym.data(), nb_fields * 2, nlatsNH ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + } + if ( size_asym > 0 ) { + eckit::linalg::Matrix A( scalar_asym.data(), nb_fields * 2, size_asym ); + eckit::linalg::Matrix B( legendre_asym_.data() + legendre_asym_begin_[jm] * nlatsNH, size_asym, + nlatsNH ); + eckit::linalg::Matrix C( scl_fourier_asym.data(), nb_fields * 2, nlatsNH ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + { + //ATLAS_TRACE( "opt2 merge spheres" ); + // northern hemisphere: + int ioff = jm * size_fourier; + for ( int j = 0; j < 2 * nb_fields * nlatsNH; j++ ) { + scl_fourier[j + ioff] = scl_fourier_sym[j] + scl_fourier_asym[j]; + } + // southern hemisphere: + int idx = 0; + for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { + for ( int imag = 0; imag < 2; imag++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { + int pos = jfld + nb_fields * ( imag + 2 * ( nlats - jlat - 1 ) ); + scl_fourier[pos + ioff] = scl_fourier_sym[idx] - scl_fourier_asym[idx]; + } + } + } + } + } +#else int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1; eckit::linalg::Matrix A( eckit::linalg::Matrix( const_cast( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) ); eckit::linalg::Matrix B( legendre_.data() + noff * g.ny(), ns, g.ny() ); eckit::linalg::Matrix C( scl_fourier.data() + jm * size_fourier, nb_fields * 2, g.ny() ); eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); +#endif } } #if 0 // 1: better for small number of columns, large truncation; 0: better for large number of columns @@ -323,7 +445,7 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel } } } -} +} // namespace trans // -------------------------------------------------------------------------------------------------------------------- diff --git a/src/atlas/trans/localopt2/TransLocalopt2.h b/src/atlas/trans/localopt2/TransLocalopt2.h index 8f3de137c..92fb292f1 100644 --- a/src/atlas/trans/localopt2/TransLocalopt2.h +++ b/src/atlas/trans/localopt2/TransLocalopt2.h @@ -113,9 +113,13 @@ class TransLocalopt2 : public trans::TransImpl { int truncation_; bool precompute_; mutable std::vector legendre_; + mutable std::vector legendre_sym_; + mutable std::vector legendre_asym_; mutable std::vector fourier_; mutable std::vector fouriertp_; std::vector legendre_begin_; + std::vector legendre_sym_begin_; + std::vector legendre_asym_begin_; }; //----------------------------------------------------------------------------- diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index 5838bd796..a3c5285fe 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -712,7 +712,7 @@ CASE( "test_trans_vordiv_with_translib" ) { double tolerance = 1.e-13; // Grid: (Adjust the following line if the test takes too long!) - Grid g( "F320" ); + Grid g( "F120" ); grid::StructuredGrid gs( g ); int ndgl = gs.ny(); @@ -721,8 +721,8 @@ CASE( "test_trans_vordiv_with_translib" ) { #if ATLAS_HAVE_TRANS trans::Trans transIFS( g, trc, util::Config( "type", "ifs" ) ); #endif - trans::Trans transLocal( g, trc, util::Config( "type", "localopt" ) ); - trans::Trans transLocalopt( g, trc, util::Config( "type", "localopt2" ) ); + trans::Trans transLocal1( g, trc, util::Config( "type", "localopt" ) ); + trans::Trans transLocal2( g, trc, util::Config( "type", "localopt2" ) ); functionspace::Spectral spectral( trc ); functionspace::StructuredColumns gridpoints( g ); @@ -734,8 +734,8 @@ CASE( "test_trans_vordiv_with_translib" ) { std::vector div( 2 * N * nb_vordiv ); std::vector rspecg( 2 * N ); std::vector gp( nb_all * g.size() ); - std::vector rgp( nb_all * g.size() ); - std::vector rgpopt( nb_all * g.size() ); + std::vector rgp1( nb_all * g.size() ); + std::vector rgp2( nb_all * g.size() ); std::vector rgp_analytic( g.size() ); int icase = 0; @@ -769,9 +769,9 @@ CASE( "test_trans_vordiv_with_translib" ) { if ( ivar_in == 2 ) sp[k * nb_scalar + jfld] = 1.; for ( int j = 0; j < nb_all * g.size(); j++ ) { - gp[j] = 0.; - rgp[j] = 0.; - rgpopt[j] = 0.; + gp[j] = 0.; + rgp1[j] = 0.; + rgp2[j] = 0.; } for ( int j = 0; j < g.size(); j++ ) { rgp_analytic[j] = 0.; @@ -780,30 +780,30 @@ CASE( "test_trans_vordiv_with_translib" ) { spectral_transform_grid_analytic( trc, trc, n, m, imag, g, rspecg.data(), rgp_analytic.data(), ivar_in, ivar_out ); - EXPECT_NO_THROW( transLocal.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), - div.data(), rgp.data() ) ); + EXPECT_NO_THROW( transLocal1.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), + div.data(), rgp1.data() ) ); - EXPECT_NO_THROW( transLocalopt.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), - div.data(), rgpopt.data() ) ); + EXPECT_NO_THROW( transLocal2.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), + div.data(), rgp2.data() ) ); int pos = ( ivar_out * nb_vordiv + jfld ); - double rms_gen = - compute_rms( g.size(), rgpopt.data() + pos * g.size(), rgp_analytic.data() ); + double rms_gen1 = + compute_rms( g.size(), rgp1.data() + pos * g.size(), rgp_analytic.data() ); - double rms_genopt = - compute_rms( g.size(), rgpopt.data() + pos * g.size(), rgp_analytic.data() ); + double rms_gen2 = + compute_rms( g.size(), rgp2.data() + pos * g.size(), rgp_analytic.data() ); - if ( !( rms_gen < tolerance ) || !( rms_genopt < tolerance ) ) { + if ( !( rms_gen1 < tolerance ) || !( rms_gen2 < tolerance ) ) { Log::info() << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl; - ATLAS_DEBUG_VAR( rms_gen ); - ATLAS_DEBUG_VAR( rms_genopt ); + ATLAS_DEBUG_VAR( rms_gen1 ); + ATLAS_DEBUG_VAR( rms_gen2 ); ATLAS_DEBUG_VAR( tolerance ); } - EXPECT( rms_gen < tolerance ); - EXPECT( rms_genopt < tolerance ); + EXPECT( rms_gen1 < tolerance ); + EXPECT( rms_gen2 < tolerance ); icase++; #if ATLAS_HAVE_TRANS @@ -812,21 +812,156 @@ CASE( "test_trans_vordiv_with_translib" ) { double rms_trans = compute_rms( g.size(), gp.data() + pos * g.size(), rgp_analytic.data() ); double rms_diff = - compute_rms( g.size(), rgpopt.data() + pos * g.size(), gp.data() + pos * g.size() ); + compute_rms( g.size(), rgp1.data() + pos * g.size(), gp.data() + pos * g.size() ); EXPECT( rms_trans < tolerance ); if ( !( rms_trans < tolerance ) || !( rms_diff < tolerance ) ) { Log::info() << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl; - ATLAS_DEBUG_VAR( rms_gen ); - ATLAS_DEBUG_VAR( rms_genopt ); + ATLAS_DEBUG_VAR( rms_gen1 ); + ATLAS_DEBUG_VAR( rms_gen2 ); ATLAS_DEBUG_VAR( rms_trans ); ATLAS_DEBUG_VAR( rms_diff ); ATLAS_DEBUG_VAR( tolerance ); } #endif - if ( icase > 2 ) EXPECT( false ); - Log::info() << "test " << icase << std::endl; + EXPECT( icase < 300 ); + } + k++; + } + } + } + } + } + } + Log::info() << "Vordiv+scalar comparison with trans: all " << icase << " cases successfully passed!" << std::endl; +} + +//----------------------------------------------------------------------------- + +CASE( "test_trans_hires" ) { + Log::info() << "test_trans_hires" << std::endl; + // test transgeneral by comparing its result with the trans library + // this test is based on the test_nomesh case in test_trans.cc + + std::ostream& out = Log::info(); + double tolerance = 1.e-13; + + // Grid: (Adjust the following line if the test takes too long!) + Grid g( "F128" ); + + grid::StructuredGrid gs( g ); + int ndgl = gs.ny(); + //int trc = ndgl - 1; // linear + int trc = ndgl / 2. - 1; // cubic +#if 0 + trans::Trans transIFS( g, trc, util::Config( "type", "ifs" ) ); +#endif + trans::Trans transLocal1( g, trc, util::Config( "type", "localopt2" ) ); + //trans::Trans transLocal2( g, trc, util::Config( "type", "localopt2" ) ); + + functionspace::Spectral spectral( trc ); + functionspace::StructuredColumns gridpoints( g ); + + int nb_scalar = 1, nb_vordiv = 0; + int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv; + std::vector sp( 2 * N * nb_scalar ); + std::vector vor( 2 * N * nb_vordiv ); + std::vector div( 2 * N * nb_vordiv ); + std::vector rspecg( 2 * N ); + std::vector gp( nb_all * g.size() ); + std::vector rgp1( nb_all * g.size() ); + //std::vector rgp2( nb_all * g.size() ); + std::vector rgp_analytic( g.size() ); + + int icase = 0; + for ( int ivar_in = 2; ivar_in < 3; ivar_in++ ) { // vorticity, divergence, scalar + for ( int ivar_out = 2; ivar_out < 3; ivar_out++ ) { // u, v, scalar + int nb_fld = 1; + if ( ivar_out == 2 ) { + tolerance = 1.e-13; + nb_fld = nb_scalar; + } + else { + tolerance = 2.e-6; + nb_fld = nb_vordiv; + } + for ( int jfld = 0; jfld < nb_fld; jfld++ ) { // multiple fields + int k = 0; + for ( int m = 0; m <= trc; m++ ) { // zonal wavenumber + for ( int n = m; n <= trc; n++ ) { // total wavenumber + for ( int imag = 0; imag <= 1; imag++ ) { // real and imaginary part + + if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. ) { + for ( int j = 0; j < 2 * N * nb_scalar; j++ ) { + sp[j] = 0.; + } + for ( int j = 0; j < 2 * N * nb_vordiv; j++ ) { + vor[j] = 0.; + div[j] = 0.; + } + if ( ivar_in == 0 ) vor[k * nb_vordiv + jfld] = 1.; + if ( ivar_in == 1 ) div[k * nb_vordiv + jfld] = 1.; + if ( ivar_in == 2 ) sp[k * nb_scalar + jfld] = 1.; + + for ( int j = 0; j < nb_all * g.size(); j++ ) { + gp[j] = 0.; + rgp1[j] = 0.; + //rgp2[j] = 0.; + } + for ( int j = 0; j < g.size(); j++ ) { + rgp_analytic[j] = 0.; + } + + spectral_transform_grid_analytic( trc, trc, n, m, imag, g, rspecg.data(), + rgp_analytic.data(), ivar_in, ivar_out ); + + EXPECT_NO_THROW( transLocal1.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), + div.data(), rgp1.data() ) ); + + //EXPECT_NO_THROW( transLocal2.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), + // div.data(), rgp2.data() ) ); + + int pos = ( ivar_out * nb_vordiv + jfld ); + + double rms_gen1 = + compute_rms( g.size(), rgp1.data() + pos * g.size(), rgp_analytic.data() ); + + //double rms_gen2 = + // compute_rms( g.size(), rgp2.data() + pos * g.size(), rgp_analytic.data() ); + + if ( !( rms_gen1 < tolerance ) ) { // || !( rms_gen2 < tolerance ) ) { + Log::info() + << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out + << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl; + ATLAS_DEBUG_VAR( rms_gen1 ); + //ATLAS_DEBUG_VAR( rms_gen2 ); + ATLAS_DEBUG_VAR( tolerance ); + } + EXPECT( rms_gen1 < tolerance ); + //EXPECT( rms_gen2 < tolerance ); + icase++; + +#if 0 + EXPECT_NO_THROW( transIFS.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), + div.data(), gp.data() ) ); + double rms_trans = + compute_rms( g.size(), gp.data() + pos * g.size(), rgp_analytic.data() ); + double rms_diff = + compute_rms( g.size(), rgp1.data() + pos * g.size(), gp.data() + pos * g.size() ); + EXPECT( rms_trans < tolerance ); + if ( !( rms_trans < tolerance ) || !( rms_diff < tolerance ) ) { + Log::info() + << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out + << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl; + ATLAS_DEBUG_VAR( rms_gen1 ); + //ATLAS_DEBUG_VAR( rms_gen2 ); + ATLAS_DEBUG_VAR( rms_trans ); + ATLAS_DEBUG_VAR( rms_diff ); + ATLAS_DEBUG_VAR( tolerance ); + } +#endif + EXPECT( icase < 300 ); } k++; } From 7663d891d6aecb7dfbe230dd6434cca3ec26651f Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 8 Mar 2018 17:56:30 +0000 Subject: [PATCH 014/123] fftw is working. With lots of debug output --- CMakeLists.txt | 6 ++ src/CMakeLists.txt | 6 ++ src/atlas/CMakeLists.txt | 4 +- src/atlas/library/defines.h.in | 1 + src/atlas/trans/localopt/TransLocalopt.cc | 2 +- src/atlas/trans/localopt2/TransLocalopt2.cc | 102 +++++++++++++++++++- src/tests/trans/test_transgeneral.cc | 25 +++-- 7 files changed, 130 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cf31981d9..787c96898 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -99,6 +99,12 @@ else() ecbuild_enable_ompstubs() endif() +### FFTW ... + +ecbuild_add_option( FEATURE FFTW + DESCRIPTION "Support for fftw" + REQUIRED_PACKAGES "FFTW COMPONENTS double" ) + ### trans ... ecbuild_add_option( FEATURE TRANS diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2a48b5b55..98a5e1dd0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -36,6 +36,12 @@ else() set( ATLAS_HAVE_TRANS 0 ) endif() +if( ATLAS_HAVE_FFTW ) + set( ATLAS_HAVE_FFTW 1 ) +else() + set( ATLAS_HAVE_FFTW 0 ) +endif() + if( ATLAS_HAVE_BOUNDSCHECKING ) set( ATLAS_HAVE_BOUNDSCHECKING 1 ) else() diff --git a/src/atlas/CMakeLists.txt b/src/atlas/CMakeLists.txt index 8cb63ed1f..b55db3e40 100644 --- a/src/atlas/CMakeLists.txt +++ b/src/atlas/CMakeLists.txt @@ -576,6 +576,7 @@ ecbuild_add_library( TARGET atlas "${CGAL_INCLUDE_DIRS}" "${TRANSI_INCLUDE_DIRS}" "${MPI_CXX_INCLUDE_DIRS}" + "${FFTW_INCLUDES}" LIBS eckit_geometry eckit_linalg @@ -584,9 +585,10 @@ ecbuild_add_library( TARGET atlas "${CGAL_LIBRARIES}" "${TRANSI_LIBRARIES}" "${FCKIT_LIBRARIES}" + "${FFTW_LIBRARIES}" DEFINITIONS ${ATLAS_DEFINITIONS} - ) +) if( ATLAS_HAVE_GRIDTOOLS_STORAGE ) target_link_libraries( atlas gridtools::storage ) diff --git a/src/atlas/library/defines.h.in b/src/atlas/library/defines.h.in index 22ba76c0c..e644fc73e 100644 --- a/src/atlas/library/defines.h.in +++ b/src/atlas/library/defines.h.in @@ -10,6 +10,7 @@ #define ATLAS_HAVE_TESSELATION @ATLAS_HAVE_TESSELATION@ #define ATLAS_HAVE_FORTRAN @ATLAS_HAVE_FORTRAN@ #define ATLAS_HAVE_EIGEN @ATLAS_HAVE_EIGEN@ +#define ATLAS_HAVE_FFTW @ATLAS_HAVE_FFTW@ #define ATLAS_BITS_GLOBAL @ATLAS_BITS_GLOBAL@ #define ATLAS_ARRAYVIEW_BOUNDS_CHECKING @ATLAS_HAVE_BOUNDSCHECKING@ #define ATLAS_INDEXVIEW_BOUNDS_CHECKING @ATLAS_HAVE_BOUNDSCHECKING@ diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc index 907be3e24..7e157d80c 100644 --- a/src/atlas/trans/localopt/TransLocalopt.cc +++ b/src/atlas/trans/localopt/TransLocalopt.cc @@ -206,7 +206,7 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); } } -#if 1 // 1: better for small number of columns, large truncation; 0: better for large number of columns +#if 0 // 1: better for small number of columns, large truncation; 0: better for large number of columns // Transposition in Fourier space: std::vector scl_fourier_tp( size_fourier * ( truncation + 1 ) ); diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc index 745c6f065..1ccaffa5c 100644 --- a/src/atlas/trans/localopt2/TransLocalopt2.cc +++ b/src/atlas/trans/localopt2/TransLocalopt2.cc @@ -23,6 +23,9 @@ #include "atlas/util/Constants.h" #include "eckit/linalg/LinearAlgebra.h" #include "eckit/linalg/Matrix.h" +#if ATLAS_HAVE_FFTW +#include +#endif namespace atlas { namespace trans { @@ -82,6 +85,11 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long grid_( grid ), truncation_( truncation ), precompute_( config.getBool( "precompute", true ) ) { +#if ATLAS_HAVE_FFTW + Log::info() << "Atlas has FFTW" << std::endl; +#else + Log::info() << "Atlas has no FFTW" << std::endl; +#endif ATLAS_TRACE( "Precompute legendre opt2" ); int nlats = 0; int nlons = 0; @@ -257,6 +265,7 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel if ( grid::StructuredGrid g = grid_ ) { ATLAS_TRACE( "invtrans_uv structured opt2" ); int nlats = g.ny(); + int nlons = g.nxmax(); int nlatsNH = nlats_northernHemisphere( nlats ); int size_fourier = nb_fields * 2 * g.ny(); std::vector scl_fourier( size_fourier * ( truncation + 1 ) ); @@ -265,7 +274,8 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel { ATLAS_TRACE( "opt2 Legendre dgemm" ); for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { -#if 1 // 0: no symmetry, 1: use symmetry +#if 1 // 0: no symmetry, 1: use symmetry \ + // TODO: 0 is currently not working because it requires all latitudes to be included in legendre_ (which is currently not done) int size_sym = num_n( truncation_ + 1, jm, true ); int size_asym = num_n( truncation_ + 1, jm, false ); std::vector scalar_sym( 2 * nb_fields * size_sym, -1234. ); @@ -329,6 +339,95 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel #endif } } +#if ATLAS_HAVE_FFTW + std::vector scl_fourier_tp( size_fourier * ( truncation + 1 ) ); + { + // Transposition in Fourier space: + { + ATLAS_TRACE( "opt2 transposition in Fourier" ); + int idx = 0; + for ( int jm = 0; jm < truncation_ + 1; jm++ ) { + for ( int jlat = 0; jlat < g.ny(); jlat++ ) { + for ( int imag = 0; imag < 2; imag++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int pos_tp = imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + g.ny() * ( jfld ) ) ); + //int pos = jfld + nb_fields * ( imag + 2 * ( jlat + g.ny() * ( jm ) ) ); + scl_fourier_tp[pos_tp] = scl_fourier[idx++]; // = scl_fourier[pos] + } + } + } + } + } + + // Fourier transformation: + std::vector gp_opt2( nb_fields * grid_.size(), 0. ); + { + ATLAS_TRACE( "opt2 Fourier dgemm" ); + eckit::linalg::Matrix A( fouriertp_.data(), g.nxmax(), ( truncation_ + 1 ) * 2 ); + eckit::linalg::Matrix B( scl_fourier_tp.data(), ( truncation_ + 1 ) * 2, nb_fields * g.ny() ); + eckit::linalg::Matrix C( gp_fields, g.nxmax(), nb_fields * g.ny() ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + } + } + { + auto position = [&]( int jfld, int imag, int jlat, int jm ) { + return jfld + nb_fields * ( imag + 2 * ( jlat + g.ny() * ( jm ) ) ); + }; + + { + ATLAS_TRACE( "opt2 transposition in Fourier for FFTW" ); + int num_complex = ( nlons / 2 ) + 1; + fftw_complex* in = fftw_alloc_complex( num_complex ); + double* out = fftw_alloc_real( nlons ); + fftw_plan plan = fftw_plan_dft_c2r_1d( nlons, in, out, FFTW_ESTIMATE ); + int idx0 = 0, idx1 = 0; + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + for ( int jlat = 0; jlat < g.ny(); jlat++ ) { + Log::info() << "scl_fourier_tp: " << std::endl; + for ( int jm = 0; jm < truncation_ + 1; jm++ ) { + for ( int imag = 0; imag < 2; imag++, idx1++ ) { + Log::info() << scl_fourier_tp[idx1] << " "; + } + } + Log::info() << std::endl; + for ( int jm = 0; jm < num_complex; jm++ ) { + for ( int imag = 0; imag < 2; imag++ ) { + if ( jm <= truncation_ ) { + in[jm][imag] = scl_fourier[position( jfld, imag, jlat, jm )] / 2.; + } + else { + in[jm][imag] = 0.; + } + } + } + in[0][0] *= 2.; + Log::info() << "fft:in: " << std::endl; + for ( int jm = 0; jm < num_complex; jm++ ) { + for ( int imag = 0; imag < 2; imag++ ) { + Log::info() << in[jm][imag] << " "; + } + } + Log::info() << std::endl; + fftw_execute( plan ); + Log::info() << "fft:out: " << std::endl; + for ( int jlon = 0; jlon < nlons; jlon++ ) { + Log::info() << out[jlon] << " "; + } + Log::info() << std::endl; + Log::info() << "gp_fields: old: " << std::endl; + for ( int jlon = 0; jlon < nlons; jlon++, idx0++ ) { + Log::info() << gp_fields[idx0] << " "; + gp_fields[idx0] = out[jlon]; + } + Log::info() << std::endl; + } + } + fftw_destroy_plan( plan ); + fftw_free( in ); + fftw_free( out ); + } + } +#else #if 0 // 1: better for small number of columns, large truncation; 0: better for large number of columns // Transposition in Fourier space: @@ -402,6 +501,7 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); } +#endif #endif // Computing u,v from U,V: { diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index a3c5285fe..2135ca994 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -712,7 +712,7 @@ CASE( "test_trans_vordiv_with_translib" ) { double tolerance = 1.e-13; // Grid: (Adjust the following line if the test takes too long!) - Grid g( "F120" ); + Grid g( "F3" ); grid::StructuredGrid gs( g ); int ndgl = gs.ny(); @@ -814,16 +814,15 @@ CASE( "test_trans_vordiv_with_translib" ) { double rms_diff = compute_rms( g.size(), rgp1.data() + pos * g.size(), gp.data() + pos * g.size() ); EXPECT( rms_trans < tolerance ); - if ( !( rms_trans < tolerance ) || !( rms_diff < tolerance ) ) { - Log::info() - << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out - << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl; - ATLAS_DEBUG_VAR( rms_gen1 ); - ATLAS_DEBUG_VAR( rms_gen2 ); - ATLAS_DEBUG_VAR( rms_trans ); - ATLAS_DEBUG_VAR( rms_diff ); - ATLAS_DEBUG_VAR( tolerance ); - } + //if ( !( rms_trans < tolerance ) || !( rms_diff < tolerance ) ) { + Log::info() << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out + << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl; + ATLAS_DEBUG_VAR( rms_gen1 ); + ATLAS_DEBUG_VAR( rms_gen2 ); + ATLAS_DEBUG_VAR( rms_trans ); + ATLAS_DEBUG_VAR( rms_diff ); + ATLAS_DEBUG_VAR( tolerance ); + //} #endif EXPECT( icase < 300 ); } @@ -838,7 +837,7 @@ CASE( "test_trans_vordiv_with_translib" ) { } //----------------------------------------------------------------------------- - +#if 0 CASE( "test_trans_hires" ) { Log::info() << "test_trans_hires" << std::endl; // test transgeneral by comparing its result with the trans library @@ -972,7 +971,7 @@ CASE( "test_trans_hires" ) { } Log::info() << "Vordiv+scalar comparison with trans: all " << icase << " cases successfully passed!" << std::endl; } - +#endif //----------------------------------------------------------------------------- CASE( "test_trans_invtrans" ) { From e3c8cacb9e1f8bc9e4e2e903163e0f44327fd7c3 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 8 Mar 2018 18:42:59 +0000 Subject: [PATCH 015/123] fftw works with all latitudes in one execution --- src/atlas/trans/localopt2/TransLocalopt2.cc | 54 ++++++++++++--------- 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc index 1ccaffa5c..9f3c4f92f 100644 --- a/src/atlas/trans/localopt2/TransLocalopt2.cc +++ b/src/atlas/trans/localopt2/TransLocalopt2.cc @@ -373,58 +373,68 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel auto position = [&]( int jfld, int imag, int jlat, int jm ) { return jfld + nb_fields * ( imag + 2 * ( jlat + g.ny() * ( jm ) ) ); }; + auto factor = [&]( int jm ) { + if ( jm > 0 ) { return 2.; } + else { + return 1.; + } + }; { ATLAS_TRACE( "opt2 transposition in Fourier for FFTW" ); - int num_complex = ( nlons / 2 ) + 1; - fftw_complex* in = fftw_alloc_complex( num_complex ); - double* out = fftw_alloc_real( nlons ); - fftw_plan plan = fftw_plan_dft_c2r_1d( nlons, in, out, FFTW_ESTIMATE ); - int idx0 = 0, idx1 = 0; + int num_complex = ( nlons / 2 ) + 1; + fftw_complex* fft_in = fftw_alloc_complex( nlats * num_complex ); + double* fft_out = fftw_alloc_real( nlats * nlons ); + fftw_plan plan = fftw_plan_many_dft_c2r( 1, &nlons, nlats, fft_in, NULL, 1, num_complex, fft_out, + NULL, 1, nlons, FFTW_ESTIMATE ); for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int idx = 0; for ( int jlat = 0; jlat < g.ny(); jlat++ ) { - Log::info() << "scl_fourier_tp: " << std::endl; - for ( int jm = 0; jm < truncation_ + 1; jm++ ) { - for ( int imag = 0; imag < 2; imag++, idx1++ ) { - Log::info() << scl_fourier_tp[idx1] << " "; - } - } - Log::info() << std::endl; - for ( int jm = 0; jm < num_complex; jm++ ) { + for ( int jm = 0; jm < num_complex; jm++, idx++ ) { for ( int imag = 0; imag < 2; imag++ ) { if ( jm <= truncation_ ) { - in[jm][imag] = scl_fourier[position( jfld, imag, jlat, jm )] / 2.; + fft_in[idx][imag] = + scl_fourier[position( jfld, imag, jlat, jm )] / factor( jm ); } else { - in[jm][imag] = 0.; + fft_in[idx][imag] = 0.; } } } - in[0][0] *= 2.; + } + fftw_execute( plan ); + int idx0 = 0, idx1 = 0; + for ( int jlat = 0; jlat < g.ny(); jlat++ ) { + Log::info() << "scl_fourier_tp: " << std::endl; + for ( int jm = 0; jm < truncation_ + 1; jm++ ) { + for ( int imag = 0; imag < 2; imag++, idx1++ ) { + Log::info() << scl_fourier_tp[idx1] << " "; + } + } + Log::info() << std::endl; Log::info() << "fft:in: " << std::endl; for ( int jm = 0; jm < num_complex; jm++ ) { for ( int imag = 0; imag < 2; imag++ ) { - Log::info() << in[jm][imag] << " "; + Log::info() << fft_in[jm + num_complex * jlat][imag] << " "; } } Log::info() << std::endl; - fftw_execute( plan ); Log::info() << "fft:out: " << std::endl; for ( int jlon = 0; jlon < nlons; jlon++ ) { - Log::info() << out[jlon] << " "; + Log::info() << fft_out[jlon + nlons * jlat] << " "; } Log::info() << std::endl; Log::info() << "gp_fields: old: " << std::endl; for ( int jlon = 0; jlon < nlons; jlon++, idx0++ ) { Log::info() << gp_fields[idx0] << " "; - gp_fields[idx0] = out[jlon]; + //gp_fields[idx0] = fft_out[jlon + nlons * jlat]; } Log::info() << std::endl; } } fftw_destroy_plan( plan ); - fftw_free( in ); - fftw_free( out ); + fftw_free( fft_in ); + fftw_free( fft_out ); } } #else From 3800fa0c88c3e2268476f4070bcef8799aab524f Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 8 Mar 2018 19:19:52 +0000 Subject: [PATCH 016/123] opt2 is now reusing the FFTW plan. Opt is now using symmetry (no FFTW) --- src/atlas/trans/localopt/TransLocalopt.cc | 146 ++++++++++++++++++-- src/atlas/trans/localopt/TransLocalopt.h | 4 + src/atlas/trans/localopt2/TransLocalopt2.cc | 101 ++++---------- src/atlas/trans/localopt2/TransLocalopt2.h | 9 ++ src/tests/trans/test_transgeneral.cc | 21 +-- 5 files changed, 183 insertions(+), 98 deletions(-) diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc index 7e157d80c..d5281db77 100644 --- a/src/atlas/trans/localopt/TransLocalopt.cc +++ b/src/atlas/trans/localopt/TransLocalopt.cc @@ -9,6 +9,7 @@ */ #include "atlas/trans/localopt/TransLocalopt.h" +#include #include "atlas/array.h" #include "atlas/option.h" #include "atlas/parallel/mpi/mpi.h" @@ -39,6 +40,37 @@ size_t legendre_size( const size_t truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; } +int nlats_northernHemisphere( const int nlats ) { + return ceil( nlats / 2. ); + // using ceil here should make it possible to have odd number of latitudes (with the centre latitude being the equator) +} + +int num_n( const int truncation, const int m, const bool symmetric ) { + int len = 0; + if ( symmetric ) { len = ( truncation - m + 2 ) / 2; } + else { + len = ( truncation - m + 1 ) / 2; + } + return len; +} + +std::vector n_indices( const int truncation, const int m, const bool symmetric ) { + int len = num_n( truncation, m, symmetric ), jn0 = 0; + if ( !symmetric ) { jn0 = 1; } + std::vector jns( len ); + int ia = 0, id = len - 1; + for ( int jn = jn0; jn <= truncation - m; jn += 2, ia++, id-- ) { +#if 1 // 1: ascending, 0: descending + int idx = ia; +#else + int idx = id; +#endif + jns[idx] = jn; + ASSERT( idx < len && idx >= 0 ); + } + return jns; +} + } // namespace // -------------------------------------------------------------------------------------------------------------------- @@ -51,22 +83,26 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t truncation_( truncation ), precompute_( config.getBool( "precompute", true ) ) { ATLAS_TRACE( "Precompute legendre opt" ); - int nlats, nlons; + int nlats = 0; + int nlons = 0; + int nlatsNH = nlats_northernHemisphere( nlats ); if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { grid::StructuredGrid g( grid_ ); - nlats = g.ny(); - nlons = g.nxmax(); + nlats = g.ny(); + nlons = g.nxmax(); + nlatsNH = nlats_northernHemisphere( nlats ); } else { - nlats = grid_.size(); - nlons = grid_.size(); + nlats = grid_.size(); + nlons = grid_.size(); + nlatsNH = nlats; } - std::vector lats( nlats ); + std::vector lats( nlatsNH ); std::vector lons( nlons ); if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { grid::StructuredGrid g( grid_ ); // TODO: remove legendre_begin and legendre_data (only legendre_ should be needed) - for ( size_t j = 0; j < nlats; ++j ) { + for ( size_t j = 0; j < nlatsNH; ++j ) { lats[j] = g.y( j ) * util::Constants::degreesToRadians(); } for ( size_t j = 0; j < nlons; ++j ) { @@ -83,8 +119,37 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t // precomputations for Legendre polynomials: { ATLAS_TRACE( "opt precomp Legendre" ); - legendre_.resize( legendre_size( truncation_ + 1 ) * nlats ); - compute_legendre_polynomialsopt( truncation_ + 1, nlats, lats.data(), legendre_.data() ); + legendre_.resize( legendre_size( truncation_ + 1 ) * nlatsNH ); + compute_legendre_polynomialsopt( truncation_ + 1, nlatsNH, lats.data(), legendre_.data() ); + } + { + ATLAS_TRACE( "opt split Legendre" ); + int size_sym = 0; + int size_asym = 0; + legendre_sym_begin_.resize( truncation_ + 3 ); + legendre_asym_begin_.resize( truncation_ + 3 ); + legendre_sym_begin_[0] = 0; + legendre_asym_begin_[0] = 0; + for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { + size_sym += num_n( truncation_ + 1, jm, true ); + size_asym += num_n( truncation_ + 1, jm, false ); + legendre_sym_begin_[jm + 1] = size_sym; + legendre_asym_begin_[jm + 1] = size_asym; + } + legendre_sym_.resize( size_sym * nlatsNH ); + legendre_asym_.resize( size_asym * nlatsNH ); + int idx = 0, is = 0, ia = 0; + for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { + for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { + for ( int jn = 0; jn <= truncation_ - jm + 1; jn++, idx++ ) { + if ( jn % 2 == 0 ) { legendre_sym_[is++] = legendre_[idx]; } + else { + legendre_asym_[ia++] = legendre_[idx]; + } + } + } + } + ASSERT( ia == size_asym * nlatsNH && is == size_sym * nlatsNH ); } // precomputations for Fourier transformations: @@ -191,19 +256,78 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field if ( grid::StructuredGrid g = grid_ ) { ATLAS_TRACE( "invtrans_uv structured opt" ); int nlats = g.ny(); + int nlons = g.nxmax(); + int nlatsNH = nlats_northernHemisphere( nlats ); int size_fourier = nb_fields * 2 * g.ny(); std::vector scl_fourier( size_fourier * ( truncation + 1 ) ); // Legendre transform: { ATLAS_TRACE( "opt Legendre dgemm" ); - for ( int jm = 0; jm <= truncation; jm++ ) { + for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { +#if 1 // 0: no symmetry, 1: use symmetry \ + // TODO: 0 is currently not working because it requires all latitudes to be included in legendre_ (which is currently not done) + int size_sym = num_n( truncation_ + 1, jm, true ); + int size_asym = num_n( truncation_ + 1, jm, false ); + std::vector scalar_sym( 2 * nb_fields * size_sym, -1234. ); + std::vector scalar_asym( 2 * nb_fields * size_asym, -1234. ); + std::vector scl_fourier_sym( size_fourier ); + std::vector scl_fourier_asym( size_fourier ); + { + //ATLAS_TRACE( "opt Legendre split" ); + int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2; + for ( int jn = 0; jn <= truncation_ - jm + 1; jn++ ) { + for ( int imag = 0; imag < 2; imag++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { + if ( jn % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; } + else { + scalar_asym[ia++] = scalar_spectra[idx + ioff]; + } + } + } + } + ASSERT( ia == 2 * nb_fields * size_asym && is == 2 * nb_fields * size_sym ); + } + { + eckit::linalg::Matrix A( scalar_sym.data(), nb_fields * 2, size_sym ); + eckit::linalg::Matrix B( legendre_sym_.data() + legendre_sym_begin_[jm] * nlatsNH, size_sym, + nlatsNH ); + eckit::linalg::Matrix C( scl_fourier_sym.data(), nb_fields * 2, nlatsNH ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + } + if ( size_asym > 0 ) { + eckit::linalg::Matrix A( scalar_asym.data(), nb_fields * 2, size_asym ); + eckit::linalg::Matrix B( legendre_asym_.data() + legendre_asym_begin_[jm] * nlatsNH, size_asym, + nlatsNH ); + eckit::linalg::Matrix C( scl_fourier_asym.data(), nb_fields * 2, nlatsNH ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + { + //ATLAS_TRACE( "opt merge spheres" ); + // northern hemisphere: + int ioff = jm * size_fourier; + for ( int j = 0; j < 2 * nb_fields * nlatsNH; j++ ) { + scl_fourier[j + ioff] = scl_fourier_sym[j] + scl_fourier_asym[j]; + } + // southern hemisphere: + int idx = 0; + for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { + for ( int imag = 0; imag < 2; imag++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { + int pos = jfld + nb_fields * ( imag + 2 * ( nlats - jlat - 1 ) ); + scl_fourier[pos + ioff] = scl_fourier_sym[idx] - scl_fourier_asym[idx]; + } + } + } + } + } +#else int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1; eckit::linalg::Matrix A( eckit::linalg::Matrix( const_cast( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) ); eckit::linalg::Matrix B( legendre_.data() + noff * g.ny(), ns, g.ny() ); eckit::linalg::Matrix C( scl_fourier.data() + jm * size_fourier, nb_fields * 2, g.ny() ); eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); +#endif } } #if 0 // 1: better for small number of columns, large truncation; 0: better for large number of columns @@ -322,7 +446,7 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field } } } -} +} // namespace trans // -------------------------------------------------------------------------------------------------------------------- diff --git a/src/atlas/trans/localopt/TransLocalopt.h b/src/atlas/trans/localopt/TransLocalopt.h index ff77db4a9..dca01cd2f 100644 --- a/src/atlas/trans/localopt/TransLocalopt.h +++ b/src/atlas/trans/localopt/TransLocalopt.h @@ -112,9 +112,13 @@ class TransLocalopt : public trans::TransImpl { int truncation_; bool precompute_; mutable std::vector legendre_; + mutable std::vector legendre_sym_; + mutable std::vector legendre_asym_; mutable std::vector fourier_; mutable std::vector fouriertp_; std::vector legendre_begin_; + std::vector legendre_sym_begin_; + std::vector legendre_asym_begin_; }; //----------------------------------------------------------------------------- diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc index 9f3c4f92f..3f4edafb2 100644 --- a/src/atlas/trans/localopt2/TransLocalopt2.cc +++ b/src/atlas/trans/localopt2/TransLocalopt2.cc @@ -23,9 +23,6 @@ #include "atlas/util/Constants.h" #include "eckit/linalg/LinearAlgebra.h" #include "eckit/linalg/Matrix.h" -#if ATLAS_HAVE_FFTW -#include -#endif namespace atlas { namespace trans { @@ -85,11 +82,6 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long grid_( grid ), truncation_( truncation ), precompute_( config.getBool( "precompute", true ) ) { -#if ATLAS_HAVE_FFTW - Log::info() << "Atlas has FFTW" << std::endl; -#else - Log::info() << "Atlas has no FFTW" << std::endl; -#endif ATLAS_TRACE( "Precompute legendre opt2" ); int nlats = 0; int nlons = 0; @@ -185,6 +177,16 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long } } } +#if ATLAS_HAVE_FFTW + { + ATLAS_TRACE( "opt2 precomp FFTW" ); + int num_complex = ( nlons / 2 ) + 1; + fft_in_ = fftw_alloc_complex( nlats * num_complex ); + fft_out_ = fftw_alloc_real( nlats * nlons ); + plan_ = fftw_plan_many_dft_c2r( 1, &nlons, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1, nlons, + FFTW_ESTIMATE ); + } +#endif } // -------------------------------------------------------------------------------------------------------------------- @@ -194,7 +196,13 @@ TransLocalopt2::TransLocalopt2( const Grid& grid, const long truncation, const e // -------------------------------------------------------------------------------------------------------------------- -TransLocalopt2::~TransLocalopt2() {} +TransLocalopt2::~TransLocalopt2() { +#if ATLAS_HAVE_FFTW + fftw_destroy_plan( plan_ ); + fftw_free( fft_in_ ); + fftw_free( fft_out_ ); +#endif +} // -------------------------------------------------------------------------------------------------------------------- @@ -340,35 +348,6 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel } } #if ATLAS_HAVE_FFTW - std::vector scl_fourier_tp( size_fourier * ( truncation + 1 ) ); - { - // Transposition in Fourier space: - { - ATLAS_TRACE( "opt2 transposition in Fourier" ); - int idx = 0; - for ( int jm = 0; jm < truncation_ + 1; jm++ ) { - for ( int jlat = 0; jlat < g.ny(); jlat++ ) { - for ( int imag = 0; imag < 2; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int pos_tp = imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + g.ny() * ( jfld ) ) ); - //int pos = jfld + nb_fields * ( imag + 2 * ( jlat + g.ny() * ( jm ) ) ); - scl_fourier_tp[pos_tp] = scl_fourier[idx++]; // = scl_fourier[pos] - } - } - } - } - } - - // Fourier transformation: - std::vector gp_opt2( nb_fields * grid_.size(), 0. ); - { - ATLAS_TRACE( "opt2 Fourier dgemm" ); - eckit::linalg::Matrix A( fouriertp_.data(), g.nxmax(), ( truncation_ + 1 ) * 2 ); - eckit::linalg::Matrix B( scl_fourier_tp.data(), ( truncation_ + 1 ) * 2, nb_fields * g.ny() ); - eckit::linalg::Matrix C( gp_fields, g.nxmax(), nb_fields * g.ny() ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); - } - } { auto position = [&]( int jfld, int imag, int jlat, int jm ) { return jfld + nb_fields * ( imag + 2 * ( jlat + g.ny() * ( jm ) ) ); @@ -380,61 +359,29 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel } }; + int num_complex = ( nlons / 2 ) + 1; { - ATLAS_TRACE( "opt2 transposition in Fourier for FFTW" ); - int num_complex = ( nlons / 2 ) + 1; - fftw_complex* fft_in = fftw_alloc_complex( nlats * num_complex ); - double* fft_out = fftw_alloc_real( nlats * nlons ); - fftw_plan plan = fftw_plan_many_dft_c2r( 1, &nlons, nlats, fft_in, NULL, 1, num_complex, fft_out, - NULL, 1, nlons, FFTW_ESTIMATE ); + ATLAS_TRACE( "opt2 FFTW" ); for ( int jfld = 0; jfld < nb_fields; jfld++ ) { int idx = 0; for ( int jlat = 0; jlat < g.ny(); jlat++ ) { for ( int jm = 0; jm < num_complex; jm++, idx++ ) { for ( int imag = 0; imag < 2; imag++ ) { if ( jm <= truncation_ ) { - fft_in[idx][imag] = + fft_in_[idx][imag] = scl_fourier[position( jfld, imag, jlat, jm )] / factor( jm ); } else { - fft_in[idx][imag] = 0.; + fft_in_[idx][imag] = 0.; } } } } - fftw_execute( plan ); - int idx0 = 0, idx1 = 0; - for ( int jlat = 0; jlat < g.ny(); jlat++ ) { - Log::info() << "scl_fourier_tp: " << std::endl; - for ( int jm = 0; jm < truncation_ + 1; jm++ ) { - for ( int imag = 0; imag < 2; imag++, idx1++ ) { - Log::info() << scl_fourier_tp[idx1] << " "; - } - } - Log::info() << std::endl; - Log::info() << "fft:in: " << std::endl; - for ( int jm = 0; jm < num_complex; jm++ ) { - for ( int imag = 0; imag < 2; imag++ ) { - Log::info() << fft_in[jm + num_complex * jlat][imag] << " "; - } - } - Log::info() << std::endl; - Log::info() << "fft:out: " << std::endl; - for ( int jlon = 0; jlon < nlons; jlon++ ) { - Log::info() << fft_out[jlon + nlons * jlat] << " "; - } - Log::info() << std::endl; - Log::info() << "gp_fields: old: " << std::endl; - for ( int jlon = 0; jlon < nlons; jlon++, idx0++ ) { - Log::info() << gp_fields[idx0] << " "; - //gp_fields[idx0] = fft_out[jlon + nlons * jlat]; - } - Log::info() << std::endl; + fftw_execute( plan_ ); + for ( int j = 0; j < nlats * nlons; j++ ) { + gp_fields[j + jfld * nlats * nlons] = fft_out_[j]; } } - fftw_destroy_plan( plan ); - fftw_free( fft_in ); - fftw_free( fft_out ); } } #else diff --git a/src/atlas/trans/localopt2/TransLocalopt2.h b/src/atlas/trans/localopt2/TransLocalopt2.h index 92fb292f1..0cec4e84a 100644 --- a/src/atlas/trans/localopt2/TransLocalopt2.h +++ b/src/atlas/trans/localopt2/TransLocalopt2.h @@ -12,8 +12,12 @@ #include +#include "atlas/array.h" #include "atlas/grid/Grid.h" #include "atlas/trans/Trans.h" +#if ATLAS_HAVE_FFTW +#include +#endif //----------------------------------------------------------------------------- // Forward declarations @@ -120,6 +124,11 @@ class TransLocalopt2 : public trans::TransImpl { std::vector legendre_begin_; std::vector legendre_sym_begin_; std::vector legendre_asym_begin_; +#if ATLAS_HAVE_FFTW + fftw_complex* fft_in_; + double* fft_out_; + fftw_plan plan_; +#endif }; //----------------------------------------------------------------------------- diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index 2135ca994..22d874416 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -712,7 +712,7 @@ CASE( "test_trans_vordiv_with_translib" ) { double tolerance = 1.e-13; // Grid: (Adjust the following line if the test takes too long!) - Grid g( "F3" ); + Grid g( "F640" ); grid::StructuredGrid gs( g ); int ndgl = gs.ny(); @@ -814,15 +814,16 @@ CASE( "test_trans_vordiv_with_translib" ) { double rms_diff = compute_rms( g.size(), rgp1.data() + pos * g.size(), gp.data() + pos * g.size() ); EXPECT( rms_trans < tolerance ); - //if ( !( rms_trans < tolerance ) || !( rms_diff < tolerance ) ) { - Log::info() << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out - << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl; - ATLAS_DEBUG_VAR( rms_gen1 ); - ATLAS_DEBUG_VAR( rms_gen2 ); - ATLAS_DEBUG_VAR( rms_trans ); - ATLAS_DEBUG_VAR( rms_diff ); - ATLAS_DEBUG_VAR( tolerance ); - //} + if ( !( rms_trans < tolerance ) || !( rms_diff < tolerance ) ) { + Log::info() + << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out + << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl; + ATLAS_DEBUG_VAR( rms_gen1 ); + ATLAS_DEBUG_VAR( rms_gen2 ); + ATLAS_DEBUG_VAR( rms_trans ); + ATLAS_DEBUG_VAR( rms_diff ); + ATLAS_DEBUG_VAR( tolerance ); + } #endif EXPECT( icase < 300 ); } From 60886e09cb3e35dd6ba8d81bc61cae2f48118ba3 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Fri, 9 Mar 2018 15:33:27 +0000 Subject: [PATCH 017/123] choosing the eckit backend explicitly --- src/atlas/trans/localopt/TransLocalopt.cc | 1 + src/atlas/trans/localopt2/TransLocalopt2.cc | 3 +-- src/tests/trans/test_transgeneral.cc | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc index d5281db77..4b7bb6bc5 100644 --- a/src/atlas/trans/localopt/TransLocalopt.cc +++ b/src/atlas/trans/localopt/TransLocalopt.cc @@ -83,6 +83,7 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t truncation_( truncation ), precompute_( config.getBool( "precompute", true ) ) { ATLAS_TRACE( "Precompute legendre opt" ); + eckit::linalg::LinearAlgebra::backend( "generic" ); // might want to choose backend with this command int nlats = 0; int nlons = 0; int nlatsNH = nlats_northernHemisphere( nlats ); diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc index 3f4edafb2..87b7dc0ee 100644 --- a/src/atlas/trans/localopt2/TransLocalopt2.cc +++ b/src/atlas/trans/localopt2/TransLocalopt2.cc @@ -83,6 +83,7 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long truncation_( truncation ), precompute_( config.getBool( "precompute", true ) ) { ATLAS_TRACE( "Precompute legendre opt2" ); + eckit::linalg::LinearAlgebra::backend( "generic" ); // might want to choose backend with this command int nlats = 0; int nlons = 0; int nlatsNH = nlats_northernHemisphere( nlats ); @@ -267,8 +268,6 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel if ( nb_scalar_fields > 0 ) { int nb_fields = nb_scalar_fields; - //eckit::linalg::LinearAlgebra::backend( "string" ) // might want to choose backend with this command - // Transform if ( grid::StructuredGrid g = grid_ ) { ATLAS_TRACE( "invtrans_uv structured opt2" ); diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index 22d874416..ddf85054a 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -712,7 +712,7 @@ CASE( "test_trans_vordiv_with_translib" ) { double tolerance = 1.e-13; // Grid: (Adjust the following line if the test takes too long!) - Grid g( "F640" ); + Grid g( "F120" ); grid::StructuredGrid gs( g ); int ndgl = gs.ny(); From e006fb421c7fdeb6af9db3f5d85982c5692666ac Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Mon, 12 Mar 2018 19:28:23 +0000 Subject: [PATCH 018/123] optimised precomputation of Legendre polynomials; should now be about as fast as trans library; peak RAM consumption should be two times lower than in trans library when both use Belousov. This allows to run a few scalar fields on cubic F1280 with about 10GB RAM --- cmake/CompileFlags.cmake | 4 + .../trans/localopt/LegendrePolynomialsopt.cc | 54 ++--- src/atlas/trans/localopt/TransLocalopt.cc | 56 ++--- .../localopt2/LegendrePolynomialsopt2.cc | 225 ++++++++++-------- .../trans/localopt2/LegendrePolynomialsopt2.h | 9 +- src/atlas/trans/localopt2/TransLocalopt2.cc | 21 +- src/atlas/trans/localopt2/TransLocalopt2.h | 4 - src/tests/trans/test_transgeneral.cc | 191 +++++++-------- 8 files changed, 281 insertions(+), 283 deletions(-) diff --git a/cmake/CompileFlags.cmake b/cmake/CompileFlags.cmake index 522e4a6cb..c02df915c 100644 --- a/cmake/CompileFlags.cmake +++ b/cmake/CompileFlags.cmake @@ -8,3 +8,7 @@ if( CMAKE_CXX_COMPILER_ID MATCHES Cray ) # directives, ACC directives, or ASM intrinsics. endif() + +#ecbuild_add_cxx_flags("-fsanitize=address") +#ecbuild_add_cxx_flags("-fsanitize=thread") +#ecbuild_add_cxx_flags("-fsanitize=memory") diff --git a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc index 25fda7d04..e872eb20b 100644 --- a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc +++ b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc @@ -13,6 +13,7 @@ #include #include "atlas/array.h" +#include "atlas/parallel/mpi/mpi.h" #include "atlas/trans/localopt/LegendrePolynomialsopt.h" namespace atlas { @@ -26,42 +27,37 @@ void compute_legendre_polynomialsopt( const double lats[], // latitudes in radians (in) double legpol[] ) // values of associated Legendre functions, size (trc+1)*trc/2*nlats (out) { - array::ArrayT idxmn_( trc + 1, trc + 1, nlats ); - array::ArrayView idxmn = array::make_view( idxmn_ ); - - int j = 0; - for ( int jm = 0; jm <= trc; ++jm ) { - for ( int jlat = 0; jlat < nlats; ++jlat ) { - for ( int jn = jm; jn <= trc; ++jn ) { - idxmn( jm, jn, jlat ) = j++; - } - } - } - array::ArrayT zfn_( trc + 1, trc + 1 ); array::ArrayView zfn = array::make_view( zfn_ ); - int iodd; + auto idxmn = [&]( int jm, int jn, int jlat ) { + return ( 2 * trc + 3 - jm ) * jm / 2 * nlats + jlat * ( trc - jm + 1 ) + jn - jm; + }; - // Compute coefficients for Taylor series in Belousov (19) and (21) - // Belousov, Swarztrauber use zfn(0,0)=std::sqrt(2.) - // IFS normalisation chosen to be 0.5*Integral(Pnm**2) = 1 - zfn( 0, 0 ) = 2.; - for ( int jn = 1; jn <= trc; ++jn ) { - double zfnn = zfn( 0, 0 ); - for ( int jgl = 1; jgl <= jn; ++jgl ) { - zfnn *= std::sqrt( 1. - 0.25 / ( jgl * jgl ) ); - } - iodd = jn % 2; - zfn( jn, jn ) = zfnn; - for ( int jgl = 2; jgl <= jn - iodd; jgl += 2 ) { - double zfjn = ( ( jgl - 1. ) * ( 2. * jn - jgl + 2. ) ); // new factor numerator - double zfjd = ( jgl * ( 2. * jn - jgl + 1. ) ); // new factor denominator + { + ATLAS_TRACE( "init arrays" ); + + int iodd; + + // Compute coefficients for Taylor series in Belousov (19) and (21) + // Belousov, Swarztrauber use zfn(0,0)=std::sqrt(2.) + // IFS normalisation chosen to be 0.5*Integral(Pnm**2) = 1 + zfn( 0, 0 ) = 2.; + for ( int jn = 1; jn <= trc; ++jn ) { + double zfnn = zfn( 0, 0 ); + for ( int jgl = 1; jgl <= jn; ++jgl ) { + zfnn *= std::sqrt( 1. - 0.25 / ( jgl * jgl ) ); + } + iodd = jn % 2; + zfn( jn, jn ) = zfnn; + for ( int jgl = 2; jgl <= jn - iodd; jgl += 2 ) { + double zfjn = ( ( jgl - 1. ) * ( 2. * jn - jgl + 2. ) ); // new factor numerator + double zfjd = ( jgl * ( 2. * jn - jgl + 1. ) ); // new factor denominator - zfn( jn, jn - jgl ) = zfn( jn, jn - jgl + 2 ) * zfjn / zfjd; + zfn( jn, jn - jgl ) = zfn( jn, jn - jgl + 2 ) * zfjn / zfjd; + } } } - for ( int jlat = 0; jlat < nlats; ++jlat ) { // -------------------- // 1. First two columns diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc index 4b7bb6bc5..5b31b75ad 100644 --- a/src/atlas/trans/localopt/TransLocalopt.cc +++ b/src/atlas/trans/localopt/TransLocalopt.cc @@ -119,38 +119,40 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t } // precomputations for Legendre polynomials: { - ATLAS_TRACE( "opt precomp Legendre" ); - legendre_.resize( legendre_size( truncation_ + 1 ) * nlatsNH ); - compute_legendre_polynomialsopt( truncation_ + 1, nlatsNH, lats.data(), legendre_.data() ); - } - { - ATLAS_TRACE( "opt split Legendre" ); - int size_sym = 0; - int size_asym = 0; - legendre_sym_begin_.resize( truncation_ + 3 ); - legendre_asym_begin_.resize( truncation_ + 3 ); - legendre_sym_begin_[0] = 0; - legendre_asym_begin_[0] = 0; - for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { - size_sym += num_n( truncation_ + 1, jm, true ); - size_asym += num_n( truncation_ + 1, jm, false ); - legendre_sym_begin_[jm + 1] = size_sym; - legendre_asym_begin_[jm + 1] = size_asym; + std::vector legendre( legendre_size( truncation_ + 1 ) * nlatsNH ); + { + ATLAS_TRACE( "opt precomp Legendre" ); + compute_legendre_polynomialsopt( truncation_ + 1, nlatsNH, lats.data(), legendre.data() ); } - legendre_sym_.resize( size_sym * nlatsNH ); - legendre_asym_.resize( size_asym * nlatsNH ); - int idx = 0, is = 0, ia = 0; - for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { - for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { - for ( int jn = 0; jn <= truncation_ - jm + 1; jn++, idx++ ) { - if ( jn % 2 == 0 ) { legendre_sym_[is++] = legendre_[idx]; } - else { - legendre_asym_[ia++] = legendre_[idx]; + { + ATLAS_TRACE( "opt split Legendre" ); + int size_sym = 0; + int size_asym = 0; + legendre_sym_begin_.resize( truncation_ + 3 ); + legendre_asym_begin_.resize( truncation_ + 3 ); + legendre_sym_begin_[0] = 0; + legendre_asym_begin_[0] = 0; + for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { + size_sym += num_n( truncation_ + 1, jm, true ); + size_asym += num_n( truncation_ + 1, jm, false ); + legendre_sym_begin_[jm + 1] = size_sym; + legendre_asym_begin_[jm + 1] = size_asym; + } + legendre_sym_.resize( size_sym * nlatsNH ); + legendre_asym_.resize( size_asym * nlatsNH ); + int idx = 0, is = 0, ia = 0; + for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { + for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { + for ( int jn = 0; jn <= truncation_ - jm + 1; jn++, idx++ ) { + if ( jn % 2 == 0 ) { legendre_sym_[is++] = legendre[idx]; } + else { + legendre_asym_[ia++] = legendre[idx]; + } } } } + ASSERT( ia == size_asym * nlatsNH && is == size_sym * nlatsNH ); } - ASSERT( ia == size_asym * nlatsNH && is == size_sym * nlatsNH ); } // precomputations for Fourier transformations: diff --git a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc index 8f55231d5..bbff0a1a8 100644 --- a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc +++ b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc @@ -13,6 +13,7 @@ #include #include "atlas/array.h" +#include "atlas/parallel/mpi/mpi.h" #include "atlas/trans/localopt2/LegendrePolynomialsopt2.h" namespace atlas { @@ -20,27 +21,20 @@ namespace trans { //----------------------------------------------------------------------------- -void compute_legendre_polynomialsopt2( - const size_t trc, // truncation (in) - const int nlats, // number of latitudes - const double lats[], // latitudes in radians (in) - double legpol[] ) // values of associated Legendre functions, size (trc+1)*trc/2*nlats (out) -{ - array::ArrayT idxmn_( trc + 1, trc + 1, nlats ); - array::ArrayView idxmn = array::make_view( idxmn_ ); - - int j = 0; - for ( int jm = 0; jm <= trc; ++jm ) { - for ( int jlat = 0; jlat < nlats; ++jlat ) { - for ( int jn = jm; jn <= trc; ++jn ) { - idxmn( jm, jn, jlat ) = j++; - } - } - } +size_t legendre_size( const size_t truncation ) { + return ( truncation + 2 ) * ( truncation + 1 ) / 2; +} +void compute_legendre_polynomialsopt2( const size_t trc, // truncation (in) + const int nlats, // number of latitudes + const double lats[], // latitudes in radians (in) + double leg_sym[], // values of associated Legendre functions, symmetric part + double leg_asym[] ) // values of associated Legendre functions, asymmetric part +{ array::ArrayT zfn_( trc + 1, trc + 1 ); array::ArrayView zfn = array::make_view( zfn_ ); - + std::vector legpol( legendre_size( trc ) ); + auto idxmn = [&]( int jm, int jn ) { return ( 2 * trc + 3 - jm ) * jm / 2 + jn - jm; }; int iodd; // Compute coefficients for Taylor series in Belousov (19) and (21) @@ -62,99 +56,132 @@ void compute_legendre_polynomialsopt2( } } + // Loop over latitudes: for ( int jlat = 0; jlat < nlats; ++jlat ) { - // -------------------- - // 1. First two columns - // -------------------- - double lat = lats[jlat]; - double zdlx1 = ( M_PI_2 - lat ); // theta - double zdlx = std::cos( zdlx1 ); // cos(theta) - double zdlsita = std::sqrt( 1. - zdlx * zdlx ); // sin(theta) (this is how trans library does it) - - legpol[idxmn( 0, 0, jlat )] = 1.; - - double zdl1sita = 0.; - // if we are less than 1 meter from the pole, - if ( std::abs( zdlsita ) <= std::sqrt( std::numeric_limits::epsilon() ) ) { - zdlx = 1.; - zdlsita = 0.; - } - else { - zdl1sita = 1. / zdlsita; - } + { + //ATLAS_TRACE( "compute Legendre polynomials" ); + // -------------------- + // 1. First two columns + // -------------------- + double lat = lats[jlat]; + double zdlx1 = ( M_PI_2 - lat ); // theta + double zdlx = std::cos( zdlx1 ); // cos(theta) + double zdlsita = std::sqrt( 1. - zdlx * zdlx ); // sin(theta) (this is how trans library does it) + + legpol[idxmn( 0, 0 )] = 1.; + + double zdl1sita = 0.; + // if we are less than 1 meter from the pole, + if ( std::abs( zdlsita ) <= std::sqrt( std::numeric_limits::epsilon() ) ) { + zdlx = 1.; + zdlsita = 0.; + } + else { + zdl1sita = 1. / zdlsita; + } - // ordinary Legendre polynomials from series expansion - // --------------------------------------------------- - - // even N - for ( int jn = 2; jn <= trc; jn += 2 ) { - double zdlk = 0.5 * zfn( jn, 0 ); - double zdlldn = 0.0; - double zdsq = 1. / std::sqrt( jn * ( jn + 1. ) ); - // represented by only even k - for ( int jk = 2; jk <= jn; jk += 2 ) { - // normalised ordinary Legendre polynomial == \overbar{P_n}^0 - zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 ); - // normalised associated Legendre polynomial == \overbar{P_n}^1 - zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 ); + // ordinary Legendre polynomials from series expansion + // --------------------------------------------------- + + // even N + for ( int jn = 2; jn <= trc; jn += 2 ) { + double zdlk = 0.5 * zfn( jn, 0 ); + double zdlldn = 0.0; + double zdsq = 1. / std::sqrt( jn * ( jn + 1. ) ); + // represented by only even k + for ( int jk = 2; jk <= jn; jk += 2 ) { + // normalised ordinary Legendre polynomial == \overbar{P_n}^0 + zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 ); + // normalised associated Legendre polynomial == \overbar{P_n}^1 + zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 ); + } + legpol[idxmn( 0, jn )] = zdlk; + legpol[idxmn( 1, jn )] = zdlldn; } - legpol[idxmn( 0, jn, jlat )] = zdlk; - legpol[idxmn( 1, jn, jlat )] = zdlldn; - } - // odd N - for ( int jn = 1; jn <= trc; jn += 2 ) { - zfn( jn, 0 ) = 0.; - double zdlk = 0.; - double zdlldn = 0.0; - double zdsq = 1. / std::sqrt( jn * ( jn + 1. ) ); - // represented by only even k - for ( int jk = 1; jk <= jn; jk += 2 ) { - // normalised ordinary Legendre polynomial == \overbar{P_n}^0 - zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 ); - // normalised associated Legendre polynomial == \overbar{P_n}^1 - zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 ); + // odd N + for ( int jn = 1; jn <= trc; jn += 2 ) { + zfn( jn, 0 ) = 0.; + double zdlk = 0.; + double zdlldn = 0.0; + double zdsq = 1. / std::sqrt( jn * ( jn + 1. ) ); + // represented by only even k + for ( int jk = 1; jk <= jn; jk += 2 ) { + // normalised ordinary Legendre polynomial == \overbar{P_n}^0 + zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 ); + // normalised associated Legendre polynomial == \overbar{P_n}^1 + zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 ); + } + legpol[idxmn( 0, jn )] = zdlk; + legpol[idxmn( 1, jn )] = zdlldn; } - legpol[idxmn( 0, jn, jlat )] = zdlk; - legpol[idxmn( 1, jn, jlat )] = zdlldn; - } - // -------------------------------------------------------------- - // 2. Diagonal (the terms 0,0 and 1,1 have already been computed) - // Belousov, equation (23) - // -------------------------------------------------------------- + // -------------------------------------------------------------- + // 2. Diagonal (the terms 0,0 and 1,1 have already been computed) + // Belousov, equation (23) + // -------------------------------------------------------------- - double zdls = zdl1sita * std::numeric_limits::min(); - for ( int jn = 2; jn <= trc; ++jn ) { - double sq = std::sqrt( ( 2. * jn + 1. ) / ( 2. * jn ) ); + double zdls = zdl1sita * std::numeric_limits::min(); + for ( int jn = 2; jn <= trc; ++jn ) { + double sq = std::sqrt( ( 2. * jn + 1. ) / ( 2. * jn ) ); - legpol[idxmn( jn, jn, jlat )] = legpol[idxmn( jn - 1, jn - 1, jlat )] * zdlsita * sq; - if ( std::abs( legpol[idxmn( jn, jn, jlat )] ) < zdls ) legpol[idxmn( jn, jn, jlat )] = 0.0; - } + legpol[idxmn( jn, jn )] = legpol[idxmn( jn - 1, jn - 1 )] * zdlsita * sq; + if ( std::abs( legpol[idxmn( jn, jn )] ) < zdls ) legpol[idxmn( jn, jn )] = 0.0; + } - // --------------------------------------------- - // 3. General recurrence (Belousov, equation 17) - // --------------------------------------------- - - for ( int jn = 3; jn <= trc; ++jn ) { - for ( int jm = 2; jm < jn; ++jm ) { - double cn = ( ( 2. * jn + 1. ) * ( jn + jm - 3. ) * ( jn + jm - 1. ) ); // numerator of c in Belousov - double cd = ( ( 2. * jn - 3. ) * ( jn + jm - 2. ) * ( jn + jm ) ); // denominator of c in Belousov - double dn = ( ( 2. * jn + 1. ) * ( jn - jm + 1. ) * ( jn + jm - 1. ) ); // numerator of d in Belousov - double dd = ( ( 2. * jn - 1. ) * ( jn + jm - 2. ) * ( jn + jm ) ); // denominator of d in Belousov - double en = ( ( 2. * jn + 1. ) * ( jn - jm ) ); // numerator of e in Belousov - double ed = ( ( 2. * jn - 1. ) * ( jn + jm ) ); // denominator of e in Belousov - - legpol[idxmn( jm, jn, jlat )] = std::sqrt( cn / cd ) * legpol[idxmn( jm - 2, jn - 2, jlat )] - - std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1, jlat )] * zdlx + - std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1, jlat )] * zdlx; + // --------------------------------------------- + // 3. General recurrence (Belousov, equation 17) + // --------------------------------------------- + + for ( int jn = 3; jn <= trc; ++jn ) { + for ( int jm = 2; jm < jn; ++jm ) { + double cn = + ( ( 2. * jn + 1. ) * ( jn + jm - 3. ) * ( jn + jm - 1. ) ); // numerator of c in Belousov + double cd = ( ( 2. * jn - 3. ) * ( jn + jm - 2. ) * ( jn + jm ) ); // denominator of c in Belousov + double dn = + ( ( 2. * jn + 1. ) * ( jn - jm + 1. ) * ( jn + jm - 1. ) ); // numerator of d in Belousov + double dd = ( ( 2. * jn - 1. ) * ( jn + jm - 2. ) * ( jn + jm ) ); // denominator of d in Belousov + double en = ( ( 2. * jn + 1. ) * ( jn - jm ) ); // numerator of e in Belousov + double ed = ( ( 2. * jn - 1. ) * ( jn + jm ) ); // denominator of e in Belousov + + legpol[idxmn( jm, jn )] = std::sqrt( cn / cd ) * legpol[idxmn( jm - 2, jn - 2 )] - + std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1 )] * zdlx + + std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1 )] * zdlx; + } } } - // take factor 2 for m > 0 into account: - for ( int jm = 1; jm <= trc; ++jm ) { - for ( int jn = jm; jn <= trc; ++jn ) { - legpol[idxmn( jm, jn, jlat )] *= 2.; + { + //ATLAS_TRACE( "add to global arrays" ); + + // take factor 2 for m > 0 into account: + for ( int jm = 1; jm <= trc; ++jm ) { + for ( int jn = jm; jn <= trc; ++jn ) { + legpol[idxmn( jm, jn )] *= 2.; + } + } + int is0 = 0, ia0 = 0; + for ( int jm = 0; jm <= trc; jm++ ) { + int is1 = 0, ia1 = 0; + for ( int jn = jm; jn <= trc; jn++ ) { + if ( ( jn - jm ) % 2 == 0 ) { is1++; } + else { + ia1++; + } + } + int is2 = 0, ia2 = 0; + for ( int jn = jm; jn <= trc; jn++ ) { + if ( ( jn - jm ) % 2 == 0 ) { + int is = is0 * nlats + is1 * jlat + is2++; + leg_sym[is] = legpol[idxmn( jm, jn )]; + } + else { + int ia = ia0 * nlats + ia1 * jlat + ia2++; + leg_asym[ia] = legpol[idxmn( jm, jn )]; + } + } + is0 += is2; + ia0 += ia2; } } } diff --git a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h index ae550d30d..0de2a5f69 100644 --- a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h +++ b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h @@ -33,10 +33,11 @@ namespace trans { // Andreas Mueller *ECMWF* // void compute_legendre_polynomialsopt2( - const size_t trc, // truncation (in) - const int nlats, // number of latitudes - const double lats[], // latitudes in radians (in) - double legpol[] ); // values of associated Legendre functions, size (trc+1)*trc/2*nlats (out) + const size_t trc, // truncation (in) + const int nlats, // number of latitudes + const double lats[], // latitudes in radians (in) + double legendre_sym[], // values of associated Legendre functions, symmetric part + double legendre_asym[] ); // values of associated Legendre functions, asymmetric part // -------------------------------------------------------------------------------------------------------------------- diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc index 87b7dc0ee..ea6724472 100644 --- a/src/atlas/trans/localopt2/TransLocalopt2.cc +++ b/src/atlas/trans/localopt2/TransLocalopt2.cc @@ -120,11 +120,6 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long // precomputations for Legendre polynomials: { ATLAS_TRACE( "opt2 precomp Legendre" ); - legendre_.resize( legendre_size( truncation_ + 1 ) * nlatsNH ); - compute_legendre_polynomialsopt2( truncation_ + 1, nlatsNH, lats.data(), legendre_.data() ); - } - { - ATLAS_TRACE( "opt2 split Legendre" ); int size_sym = 0; int size_asym = 0; legendre_sym_begin_.resize( truncation_ + 3 ); @@ -139,18 +134,8 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long } legendre_sym_.resize( size_sym * nlatsNH ); legendre_asym_.resize( size_asym * nlatsNH ); - int idx = 0, is = 0, ia = 0; - for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { - for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { - for ( int jn = 0; jn <= truncation_ - jm + 1; jn++, idx++ ) { - if ( jn % 2 == 0 ) { legendre_sym_[is++] = legendre_[idx]; } - else { - legendre_asym_[ia++] = legendre_[idx]; - } - } - } - } - ASSERT( ia == size_asym * nlatsNH && is == size_sym * nlatsNH ); + compute_legendre_polynomialsopt2( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_.data(), + legendre_asym_.data() ); } // precomputations for Fourier transformations: @@ -188,7 +173,7 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long FFTW_ESTIMATE ); } #endif -} +} // namespace atlas // -------------------------------------------------------------------------------------------------------------------- diff --git a/src/atlas/trans/localopt2/TransLocalopt2.h b/src/atlas/trans/localopt2/TransLocalopt2.h index 0cec4e84a..51644430d 100644 --- a/src/atlas/trans/localopt2/TransLocalopt2.h +++ b/src/atlas/trans/localopt2/TransLocalopt2.h @@ -105,9 +105,6 @@ class TransLocalopt2 : public trans::TransImpl { double divergence_spectra[], const eckit::Configuration& = util::NoConfig() ) const override; private: - const double* legendre_data( int j ) const { return legendre_.data() + legendre_begin_[j]; } - double* legendre_data( int j ) { return legendre_.data() + legendre_begin_[j]; } - void invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields, const double scalar_spectra[], double gp_fields[], const eckit::Configuration& = util::NoConfig() ) const; @@ -116,7 +113,6 @@ class TransLocalopt2 : public trans::TransImpl { Grid grid_; int truncation_; bool precompute_; - mutable std::vector legendre_; mutable std::vector legendre_sym_; mutable std::vector legendre_asym_; mutable std::vector fourier_; diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index ddf85054a..f5dc61f1e 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -569,6 +569,7 @@ CASE( "test_transgeneral_legendrepolynomials" ) } #endif //----------------------------------------------------------------------------- +#if 0 #if 1 CASE( "test_transgeneral_point" ) { std::ostream& out = Log::info(); @@ -700,9 +701,10 @@ CASE( "test_transgeneral_with_translib" ) { } } } - +#endif +#endif //----------------------------------------------------------------------------- - +#if 0 CASE( "test_trans_vordiv_with_translib" ) { Log::info() << "test_trans_vordiv_with_translib" << std::endl; // test transgeneral by comparing its result with the trans library @@ -727,7 +729,7 @@ CASE( "test_trans_vordiv_with_translib" ) { functionspace::Spectral spectral( trc ); functionspace::StructuredColumns gridpoints( g ); - int nb_scalar = 1, nb_vordiv = 0; + int nb_scalar = 2, nb_vordiv = 0; int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv; std::vector sp( 2 * N * nb_scalar ); std::vector vor( 2 * N * nb_vordiv ); @@ -836,9 +838,9 @@ CASE( "test_trans_vordiv_with_translib" ) { } Log::info() << "Vordiv+scalar comparison with trans: all " << icase << " cases successfully passed!" << std::endl; } - +#endif //----------------------------------------------------------------------------- -#if 0 +#if 1 CASE( "test_trans_hires" ) { Log::info() << "test_trans_hires" << std::endl; // test transgeneral by comparing its result with the trans library @@ -848,17 +850,17 @@ CASE( "test_trans_hires" ) { double tolerance = 1.e-13; // Grid: (Adjust the following line if the test takes too long!) - Grid g( "F128" ); - + Grid g( "F1280" ); +#if ATLAS_HAVE_TRANS + //std::string transTypes[1] = {"localopt2"}; + std::string transTypes[2] = {"localopt2", "ifs"}; +#else + std::string transTypes[1] = {"localopt2"}; +#endif grid::StructuredGrid gs( g ); int ndgl = gs.ny(); //int trc = ndgl - 1; // linear int trc = ndgl / 2. - 1; // cubic -#if 0 - trans::Trans transIFS( g, trc, util::Config( "type", "ifs" ) ); -#endif - trans::Trans transLocal1( g, trc, util::Config( "type", "localopt2" ) ); - //trans::Trans transLocal2( g, trc, util::Config( "type", "localopt2" ) ); functionspace::Spectral spectral( trc ); functionspace::StructuredColumns gridpoints( g ); @@ -874,107 +876,92 @@ CASE( "test_trans_hires" ) { //std::vector rgp2( nb_all * g.size() ); std::vector rgp_analytic( g.size() ); - int icase = 0; - for ( int ivar_in = 2; ivar_in < 3; ivar_in++ ) { // vorticity, divergence, scalar - for ( int ivar_out = 2; ivar_out < 3; ivar_out++ ) { // u, v, scalar - int nb_fld = 1; - if ( ivar_out == 2 ) { - tolerance = 1.e-13; - nb_fld = nb_scalar; - } - else { - tolerance = 2.e-6; - nb_fld = nb_vordiv; - } - for ( int jfld = 0; jfld < nb_fld; jfld++ ) { // multiple fields - int k = 0; - for ( int m = 0; m <= trc; m++ ) { // zonal wavenumber - for ( int n = m; n <= trc; n++ ) { // total wavenumber - for ( int imag = 0; imag <= 1; imag++ ) { // real and imaginary part - - if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. ) { - for ( int j = 0; j < 2 * N * nb_scalar; j++ ) { - sp[j] = 0.; - } - for ( int j = 0; j < 2 * N * nb_vordiv; j++ ) { - vor[j] = 0.; - div[j] = 0.; - } - if ( ivar_in == 0 ) vor[k * nb_vordiv + jfld] = 1.; - if ( ivar_in == 1 ) div[k * nb_vordiv + jfld] = 1.; - if ( ivar_in == 2 ) sp[k * nb_scalar + jfld] = 1.; - - for ( int j = 0; j < nb_all * g.size(); j++ ) { - gp[j] = 0.; - rgp1[j] = 0.; - //rgp2[j] = 0.; - } - for ( int j = 0; j < g.size(); j++ ) { - rgp_analytic[j] = 0.; - } - - spectral_transform_grid_analytic( trc, trc, n, m, imag, g, rspecg.data(), - rgp_analytic.data(), ivar_in, ivar_out ); - - EXPECT_NO_THROW( transLocal1.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), - div.data(), rgp1.data() ) ); - - //EXPECT_NO_THROW( transLocal2.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), - // div.data(), rgp2.data() ) ); - - int pos = ( ivar_out * nb_vordiv + jfld ); - - double rms_gen1 = - compute_rms( g.size(), rgp1.data() + pos * g.size(), rgp_analytic.data() ); - - //double rms_gen2 = - // compute_rms( g.size(), rgp2.data() + pos * g.size(), rgp_analytic.data() ); - - if ( !( rms_gen1 < tolerance ) ) { // || !( rms_gen2 < tolerance ) ) { - Log::info() - << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out - << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl; - ATLAS_DEBUG_VAR( rms_gen1 ); - //ATLAS_DEBUG_VAR( rms_gen2 ); - ATLAS_DEBUG_VAR( tolerance ); - } - EXPECT( rms_gen1 < tolerance ); - //EXPECT( rms_gen2 < tolerance ); - icase++; - -#if 0 - EXPECT_NO_THROW( transIFS.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), - div.data(), gp.data() ) ); - double rms_trans = - compute_rms( g.size(), gp.data() + pos * g.size(), rgp_analytic.data() ); - double rms_diff = - compute_rms( g.size(), rgp1.data() + pos * g.size(), gp.data() + pos * g.size() ); - EXPECT( rms_trans < tolerance ); - if ( !( rms_trans < tolerance ) || !( rms_diff < tolerance ) ) { - Log::info() - << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out - << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl; - ATLAS_DEBUG_VAR( rms_gen1 ); - //ATLAS_DEBUG_VAR( rms_gen2 ); - ATLAS_DEBUG_VAR( rms_trans ); - ATLAS_DEBUG_VAR( rms_diff ); - ATLAS_DEBUG_VAR( tolerance ); + for ( auto transType : transTypes ) { + int icase = 0; + trans::Trans trans( g, trc, util::Config( "type", transType ) ); + for ( int ivar_in = 2; ivar_in < 3; ivar_in++ ) { // vorticity, divergence, scalar + for ( int ivar_out = 2; ivar_out < 3; ivar_out++ ) { // u, v, scalar + int nb_fld = 1; + if ( ivar_out == 2 ) { + tolerance = 1.e-13; + nb_fld = nb_scalar; + } + else { + tolerance = 2.e-6; + nb_fld = nb_vordiv; + } + for ( int jfld = 0; jfld < nb_fld; jfld++ ) { // multiple fields + int k = 0; + for ( int m = 0; m <= trc; m++ ) { // zonal wavenumber + for ( int n = m; n <= trc; n++ ) { // total wavenumber + for ( int imag = 0; imag <= 1; imag++ ) { // real and imaginary part + + if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. ) { + for ( int j = 0; j < 2 * N * nb_scalar; j++ ) { + sp[j] = 0.; + } + for ( int j = 0; j < 2 * N * nb_vordiv; j++ ) { + vor[j] = 0.; + div[j] = 0.; + } + if ( ivar_in == 0 ) vor[k * nb_vordiv + jfld] = 1.; + if ( ivar_in == 1 ) div[k * nb_vordiv + jfld] = 1.; + if ( ivar_in == 2 ) sp[k * nb_scalar + jfld] = 1.; + + for ( int j = 0; j < nb_all * g.size(); j++ ) { + gp[j] = 0.; + rgp1[j] = 0.; + //rgp2[j] = 0.; + } + for ( int j = 0; j < g.size(); j++ ) { + rgp_analytic[j] = 0.; + } + + spectral_transform_grid_analytic( trc, trc, n, m, imag, g, rspecg.data(), + rgp_analytic.data(), ivar_in, ivar_out ); + + EXPECT_NO_THROW( trans.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), + div.data(), rgp1.data() ) ); + + //EXPECT_NO_THROW( transLocal2.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), + // div.data(), rgp2.data() ) ); + + int pos = ( ivar_out * nb_vordiv + jfld ); + + double rms_gen1 = + compute_rms( g.size(), rgp1.data() + pos * g.size(), rgp_analytic.data() ); + + //double rms_gen2 = + // compute_rms( g.size(), rgp2.data() + pos * g.size(), rgp_analytic.data() ); + + if ( !( rms_gen1 < tolerance ) ) { // || !( rms_gen2 < tolerance ) ) { + Log::info() + << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out + << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl; + ATLAS_DEBUG_VAR( rms_gen1 ); + //ATLAS_DEBUG_VAR( rms_gen2 ); + ATLAS_DEBUG_VAR( tolerance ); + } + EXPECT( rms_gen1 < tolerance ); + //EXPECT( rms_gen2 < tolerance ); + icase++; + + EXPECT( icase < 300 ); } -#endif - EXPECT( icase < 300 ); + k++; } - k++; } } } } } + Log::info() << "Vordiv+scalar comparison with trans::" << transType << ": all " << icase + << " cases successfully passed!" << std::endl; } - Log::info() << "Vordiv+scalar comparison with trans: all " << icase << " cases successfully passed!" << std::endl; } #endif //----------------------------------------------------------------------------- - +#if 0 CASE( "test_trans_invtrans" ) { trans::Trans trans( Grid( "O64" ), 63, util::Config( "type", "local" ) ); From 8ccfbd112b7e5b2782139e19f95431c4827076c4 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Mon, 12 Mar 2018 22:59:43 +0000 Subject: [PATCH 019/123] testing scaling with nb_scalar --- src/tests/trans/test_transgeneral.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index f5dc61f1e..941b86664 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -704,7 +704,7 @@ CASE( "test_transgeneral_with_translib" ) { #endif #endif //----------------------------------------------------------------------------- -#if 0 +#if 1 CASE( "test_trans_vordiv_with_translib" ) { Log::info() << "test_trans_vordiv_with_translib" << std::endl; // test transgeneral by comparing its result with the trans library @@ -714,7 +714,7 @@ CASE( "test_trans_vordiv_with_translib" ) { double tolerance = 1.e-13; // Grid: (Adjust the following line if the test takes too long!) - Grid g( "F120" ); + Grid g( "F640" ); grid::StructuredGrid gs( g ); int ndgl = gs.ny(); @@ -729,7 +729,7 @@ CASE( "test_trans_vordiv_with_translib" ) { functionspace::Spectral spectral( trc ); functionspace::StructuredColumns gridpoints( g ); - int nb_scalar = 2, nb_vordiv = 0; + int nb_scalar = 100, nb_vordiv = 0; int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv; std::vector sp( 2 * N * nb_scalar ); std::vector vor( 2 * N * nb_vordiv ); @@ -827,7 +827,7 @@ CASE( "test_trans_vordiv_with_translib" ) { ATLAS_DEBUG_VAR( tolerance ); } #endif - EXPECT( icase < 300 ); + EXPECT( icase < 100 ); } k++; } @@ -840,7 +840,7 @@ CASE( "test_trans_vordiv_with_translib" ) { } #endif //----------------------------------------------------------------------------- -#if 1 +#if 0 CASE( "test_trans_hires" ) { Log::info() << "test_trans_hires" << std::endl; // test transgeneral by comparing its result with the trans library From 79fdd110a9a4a1263acf241464b5068070877076 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Tue, 13 Mar 2018 11:00:56 +0000 Subject: [PATCH 020/123] reversed order of total wavenumbers in Legendre transformation (the same is done in trans library for improving accuracy of summation); also added average RMS errors to test_trans_vordiv_with_translib --- .../localopt2/LegendrePolynomialsopt2.cc | 9 ++++++- src/atlas/trans/localopt2/TransLocalopt2.cc | 14 +++++++--- src/tests/trans/test_transgeneral.cc | 27 ++++++++++++++----- 3 files changed, 39 insertions(+), 11 deletions(-) diff --git a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc index bbff0a1a8..99f54586a 100644 --- a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc +++ b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc @@ -170,7 +170,14 @@ void compute_legendre_polynomialsopt2( const size_t trc, // truncation (in) } } int is2 = 0, ia2 = 0; - for ( int jn = jm; jn <= trc; jn++ ) { + // the choice between the following two code lines determines whether + // total wavenumbers are summed in an ascending or descending order. + // The trans library in IFS uses descending order because it should + // be more accurate (higher wavenumbers have smaller contributions). + // This also needs to be changed when splitting the spectral data in + // TransLocalopt2::invtrans_uv! + //for ( int jn = jm; jn <= trc; jn++ ) { + for ( int jn = trc; jn >= jm; jn-- ) { if ( ( jn - jm ) % 2 == 0 ) { int is = is0 * nlats + is1 * jlat + is2++; leg_sym[is] = legpol[idxmn( jm, jn )]; diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc index ea6724472..084eca68f 100644 --- a/src/atlas/trans/localopt2/TransLocalopt2.cc +++ b/src/atlas/trans/localopt2/TransLocalopt2.cc @@ -277,10 +277,18 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel { //ATLAS_TRACE( "opt2 Legendre split" ); int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2; - for ( int jn = 0; jn <= truncation_ - jm + 1; jn++ ) { + // the choice between the following two code lines determines whether + // total wavenumbers are summed in an ascending or descending order. + // The trans library in IFS uses descending order because it should + // be more accurate (higher wavenumbers have smaller contributions). + // This also needs to be changed when splitting the spectral data in + // compute_legendre_polynomialsopt2! + //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) { + for ( int jn = truncation_ + 1; jn >= jm; jn-- ) { for ( int imag = 0; imag < 2; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { - if ( jn % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; } + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + idx = jfld + nb_fields * ( imag + 2 * ( jn - jm ) ); + if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; } else { scalar_asym[ia++] = scalar_spectra[idx + ioff]; } diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index 941b86664..cd1b21478 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -704,7 +704,7 @@ CASE( "test_transgeneral_with_translib" ) { #endif #endif //----------------------------------------------------------------------------- -#if 1 +#if 0 CASE( "test_trans_vordiv_with_translib" ) { Log::info() << "test_trans_vordiv_with_translib" << std::endl; // test transgeneral by comparing its result with the trans library @@ -714,7 +714,7 @@ CASE( "test_trans_vordiv_with_translib" ) { double tolerance = 1.e-13; // Grid: (Adjust the following line if the test takes too long!) - Grid g( "F640" ); + Grid g( "F120" ); grid::StructuredGrid gs( g ); int ndgl = gs.ny(); @@ -722,14 +722,16 @@ CASE( "test_trans_vordiv_with_translib" ) { int trc = ndgl / 2. - 1; // cubic #if ATLAS_HAVE_TRANS trans::Trans transIFS( g, trc, util::Config( "type", "ifs" ) ); + double rav = 0.; // compute average rms error of trans library in rav #endif trans::Trans transLocal1( g, trc, util::Config( "type", "localopt" ) ); trans::Trans transLocal2( g, trc, util::Config( "type", "localopt2" ) ); + double rav1 = 0., rav2 = 0.; // compute average rms errors of transLocal1 and transLocal2 functionspace::Spectral spectral( trc ); functionspace::StructuredColumns gridpoints( g ); - int nb_scalar = 100, nb_vordiv = 0; + int nb_scalar = 1, nb_vordiv = 0; int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv; std::vector sp( 2 * N * nb_scalar ); std::vector vor( 2 * N * nb_vordiv ); @@ -796,6 +798,8 @@ CASE( "test_trans_vordiv_with_translib" ) { double rms_gen2 = compute_rms( g.size(), rgp2.data() + pos * g.size(), rgp_analytic.data() ); + rav1 += rms_gen1; + rav2 += rms_gen2; if ( !( rms_gen1 < tolerance ) || !( rms_gen2 < tolerance ) ) { Log::info() << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out @@ -813,6 +817,7 @@ CASE( "test_trans_vordiv_with_translib" ) { div.data(), gp.data() ) ); double rms_trans = compute_rms( g.size(), gp.data() + pos * g.size(), rgp_analytic.data() ); + rav += rms_trans; double rms_diff = compute_rms( g.size(), rgp1.data() + pos * g.size(), gp.data() + pos * g.size() ); EXPECT( rms_trans < tolerance ); @@ -827,7 +832,7 @@ CASE( "test_trans_vordiv_with_translib" ) { ATLAS_DEBUG_VAR( tolerance ); } #endif - EXPECT( icase < 100 ); + EXPECT( icase < 300 ); } k++; } @@ -837,10 +842,18 @@ CASE( "test_trans_vordiv_with_translib" ) { } } Log::info() << "Vordiv+scalar comparison with trans: all " << icase << " cases successfully passed!" << std::endl; + rav1 /= icase; + Log::info() << "average RMS error of transLocal1: " << rav1 << std::endl; + rav2 /= icase; + Log::info() << "average RMS error of transLocal2: " << rav2 << std::endl; +#if ATLAS_HAVE_TRANS + rav /= icase; + Log::info() << "average RMS error of transIFS: " << rav << std::endl; +#endif } #endif //----------------------------------------------------------------------------- -#if 0 +#if 1 CASE( "test_trans_hires" ) { Log::info() << "test_trans_hires" << std::endl; // test transgeneral by comparing its result with the trans library @@ -865,7 +878,7 @@ CASE( "test_trans_hires" ) { functionspace::Spectral spectral( trc ); functionspace::StructuredColumns gridpoints( g ); - int nb_scalar = 1, nb_vordiv = 0; + int nb_scalar = 100, nb_vordiv = 0; int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv; std::vector sp( 2 * N * nb_scalar ); std::vector vor( 2 * N * nb_vordiv ); @@ -946,7 +959,7 @@ CASE( "test_trans_hires" ) { //EXPECT( rms_gen2 < tolerance ); icase++; - EXPECT( icase < 300 ); + EXPECT( icase < 50 ); } k++; } From 7dc00ff369e57791c9e3eee22c64a7445d29d3ac Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Tue, 13 Mar 2018 11:42:18 +0000 Subject: [PATCH 021/123] printing case number to make hires test a little more entertaining --- src/tests/trans/test_transgeneral.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index cd1b21478..51a0d5796 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -958,8 +958,8 @@ CASE( "test_trans_hires" ) { EXPECT( rms_gen1 < tolerance ); //EXPECT( rms_gen2 < tolerance ); icase++; - - EXPECT( icase < 50 ); + Log::info() << transType << ": case " << icase << std::endl; + EXPECT( icase < 25 ); } k++; } From aa31ad62776279be644afa54443232eb982e2902 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 15 Mar 2018 15:50:40 +0000 Subject: [PATCH 022/123] updated localopt with changes from localopt2; removed scl_fourier_sym but seems to make code slower; switch to fftw_execute_dft_c2r, doesn't seem to affect performance --- .../trans/localopt/LegendrePolynomialsopt.cc | 259 ++++++++++-------- .../trans/localopt/LegendrePolynomialsopt.h | 9 +- src/atlas/trans/localopt/TransLocalopt.cc | 128 ++++++--- src/atlas/trans/localopt/TransLocalopt.h | 13 +- .../localopt2/LegendrePolynomialsopt2.cc | 16 +- src/atlas/trans/localopt2/TransLocalopt2.cc | 38 +-- src/tests/trans/test_transgeneral.cc | 94 ++----- 7 files changed, 300 insertions(+), 257 deletions(-) diff --git a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc index e872eb20b..3ed9e7544 100644 --- a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc +++ b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc @@ -21,136 +21,171 @@ namespace trans { //----------------------------------------------------------------------------- -void compute_legendre_polynomialsopt( - const size_t trc, // truncation (in) - const int nlats, // number of latitudes - const double lats[], // latitudes in radians (in) - double legpol[] ) // values of associated Legendre functions, size (trc+1)*trc/2*nlats (out) +void compute_legendre_polynomialsopt( const size_t trc, // truncation (in) + const int nlats, // number of latitudes + const double lats[], // latitudes in radians (in) + double leg_sym[], // values of associated Legendre functions, symmetric part + double leg_asym[] ) // values of associated Legendre functions, asymmetric part { + auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; }; array::ArrayT zfn_( trc + 1, trc + 1 ); array::ArrayView zfn = array::make_view( zfn_ ); + std::vector legpol( legendre_size( trc ) ); + auto idxmn = [&]( int jm, int jn ) { return ( 2 * trc + 3 - jm ) * jm / 2 + jn - jm; }; + int iodd; + + // Compute coefficients for Taylor series in Belousov (19) and (21) + // Belousov, Swarztrauber use zfn(0,0)=std::sqrt(2.) + // IFS normalisation chosen to be 0.5*Integral(Pnm**2) = 1 + zfn( 0, 0 ) = 2.; + for ( int jn = 1; jn <= trc; ++jn ) { + double zfnn = zfn( 0, 0 ); + for ( int jgl = 1; jgl <= jn; ++jgl ) { + zfnn *= std::sqrt( 1. - 0.25 / ( jgl * jgl ) ); + } + iodd = jn % 2; + zfn( jn, jn ) = zfnn; + for ( int jgl = 2; jgl <= jn - iodd; jgl += 2 ) { + double zfjn = ( ( jgl - 1. ) * ( 2. * jn - jgl + 2. ) ); // new factor numerator + double zfjd = ( jgl * ( 2. * jn - jgl + 1. ) ); // new factor denominator - auto idxmn = [&]( int jm, int jn, int jlat ) { - return ( 2 * trc + 3 - jm ) * jm / 2 * nlats + jlat * ( trc - jm + 1 ) + jn - jm; - }; - - { - ATLAS_TRACE( "init arrays" ); - - int iodd; - - // Compute coefficients for Taylor series in Belousov (19) and (21) - // Belousov, Swarztrauber use zfn(0,0)=std::sqrt(2.) - // IFS normalisation chosen to be 0.5*Integral(Pnm**2) = 1 - zfn( 0, 0 ) = 2.; - for ( int jn = 1; jn <= trc; ++jn ) { - double zfnn = zfn( 0, 0 ); - for ( int jgl = 1; jgl <= jn; ++jgl ) { - zfnn *= std::sqrt( 1. - 0.25 / ( jgl * jgl ) ); - } - iodd = jn % 2; - zfn( jn, jn ) = zfnn; - for ( int jgl = 2; jgl <= jn - iodd; jgl += 2 ) { - double zfjn = ( ( jgl - 1. ) * ( 2. * jn - jgl + 2. ) ); // new factor numerator - double zfjd = ( jgl * ( 2. * jn - jgl + 1. ) ); // new factor denominator - - zfn( jn, jn - jgl ) = zfn( jn, jn - jgl + 2 ) * zfjn / zfjd; - } + zfn( jn, jn - jgl ) = zfn( jn, jn - jgl + 2 ) * zfjn / zfjd; } } + + // Loop over latitudes: for ( int jlat = 0; jlat < nlats; ++jlat ) { - // -------------------- - // 1. First two columns - // -------------------- - double lat = lats[jlat]; - double zdlx1 = ( M_PI_2 - lat ); // theta - double zdlx = std::cos( zdlx1 ); // cos(theta) - double zdlsita = std::sqrt( 1. - zdlx * zdlx ); // sin(theta) (this is how trans library does it) - - legpol[idxmn( 0, 0, jlat )] = 1.; - - double zdl1sita = 0.; - // if we are less than 1 meter from the pole, - if ( std::abs( zdlsita ) <= std::sqrt( std::numeric_limits::epsilon() ) ) { - zdlx = 1.; - zdlsita = 0.; - } - else { - zdl1sita = 1. / zdlsita; - } + { + //ATLAS_TRACE( "compute Legendre polynomials" ); + // -------------------- + // 1. First two columns + // -------------------- + double lat = lats[jlat]; + double zdlx1 = ( M_PI_2 - lat ); // theta + double zdlx = std::cos( zdlx1 ); // cos(theta) + double zdlsita = std::sqrt( 1. - zdlx * zdlx ); // sin(theta) (this is how trans library does it) + + legpol[idxmn( 0, 0 )] = 1.; + + double zdl1sita = 0.; + // if we are less than 1 meter from the pole, + if ( std::abs( zdlsita ) <= std::sqrt( std::numeric_limits::epsilon() ) ) { + zdlx = 1.; + zdlsita = 0.; + } + else { + zdl1sita = 1. / zdlsita; + } - // ordinary Legendre polynomials from series expansion - // --------------------------------------------------- - - // even N - for ( int jn = 2; jn <= trc; jn += 2 ) { - double zdlk = 0.5 * zfn( jn, 0 ); - double zdlldn = 0.0; - double zdsq = 1. / std::sqrt( jn * ( jn + 1. ) ); - // represented by only even k - for ( int jk = 2; jk <= jn; jk += 2 ) { - // normalised ordinary Legendre polynomial == \overbar{P_n}^0 - zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 ); - // normalised associated Legendre polynomial == \overbar{P_n}^1 - zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 ); + // ordinary Legendre polynomials from series expansion + // --------------------------------------------------- + + // even N + for ( int jn = 2; jn <= trc; jn += 2 ) { + double zdlk = 0.5 * zfn( jn, 0 ); + double zdlldn = 0.0; + double zdsq = 1. / std::sqrt( jn * ( jn + 1. ) ); + // represented by only even k + for ( int jk = 2; jk <= jn; jk += 2 ) { + // normalised ordinary Legendre polynomial == \overbar{P_n}^0 + zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 ); + // normalised associated Legendre polynomial == \overbar{P_n}^1 + zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 ); + } + legpol[idxmn( 0, jn )] = zdlk; + legpol[idxmn( 1, jn )] = zdlldn; } - legpol[idxmn( 0, jn, jlat )] = zdlk; - legpol[idxmn( 1, jn, jlat )] = zdlldn; - } - // odd N - for ( int jn = 1; jn <= trc; jn += 2 ) { - zfn( jn, 0 ) = 0.; - double zdlk = 0.; - double zdlldn = 0.0; - double zdsq = 1. / std::sqrt( jn * ( jn + 1. ) ); - // represented by only even k - for ( int jk = 1; jk <= jn; jk += 2 ) { - // normalised ordinary Legendre polynomial == \overbar{P_n}^0 - zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 ); - // normalised associated Legendre polynomial == \overbar{P_n}^1 - zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 ); + // odd N + for ( int jn = 1; jn <= trc; jn += 2 ) { + zfn( jn, 0 ) = 0.; + double zdlk = 0.; + double zdlldn = 0.0; + double zdsq = 1. / std::sqrt( jn * ( jn + 1. ) ); + // represented by only even k + for ( int jk = 1; jk <= jn; jk += 2 ) { + // normalised ordinary Legendre polynomial == \overbar{P_n}^0 + zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 ); + // normalised associated Legendre polynomial == \overbar{P_n}^1 + zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 ); + } + legpol[idxmn( 0, jn )] = zdlk; + legpol[idxmn( 1, jn )] = zdlldn; } - legpol[idxmn( 0, jn, jlat )] = zdlk; - legpol[idxmn( 1, jn, jlat )] = zdlldn; - } - // -------------------------------------------------------------- - // 2. Diagonal (the terms 0,0 and 1,1 have already been computed) - // Belousov, equation (23) - // -------------------------------------------------------------- + // -------------------------------------------------------------- + // 2. Diagonal (the terms 0,0 and 1,1 have already been computed) + // Belousov, equation (23) + // -------------------------------------------------------------- - double zdls = zdl1sita * std::numeric_limits::min(); - for ( int jn = 2; jn <= trc; ++jn ) { - double sq = std::sqrt( ( 2. * jn + 1. ) / ( 2. * jn ) ); + double zdls = zdl1sita * std::numeric_limits::min(); + for ( int jn = 2; jn <= trc; ++jn ) { + double sq = std::sqrt( ( 2. * jn + 1. ) / ( 2. * jn ) ); - legpol[idxmn( jn, jn, jlat )] = legpol[idxmn( jn - 1, jn - 1, jlat )] * zdlsita * sq; - if ( std::abs( legpol[idxmn( jn, jn, jlat )] ) < zdls ) legpol[idxmn( jn, jn, jlat )] = 0.0; - } + legpol[idxmn( jn, jn )] = legpol[idxmn( jn - 1, jn - 1 )] * zdlsita * sq; + if ( std::abs( legpol[idxmn( jn, jn )] ) < zdls ) legpol[idxmn( jn, jn )] = 0.0; + } - // --------------------------------------------- - // 3. General recurrence (Belousov, equation 17) - // --------------------------------------------- - - for ( int jn = 3; jn <= trc; ++jn ) { - for ( int jm = 2; jm < jn; ++jm ) { - double cn = ( ( 2. * jn + 1. ) * ( jn + jm - 3. ) * ( jn + jm - 1. ) ); // numerator of c in Belousov - double cd = ( ( 2. * jn - 3. ) * ( jn + jm - 2. ) * ( jn + jm ) ); // denominator of c in Belousov - double dn = ( ( 2. * jn + 1. ) * ( jn - jm + 1. ) * ( jn + jm - 1. ) ); // numerator of d in Belousov - double dd = ( ( 2. * jn - 1. ) * ( jn + jm - 2. ) * ( jn + jm ) ); // denominator of d in Belousov - double en = ( ( 2. * jn + 1. ) * ( jn - jm ) ); // numerator of e in Belousov - double ed = ( ( 2. * jn - 1. ) * ( jn + jm ) ); // denominator of e in Belousov - - legpol[idxmn( jm, jn, jlat )] = std::sqrt( cn / cd ) * legpol[idxmn( jm - 2, jn - 2, jlat )] - - std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1, jlat )] * zdlx + - std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1, jlat )] * zdlx; + // --------------------------------------------- + // 3. General recurrence (Belousov, equation 17) + // --------------------------------------------- + + for ( int jn = 3; jn <= trc; ++jn ) { + for ( int jm = 2; jm < jn; ++jm ) { + double cn = + ( ( 2. * jn + 1. ) * ( jn + jm - 3. ) * ( jn + jm - 1. ) ); // numerator of c in Belousov + double cd = ( ( 2. * jn - 3. ) * ( jn + jm - 2. ) * ( jn + jm ) ); // denominator of c in Belousov + double dn = + ( ( 2. * jn + 1. ) * ( jn - jm + 1. ) * ( jn + jm - 1. ) ); // numerator of d in Belousov + double dd = ( ( 2. * jn - 1. ) * ( jn + jm - 2. ) * ( jn + jm ) ); // denominator of d in Belousov + double en = ( ( 2. * jn + 1. ) * ( jn - jm ) ); // numerator of e in Belousov + double ed = ( ( 2. * jn - 1. ) * ( jn + jm ) ); // denominator of e in Belousov + + legpol[idxmn( jm, jn )] = std::sqrt( cn / cd ) * legpol[idxmn( jm - 2, jn - 2 )] - + std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1 )] * zdlx + + std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1 )] * zdlx; + } } } - // take factor 2 for m > 0 into account: - for ( int jm = 1; jm <= trc; ++jm ) { - for ( int jn = jm; jn <= trc; ++jn ) { - legpol[idxmn( jm, jn, jlat )] *= 2.; + { + //ATLAS_TRACE( "add to global arrays" ); + + // take factor 2 for m > 0 into account: + for ( int jm = 1; jm <= trc; ++jm ) { + for ( int jn = jm; jn <= trc; ++jn ) { + legpol[idxmn( jm, jn )] *= 2.; + } + } + int is0 = 0, ia0 = 0; + for ( int jm = 0; jm <= trc; jm++ ) { + int is1 = 0, ia1 = 0; + for ( int jn = jm; jn <= trc; jn++ ) { + if ( ( jn - jm ) % 2 == 0 ) { is1++; } + else { + ia1++; + } + } + int is2 = 0, ia2 = 0; + // the choice between the following two code lines determines whether + // total wavenumbers are summed in an ascending or descending order. + // The trans library in IFS uses descending order because it should + // be more accurate (higher wavenumbers have smaller contributions). + // This also needs to be changed when splitting the spectral data in + // TransLocalopt::invtrans_uv! + //for ( int jn = jm; jn <= trc; jn++ ) { + for ( int jn = trc; jn >= jm; jn-- ) { + if ( ( jn - jm ) % 2 == 0 ) { + int is = is0 * nlats + is1 * jlat + is2++; + leg_sym[is] = legpol[idxmn( jm, jn )]; + } + else { + int ia = ia0 * nlats + ia1 * jlat + ia2++; + leg_asym[ia] = legpol[idxmn( jm, jn )]; + } + } + is0 += is2; + ia0 += ia2; } } } diff --git a/src/atlas/trans/localopt/LegendrePolynomialsopt.h b/src/atlas/trans/localopt/LegendrePolynomialsopt.h index e2fd7db8f..4dc3ce2e5 100644 --- a/src/atlas/trans/localopt/LegendrePolynomialsopt.h +++ b/src/atlas/trans/localopt/LegendrePolynomialsopt.h @@ -33,10 +33,11 @@ namespace trans { // Andreas Mueller *ECMWF* // void compute_legendre_polynomialsopt( - const size_t trc, // truncation (in) - const int nlats, // number of latitudes - const double lats[], // latitudes in radians (in) - double legpol[] ); // values of associated Legendre functions, size (trc+1)*trc/2*nlats (out) + const size_t trc, // truncation (in) + const int nlats, // number of latitudes + const double lats[], // latitudes in radians (in) + double legendre_sym[], // values of associated Legendre functions, symmetric part + double legendre_asym[] ); // values of associated Legendre functions, asymmetric part // -------------------------------------------------------------------------------------------------------------------- diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc index 5b31b75ad..b34b8f837 100644 --- a/src/atlas/trans/localopt/TransLocalopt.cc +++ b/src/atlas/trans/localopt/TransLocalopt.cc @@ -119,40 +119,23 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t } // precomputations for Legendre polynomials: { - std::vector legendre( legendre_size( truncation_ + 1 ) * nlatsNH ); - { - ATLAS_TRACE( "opt precomp Legendre" ); - compute_legendre_polynomialsopt( truncation_ + 1, nlatsNH, lats.data(), legendre.data() ); - } - { - ATLAS_TRACE( "opt split Legendre" ); - int size_sym = 0; - int size_asym = 0; - legendre_sym_begin_.resize( truncation_ + 3 ); - legendre_asym_begin_.resize( truncation_ + 3 ); - legendre_sym_begin_[0] = 0; - legendre_asym_begin_[0] = 0; - for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { - size_sym += num_n( truncation_ + 1, jm, true ); - size_asym += num_n( truncation_ + 1, jm, false ); - legendre_sym_begin_[jm + 1] = size_sym; - legendre_asym_begin_[jm + 1] = size_asym; - } - legendre_sym_.resize( size_sym * nlatsNH ); - legendre_asym_.resize( size_asym * nlatsNH ); - int idx = 0, is = 0, ia = 0; - for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { - for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { - for ( int jn = 0; jn <= truncation_ - jm + 1; jn++, idx++ ) { - if ( jn % 2 == 0 ) { legendre_sym_[is++] = legendre[idx]; } - else { - legendre_asym_[ia++] = legendre[idx]; - } - } - } - } - ASSERT( ia == size_asym * nlatsNH && is == size_sym * nlatsNH ); + ATLAS_TRACE( "opt precomp Legendre" ); + int size_sym = 0; + int size_asym = 0; + legendre_sym_begin_.resize( truncation_ + 3 ); + legendre_asym_begin_.resize( truncation_ + 3 ); + legendre_sym_begin_[0] = 0; + legendre_asym_begin_[0] = 0; + for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { + size_sym += num_n( truncation_ + 1, jm, true ); + size_asym += num_n( truncation_ + 1, jm, false ); + legendre_sym_begin_[jm + 1] = size_sym; + legendre_asym_begin_[jm + 1] = size_asym; } + legendre_sym_.resize( size_sym * nlatsNH ); + legendre_asym_.resize( size_asym * nlatsNH ); + compute_legendre_polynomialsopt( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_.data(), + legendre_asym_.data() ); } // precomputations for Fourier transformations: @@ -180,7 +163,17 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t } } } -} +#if ATLAS_HAVE_FFTW + { + ATLAS_TRACE( "opt precomp FFTW" ); + int num_complex = ( nlons / 2 ) + 1; + fft_in_ = fftw_alloc_complex( nlats * num_complex ); + fft_out_ = fftw_alloc_real( nlats * nlons ); + plan_ = fftw_plan_many_dft_c2r( 1, &nlons, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1, nlons, + FFTW_ESTIMATE ); + } +#endif +} // namespace atlas // -------------------------------------------------------------------------------------------------------------------- @@ -189,7 +182,13 @@ TransLocalopt::TransLocalopt( const Grid& grid, const long truncation, const eck // -------------------------------------------------------------------------------------------------------------------- -TransLocalopt::~TransLocalopt() {} +TransLocalopt::~TransLocalopt() { +#if ATLAS_HAVE_FFTW + fftw_destroy_plan( plan_ ); + fftw_free( fft_in_ ); + fftw_free( fft_out_ ); +#endif +} // -------------------------------------------------------------------------------------------------------------------- @@ -253,8 +252,6 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field if ( nb_scalar_fields > 0 ) { int nb_fields = nb_scalar_fields; - //eckit::linalg::LinearAlgebra::backend( "string" ) // might want to choose backend with this command - // Transform if ( grid::StructuredGrid g = grid_ ) { ATLAS_TRACE( "invtrans_uv structured opt" ); @@ -272,17 +269,25 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field // TODO: 0 is currently not working because it requires all latitudes to be included in legendre_ (which is currently not done) int size_sym = num_n( truncation_ + 1, jm, true ); int size_asym = num_n( truncation_ + 1, jm, false ); - std::vector scalar_sym( 2 * nb_fields * size_sym, -1234. ); - std::vector scalar_asym( 2 * nb_fields * size_asym, -1234. ); + std::vector scalar_sym( 2 * nb_fields * size_sym ); + std::vector scalar_asym( 2 * nb_fields * size_asym ); std::vector scl_fourier_sym( size_fourier ); std::vector scl_fourier_asym( size_fourier ); { //ATLAS_TRACE( "opt Legendre split" ); int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2; - for ( int jn = 0; jn <= truncation_ - jm + 1; jn++ ) { + // the choice between the following two code lines determines whether + // total wavenumbers are summed in an ascending or descending order. + // The trans library in IFS uses descending order because it should + // be more accurate (higher wavenumbers have smaller contributions). + // This also needs to be changed when splitting the spectral data in + // compute_legendre_polynomialsopt! + //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) { + for ( int jn = truncation_ + 1; jn >= jm; jn-- ) { for ( int imag = 0; imag < 2; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { - if ( jn % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; } + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + idx = jfld + nb_fields * ( imag + 2 * ( jn - jm ) ); + if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; } else { scalar_asym[ia++] = scalar_spectra[idx + ioff]; } @@ -333,6 +338,44 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field #endif } } +#if ATLAS_HAVE_FFTW + { + auto position = [&]( int jfld, int imag, int jlat, int jm ) { + return jfld + nb_fields * ( imag + 2 * ( jlat + g.ny() * ( jm ) ) ); + }; + auto factor = [&]( int jm ) { + if ( jm > 0 ) { return 2.; } + else { + return 1.; + } + }; + + int num_complex = ( nlons / 2 ) + 1; + { + ATLAS_TRACE( "opt FFTW" ); + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int idx = 0; + for ( int jlat = 0; jlat < g.ny(); jlat++ ) { + for ( int jm = 0; jm < num_complex; jm++, idx++ ) { + for ( int imag = 0; imag < 2; imag++ ) { + if ( jm <= truncation_ ) { + fft_in_[idx][imag] = + scl_fourier[position( jfld, imag, jlat, jm )] / factor( jm ); + } + else { + fft_in_[idx][imag] = 0.; + } + } + } + } + fftw_execute( plan_ ); + for ( int j = 0; j < nlats * nlons; j++ ) { + gp_fields[j + jfld * nlats * nlons] = fft_out_[j]; + } + } + } + } +#else #if 0 // 1: better for small number of columns, large truncation; 0: better for large number of columns // Transposition in Fourier space: @@ -406,6 +449,7 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); } +#endif #endif // Computing u,v from U,V: { diff --git a/src/atlas/trans/localopt/TransLocalopt.h b/src/atlas/trans/localopt/TransLocalopt.h index dca01cd2f..b9f89ef08 100644 --- a/src/atlas/trans/localopt/TransLocalopt.h +++ b/src/atlas/trans/localopt/TransLocalopt.h @@ -12,8 +12,12 @@ #include +#include "atlas/array.h" #include "atlas/grid/Grid.h" #include "atlas/trans/Trans.h" +#if ATLAS_HAVE_FFTW +#include +#endif //----------------------------------------------------------------------------- // Forward declarations @@ -100,9 +104,6 @@ class TransLocalopt : public trans::TransImpl { double divergence_spectra[], const eckit::Configuration& = util::NoConfig() ) const override; private: - const double* legendre_data( int j ) const { return legendre_.data() + legendre_begin_[j]; } - double* legendre_data( int j ) { return legendre_.data() + legendre_begin_[j]; } - void invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields, const double scalar_spectra[], double gp_fields[], const eckit::Configuration& = util::NoConfig() ) const; @@ -111,7 +112,6 @@ class TransLocalopt : public trans::TransImpl { Grid grid_; int truncation_; bool precompute_; - mutable std::vector legendre_; mutable std::vector legendre_sym_; mutable std::vector legendre_asym_; mutable std::vector fourier_; @@ -119,6 +119,11 @@ class TransLocalopt : public trans::TransImpl { std::vector legendre_begin_; std::vector legendre_sym_begin_; std::vector legendre_asym_begin_; +#if ATLAS_HAVE_FFTW + fftw_complex* fft_in_; + double* fft_out_; + fftw_plan plan_; +#endif }; //----------------------------------------------------------------------------- diff --git a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc index 99f54586a..4f786370d 100644 --- a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc +++ b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc @@ -21,16 +21,14 @@ namespace trans { //----------------------------------------------------------------------------- -size_t legendre_size( const size_t truncation ) { - return ( truncation + 2 ) * ( truncation + 1 ) / 2; -} - -void compute_legendre_polynomialsopt2( const size_t trc, // truncation (in) - const int nlats, // number of latitudes - const double lats[], // latitudes in radians (in) - double leg_sym[], // values of associated Legendre functions, symmetric part - double leg_asym[] ) // values of associated Legendre functions, asymmetric part +void compute_legendre_polynomialsopt2( + const size_t trc, // truncation (in) + const int nlats, // number of latitudes + const double lats[], // latitudes in radians (in) + double __restrict leg_sym[], // values of associated Legendre functions, symmetric part + double __restrict leg_asym[] ) // values of associated Legendre functions, asymmetric part { + auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; }; array::ArrayT zfn_( trc + 1, trc + 1 ); array::ArrayView zfn = array::make_view( zfn_ ); std::vector legpol( legendre_size( trc ) ); diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc index 084eca68f..2c2ffdee4 100644 --- a/src/atlas/trans/localopt2/TransLocalopt2.cc +++ b/src/atlas/trans/localopt2/TransLocalopt2.cc @@ -167,10 +167,17 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long { ATLAS_TRACE( "opt2 precomp FFTW" ); int num_complex = ( nlons / 2 ) + 1; - fft_in_ = fftw_alloc_complex( nlats * num_complex ); - fft_out_ = fftw_alloc_real( nlats * nlons ); - plan_ = fftw_plan_many_dft_c2r( 1, &nlons, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1, nlons, - FFTW_ESTIMATE ); + + fftw_complex* tmp_in = (fftw_complex*)malloc( sizeof( fftw_complex ) ); + double* tmp_out = (double*)malloc( sizeof( double ) ); + fft_in_ = fftw_alloc_complex( nlats * num_complex ); + fft_out_ = fftw_alloc_real( nlats * nlons ); + + plan_ = fftw_plan_many_dft_c2r( 1, &nlons, nlats, tmp_in, NULL, 1, num_complex, tmp_out, NULL, 1, nlons, + FFTW_ESTIMATE + FFTW_NO_SIMD ); + + free( tmp_in ); + free( tmp_out ); } #endif } // namespace atlas @@ -270,9 +277,8 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel // TODO: 0 is currently not working because it requires all latitudes to be included in legendre_ (which is currently not done) int size_sym = num_n( truncation_ + 1, jm, true ); int size_asym = num_n( truncation_ + 1, jm, false ); - std::vector scalar_sym( 2 * nb_fields * size_sym, -1234. ); - std::vector scalar_asym( 2 * nb_fields * size_asym, -1234. ); - std::vector scl_fourier_sym( size_fourier ); + std::vector scalar_sym( 2 * nb_fields * size_sym ); + std::vector scalar_asym( 2 * nb_fields * size_asym ); std::vector scl_fourier_asym( size_fourier ); { //ATLAS_TRACE( "opt2 Legendre split" ); @@ -301,7 +307,7 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel eckit::linalg::Matrix A( scalar_sym.data(), nb_fields * 2, size_sym ); eckit::linalg::Matrix B( legendre_sym_.data() + legendre_sym_begin_[jm] * nlatsNH, size_sym, nlatsNH ); - eckit::linalg::Matrix C( scl_fourier_sym.data(), nb_fields * 2, nlatsNH ); + eckit::linalg::Matrix C( scl_fourier.data() + jm * size_fourier, nb_fields * 2, nlatsNH ); eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); } if ( size_asym > 0 ) { @@ -312,21 +318,21 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); { //ATLAS_TRACE( "opt2 merge spheres" ); - // northern hemisphere: - int ioff = jm * size_fourier; - for ( int j = 0; j < 2 * nb_fields * nlatsNH; j++ ) { - scl_fourier[j + ioff] = scl_fourier_sym[j] + scl_fourier_asym[j]; - } // southern hemisphere: - int idx = 0; + int ioff = jm * size_fourier; + int idx = 0; for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { for ( int imag = 0; imag < 2; imag++ ) { for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { int pos = jfld + nb_fields * ( imag + 2 * ( nlats - jlat - 1 ) ); - scl_fourier[pos + ioff] = scl_fourier_sym[idx] - scl_fourier_asym[idx]; + scl_fourier[pos + ioff] = scl_fourier[idx + ioff] - scl_fourier_asym[idx]; } } } + // northern hemisphere: + for ( int j = 0; j < 2 * nb_fields * nlatsNH; j++ ) { + scl_fourier[j + ioff] += scl_fourier_asym[j]; + } } } #else @@ -369,7 +375,7 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel } } } - fftw_execute( plan_ ); + fftw_execute_dft_c2r( plan_, fft_in_, fft_out_ ); for ( int j = 0; j < nlats * nlons; j++ ) { gp_fields[j + jfld * nlats * nlons] = fft_out_[j]; } diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index 51a0d5796..cca428385 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -704,7 +704,7 @@ CASE( "test_transgeneral_with_translib" ) { #endif #endif //----------------------------------------------------------------------------- -#if 0 +#if 1 CASE( "test_trans_vordiv_with_translib" ) { Log::info() << "test_trans_vordiv_with_translib" << std::endl; // test transgeneral by comparing its result with the trans library @@ -714,7 +714,7 @@ CASE( "test_trans_vordiv_with_translib" ) { double tolerance = 1.e-13; // Grid: (Adjust the following line if the test takes too long!) - Grid g( "F120" ); + Grid g( "F320" ); grid::StructuredGrid gs( g ); int ndgl = gs.ny(); @@ -853,7 +853,7 @@ CASE( "test_trans_vordiv_with_translib" ) { } #endif //----------------------------------------------------------------------------- -#if 1 +#if 0 CASE( "test_trans_hires" ) { Log::info() << "test_trans_hires" << std::endl; // test transgeneral by comparing its result with the trans library @@ -863,10 +863,10 @@ CASE( "test_trans_hires" ) { double tolerance = 1.e-13; // Grid: (Adjust the following line if the test takes too long!) - Grid g( "F1280" ); + Grid g( "F640" ); #if ATLAS_HAVE_TRANS - //std::string transTypes[1] = {"localopt2"}; - std::string transTypes[2] = {"localopt2", "ifs"}; + std::string transTypes[1] = {"localopt2"}; + //std::string transTypes[3] = {"localopt", "localopt2", "ifs"}; #else std::string transTypes[1] = {"localopt2"}; #endif @@ -880,14 +880,6 @@ CASE( "test_trans_hires" ) { int nb_scalar = 100, nb_vordiv = 0; int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv; - std::vector sp( 2 * N * nb_scalar ); - std::vector vor( 2 * N * nb_vordiv ); - std::vector div( 2 * N * nb_vordiv ); - std::vector rspecg( 2 * N ); - std::vector gp( nb_all * g.size() ); - std::vector rgp1( nb_all * g.size() ); - //std::vector rgp2( nb_all * g.size() ); - std::vector rgp_analytic( g.size() ); for ( auto transType : transTypes ) { int icase = 0; @@ -895,71 +887,33 @@ CASE( "test_trans_hires" ) { for ( int ivar_in = 2; ivar_in < 3; ivar_in++ ) { // vorticity, divergence, scalar for ( int ivar_out = 2; ivar_out < 3; ivar_out++ ) { // u, v, scalar int nb_fld = 1; - if ( ivar_out == 2 ) { - tolerance = 1.e-13; - nb_fld = nb_scalar; - } + if ( ivar_out == 2 ) { nb_fld = nb_scalar; } else { - tolerance = 2.e-6; - nb_fld = nb_vordiv; + nb_fld = nb_vordiv; } - for ( int jfld = 0; jfld < nb_fld; jfld++ ) { // multiple fields + for ( int jfld = 0; jfld < 1; jfld++ ) { // multiple fields int k = 0; for ( int m = 0; m <= trc; m++ ) { // zonal wavenumber for ( int n = m; n <= trc; n++ ) { // total wavenumber for ( int imag = 0; imag <= 1; imag++ ) { // real and imaginary part - if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. ) { - for ( int j = 0; j < 2 * N * nb_scalar; j++ ) { - sp[j] = 0.; - } - for ( int j = 0; j < 2 * N * nb_vordiv; j++ ) { - vor[j] = 0.; - div[j] = 0.; - } - if ( ivar_in == 0 ) vor[k * nb_vordiv + jfld] = 1.; - if ( ivar_in == 1 ) div[k * nb_vordiv + jfld] = 1.; + if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. && + icase < 25 ) { + auto start = std::chrono::system_clock::now(); + std::vector sp( 2 * N * nb_scalar ); + std::vector gp( nb_all * g.size() ); if ( ivar_in == 2 ) sp[k * nb_scalar + jfld] = 1.; - - for ( int j = 0; j < nb_all * g.size(); j++ ) { - gp[j] = 0.; - rgp1[j] = 0.; - //rgp2[j] = 0.; - } - for ( int j = 0; j < g.size(); j++ ) { - rgp_analytic[j] = 0.; - } - - spectral_transform_grid_analytic( trc, trc, n, m, imag, g, rspecg.data(), - rgp_analytic.data(), ivar_in, ivar_out ); - - EXPECT_NO_THROW( trans.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), - div.data(), rgp1.data() ) ); - - //EXPECT_NO_THROW( transLocal2.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), - // div.data(), rgp2.data() ) ); - - int pos = ( ivar_out * nb_vordiv + jfld ); - - double rms_gen1 = - compute_rms( g.size(), rgp1.data() + pos * g.size(), rgp_analytic.data() ); - - //double rms_gen2 = - // compute_rms( g.size(), rgp2.data() + pos * g.size(), rgp_analytic.data() ); - - if ( !( rms_gen1 < tolerance ) ) { // || !( rms_gen2 < tolerance ) ) { - Log::info() - << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out - << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl; - ATLAS_DEBUG_VAR( rms_gen1 ); - //ATLAS_DEBUG_VAR( rms_gen2 ); - ATLAS_DEBUG_VAR( tolerance ); - } - EXPECT( rms_gen1 < tolerance ); - //EXPECT( rms_gen2 < tolerance ); + EXPECT_NO_THROW( trans.invtrans( nb_scalar, sp.data(), nb_vordiv, nullptr, nullptr, + gp.data() ) ); icase++; - Log::info() << transType << ": case " << icase << std::endl; - EXPECT( icase < 25 ); + auto end = std::chrono::system_clock::now(); // + std::chrono::duration elapsed_seconds = end - start; + std::time_t end_time = std::chrono::system_clock::to_time_t( end ); + std::string time_str = std::ctime( &end_time ); + Log::info() + << transType << ": case " << icase + << ", elapsed time: " << elapsed_seconds.count() + << "s. Now: " << time_str.substr( 0, time_str.length() - 1 ) << std::endl; } k++; } From 40bae9c645b8244c169865912fb8f3db2666754b Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 15 Mar 2018 17:42:07 +0000 Subject: [PATCH 023/123] not using fft_in_ and fft_out_ when creating the plan is slower; removing scl_fourier_sym doesn't seem to affect the speed --- src/atlas/trans/localopt/TransLocalopt.cc | 17 --------- src/atlas/trans/localopt2/TransLocalopt2.cc | 39 +++++---------------- 2 files changed, 9 insertions(+), 47 deletions(-) diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc index b34b8f837..9c4469725 100644 --- a/src/atlas/trans/localopt/TransLocalopt.cc +++ b/src/atlas/trans/localopt/TransLocalopt.cc @@ -54,23 +54,6 @@ int num_n( const int truncation, const int m, const bool symmetric ) { return len; } -std::vector n_indices( const int truncation, const int m, const bool symmetric ) { - int len = num_n( truncation, m, symmetric ), jn0 = 0; - if ( !symmetric ) { jn0 = 1; } - std::vector jns( len ); - int ia = 0, id = len - 1; - for ( int jn = jn0; jn <= truncation - m; jn += 2, ia++, id-- ) { -#if 1 // 1: ascending, 0: descending - int idx = ia; -#else - int idx = id; -#endif - jns[idx] = jn; - ASSERT( idx < len && idx >= 0 ); - } - return jns; -} - } // namespace // -------------------------------------------------------------------------------------------------------------------- diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc index 2c2ffdee4..d6d4b4012 100644 --- a/src/atlas/trans/localopt2/TransLocalopt2.cc +++ b/src/atlas/trans/localopt2/TransLocalopt2.cc @@ -54,23 +54,6 @@ int num_n( const int truncation, const int m, const bool symmetric ) { return len; } -std::vector n_indices( const int truncation, const int m, const bool symmetric ) { - int len = num_n( truncation, m, symmetric ), jn0 = 0; - if ( !symmetric ) { jn0 = 1; } - std::vector jns( len ); - int ia = 0, id = len - 1; - for ( int jn = jn0; jn <= truncation - m; jn += 2, ia++, id-- ) { -#if 1 // 1: ascending, 0: descending - int idx = ia; -#else - int idx = id; -#endif - jns[idx] = jn; - ASSERT( idx < len && idx >= 0 ); - } - return jns; -} - } // namespace // -------------------------------------------------------------------------------------------------------------------- @@ -167,17 +150,10 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long { ATLAS_TRACE( "opt2 precomp FFTW" ); int num_complex = ( nlons / 2 ) + 1; - - fftw_complex* tmp_in = (fftw_complex*)malloc( sizeof( fftw_complex ) ); - double* tmp_out = (double*)malloc( sizeof( double ) ); - fft_in_ = fftw_alloc_complex( nlats * num_complex ); - fft_out_ = fftw_alloc_real( nlats * nlons ); - - plan_ = fftw_plan_many_dft_c2r( 1, &nlons, nlats, tmp_in, NULL, 1, num_complex, tmp_out, NULL, 1, nlons, - FFTW_ESTIMATE + FFTW_NO_SIMD ); - - free( tmp_in ); - free( tmp_out ); + fft_in_ = fftw_alloc_complex( nlats * num_complex ); + fft_out_ = fftw_alloc_real( nlats * nlons ); + plan_ = fftw_plan_many_dft_c2r( 1, &nlons, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1, nlons, + FFTW_ESTIMATE ); } #endif } // namespace atlas @@ -320,12 +296,15 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel //ATLAS_TRACE( "opt2 merge spheres" ); // southern hemisphere: int ioff = jm * size_fourier; + int pos0 = 2 * ( nlats - 1 ) + ioff; int idx = 0; for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { + int poslat = pos0 - 2 * jlat; for ( int imag = 0; imag < 2; imag++ ) { + int posimag = nb_fields * ( imag + poslat ); for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { - int pos = jfld + nb_fields * ( imag + 2 * ( nlats - jlat - 1 ) ); - scl_fourier[pos + ioff] = scl_fourier[idx + ioff] - scl_fourier_asym[idx]; + int pos = jfld + posimag; + scl_fourier[pos] = scl_fourier[idx + ioff] - scl_fourier_asym[idx]; } } } From 2ef0a02ca61baaa87c1359d8df74bbd6c1270abb Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 15 Mar 2018 17:48:56 +0000 Subject: [PATCH 024/123] removed restrict again because gcc doesn't allow restrict with double --- src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc index 4f786370d..90e398229 100644 --- a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc +++ b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc @@ -21,12 +21,11 @@ namespace trans { //----------------------------------------------------------------------------- -void compute_legendre_polynomialsopt2( - const size_t trc, // truncation (in) - const int nlats, // number of latitudes - const double lats[], // latitudes in radians (in) - double __restrict leg_sym[], // values of associated Legendre functions, symmetric part - double __restrict leg_asym[] ) // values of associated Legendre functions, asymmetric part +void compute_legendre_polynomialsopt2( const size_t trc, // truncation (in) + const int nlats, // number of latitudes + const double lats[], // latitudes in radians (in) + double leg_sym[], // values of associated Legendre functions, symmetric part + double leg_asym[] ) // values of associated Legendre functions, asymmetric part { auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; }; array::ArrayT zfn_( trc + 1, trc + 1 ); From fc6346213ac3e059c46951ad69a9ff071bbd8ddf Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Fri, 16 Mar 2018 11:19:50 +0000 Subject: [PATCH 025/123] removed imaginary part from Legendre transform for zonal wavenumber 0; running hires-test with truncation-1 to be more comparable with trans --- src/atlas/trans/localopt2/TransLocalopt2.cc | 92 +++++++++++---------- src/tests/trans/test_transgeneral.cc | 15 ++-- 2 files changed, 58 insertions(+), 49 deletions(-) diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc index d6d4b4012..2f29c7acb 100644 --- a/src/atlas/trans/localopt2/TransLocalopt2.cc +++ b/src/atlas/trans/localopt2/TransLocalopt2.cc @@ -239,11 +239,11 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel // Transform if ( grid::StructuredGrid g = grid_ ) { ATLAS_TRACE( "invtrans_uv structured opt2" ); - int nlats = g.ny(); - int nlons = g.nxmax(); - int nlatsNH = nlats_northernHemisphere( nlats ); - int size_fourier = nb_fields * 2 * g.ny(); - std::vector scl_fourier( size_fourier * ( truncation + 1 ) ); + int nlats = g.ny(); + int nlons = g.nxmax(); + int nlatsNH = nlats_northernHemisphere( nlats ); + int size_fourier_max = nb_fields * 2 * nlats; + std::vector scl_fourier( size_fourier_max * ( truncation + 1 ) ); // Legendre transform: { @@ -253,9 +253,13 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel // TODO: 0 is currently not working because it requires all latitudes to be included in legendre_ (which is currently not done) int size_sym = num_n( truncation_ + 1, jm, true ); int size_asym = num_n( truncation_ + 1, jm, false ); - std::vector scalar_sym( 2 * nb_fields * size_sym ); - std::vector scalar_asym( 2 * nb_fields * size_asym ); - std::vector scl_fourier_asym( size_fourier ); + int n_imag = 2; + if ( jm == 0 ) { n_imag = 1; } + int size_fourier = nb_fields * n_imag * nlatsNH; + std::vector scalar_sym( n_imag * nb_fields * size_sym ); + std::vector scalar_asym( n_imag * nb_fields * size_asym ); + std::vector scl_fourier_sym( size_fourier ); + std::vector scl_fourier_asym( size_fourier, 0. ); { //ATLAS_TRACE( "opt2 Legendre split" ); int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2; @@ -267,7 +271,7 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel // compute_legendre_polynomialsopt2! //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) { for ( int jn = truncation_ + 1; jn >= jm; jn-- ) { - for ( int imag = 0; imag < 2; imag++ ) { + for ( int imag = 0; imag < n_imag; imag++ ) { for ( int jfld = 0; jfld < nb_fields; jfld++ ) { idx = jfld + nb_fields * ( imag + 2 * ( jn - jm ) ); if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; } @@ -277,40 +281,51 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel } } } - ASSERT( ia == 2 * nb_fields * size_asym && is == 2 * nb_fields * size_sym ); + ASSERT( ia == n_imag * nb_fields * size_asym && is == n_imag * nb_fields * size_sym ); } { - eckit::linalg::Matrix A( scalar_sym.data(), nb_fields * 2, size_sym ); + //Log::info() << "jm=" << jm << " symmetric - "; + eckit::linalg::Matrix A( scalar_sym.data(), nb_fields * n_imag, size_sym ); eckit::linalg::Matrix B( legendre_sym_.data() + legendre_sym_begin_[jm] * nlatsNH, size_sym, nlatsNH ); - eckit::linalg::Matrix C( scl_fourier.data() + jm * size_fourier, nb_fields * 2, nlatsNH ); + eckit::linalg::Matrix C( scl_fourier_sym.data(), nb_fields * n_imag, nlatsNH ); eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); } if ( size_asym > 0 ) { - eckit::linalg::Matrix A( scalar_asym.data(), nb_fields * 2, size_asym ); + //Log::info() << "jm=" << jm << " antisymmetric - "; + eckit::linalg::Matrix A( scalar_asym.data(), nb_fields * n_imag, size_asym ); eckit::linalg::Matrix B( legendre_asym_.data() + legendre_asym_begin_[jm] * nlatsNH, size_asym, nlatsNH ); - eckit::linalg::Matrix C( scl_fourier_asym.data(), nb_fields * 2, nlatsNH ); + eckit::linalg::Matrix C( scl_fourier_asym.data(), nb_fields * n_imag, nlatsNH ); eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); - { - //ATLAS_TRACE( "opt2 merge spheres" ); - // southern hemisphere: - int ioff = jm * size_fourier; - int pos0 = 2 * ( nlats - 1 ) + ioff; - int idx = 0; - for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { - int poslat = pos0 - 2 * jlat; - for ( int imag = 0; imag < 2; imag++ ) { - int posimag = nb_fields * ( imag + poslat ); - for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { - int pos = jfld + posimag; - scl_fourier[pos] = scl_fourier[idx + ioff] - scl_fourier_asym[idx]; - } + } + { + //ATLAS_TRACE( "opt2 merge spheres" ); + // northern hemisphere: + int ioff = jm * size_fourier_max; + int pos0 = ioff; + int idx = 0; + for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { + int poslat = pos0 + 2 * jlat; + for ( int imag = 0; imag < n_imag; imag++ ) { + int posimag = nb_fields * ( imag + poslat ); + for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { + int pos = jfld + posimag; + scl_fourier[pos] = scl_fourier_sym[idx] + scl_fourier_asym[idx]; } } - // northern hemisphere: - for ( int j = 0; j < 2 * nb_fields * nlatsNH; j++ ) { - scl_fourier[j + ioff] += scl_fourier_asym[j]; + } + // southern hemisphere: + idx = 0; + pos0 = 2 * ( nlats - 1 ) + ioff; + for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { + int poslat = pos0 - 2 * jlat; + for ( int imag = 0; imag < n_imag; imag++ ) { + int posimag = nb_fields * ( imag + poslat ); + for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { + int pos = jfld + posimag; + scl_fourier[pos] = scl_fourier_sym[idx] - scl_fourier_asym[idx]; + } } } } @@ -327,26 +342,19 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel #if ATLAS_HAVE_FFTW { auto position = [&]( int jfld, int imag, int jlat, int jm ) { - return jfld + nb_fields * ( imag + 2 * ( jlat + g.ny() * ( jm ) ) ); + return jfld + nb_fields * ( imag + 2 * ( jlat + nlats * ( jm ) ) ); }; - auto factor = [&]( int jm ) { - if ( jm > 0 ) { return 2.; } - else { - return 1.; - } - }; - int num_complex = ( nlons / 2 ) + 1; { ATLAS_TRACE( "opt2 FFTW" ); for ( int jfld = 0; jfld < nb_fields; jfld++ ) { int idx = 0; for ( int jlat = 0; jlat < g.ny(); jlat++ ) { - for ( int jm = 0; jm < num_complex; jm++, idx++ ) { + fft_in_[idx++][0] = scl_fourier[position( jfld, 0, jlat, 0 )]; + for ( int jm = 1; jm < num_complex; jm++, idx++ ) { for ( int imag = 0; imag < 2; imag++ ) { if ( jm <= truncation_ ) { - fft_in_[idx][imag] = - scl_fourier[position( jfld, imag, jlat, jm )] / factor( jm ); + fft_in_[idx][imag] = scl_fourier[position( jfld, imag, jlat, jm )] / 2.; } else { fft_in_[idx][imag] = 0.; diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index cca428385..a4ab92c26 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -714,7 +714,7 @@ CASE( "test_trans_vordiv_with_translib" ) { double tolerance = 1.e-13; // Grid: (Adjust the following line if the test takes too long!) - Grid g( "F320" ); + Grid g( "F3" ); grid::StructuredGrid gs( g ); int ndgl = gs.ny(); @@ -865,7 +865,7 @@ CASE( "test_trans_hires" ) { // Grid: (Adjust the following line if the test takes too long!) Grid g( "F640" ); #if ATLAS_HAVE_TRANS - std::string transTypes[1] = {"localopt2"}; + std::string transTypes[2] = {"localopt2", "ifs"}; //std::string transTypes[3] = {"localopt", "localopt2", "ifs"}; #else std::string transTypes[1] = {"localopt2"}; @@ -875,13 +875,14 @@ CASE( "test_trans_hires" ) { //int trc = ndgl - 1; // linear int trc = ndgl / 2. - 1; // cubic - functionspace::Spectral spectral( trc ); - functionspace::StructuredColumns gridpoints( g ); - - int nb_scalar = 100, nb_vordiv = 0; - int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv; + int nb_scalar = 1, nb_vordiv = 0; for ( auto transType : transTypes ) { + if ( transType == "localopt2" ) { trc = ndgl / 2. - 2; } + else { + trc = ndgl / 2. - 1; + } + int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv; int icase = 0; trans::Trans trans( g, trc, util::Config( "type", transType ) ); for ( int ivar_in = 2; ivar_in < 3; ivar_in++ ) { // vorticity, divergence, scalar From 5d7d5e3ef510e12213a4525faf1f142d484d3c1e Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Sat, 17 Mar 2018 13:37:06 +0000 Subject: [PATCH 026/123] added alignment and padding for Legendre transformations; works on Mac OS, still need to test on other platforms and with MKL --- CMakeLists.txt | 6 ++ cmake/CompileFlags.cmake | 2 + src/CMakeLists.txt | 6 ++ src/atlas/CMakeLists.txt | 2 + src/atlas/library/defines.h.in | 1 + .../localopt2/LegendrePolynomialsopt2.cc | 20 ++--- .../trans/localopt2/LegendrePolynomialsopt2.h | 12 +-- src/atlas/trans/localopt2/TransLocalopt2.cc | 82 +++++++++++++------ src/atlas/trans/localopt2/TransLocalopt2.h | 8 +- src/tests/trans/test_transgeneral.cc | 2 +- 10 files changed, 98 insertions(+), 43 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 787c96898..e71fcc03f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -105,6 +105,12 @@ ecbuild_add_option( FEATURE FFTW DESCRIPTION "Support for fftw" REQUIRED_PACKAGES "FFTW COMPONENTS double" ) +### MKL ... + +ecbuild_add_option( FEATURE MKL + DESCRIPTION "MKL linear algebra library" + REQUIRED_PACKAGES MKL ) + ### trans ... ecbuild_add_option( FEATURE TRANS diff --git a/cmake/CompileFlags.cmake b/cmake/CompileFlags.cmake index c02df915c..c92b7d581 100644 --- a/cmake/CompileFlags.cmake +++ b/cmake/CompileFlags.cmake @@ -9,6 +9,8 @@ if( CMAKE_CXX_COMPILER_ID MATCHES Cray ) endif() +ecbuild_add_cxx_flags("-Wl,-ydgemm_") +ecbuild_add_fortran_flags("-Wl,-ydgemm_") #ecbuild_add_cxx_flags("-fsanitize=address") #ecbuild_add_cxx_flags("-fsanitize=thread") #ecbuild_add_cxx_flags("-fsanitize=memory") diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 98a5e1dd0..48175e157 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -42,6 +42,12 @@ else() set( ATLAS_HAVE_FFTW 0 ) endif() +if( ATLAS_HAVE_MKL ) + set( ATLAS_HAVE_MKL 1 ) +else() + set( ATLAS_HAVE_MKL 0 ) +endif() + if( ATLAS_HAVE_BOUNDSCHECKING ) set( ATLAS_HAVE_BOUNDSCHECKING 1 ) else() diff --git a/src/atlas/CMakeLists.txt b/src/atlas/CMakeLists.txt index b55db3e40..26915243e 100644 --- a/src/atlas/CMakeLists.txt +++ b/src/atlas/CMakeLists.txt @@ -577,6 +577,7 @@ ecbuild_add_library( TARGET atlas "${TRANSI_INCLUDE_DIRS}" "${MPI_CXX_INCLUDE_DIRS}" "${FFTW_INCLUDES}" + "${MKL_INCLUDE_DIRS}" LIBS eckit_geometry eckit_linalg @@ -586,6 +587,7 @@ ecbuild_add_library( TARGET atlas "${TRANSI_LIBRARIES}" "${FCKIT_LIBRARIES}" "${FFTW_LIBRARIES}" + "${MKL_LIBRARIES}" DEFINITIONS ${ATLAS_DEFINITIONS} ) diff --git a/src/atlas/library/defines.h.in b/src/atlas/library/defines.h.in index e644fc73e..75f91f2b5 100644 --- a/src/atlas/library/defines.h.in +++ b/src/atlas/library/defines.h.in @@ -11,6 +11,7 @@ #define ATLAS_HAVE_FORTRAN @ATLAS_HAVE_FORTRAN@ #define ATLAS_HAVE_EIGEN @ATLAS_HAVE_EIGEN@ #define ATLAS_HAVE_FFTW @ATLAS_HAVE_FFTW@ +#define ATLAS_HAVE_MKL @ATLAS_HAVE_MKL@ #define ATLAS_BITS_GLOBAL @ATLAS_BITS_GLOBAL@ #define ATLAS_ARRAYVIEW_BOUNDS_CHECKING @ATLAS_HAVE_BOUNDSCHECKING@ #define ATLAS_INDEXVIEW_BOUNDS_CHECKING @ATLAS_HAVE_BOUNDSCHECKING@ diff --git a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc index 90e398229..9d96b1ac0 100644 --- a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc +++ b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc @@ -21,11 +21,14 @@ namespace trans { //----------------------------------------------------------------------------- -void compute_legendre_polynomialsopt2( const size_t trc, // truncation (in) - const int nlats, // number of latitudes - const double lats[], // latitudes in radians (in) - double leg_sym[], // values of associated Legendre functions, symmetric part - double leg_asym[] ) // values of associated Legendre functions, asymmetric part +void compute_legendre_polynomialsopt2( + const size_t trc, // truncation (in) + const int nlats, // number of latitudes + const double lats[], // latitudes in radians (in) + double leg_sym[], // values of associated Legendre functions, symmetric part + double leg_asym[], // values of associated Legendre functions, asymmetric part + size_t leg_start_sym[], // start indices for different zonal wave numbers, symmetric part + size_t leg_start_asym[] ) // start indices for different zonal wave numbers, asymmetric part { auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; }; array::ArrayT zfn_( trc + 1, trc + 1 ); @@ -157,7 +160,6 @@ void compute_legendre_polynomialsopt2( const size_t trc, // truncation (in) legpol[idxmn( jm, jn )] *= 2.; } } - int is0 = 0, ia0 = 0; for ( int jm = 0; jm <= trc; jm++ ) { int is1 = 0, ia1 = 0; for ( int jn = jm; jn <= trc; jn++ ) { @@ -176,16 +178,14 @@ void compute_legendre_polynomialsopt2( const size_t trc, // truncation (in) //for ( int jn = jm; jn <= trc; jn++ ) { for ( int jn = trc; jn >= jm; jn-- ) { if ( ( jn - jm ) % 2 == 0 ) { - int is = is0 * nlats + is1 * jlat + is2++; + int is = leg_start_sym[jm] + is1 * jlat + is2++; leg_sym[is] = legpol[idxmn( jm, jn )]; } else { - int ia = ia0 * nlats + ia1 * jlat + ia2++; + int ia = leg_start_asym[jm] + ia1 * jlat + ia2++; leg_asym[ia] = legpol[idxmn( jm, jn )]; } } - is0 += is2; - ia0 += ia2; } } } diff --git a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h index 0de2a5f69..7e97dec7e 100644 --- a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h +++ b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h @@ -33,11 +33,13 @@ namespace trans { // Andreas Mueller *ECMWF* // void compute_legendre_polynomialsopt2( - const size_t trc, // truncation (in) - const int nlats, // number of latitudes - const double lats[], // latitudes in radians (in) - double legendre_sym[], // values of associated Legendre functions, symmetric part - double legendre_asym[] ); // values of associated Legendre functions, asymmetric part + const size_t trc, // truncation (in) + const int nlats, // number of latitudes + const double lats[], // latitudes in radians (in) + double legendre_sym[], // values of associated Legendre functions, symmetric part + double legendre_asym[], // values of associated Legendre functions, asymmetric part + size_t leg_start_sym[], // start indices for different zonal wave numbers, symmetric part + size_t leg_start_asym[] ); // start indices for different zonal wave numbers, asymmetric part // -------------------------------------------------------------------------------------------------------------------- diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc index 2f29c7acb..71aeeb6e0 100644 --- a/src/atlas/trans/localopt2/TransLocalopt2.cc +++ b/src/atlas/trans/localopt2/TransLocalopt2.cc @@ -23,6 +23,9 @@ #include "atlas/util/Constants.h" #include "eckit/linalg/LinearAlgebra.h" #include "eckit/linalg/Matrix.h" +#ifdef ATLAS_HAVE_MKL +#include "mkl.h" +#endif namespace atlas { namespace trans { @@ -54,6 +57,28 @@ int num_n( const int truncation, const int m, const bool symmetric ) { return len; } +void alloc_aligned( double*& ptr, size_t n ) { +#ifdef ATLAS_HAVE_MKL + int al = 64; + ptr = mkl_malloc( sizeof( double ) * n, al ); +#else + posix_memalign( (void**)&ptr, sizeof( double ) * 64, sizeof( double ) * n ); + //ptr = (double*)malloc( sizeof( double ) * n ); + //ptr = new double[n]; +#endif +} + +void free_aligned( double*& ptr ) { +#ifdef ATLAS_HAVE_MKL + mkl_free( ptr ); +#else + free( ptr ); +#endif +} + +int add_padding( int n ) { + return std::ceil( n / 8. ) * 8; +} } // namespace // -------------------------------------------------------------------------------------------------------------------- @@ -110,21 +135,21 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long legendre_sym_begin_[0] = 0; legendre_asym_begin_[0] = 0; for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { - size_sym += num_n( truncation_ + 1, jm, true ); - size_asym += num_n( truncation_ + 1, jm, false ); + size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsNH ); + size_asym += add_padding( num_n( truncation_ + 1, jm, false ) * nlatsNH ); legendre_sym_begin_[jm + 1] = size_sym; legendre_asym_begin_[jm + 1] = size_asym; } - legendre_sym_.resize( size_sym * nlatsNH ); - legendre_asym_.resize( size_asym * nlatsNH ); - compute_legendre_polynomialsopt2( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_.data(), - legendre_asym_.data() ); + alloc_aligned( legendre_sym_, size_sym ); + alloc_aligned( legendre_asym_, size_asym ); + compute_legendre_polynomialsopt2( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_, + legendre_sym_begin_.data(), legendre_asym_begin_.data() ); } // precomputations for Fourier transformations: { ATLAS_TRACE( "opt2 precomp Fourier" ); - fourier_.resize( 2 * ( truncation_ + 1 ) * nlons ); + alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons ); int idx = 0; for ( int jlon = 0; jlon < nlons; jlon++ ) { for ( int jm = 0; jm < truncation_ + 1; jm++ ) { @@ -135,7 +160,7 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long } { ATLAS_TRACE( "opt2 precomp Fourier tp" ); - fouriertp_.resize( 2 * ( truncation_ + 1 ) * nlons ); + alloc_aligned( fouriertp_, 2 * ( truncation_ + 1 ) * nlons ); int idx = 0; for ( int jm = 0; jm < truncation_ + 1; jm++ ) { for ( int jlon = 0; jlon < nlons; jlon++ ) { @@ -166,6 +191,10 @@ TransLocalopt2::TransLocalopt2( const Grid& grid, const long truncation, const e // -------------------------------------------------------------------------------------------------------------------- TransLocalopt2::~TransLocalopt2() { + free_aligned( legendre_sym_ ); + free_aligned( legendre_asym_ ); + free_aligned( fourier_ ); + free_aligned( fouriertp_ ); #if ATLAS_HAVE_FFTW fftw_destroy_plan( plan_ ); fftw_free( fft_in_ ); @@ -243,7 +272,8 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel int nlons = g.nxmax(); int nlatsNH = nlats_northernHemisphere( nlats ); int size_fourier_max = nb_fields * 2 * nlats; - std::vector scl_fourier( size_fourier_max * ( truncation + 1 ) ); + double* scl_fourier; + alloc_aligned( scl_fourier, size_fourier_max * ( truncation + 1 ) ); // Legendre transform: { @@ -256,10 +286,14 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel int n_imag = 2; if ( jm == 0 ) { n_imag = 1; } int size_fourier = nb_fields * n_imag * nlatsNH; - std::vector scalar_sym( n_imag * nb_fields * size_sym ); - std::vector scalar_asym( n_imag * nb_fields * size_asym ); - std::vector scl_fourier_sym( size_fourier ); - std::vector scl_fourier_asym( size_fourier, 0. ); + double* scalar_sym; + double* scalar_asym; + double* scl_fourier_sym; + double* scl_fourier_asym; + alloc_aligned( scalar_sym, n_imag * nb_fields * size_sym ); + alloc_aligned( scalar_asym, n_imag * nb_fields * size_asym ); + alloc_aligned( scl_fourier_sym, size_fourier ); + alloc_aligned( scl_fourier_asym, size_fourier ); { //ATLAS_TRACE( "opt2 Legendre split" ); int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2; @@ -284,19 +318,15 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel ASSERT( ia == n_imag * nb_fields * size_asym && is == n_imag * nb_fields * size_sym ); } { - //Log::info() << "jm=" << jm << " symmetric - "; - eckit::linalg::Matrix A( scalar_sym.data(), nb_fields * n_imag, size_sym ); - eckit::linalg::Matrix B( legendre_sym_.data() + legendre_sym_begin_[jm] * nlatsNH, size_sym, - nlatsNH ); - eckit::linalg::Matrix C( scl_fourier_sym.data(), nb_fields * n_imag, nlatsNH ); + eckit::linalg::Matrix A( scalar_sym, nb_fields * n_imag, size_sym ); + eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm], size_sym, nlatsNH ); + eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsNH ); eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); } if ( size_asym > 0 ) { - //Log::info() << "jm=" << jm << " antisymmetric - "; - eckit::linalg::Matrix A( scalar_asym.data(), nb_fields * n_imag, size_asym ); - eckit::linalg::Matrix B( legendre_asym_.data() + legendre_asym_begin_[jm] * nlatsNH, size_asym, - nlatsNH ); - eckit::linalg::Matrix C( scl_fourier_asym.data(), nb_fields * n_imag, nlatsNH ); + eckit::linalg::Matrix A( scalar_asym, nb_fields * n_imag, size_asym ); + eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm], size_asym, nlatsNH ); + eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsNH ); eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); } { @@ -329,6 +359,11 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel } } } + free_aligned( scalar_sym ); + free_aligned( scalar_asym ); + free_aligned( scl_fourier_sym ); + free_aligned( scl_fourier_asym ); + #else int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1; eckit::linalg::Matrix A( eckit::linalg::Matrix( @@ -464,6 +499,7 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel } } } + free_aligned( scl_fourier ); } else { ATLAS_TRACE( "invtrans_uv unstructured opt2" ); diff --git a/src/atlas/trans/localopt2/TransLocalopt2.h b/src/atlas/trans/localopt2/TransLocalopt2.h index 51644430d..c5f5f2aa4 100644 --- a/src/atlas/trans/localopt2/TransLocalopt2.h +++ b/src/atlas/trans/localopt2/TransLocalopt2.h @@ -113,10 +113,10 @@ class TransLocalopt2 : public trans::TransImpl { Grid grid_; int truncation_; bool precompute_; - mutable std::vector legendre_sym_; - mutable std::vector legendre_asym_; - mutable std::vector fourier_; - mutable std::vector fouriertp_; + double* legendre_sym_; + double* legendre_asym_; + double* fourier_; + double* fouriertp_; std::vector legendre_begin_; std::vector legendre_sym_begin_; std::vector legendre_asym_begin_; diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index a4ab92c26..7ad11b882 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -714,7 +714,7 @@ CASE( "test_trans_vordiv_with_translib" ) { double tolerance = 1.e-13; // Grid: (Adjust the following line if the test takes too long!) - Grid g( "F3" ); + Grid g( "F120" ); grid::StructuredGrid gs( g ); int ndgl = gs.ny(); From a5289bbb7693092d8e30262ffb9cf4e32d012423 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Sat, 17 Mar 2018 13:46:33 +0000 Subject: [PATCH 027/123] small fix for MKL --- src/atlas/trans/localopt2/TransLocalopt2.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc index 71aeeb6e0..d06a7d435 100644 --- a/src/atlas/trans/localopt2/TransLocalopt2.cc +++ b/src/atlas/trans/localopt2/TransLocalopt2.cc @@ -60,7 +60,7 @@ int num_n( const int truncation, const int m, const bool symmetric ) { void alloc_aligned( double*& ptr, size_t n ) { #ifdef ATLAS_HAVE_MKL int al = 64; - ptr = mkl_malloc( sizeof( double ) * n, al ); + ptr = (double*)mkl_malloc( sizeof( double ) * n, al ); #else posix_memalign( (void**)&ptr, sizeof( double ) * 64, sizeof( double ) * n ); //ptr = (double*)malloc( sizeof( double ) * n ); From b50570dcfe2f8365ada5042289f8d5aa242e281c Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Sat, 17 Mar 2018 16:05:39 +0000 Subject: [PATCH 028/123] added alignment to dgemm-Fourier transformation; three versions of localopt (1: FFTW, 2+3: no FFTW, different transp.) --- src/atlas/CMakeLists.txt | 10 + src/atlas/trans/Trans.cc | 2 + src/atlas/trans/VorDivToUV.cc | 2 + .../trans/localopt/LegendrePolynomialsopt.cc | 20 +- .../trans/localopt/LegendrePolynomialsopt.h | 12 +- src/atlas/trans/localopt/TransLocalopt.cc | 552 ++++++++------ src/atlas/trans/localopt/TransLocalopt.h | 8 +- src/atlas/trans/localopt2/TransLocalopt2.cc | 435 ++++++----- .../trans/localopt3/FourierTransformsopt3.cc | 78 ++ .../trans/localopt3/FourierTransformsopt3.h | 38 + .../localopt3/LegendrePolynomialsopt3.cc | 197 +++++ .../trans/localopt3/LegendrePolynomialsopt3.h | 47 ++ .../trans/localopt3/LegendreTransformsopt3.cc | 62 ++ .../trans/localopt3/LegendreTransformsopt3.h | 37 + src/atlas/trans/localopt3/TransLocalopt3.cc | 690 ++++++++++++++++++ src/atlas/trans/localopt3/TransLocalopt3.h | 133 ++++ .../trans/localopt3/VorDivToUVLocalopt3.cc | 184 +++++ .../trans/localopt3/VorDivToUVLocalopt3.h | 67 ++ src/tests/trans/test_transgeneral.cc | 6 +- 19 files changed, 2133 insertions(+), 447 deletions(-) create mode 100644 src/atlas/trans/localopt3/FourierTransformsopt3.cc create mode 100644 src/atlas/trans/localopt3/FourierTransformsopt3.h create mode 100644 src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc create mode 100644 src/atlas/trans/localopt3/LegendrePolynomialsopt3.h create mode 100644 src/atlas/trans/localopt3/LegendreTransformsopt3.cc create mode 100644 src/atlas/trans/localopt3/LegendreTransformsopt3.h create mode 100644 src/atlas/trans/localopt3/TransLocalopt3.cc create mode 100644 src/atlas/trans/localopt3/TransLocalopt3.h create mode 100644 src/atlas/trans/localopt3/VorDivToUVLocalopt3.cc create mode 100644 src/atlas/trans/localopt3/VorDivToUVLocalopt3.h diff --git a/src/atlas/CMakeLists.txt b/src/atlas/CMakeLists.txt index 26915243e..bf6f686c3 100644 --- a/src/atlas/CMakeLists.txt +++ b/src/atlas/CMakeLists.txt @@ -351,6 +351,16 @@ trans/localopt2/FourierTransformsopt2.h trans/localopt2/FourierTransformsopt2.cc trans/localopt2/VorDivToUVLocalopt2.h trans/localopt2/VorDivToUVLocalopt2.cc +trans/localopt3/TransLocalopt3.h +trans/localopt3/TransLocalopt3.cc +trans/localopt3/LegendrePolynomialsopt3.h +trans/localopt3/LegendrePolynomialsopt3.cc +trans/localopt3/LegendreTransformsopt3.h +trans/localopt3/LegendreTransformsopt3.cc +trans/localopt3/FourierTransformsopt3.h +trans/localopt3/FourierTransformsopt3.cc +trans/localopt3/VorDivToUVLocalopt3.h +trans/localopt3/VorDivToUVLocalopt3.cc ) if( ATLAS_HAVE_TRANS ) diff --git a/src/atlas/trans/Trans.cc b/src/atlas/trans/Trans.cc index efd8eabe9..b264cc7ee 100644 --- a/src/atlas/trans/Trans.cc +++ b/src/atlas/trans/Trans.cc @@ -30,6 +30,7 @@ #include "atlas/trans/local/TransLocal.h" #include "atlas/trans/localopt/TransLocalopt.h" #include "atlas/trans/localopt2/TransLocalopt2.h" +#include "atlas/trans/localopt3/TransLocalopt3.h" namespace atlas { namespace trans { @@ -66,6 +67,7 @@ struct force_link { load_builder_grid(); load_builder_grid(); load_builder_grid(); + load_builder_grid(); } }; diff --git a/src/atlas/trans/VorDivToUV.cc b/src/atlas/trans/VorDivToUV.cc index 566303c52..727ead312 100644 --- a/src/atlas/trans/VorDivToUV.cc +++ b/src/atlas/trans/VorDivToUV.cc @@ -29,6 +29,7 @@ #include "atlas/trans/local/VorDivToUVLocal.h" #include "atlas/trans/localopt/VorDivToUVLocalopt.h" #include "atlas/trans/localopt2/VorDivToUVLocalopt2.h" +#include "atlas/trans/localopt3/VorDivToUVLocalopt3.h" namespace atlas { namespace trans { @@ -59,6 +60,7 @@ struct force_link { load_builder(); load_builder(); load_builder(); + load_builder(); } }; diff --git a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc index 3ed9e7544..a31d893c6 100644 --- a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc +++ b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc @@ -21,11 +21,14 @@ namespace trans { //----------------------------------------------------------------------------- -void compute_legendre_polynomialsopt( const size_t trc, // truncation (in) - const int nlats, // number of latitudes - const double lats[], // latitudes in radians (in) - double leg_sym[], // values of associated Legendre functions, symmetric part - double leg_asym[] ) // values of associated Legendre functions, asymmetric part +void compute_legendre_polynomialsopt( + const size_t trc, // truncation (in) + const int nlats, // number of latitudes + const double lats[], // latitudes in radians (in) + double leg_sym[], // values of associated Legendre functions, symmetric part + double leg_asym[], // values of associated Legendre functions, asymmetric part + size_t leg_start_sym[], // start indices for different zonal wave numbers, symmetric part + size_t leg_start_asym[] ) // start indices for different zonal wave numbers, asymmetric part { auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; }; array::ArrayT zfn_( trc + 1, trc + 1 ); @@ -157,7 +160,6 @@ void compute_legendre_polynomialsopt( const size_t trc, // truncation (in) legpol[idxmn( jm, jn )] *= 2.; } } - int is0 = 0, ia0 = 0; for ( int jm = 0; jm <= trc; jm++ ) { int is1 = 0, ia1 = 0; for ( int jn = jm; jn <= trc; jn++ ) { @@ -176,16 +178,14 @@ void compute_legendre_polynomialsopt( const size_t trc, // truncation (in) //for ( int jn = jm; jn <= trc; jn++ ) { for ( int jn = trc; jn >= jm; jn-- ) { if ( ( jn - jm ) % 2 == 0 ) { - int is = is0 * nlats + is1 * jlat + is2++; + int is = leg_start_sym[jm] + is1 * jlat + is2++; leg_sym[is] = legpol[idxmn( jm, jn )]; } else { - int ia = ia0 * nlats + ia1 * jlat + ia2++; + int ia = leg_start_asym[jm] + ia1 * jlat + ia2++; leg_asym[ia] = legpol[idxmn( jm, jn )]; } } - is0 += is2; - ia0 += ia2; } } } diff --git a/src/atlas/trans/localopt/LegendrePolynomialsopt.h b/src/atlas/trans/localopt/LegendrePolynomialsopt.h index 4dc3ce2e5..4a2f004c7 100644 --- a/src/atlas/trans/localopt/LegendrePolynomialsopt.h +++ b/src/atlas/trans/localopt/LegendrePolynomialsopt.h @@ -33,11 +33,13 @@ namespace trans { // Andreas Mueller *ECMWF* // void compute_legendre_polynomialsopt( - const size_t trc, // truncation (in) - const int nlats, // number of latitudes - const double lats[], // latitudes in radians (in) - double legendre_sym[], // values of associated Legendre functions, symmetric part - double legendre_asym[] ); // values of associated Legendre functions, asymmetric part + const size_t trc, // truncation (in) + const int nlats, // number of latitudes + const double lats[], // latitudes in radians (in) + double legendre_sym[], // values of associated Legendre functions, symmetric part + double legendre_asym[], // values of associated Legendre functions, asymmetric part + size_t leg_start_sym[], // start indices for different zonal wave numbers, symmetric part + size_t leg_start_asym[] ); // start indices for different zonal wave numbers, asymmetric part // -------------------------------------------------------------------------------------------------------------------- diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc index 9c4469725..0b9e1bf5f 100644 --- a/src/atlas/trans/localopt/TransLocalopt.cc +++ b/src/atlas/trans/localopt/TransLocalopt.cc @@ -23,6 +23,9 @@ #include "atlas/util/Constants.h" #include "eckit/linalg/LinearAlgebra.h" #include "eckit/linalg/Matrix.h" +#if ATLAS_HAVE_MKL +#include "mkl.h" +#endif namespace atlas { namespace trans { @@ -54,6 +57,28 @@ int num_n( const int truncation, const int m, const bool symmetric ) { return len; } +void alloc_aligned( double*& ptr, size_t n ) { +#if ATLAS_HAVE_MKL + int al = 64; + ptr = (double*)mkl_malloc( sizeof( double ) * n, al ); +#else + posix_memalign( (void**)&ptr, sizeof( double ) * 64, sizeof( double ) * n ); + //ptr = (double*)malloc( sizeof( double ) * n ); + //ptr = new double[n]; +#endif +} + +void free_aligned( double*& ptr ) { +#if ATLAS_HAVE_MKL + mkl_free( ptr ); +#else + free( ptr ); +#endif +} + +int add_padding( int n ) { + return std::ceil( n / 8. ) * 8; +} } // namespace // -------------------------------------------------------------------------------------------------------------------- @@ -66,7 +91,11 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t truncation_( truncation ), precompute_( config.getBool( "precompute", true ) ) { ATLAS_TRACE( "Precompute legendre opt" ); +#if ATLAS_HAVE_MKL + eckit::linalg::LinearAlgebra::backend( "mkl" ); // might want to choose backend with this command +#else eckit::linalg::LinearAlgebra::backend( "generic" ); // might want to choose backend with this command +#endif int nlats = 0; int nlons = 0; int nlatsNH = nlats_northernHemisphere( nlats ); @@ -110,21 +139,21 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t legendre_sym_begin_[0] = 0; legendre_asym_begin_[0] = 0; for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { - size_sym += num_n( truncation_ + 1, jm, true ); - size_asym += num_n( truncation_ + 1, jm, false ); + size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsNH ); + size_asym += add_padding( num_n( truncation_ + 1, jm, false ) * nlatsNH ); legendre_sym_begin_[jm + 1] = size_sym; legendre_asym_begin_[jm + 1] = size_asym; } - legendre_sym_.resize( size_sym * nlatsNH ); - legendre_asym_.resize( size_asym * nlatsNH ); - compute_legendre_polynomialsopt( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_.data(), - legendre_asym_.data() ); + alloc_aligned( legendre_sym_, size_sym ); + alloc_aligned( legendre_asym_, size_asym ); + compute_legendre_polynomialsopt( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_, + legendre_sym_begin_.data(), legendre_asym_begin_.data() ); } // precomputations for Fourier transformations: { ATLAS_TRACE( "opt precomp Fourier" ); - fourier_.resize( 2 * ( truncation_ + 1 ) * nlons ); + alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons ); int idx = 0; for ( int jlon = 0; jlon < nlons; jlon++ ) { for ( int jm = 0; jm < truncation_ + 1; jm++ ) { @@ -135,7 +164,7 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t } { ATLAS_TRACE( "opt precomp Fourier tp" ); - fouriertp_.resize( 2 * ( truncation_ + 1 ) * nlons ); + alloc_aligned( fouriertp_, 2 * ( truncation_ + 1 ) * nlons ); int idx = 0; for ( int jm = 0; jm < truncation_ + 1; jm++ ) { for ( int jlon = 0; jlon < nlons; jlon++ ) { @@ -166,6 +195,10 @@ TransLocalopt::TransLocalopt( const Grid& grid, const long truncation, const eck // -------------------------------------------------------------------------------------------------------------------- TransLocalopt::~TransLocalopt() { + free_aligned( legendre_sym_ ); + free_aligned( legendre_asym_ ); + free_aligned( fourier_ ); + free_aligned( fouriertp_ ); #if ATLAS_HAVE_FFTW fftw_destroy_plan( plan_ ); fftw_free( fft_in_ ); @@ -238,24 +271,42 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field // Transform if ( grid::StructuredGrid g = grid_ ) { ATLAS_TRACE( "invtrans_uv structured opt" ); - int nlats = g.ny(); - int nlons = g.nxmax(); - int nlatsNH = nlats_northernHemisphere( nlats ); - int size_fourier = nb_fields * 2 * g.ny(); - std::vector scl_fourier( size_fourier * ( truncation + 1 ) ); + int nlats = g.ny(); + int nlons = g.nxmax(); + int nlatsNH = nlats_northernHemisphere( nlats ); + auto posFFTW = [&]( int jfld, int imag, int jlat, int jm ) { + return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) ); + }; + /*auto posFFTW = [&]( int jfld, int imag, int jlat, int jm ) { + return jfld + nb_fields * ( imag + 2 * ( nlats - jlat - 1 ) + jm * nb_fields * 2 * nlats ); + };*/ + auto posGemm1 = [&]( int jfld, int imag, int jlat, int jm ) { + return jfld + nb_fields * ( jlat + nlats * ( imag + 2 * ( jm ) ) ); + }; + auto posGemm2 = [&]( int jfld, int imag, int jlat, int jm ) { + return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) ); + }; + int size_fourier_max = nb_fields * 2 * nlats; + double* scl_fourier; + alloc_aligned( scl_fourier, size_fourier_max * ( truncation + 1 ) ); // Legendre transform: { ATLAS_TRACE( "opt Legendre dgemm" ); - for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { -#if 1 // 0: no symmetry, 1: use symmetry \ - // TODO: 0 is currently not working because it requires all latitudes to be included in legendre_ (which is currently not done) + for ( int jm = 0; jm <= truncation_; jm++ ) { int size_sym = num_n( truncation_ + 1, jm, true ); int size_asym = num_n( truncation_ + 1, jm, false ); - std::vector scalar_sym( 2 * nb_fields * size_sym ); - std::vector scalar_asym( 2 * nb_fields * size_asym ); - std::vector scl_fourier_sym( size_fourier ); - std::vector scl_fourier_asym( size_fourier ); + int n_imag = 2; + if ( jm == 0 ) { n_imag = 1; } + int size_fourier = nb_fields * n_imag * nlatsNH; + double* scalar_sym; + double* scalar_asym; + double* scl_fourier_sym; + double* scl_fourier_asym; + alloc_aligned( scalar_sym, n_imag * nb_fields * size_sym ); + alloc_aligned( scalar_asym, n_imag * nb_fields * size_asym ); + alloc_aligned( scl_fourier_sym, size_fourier ); + alloc_aligned( scl_fourier_asym, size_fourier ); { //ATLAS_TRACE( "opt Legendre split" ); int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2; @@ -267,7 +318,7 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field // compute_legendre_polynomialsopt! //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) { for ( int jn = truncation_ + 1; jn >= jm; jn-- ) { - for ( int imag = 0; imag < 2; imag++ ) { + for ( int imag = 0; imag < n_imag; imag++ ) { for ( int jfld = 0; jfld < nb_fields; jfld++ ) { idx = jfld + nb_fields * ( imag + 2 * ( jn - jm ) ); if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; } @@ -277,73 +328,70 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field } } } - ASSERT( ia == 2 * nb_fields * size_asym && is == 2 * nb_fields * size_sym ); + ASSERT( ia == n_imag * nb_fields * size_asym && is == n_imag * nb_fields * size_sym ); } { - eckit::linalg::Matrix A( scalar_sym.data(), nb_fields * 2, size_sym ); - eckit::linalg::Matrix B( legendre_sym_.data() + legendre_sym_begin_[jm] * nlatsNH, size_sym, - nlatsNH ); - eckit::linalg::Matrix C( scl_fourier_sym.data(), nb_fields * 2, nlatsNH ); + eckit::linalg::Matrix A( scalar_sym, nb_fields * n_imag, size_sym ); + eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm], size_sym, nlatsNH ); + eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsNH ); eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); } if ( size_asym > 0 ) { - eckit::linalg::Matrix A( scalar_asym.data(), nb_fields * 2, size_asym ); - eckit::linalg::Matrix B( legendre_asym_.data() + legendre_asym_begin_[jm] * nlatsNH, size_asym, - nlatsNH ); - eckit::linalg::Matrix C( scl_fourier_asym.data(), nb_fields * 2, nlatsNH ); + eckit::linalg::Matrix A( scalar_asym, nb_fields * n_imag, size_asym ); + eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm], size_asym, nlatsNH ); + eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsNH ); eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); - { - //ATLAS_TRACE( "opt merge spheres" ); - // northern hemisphere: - int ioff = jm * size_fourier; - for ( int j = 0; j < 2 * nb_fields * nlatsNH; j++ ) { - scl_fourier[j + ioff] = scl_fourier_sym[j] + scl_fourier_asym[j]; + } +#if 1 //ATLAS_HAVE_FFTW + { + //ATLAS_TRACE( "opt merge spheres" ); + // northern hemisphere: + int ioff = jm * size_fourier_max; + int pos0 = ioff; + int idx = 0; + for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { + int poslat = pos0 + 2 * jlat; + for ( int imag = 0; imag < n_imag; imag++ ) { + int posimag = nb_fields * ( imag + poslat ); + for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { + scl_fourier[posFFTW( jfld, imag, jlat, jm )] = + scl_fourier_sym[idx] + scl_fourier_asym[idx]; + } } - // southern hemisphere: - int idx = 0; - for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { - for ( int imag = 0; imag < 2; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { - int pos = jfld + nb_fields * ( imag + 2 * ( nlats - jlat - 1 ) ); - scl_fourier[pos + ioff] = scl_fourier_sym[idx] - scl_fourier_asym[idx]; - } + } + // southern hemisphere: + idx = 0; + pos0 = 2 * ( nlats - 1 ) + ioff; + for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { + int poslat = pos0 - 2 * jlat; + for ( int imag = 0; imag < n_imag; imag++ ) { + int posimag = nb_fields * ( imag + poslat ); + for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { + int jslat = nlats - jlat - 1; + scl_fourier[posFFTW( jfld, imag, jslat, jm )] = + scl_fourier_sym[idx] - scl_fourier_asym[idx]; } } } } -#else - int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1; - eckit::linalg::Matrix A( eckit::linalg::Matrix( - const_cast( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) ); - eckit::linalg::Matrix B( legendre_.data() + noff * g.ny(), ns, g.ny() ); - eckit::linalg::Matrix C( scl_fourier.data() + jm * size_fourier, nb_fields * 2, g.ny() ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); -#endif + free_aligned( scalar_sym ); + free_aligned( scalar_asym ); + free_aligned( scl_fourier_sym ); + free_aligned( scl_fourier_asym ); } } -#if ATLAS_HAVE_FFTW { - auto position = [&]( int jfld, int imag, int jlat, int jm ) { - return jfld + nb_fields * ( imag + 2 * ( jlat + g.ny() * ( jm ) ) ); - }; - auto factor = [&]( int jm ) { - if ( jm > 0 ) { return 2.; } - else { - return 1.; - } - }; - int num_complex = ( nlons / 2 ) + 1; { ATLAS_TRACE( "opt FFTW" ); for ( int jfld = 0; jfld < nb_fields; jfld++ ) { int idx = 0; - for ( int jlat = 0; jlat < g.ny(); jlat++ ) { - for ( int jm = 0; jm < num_complex; jm++, idx++ ) { + for ( int jlat = 0; jlat < nlats; jlat++ ) { + fft_in_[idx++][0] = scl_fourier[posFFTW( jfld, 0, jlat, 0 )]; + for ( int jm = 1; jm < num_complex; jm++, idx++ ) { for ( int imag = 0; imag < 2; imag++ ) { if ( jm <= truncation_ ) { - fft_in_[idx][imag] = - scl_fourier[position( jfld, imag, jlat, jm )] / factor( jm ); + fft_in_[idx][imag] = scl_fourier[posFFTW( jfld, imag, jlat, jm )] / 2.; } else { fft_in_[idx][imag] = 0.; @@ -351,7 +399,7 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field } } } - fftw_execute( plan_ ); + fftw_execute_dft_c2r( plan_, fft_in_, fft_out_ ); for ( int j = 0; j < nlats * nlons; j++ ) { gp_fields[j + jfld * nlats * nlons] = fft_out_[j]; } @@ -360,32 +408,52 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field } #else #if 0 // 1: better for small number of columns, large truncation; 0: better for large number of columns - - // Transposition in Fourier space: - std::vector scl_fourier_tp( size_fourier * ( truncation + 1 ) ); - { - ATLAS_TRACE( "opt transposition in Fourier" ); - int idx = 0; - for ( int jm = 0; jm < truncation_ + 1; jm++ ) { - for ( int jlat = 0; jlat < g.ny(); jlat++ ) { - for ( int imag = 0; imag < 2; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int pos_tp = jfld + nb_fields * ( jlat + g.ny() * ( imag + 2 * ( jm ) ) ); - //int pos = jfld + nb_fields * ( imag + 2 * ( jlat + g.ny() * ( jm ) ) ); - scl_fourier_tp[pos_tp] = scl_fourier[idx++]; // = scl_fourier[pos] + { + //ATLAS_TRACE( "opt merge spheres" ); + // northern hemisphere: + int ioff = jm * size_fourier_max; + int pos0 = ioff; + int idx = 0; + for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { + int poslat = pos0 + 2 * jlat; + for ( int imag = 0; imag < n_imag; imag++ ) { + int posimag = nb_fields * ( imag + poslat ); + for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { + scl_fourier[posGemm1( jfld, imag, jlat, jm )] = + scl_fourier_sym[idx] + scl_fourier_asym[idx]; + } + } + } + // southern hemisphere: + idx = 0; + pos0 = 2 * ( nlats - 1 ) + ioff; + for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { + int poslat = pos0 - 2 * jlat; + for ( int imag = 0; imag < n_imag; imag++ ) { + int posimag = nb_fields * ( imag + poslat ); + for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { + int jslat = nlats - jlat - 1; + scl_fourier[posGemm1( jfld, imag, jslat, jm )] = + scl_fourier_sym[idx] - scl_fourier_asym[idx]; + } } } } + free_aligned( scalar_sym ); + free_aligned( scalar_asym ); + free_aligned( scl_fourier_sym ); + free_aligned( scl_fourier_asym ); } } // Fourier transformation: - std::vector gp_opt( nb_fields * grid_.size(), 0. ); + double* gp_opt; + alloc_aligned( gp_opt, nb_fields * grid_.size() ); { ATLAS_TRACE( "opt Fourier dgemm" ); - eckit::linalg::Matrix A( scl_fourier_tp.data(), nb_fields * g.ny(), ( truncation_ + 1 ) * 2 ); - eckit::linalg::Matrix B( fourier_.data(), ( truncation_ + 1 ) * 2, g.nxmax() ); - eckit::linalg::Matrix C( gp_opt.data(), nb_fields * g.ny(), g.nxmax() ); + eckit::linalg::Matrix A( scl_fourier, nb_fields * g.ny(), ( truncation_ + 1 ) * 2 ); + eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, g.nxmax() ); + eckit::linalg::Matrix C( gp_opt, nb_fields * g.ny(), g.nxmax() ); eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); } @@ -403,194 +471,218 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field } } } + free_aligned( gp_opt ); #else - // Transposition in Fourier space: - std::vector scl_fourier_tp( size_fourier * ( truncation + 1 ) ); - { - ATLAS_TRACE( "opt transposition in Fourier" ); - int idx = 0; - for ( int jm = 0; jm < truncation_ + 1; jm++ ) { - for ( int jlat = 0; jlat < g.ny(); jlat++ ) { - for ( int imag = 0; imag < 2; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int pos_tp = imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + g.ny() * ( jfld ) ) ); - //int pos = jfld + nb_fields * ( imag + 2 * ( jlat + g.ny() * ( jm ) ) ); - scl_fourier_tp[pos_tp] = scl_fourier[idx++]; // = scl_fourier[pos] + { + //ATLAS_TRACE( "opt merge spheres" ); + // northern hemisphere: + int ioff = jm * size_fourier_max; + int pos0 = ioff; + int idx = 0; + for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { + int poslat = pos0 + 2 * jlat; + for ( int imag = 0; imag < n_imag; imag++ ) { + int posimag = nb_fields * ( imag + poslat ); + for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { + scl_fourier[posGemm2( jfld, imag, jlat, jm )] = + scl_fourier_sym[idx] + scl_fourier_asym[idx]; + } + } + } + // southern hemisphere: + idx = 0; + pos0 = 2 * ( nlats - 1 ) + ioff; + for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { + int poslat = pos0 - 2 * jlat; + for ( int imag = 0; imag < n_imag; imag++ ) { + int posimag = nb_fields * ( imag + poslat ); + for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { + int jslat = nlats - jlat - 1; + scl_fourier[posGemm2( jfld, imag, jslat, jm )] = + scl_fourier_sym[idx] - scl_fourier_asym[idx]; + } } } } + free_aligned( scalar_sym ); + free_aligned( scalar_asym ); + free_aligned( scl_fourier_sym ); + free_aligned( scl_fourier_asym ); } } // Fourier transformation: - std::vector gp_opt( nb_fields * grid_.size(), 0. ); { ATLAS_TRACE( "opt Fourier dgemm" ); - eckit::linalg::Matrix A( fouriertp_.data(), g.nxmax(), ( truncation_ + 1 ) * 2 ); - eckit::linalg::Matrix B( scl_fourier_tp.data(), ( truncation_ + 1 ) * 2, nb_fields * g.ny() ); + eckit::linalg::Matrix A( fouriertp_, g.nxmax(), ( truncation_ + 1 ) * 2 ); + eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * g.ny() ); eckit::linalg::Matrix C( gp_fields, g.nxmax(), nb_fields * g.ny() ); eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); } #endif #endif - // Computing u,v from U,V: - { - if ( nb_vordiv_fields > 0 ) { - ATLAS_TRACE( "opt u,v from U,V" ); - std::vector coslats( nlats ); - for ( size_t j = 0; j < nlats; ++j ) { - coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() ); - } - int idx = 0; - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - for ( int jlat = 0; jlat < g.ny(); jlat++ ) { - for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) { - gp_fields[idx] /= coslats[jlat]; - idx++; + // Computing u,v from U,V: + { + if ( nb_vordiv_fields > 0 ) { + ATLAS_TRACE( "opt u,v from U,V" ); + std::vector coslats( nlats ); + for ( size_t j = 0; j < nlats; ++j ) { + coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() ); + } + int idx = 0; + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + for ( int jlat = 0; jlat < g.ny(); jlat++ ) { + for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) { + gp_fields[idx] /= coslats[jlat]; + idx++; + } + } } } } + free_aligned( scl_fourier ); } - } - } - else { - ATLAS_TRACE( "invtrans_uv unstructured opt" ); - int idx = 0; - for ( PointXY p : grid_.xy() ) { - double lon = p.x() * util::Constants::degreesToRadians(); - double lat = p.y() * util::Constants::degreesToRadians(); - double trcFT = truncation; - - // Legendre transform: - //invtrans_legendreopt( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra, - // legReal.data(), legImag.data() ); - - // Fourier transform: - //invtrans_fourieropt( trcFT, lon, nb_fields, legReal.data(), legImag.data(), - // gp_tmp.data() + ( nb_fields * idx ) ); - for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { - //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat ); + else { + ATLAS_TRACE( "invtrans_uv unstructured opt" ); + int idx = 0; + for ( PointXY p : grid_.xy() ) { + double lon = p.x() * util::Constants::degreesToRadians(); + double lat = p.y() * util::Constants::degreesToRadians(); + double trcFT = truncation; + + // Legendre transform: + //invtrans_legendreopt( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra, + // legReal.data(), legImag.data() ); + + // Fourier transform: + //invtrans_fourieropt( trcFT, lon, nb_fields, legReal.data(), legImag.data(), + // gp_tmp.data() + ( nb_fields * idx ) ); + for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { + //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat ); + } + ++idx; + } } - ++idx; } - } - } -} // namespace trans + } // namespace trans -// -------------------------------------------------------------------------------------------------------------------- + // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[], - const double divergence_spectra[], double gp_fields[], - const eckit::Configuration& config ) const { - invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config ); -} + void TransLocalopt::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[], + const double divergence_spectra[], double gp_fields[], + const eckit::Configuration& config ) const { + invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config ); + } -void extend_truncationopt( const int old_truncation, const int nb_fields, const double old_spectra[], - double new_spectra[] ) { - int k = 0, k_old = 0; - for ( int m = 0; m <= old_truncation + 1; m++ ) { // zonal wavenumber - for ( int n = m; n <= old_truncation + 1; n++ ) { // total wavenumber - for ( int imag = 0; imag < 2; imag++ ) { // imaginary/real part - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { // field - if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; } - else { - new_spectra[k++] = old_spectra[k_old++]; + void extend_truncationopt( const int old_truncation, const int nb_fields, const double old_spectra[], + double new_spectra[] ) { + int k = 0, k_old = 0; + for ( int m = 0; m <= old_truncation + 1; m++ ) { // zonal wavenumber + for ( int n = m; n <= old_truncation + 1; n++ ) { // total wavenumber + for ( int imag = 0; imag < 2; imag++ ) { // imaginary/real part + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { // field + if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; } + else { + new_spectra[k++] = old_spectra[k_old++]; + } + } } } } } - } -} -// -------------------------------------------------------------------------------------------------------------------- + // -------------------------------------------------------------------------------------------------------------------- + + void TransLocalopt::invtrans( const int nb_scalar_fields, const double scalar_spectra[], + const int nb_vordiv_fields, const double vorticity_spectra[], + const double divergence_spectra[], double gp_fields[], + const eckit::Configuration& config ) const { + ATLAS_TRACE( "TransLocalopt::invtrans" ); + int nb_gp = grid_.size(); + int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields; + if ( nb_vordiv_fields > 0 ) { + std::vector vorticity_spectra_extended( nb_vordiv_spec_ext, 0. ); + std::vector divergence_spectra_extended( nb_vordiv_spec_ext, 0. ); + std::vector U_ext( nb_vordiv_spec_ext, 0. ); + std::vector V_ext( nb_vordiv_spec_ext, 0. ); -void TransLocalopt::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields, - const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[], - const eckit::Configuration& config ) const { - ATLAS_TRACE( "TransLocalopt::invtrans" ); - int nb_gp = grid_.size(); - int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields; - if ( nb_vordiv_fields > 0 ) { - std::vector vorticity_spectra_extended( nb_vordiv_spec_ext, 0. ); - std::vector divergence_spectra_extended( nb_vordiv_spec_ext, 0. ); - std::vector U_ext( nb_vordiv_spec_ext, 0. ); - std::vector V_ext( nb_vordiv_spec_ext, 0. ); - - { - ATLAS_TRACE( "opt extend vordiv" ); - // increase truncation in vorticity_spectra and divergence_spectra: - extend_truncationopt( truncation_, nb_vordiv_fields, vorticity_spectra, vorticity_spectra_extended.data() ); - extend_truncationopt( truncation_, nb_vordiv_fields, divergence_spectra, - divergence_spectra_extended.data() ); - } + { + ATLAS_TRACE( "opt extend vordiv" ); + // increase truncation in vorticity_spectra and divergence_spectra: + extend_truncationopt( truncation_, nb_vordiv_fields, vorticity_spectra, + vorticity_spectra_extended.data() ); + extend_truncationopt( truncation_, nb_vordiv_fields, divergence_spectra, + divergence_spectra_extended.data() ); + } - { - ATLAS_TRACE( "vordiv to UV opt" ); - // call vd2uv to compute u and v in spectral space - trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt" ) ); - vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(), - divergence_spectra_extended.data(), U_ext.data(), V_ext.data() ); - } + { + ATLAS_TRACE( "vordiv to UV opt" ); + // call vd2uv to compute u and v in spectral space + trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt" ) ); + vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(), + divergence_spectra_extended.data(), U_ext.data(), V_ext.data() ); + } - // perform spectral transform to compute all fields in grid point space - invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config ); - invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(), - gp_fields + nb_gp * nb_vordiv_fields, config ); - } - if ( nb_scalar_fields > 0 ) { - int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields; - std::vector scalar_spectra_extended( nb_scalar_spec_ext, 0. ); - extend_truncationopt( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() ); - invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(), - gp_fields + 2 * nb_gp * nb_vordiv_fields, config ); - } -} + // perform spectral transform to compute all fields in grid point space + invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config ); + invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(), + gp_fields + nb_gp * nb_vordiv_fields, config ); + } + if ( nb_scalar_fields > 0 ) { + int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields; + std::vector scalar_spectra_extended( nb_scalar_spec_ext, 0. ); + extend_truncationopt( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() ); + invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(), + gp_fields + 2 * nb_gp * nb_vordiv_fields, config ); + } + } -// -------------------------------------------------------------------------------------------------------------------- + // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. -} + void TransLocalopt::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. + } -// -------------------------------------------------------------------------------------------------------------------- + // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt::dirtrans( const FieldSet& gpfields, FieldSet& spfields, const eckit::Configuration& config ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. -} + void TransLocalopt::dirtrans( const FieldSet& gpfields, FieldSet& spfields, const eckit::Configuration& config ) + const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. + } -// -------------------------------------------------------------------------------------------------------------------- + // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv, - const eckit::Configuration& config ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. -} + void TransLocalopt::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv, + const eckit::Configuration& config ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. + } -// -------------------------------------------------------------------------------------------------------------------- + // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[], - const eckit::Configuration& ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. -} + void TransLocalopt::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[], + const eckit::Configuration& ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. + } -// -------------------------------------------------------------------------------------------------------------------- + // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[], - double divergence_spectra[], const eckit::Configuration& ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. -} + void TransLocalopt::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[], + double divergence_spectra[], const eckit::Configuration& ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. + } -// -------------------------------------------------------------------------------------------------------------------- + // -------------------------------------------------------------------------------------------------------------------- -} // namespace trans + } // namespace trans } // namespace atlas diff --git a/src/atlas/trans/localopt/TransLocalopt.h b/src/atlas/trans/localopt/TransLocalopt.h index b9f89ef08..fce71261a 100644 --- a/src/atlas/trans/localopt/TransLocalopt.h +++ b/src/atlas/trans/localopt/TransLocalopt.h @@ -112,10 +112,10 @@ class TransLocalopt : public trans::TransImpl { Grid grid_; int truncation_; bool precompute_; - mutable std::vector legendre_sym_; - mutable std::vector legendre_asym_; - mutable std::vector fourier_; - mutable std::vector fouriertp_; + double* legendre_sym_; + double* legendre_asym_; + double* fourier_; + double* fouriertp_; std::vector legendre_begin_; std::vector legendre_sym_begin_; std::vector legendre_asym_begin_; diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc index d06a7d435..f4672becc 100644 --- a/src/atlas/trans/localopt2/TransLocalopt2.cc +++ b/src/atlas/trans/localopt2/TransLocalopt2.cc @@ -23,7 +23,7 @@ #include "atlas/util/Constants.h" #include "eckit/linalg/LinearAlgebra.h" #include "eckit/linalg/Matrix.h" -#ifdef ATLAS_HAVE_MKL +#if ATLAS_HAVE_MKL #include "mkl.h" #endif @@ -58,7 +58,7 @@ int num_n( const int truncation, const int m, const bool symmetric ) { } void alloc_aligned( double*& ptr, size_t n ) { -#ifdef ATLAS_HAVE_MKL +#if ATLAS_HAVE_MKL int al = 64; ptr = (double*)mkl_malloc( sizeof( double ) * n, al ); #else @@ -69,7 +69,7 @@ void alloc_aligned( double*& ptr, size_t n ) { } void free_aligned( double*& ptr ) { -#ifdef ATLAS_HAVE_MKL +#if ATLAS_HAVE_MKL mkl_free( ptr ); #else free( ptr ); @@ -91,7 +91,11 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long truncation_( truncation ), precompute_( config.getBool( "precompute", true ) ) { ATLAS_TRACE( "Precompute legendre opt2" ); +#if ATLAS_HAVE_MKL + eckit::linalg::LinearAlgebra::backend( "mkl" ); // might want to choose backend with this command +#else eckit::linalg::LinearAlgebra::backend( "generic" ); // might want to choose backend with this command +#endif int nlats = 0; int nlons = 0; int nlatsNH = nlats_northernHemisphere( nlats ); @@ -268,9 +272,21 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel // Transform if ( grid::StructuredGrid g = grid_ ) { ATLAS_TRACE( "invtrans_uv structured opt2" ); - int nlats = g.ny(); - int nlons = g.nxmax(); - int nlatsNH = nlats_northernHemisphere( nlats ); + int nlats = g.ny(); + int nlons = g.nxmax(); + int nlatsNH = nlats_northernHemisphere( nlats ); + auto posFFTW = [&]( int jfld, int imag, int jlat, int jm ) { + return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) ); + }; + /*auto posFFTW = [&]( int jfld, int imag, int jlat, int jm ) { + return jfld + nb_fields * ( imag + 2 * ( nlats - jlat - 1 ) + jm * nb_fields * 2 * nlats ); + };*/ + auto posGemm1 = [&]( int jfld, int imag, int jlat, int jm ) { + return jfld + nb_fields * ( jlat + nlats * ( imag + 2 * ( jm ) ) ); + }; + auto posGemm2 = [&]( int jfld, int imag, int jlat, int jm ) { + return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) ); + }; int size_fourier_max = nb_fields * 2 * nlats; double* scl_fourier; alloc_aligned( scl_fourier, size_fourier_max * ( truncation + 1 ) ); @@ -278,9 +294,7 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel // Legendre transform: { ATLAS_TRACE( "opt2 Legendre dgemm" ); - for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { -#if 1 // 0: no symmetry, 1: use symmetry \ - // TODO: 0 is currently not working because it requires all latitudes to be included in legendre_ (which is currently not done) + for ( int jm = 0; jm <= truncation_; jm++ ) { int size_sym = num_n( truncation_ + 1, jm, true ); int size_asym = num_n( truncation_ + 1, jm, false ); int n_imag = 2; @@ -329,6 +343,7 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsNH ); eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); } +#if 0 //ATLAS_HAVE_FFTW { //ATLAS_TRACE( "opt2 merge spheres" ); // northern hemisphere: @@ -340,8 +355,8 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel for ( int imag = 0; imag < n_imag; imag++ ) { int posimag = nb_fields * ( imag + poslat ); for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { - int pos = jfld + posimag; - scl_fourier[pos] = scl_fourier_sym[idx] + scl_fourier_asym[idx]; + scl_fourier[posFFTW( jfld, imag, jlat, jm )] = + scl_fourier_sym[idx] + scl_fourier_asym[idx]; } } } @@ -353,8 +368,9 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel for ( int imag = 0; imag < n_imag; imag++ ) { int posimag = nb_fields * ( imag + poslat ); for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { - int pos = jfld + posimag; - scl_fourier[pos] = scl_fourier_sym[idx] - scl_fourier_asym[idx]; + int jslat = nlats - jlat - 1; + scl_fourier[posFFTW( jfld, imag, jslat, jm )] = + scl_fourier_sym[idx] - scl_fourier_asym[idx]; } } } @@ -363,33 +379,20 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel free_aligned( scalar_asym ); free_aligned( scl_fourier_sym ); free_aligned( scl_fourier_asym ); - -#else - int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1; - eckit::linalg::Matrix A( eckit::linalg::Matrix( - const_cast( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) ); - eckit::linalg::Matrix B( legendre_.data() + noff * g.ny(), ns, g.ny() ); - eckit::linalg::Matrix C( scl_fourier.data() + jm * size_fourier, nb_fields * 2, g.ny() ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); -#endif } } -#if ATLAS_HAVE_FFTW { - auto position = [&]( int jfld, int imag, int jlat, int jm ) { - return jfld + nb_fields * ( imag + 2 * ( jlat + nlats * ( jm ) ) ); - }; int num_complex = ( nlons / 2 ) + 1; { ATLAS_TRACE( "opt2 FFTW" ); for ( int jfld = 0; jfld < nb_fields; jfld++ ) { int idx = 0; - for ( int jlat = 0; jlat < g.ny(); jlat++ ) { - fft_in_[idx++][0] = scl_fourier[position( jfld, 0, jlat, 0 )]; + for ( int jlat = 0; jlat < nlats; jlat++ ) { + fft_in_[idx++][0] = scl_fourier[posFFTW( jfld, 0, jlat, 0 )]; for ( int jm = 1; jm < num_complex; jm++, idx++ ) { for ( int imag = 0; imag < 2; imag++ ) { if ( jm <= truncation_ ) { - fft_in_[idx][imag] = scl_fourier[position( jfld, imag, jlat, jm )] / 2.; + fft_in_[idx][imag] = scl_fourier[posFFTW( jfld, imag, jlat, jm )] / 2.; } else { fft_in_[idx][imag] = 0.; @@ -405,33 +408,53 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel } } #else -#if 0 // 1: better for small number of columns, large truncation; 0: better for large number of columns - - // Transposition in Fourier space: - std::vector scl_fourier_tp( size_fourier * ( truncation + 1 ) ); - { - ATLAS_TRACE( "opt2 transposition in Fourier" ); - int idx = 0; - for ( int jm = 0; jm < truncation_ + 1; jm++ ) { - for ( int jlat = 0; jlat < g.ny(); jlat++ ) { - for ( int imag = 0; imag < 2; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int pos_tp = jfld + nb_fields * ( jlat + g.ny() * ( imag + 2 * ( jm ) ) ); - //int pos = jfld + nb_fields * ( imag + 2 * ( jlat + g.ny() * ( jm ) ) ); - scl_fourier_tp[pos_tp] = scl_fourier[idx++]; // = scl_fourier[pos] +#if 1 // 1: better for small number of columns, large truncation; 0: better for large number of columns + { + //ATLAS_TRACE( "opt2 merge spheres" ); + // northern hemisphere: + int ioff = jm * size_fourier_max; + int pos0 = ioff; + int idx = 0; + for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { + int poslat = pos0 + 2 * jlat; + for ( int imag = 0; imag < n_imag; imag++ ) { + int posimag = nb_fields * ( imag + poslat ); + for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { + scl_fourier[posGemm1( jfld, imag, jlat, jm )] = + scl_fourier_sym[idx] + scl_fourier_asym[idx]; + } + } + } + // southern hemisphere: + idx = 0; + pos0 = 2 * ( nlats - 1 ) + ioff; + for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { + int poslat = pos0 - 2 * jlat; + for ( int imag = 0; imag < n_imag; imag++ ) { + int posimag = nb_fields * ( imag + poslat ); + for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { + int jslat = nlats - jlat - 1; + scl_fourier[posGemm1( jfld, imag, jslat, jm )] = + scl_fourier_sym[idx] - scl_fourier_asym[idx]; + } } } } + free_aligned( scalar_sym ); + free_aligned( scalar_asym ); + free_aligned( scl_fourier_sym ); + free_aligned( scl_fourier_asym ); } } // Fourier transformation: - std::vector gp_opt2( nb_fields * grid_.size(), 0. ); + double* gp_opt2; + alloc_aligned( gp_opt2, nb_fields * grid_.size() ); { ATLAS_TRACE( "opt2 Fourier dgemm" ); - eckit::linalg::Matrix A( scl_fourier_tp.data(), nb_fields * g.ny(), ( truncation_ + 1 ) * 2 ); - eckit::linalg::Matrix B( fourier_.data(), ( truncation_ + 1 ) * 2, g.nxmax() ); - eckit::linalg::Matrix C( gp_opt2.data(), nb_fields * g.ny(), g.nxmax() ); + eckit::linalg::Matrix A( scl_fourier, nb_fields * g.ny(), ( truncation_ + 1 ) * 2 ); + eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, g.nxmax() ); + eckit::linalg::Matrix C( gp_opt2, nb_fields * g.ny(), g.nxmax() ); eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); } @@ -449,197 +472,219 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel } } } + free_aligned( gp_opt2 ); #else - // Transposition in Fourier space: - std::vector scl_fourier_tp( size_fourier * ( truncation + 1 ) ); - { - ATLAS_TRACE( "opt2 transposition in Fourier" ); - int idx = 0; - for ( int jm = 0; jm < truncation_ + 1; jm++ ) { - for ( int jlat = 0; jlat < g.ny(); jlat++ ) { - for ( int imag = 0; imag < 2; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int pos_tp = imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + g.ny() * ( jfld ) ) ); - //int pos = jfld + nb_fields * ( imag + 2 * ( jlat + g.ny() * ( jm ) ) ); - scl_fourier_tp[pos_tp] = scl_fourier[idx++]; // = scl_fourier[pos] + { + //ATLAS_TRACE( "opt2 merge spheres" ); + // northern hemisphere: + int ioff = jm * size_fourier_max; + int pos0 = ioff; + int idx = 0; + for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { + int poslat = pos0 + 2 * jlat; + for ( int imag = 0; imag < n_imag; imag++ ) { + int posimag = nb_fields * ( imag + poslat ); + for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { + scl_fourier[posGemm2( jfld, imag, jlat, jm )] = + scl_fourier_sym[idx] + scl_fourier_asym[idx]; + } + } + } + // southern hemisphere: + idx = 0; + pos0 = 2 * ( nlats - 1 ) + ioff; + for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { + int poslat = pos0 - 2 * jlat; + for ( int imag = 0; imag < n_imag; imag++ ) { + int posimag = nb_fields * ( imag + poslat ); + for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { + int jslat = nlats - jlat - 1; + scl_fourier[posGemm2( jfld, imag, jslat, jm )] = + scl_fourier_sym[idx] - scl_fourier_asym[idx]; + } } } } + free_aligned( scalar_sym ); + free_aligned( scalar_asym ); + free_aligned( scl_fourier_sym ); + free_aligned( scl_fourier_asym ); } } // Fourier transformation: - std::vector gp_opt2( nb_fields * grid_.size(), 0. ); { ATLAS_TRACE( "opt2 Fourier dgemm" ); - eckit::linalg::Matrix A( fouriertp_.data(), g.nxmax(), ( truncation_ + 1 ) * 2 ); - eckit::linalg::Matrix B( scl_fourier_tp.data(), ( truncation_ + 1 ) * 2, nb_fields * g.ny() ); + eckit::linalg::Matrix A( fouriertp_, g.nxmax(), ( truncation_ + 1 ) * 2 ); + eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * g.ny() ); eckit::linalg::Matrix C( gp_fields, g.nxmax(), nb_fields * g.ny() ); eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); } #endif #endif - // Computing u,v from U,V: - { - if ( nb_vordiv_fields > 0 ) { - ATLAS_TRACE( "opt2 u,v from U,V" ); - std::vector coslats( nlats ); - for ( size_t j = 0; j < nlats; ++j ) { - coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() ); - } - int idx = 0; - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - for ( int jlat = 0; jlat < g.ny(); jlat++ ) { - for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) { - gp_fields[idx] /= coslats[jlat]; - idx++; + // Computing u,v from U,V: + { + if ( nb_vordiv_fields > 0 ) { + ATLAS_TRACE( "opt2 u,v from U,V" ); + std::vector coslats( nlats ); + for ( size_t j = 0; j < nlats; ++j ) { + coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() ); + } + int idx = 0; + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + for ( int jlat = 0; jlat < g.ny(); jlat++ ) { + for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) { + gp_fields[idx] /= coslats[jlat]; + idx++; + } + } } } } + free_aligned( scl_fourier ); } - } - free_aligned( scl_fourier ); - } - else { - ATLAS_TRACE( "invtrans_uv unstructured opt2" ); - int idx = 0; - for ( PointXY p : grid_.xy() ) { - double lon = p.x() * util::Constants::degreesToRadians(); - double lat = p.y() * util::Constants::degreesToRadians(); - double trcFT = truncation; - - // Legendre transform: - //invtrans_legendreopt2( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra, - // legReal.data(), legImag.data() ); - - // Fourier transform: - //invtrans_fourieropt2( trcFT, lon, nb_fields, legReal.data(), legImag.data(), - // gp_tmp.data() + ( nb_fields * idx ) ); - for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { - //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat ); + else { + ATLAS_TRACE( "invtrans_uv unstructured opt2" ); + int idx = 0; + for ( PointXY p : grid_.xy() ) { + double lon = p.x() * util::Constants::degreesToRadians(); + double lat = p.y() * util::Constants::degreesToRadians(); + double trcFT = truncation; + + // Legendre transform: + //invtrans_legendreopt2( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra, + // legReal.data(), legImag.data() ); + + // Fourier transform: + //invtrans_fourieropt2( trcFT, lon, nb_fields, legReal.data(), legImag.data(), + // gp_tmp.data() + ( nb_fields * idx ) ); + for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { + //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat ); + } + ++idx; + } } - ++idx; } - } - } -} // namespace trans + } // namespace trans -// -------------------------------------------------------------------------------------------------------------------- + // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt2::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[], - const double divergence_spectra[], double gp_fields[], - const eckit::Configuration& config ) const { - invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config ); -} + void TransLocalopt2::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[], + const double divergence_spectra[], double gp_fields[], + const eckit::Configuration& config ) const { + invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config ); + } -void extend_truncationopt2( const int old_truncation, const int nb_fields, const double old_spectra[], - double new_spectra[] ) { - int k = 0, k_old = 0; - for ( int m = 0; m <= old_truncation + 1; m++ ) { // zonal wavenumber - for ( int n = m; n <= old_truncation + 1; n++ ) { // total wavenumber - for ( int imag = 0; imag < 2; imag++ ) { // imaginary/real part - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { // field - if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; } - else { - new_spectra[k++] = old_spectra[k_old++]; + void extend_truncationopt2( const int old_truncation, const int nb_fields, const double old_spectra[], + double new_spectra[] ) { + int k = 0, k_old = 0; + for ( int m = 0; m <= old_truncation + 1; m++ ) { // zonal wavenumber + for ( int n = m; n <= old_truncation + 1; n++ ) { // total wavenumber + for ( int imag = 0; imag < 2; imag++ ) { // imaginary/real part + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { // field + if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; } + else { + new_spectra[k++] = old_spectra[k_old++]; + } + } } } } } - } -} -// -------------------------------------------------------------------------------------------------------------------- + // -------------------------------------------------------------------------------------------------------------------- + + void TransLocalopt2::invtrans( const int nb_scalar_fields, const double scalar_spectra[], + const int nb_vordiv_fields, const double vorticity_spectra[], + const double divergence_spectra[], double gp_fields[], + const eckit::Configuration& config ) const { + ATLAS_TRACE( "TransLocalopt2::invtrans" ); + int nb_gp = grid_.size(); + int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields; + if ( nb_vordiv_fields > 0 ) { + std::vector vorticity_spectra_extended( nb_vordiv_spec_ext, 0. ); + std::vector divergence_spectra_extended( nb_vordiv_spec_ext, 0. ); + std::vector U_ext( nb_vordiv_spec_ext, 0. ); + std::vector V_ext( nb_vordiv_spec_ext, 0. ); -void TransLocalopt2::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields, - const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[], - const eckit::Configuration& config ) const { - ATLAS_TRACE( "TransLocalopt2::invtrans" ); - int nb_gp = grid_.size(); - int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields; - if ( nb_vordiv_fields > 0 ) { - std::vector vorticity_spectra_extended( nb_vordiv_spec_ext, 0. ); - std::vector divergence_spectra_extended( nb_vordiv_spec_ext, 0. ); - std::vector U_ext( nb_vordiv_spec_ext, 0. ); - std::vector V_ext( nb_vordiv_spec_ext, 0. ); - - { - ATLAS_TRACE( "opt2 extend vordiv" ); - // increase truncation in vorticity_spectra and divergence_spectra: - extend_truncationopt2( truncation_, nb_vordiv_fields, vorticity_spectra, - vorticity_spectra_extended.data() ); - extend_truncationopt2( truncation_, nb_vordiv_fields, divergence_spectra, - divergence_spectra_extended.data() ); - } + { + ATLAS_TRACE( "opt2 extend vordiv" ); + // increase truncation in vorticity_spectra and divergence_spectra: + extend_truncationopt2( truncation_, nb_vordiv_fields, vorticity_spectra, + vorticity_spectra_extended.data() ); + extend_truncationopt2( truncation_, nb_vordiv_fields, divergence_spectra, + divergence_spectra_extended.data() ); + } - { - ATLAS_TRACE( "vordiv to UV opt2" ); - // call vd2uv to compute u and v in spectral space - trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt2" ) ); - vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(), - divergence_spectra_extended.data(), U_ext.data(), V_ext.data() ); - } + { + ATLAS_TRACE( "vordiv to UV opt2" ); + // call vd2uv to compute u and v in spectral space + trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt2" ) ); + vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(), + divergence_spectra_extended.data(), U_ext.data(), V_ext.data() ); + } - // perform spectral transform to compute all fields in grid point space - invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config ); - invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(), - gp_fields + nb_gp * nb_vordiv_fields, config ); - } - if ( nb_scalar_fields > 0 ) { - int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields; - std::vector scalar_spectra_extended( nb_scalar_spec_ext, 0. ); - extend_truncationopt2( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() ); - invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(), - gp_fields + 2 * nb_gp * nb_vordiv_fields, config ); - } -} + // perform spectral transform to compute all fields in grid point space + invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config ); + invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(), + gp_fields + nb_gp * nb_vordiv_fields, config ); + } + if ( nb_scalar_fields > 0 ) { + int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields; + std::vector scalar_spectra_extended( nb_scalar_spec_ext, 0. ); + extend_truncationopt2( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() ); + invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(), + gp_fields + 2 * nb_gp * nb_vordiv_fields, config ); + } + } -// -------------------------------------------------------------------------------------------------------------------- + // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt2::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. -} + void TransLocalopt2::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) + const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. + } -// -------------------------------------------------------------------------------------------------------------------- + // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt2::dirtrans( const FieldSet& gpfields, FieldSet& spfields, - const eckit::Configuration& config ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. -} + void TransLocalopt2::dirtrans( const FieldSet& gpfields, FieldSet& spfields, + const eckit::Configuration& config ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. + } -// -------------------------------------------------------------------------------------------------------------------- + // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt2::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv, - const eckit::Configuration& config ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. -} + void TransLocalopt2::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv, + const eckit::Configuration& config ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. + } -// -------------------------------------------------------------------------------------------------------------------- + // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt2::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[], - const eckit::Configuration& ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. -} + void TransLocalopt2::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[], + const eckit::Configuration& ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. + } -// -------------------------------------------------------------------------------------------------------------------- + // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt2::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[], - double divergence_spectra[], const eckit::Configuration& ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. -} + void TransLocalopt2::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[], + double divergence_spectra[], const eckit::Configuration& ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. + } -// -------------------------------------------------------------------------------------------------------------------- + // -------------------------------------------------------------------------------------------------------------------- -} // namespace trans + } // namespace trans } // namespace atlas diff --git a/src/atlas/trans/localopt3/FourierTransformsopt3.cc b/src/atlas/trans/localopt3/FourierTransformsopt3.cc new file mode 100644 index 000000000..c02b57ada --- /dev/null +++ b/src/atlas/trans/localopt3/FourierTransformsopt3.cc @@ -0,0 +1,78 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor + * does it submit to any jurisdiction. + */ + +#include +#include +#include + +#include "atlas/trans/localopt3/FourierTransformsopt3.h" + +namespace atlas { +namespace trans { + +//----------------------------------------------------------------------------- + +void invtrans_fourieropt3( const size_t trcFT, + const double lon, // longitude in radians (in) + const int nb_fields, // Number of fields + const double rlegReal[], // associated Legendre functions, size (trc+1)*trc/2 (in) + const double rlegImag[], // associated Legendre functions, size (trc+1)*trc/2 (in) + double rgp[] ) // gridpoint +{ + for ( int jfld = 0; jfld < nb_fields; ++jfld ) { + rgp[jfld] = 0.; + } + // local Fourier transformation: + for ( int jm = 0; jm <= trcFT; ++jm ) { + const double cos = std::cos( jm * lon ); + const double sin = std::sin( jm * lon ); + for ( int jfld = 0; jfld < nb_fields; ++jfld ) { + double real = cos * rlegReal[jm * nb_fields + jfld]; + double imag = sin * rlegImag[jm * nb_fields + jfld]; + rgp[jfld] += real - imag; + } + } +} + +int fourier_truncationopt3( const int truncation, // truncation + const int nx, // number of longitudes + const int nxmax, // maximum nx + const int ndgl, // number of latitudes + const double lat, // latitude in radian + const bool fullgrid ) { // regular grid + int trc = truncation; + int trclin = ndgl - 1; + int trcquad = ndgl * 2 / 3 - 1; + if ( truncation >= trclin || fullgrid ) { + // linear + trc = ( nx - 1 ) / 2; + } + else if ( truncation >= trcquad ) { + // quadratic + double weight = 3 * ( trclin - truncation ) / ndgl; + double sqcos = std::pow( std::cos( lat ), 2 ); + + trc = ( nx - 1 ) / ( 2 + weight * sqcos ); + } + else { + // cubic + double sqcos = std::pow( std::cos( lat ), 2 ); + + trc = ( nx - 1 ) / ( 2 + sqcos ) - 1; + } + trc = std::min( truncation, trc ); + return trc; +} + +// -------------------------------------------------------------------------------------------------------------------- + +} // namespace trans +} // namespace atlas diff --git a/src/atlas/trans/localopt3/FourierTransformsopt3.h b/src/atlas/trans/localopt3/FourierTransformsopt3.h new file mode 100644 index 000000000..b735ed145 --- /dev/null +++ b/src/atlas/trans/localopt3/FourierTransformsopt3.h @@ -0,0 +1,38 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#pragma once + +#include + +namespace atlas { +namespace trans { + +//----------------------------------------------------------------------------- +// Routine to compute the local Fourier transformation +// +// Author: +// Andreas Mueller *ECMWF* +// + +void invtrans_fourieropt3( const size_t trcFT, + const double lon, // longitude in radians (in) + const int nb_fields, // Number of fields + const double rlegReal[], // values of associated Legendre functions, size (trc+1)*trc/2 (in) + const double rlegImag[], // values of associated Legendre functions, size (trc+1)*trc/2 (in) + double rgp[] ); // gridpoint + +int fourier_truncationopt3( const int truncation, const int nx, const int nxmax, const int ndgl, const double lat, + const bool fullgrid ); + +// -------------------------------------------------------------------------------------------------------------------- + +} // namespace trans +} // namespace atlas diff --git a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc new file mode 100644 index 000000000..c6fbcad68 --- /dev/null +++ b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc @@ -0,0 +1,197 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor + * does it submit to any jurisdiction. + */ + +#include +#include + +#include "atlas/array.h" +#include "atlas/parallel/mpi/mpi.h" +#include "atlas/trans/localopt3/LegendrePolynomialsopt3.h" + +namespace atlas { +namespace trans { + +//----------------------------------------------------------------------------- + +void compute_legendre_polynomialsopt3( + const size_t trc, // truncation (in) + const int nlats, // number of latitudes + const double lats[], // latitudes in radians (in) + double leg_sym[], // values of associated Legendre functions, symmetric part + double leg_asym[], // values of associated Legendre functions, asymmetric part + size_t leg_start_sym[], // start indices for different zonal wave numbers, symmetric part + size_t leg_start_asym[] ) // start indices for different zonal wave numbers, asymmetric part +{ + auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; }; + array::ArrayT zfn_( trc + 1, trc + 1 ); + array::ArrayView zfn = array::make_view( zfn_ ); + std::vector legpol( legendre_size( trc ) ); + auto idxmn = [&]( int jm, int jn ) { return ( 2 * trc + 3 - jm ) * jm / 2 + jn - jm; }; + int iodd; + + // Compute coefficients for Taylor series in Belousov (19) and (21) + // Belousov, Swarztrauber use zfn(0,0)=std::sqrt(2.) + // IFS normalisation chosen to be 0.5*Integral(Pnm**2) = 1 + zfn( 0, 0 ) = 2.; + for ( int jn = 1; jn <= trc; ++jn ) { + double zfnn = zfn( 0, 0 ); + for ( int jgl = 1; jgl <= jn; ++jgl ) { + zfnn *= std::sqrt( 1. - 0.25 / ( jgl * jgl ) ); + } + iodd = jn % 2; + zfn( jn, jn ) = zfnn; + for ( int jgl = 2; jgl <= jn - iodd; jgl += 2 ) { + double zfjn = ( ( jgl - 1. ) * ( 2. * jn - jgl + 2. ) ); // new factor numerator + double zfjd = ( jgl * ( 2. * jn - jgl + 1. ) ); // new factor denominator + + zfn( jn, jn - jgl ) = zfn( jn, jn - jgl + 2 ) * zfjn / zfjd; + } + } + + // Loop over latitudes: + for ( int jlat = 0; jlat < nlats; ++jlat ) { + { + //ATLAS_TRACE( "compute Legendre polynomials" ); + // -------------------- + // 1. First two columns + // -------------------- + double lat = lats[jlat]; + double zdlx1 = ( M_PI_2 - lat ); // theta + double zdlx = std::cos( zdlx1 ); // cos(theta) + double zdlsita = std::sqrt( 1. - zdlx * zdlx ); // sin(theta) (this is how trans library does it) + + legpol[idxmn( 0, 0 )] = 1.; + + double zdl1sita = 0.; + // if we are less than 1 meter from the pole, + if ( std::abs( zdlsita ) <= std::sqrt( std::numeric_limits::epsilon() ) ) { + zdlx = 1.; + zdlsita = 0.; + } + else { + zdl1sita = 1. / zdlsita; + } + + // ordinary Legendre polynomials from series expansion + // --------------------------------------------------- + + // even N + for ( int jn = 2; jn <= trc; jn += 2 ) { + double zdlk = 0.5 * zfn( jn, 0 ); + double zdlldn = 0.0; + double zdsq = 1. / std::sqrt( jn * ( jn + 1. ) ); + // represented by only even k + for ( int jk = 2; jk <= jn; jk += 2 ) { + // normalised ordinary Legendre polynomial == \overbar{P_n}^0 + zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 ); + // normalised associated Legendre polynomial == \overbar{P_n}^1 + zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 ); + } + legpol[idxmn( 0, jn )] = zdlk; + legpol[idxmn( 1, jn )] = zdlldn; + } + + // odd N + for ( int jn = 1; jn <= trc; jn += 2 ) { + zfn( jn, 0 ) = 0.; + double zdlk = 0.; + double zdlldn = 0.0; + double zdsq = 1. / std::sqrt( jn * ( jn + 1. ) ); + // represented by only even k + for ( int jk = 1; jk <= jn; jk += 2 ) { + // normalised ordinary Legendre polynomial == \overbar{P_n}^0 + zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 ); + // normalised associated Legendre polynomial == \overbar{P_n}^1 + zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 ); + } + legpol[idxmn( 0, jn )] = zdlk; + legpol[idxmn( 1, jn )] = zdlldn; + } + + // -------------------------------------------------------------- + // 2. Diagonal (the terms 0,0 and 1,1 have already been computed) + // Belousov, equation (23) + // -------------------------------------------------------------- + + double zdls = zdl1sita * std::numeric_limits::min(); + for ( int jn = 2; jn <= trc; ++jn ) { + double sq = std::sqrt( ( 2. * jn + 1. ) / ( 2. * jn ) ); + + legpol[idxmn( jn, jn )] = legpol[idxmn( jn - 1, jn - 1 )] * zdlsita * sq; + if ( std::abs( legpol[idxmn( jn, jn )] ) < zdls ) legpol[idxmn( jn, jn )] = 0.0; + } + + // --------------------------------------------- + // 3. General recurrence (Belousov, equation 17) + // --------------------------------------------- + + for ( int jn = 3; jn <= trc; ++jn ) { + for ( int jm = 2; jm < jn; ++jm ) { + double cn = + ( ( 2. * jn + 1. ) * ( jn + jm - 3. ) * ( jn + jm - 1. ) ); // numerator of c in Belousov + double cd = ( ( 2. * jn - 3. ) * ( jn + jm - 2. ) * ( jn + jm ) ); // denominator of c in Belousov + double dn = + ( ( 2. * jn + 1. ) * ( jn - jm + 1. ) * ( jn + jm - 1. ) ); // numerator of d in Belousov + double dd = ( ( 2. * jn - 1. ) * ( jn + jm - 2. ) * ( jn + jm ) ); // denominator of d in Belousov + double en = ( ( 2. * jn + 1. ) * ( jn - jm ) ); // numerator of e in Belousov + double ed = ( ( 2. * jn - 1. ) * ( jn + jm ) ); // denominator of e in Belousov + + legpol[idxmn( jm, jn )] = std::sqrt( cn / cd ) * legpol[idxmn( jm - 2, jn - 2 )] - + std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1 )] * zdlx + + std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1 )] * zdlx; + } + } + } + + { + //ATLAS_TRACE( "add to global arrays" ); + + // take factor 2 for m > 0 into account: + for ( int jm = 1; jm <= trc; ++jm ) { + for ( int jn = jm; jn <= trc; ++jn ) { + legpol[idxmn( jm, jn )] *= 2.; + } + } + for ( int jm = 0; jm <= trc; jm++ ) { + int is1 = 0, ia1 = 0; + for ( int jn = jm; jn <= trc; jn++ ) { + if ( ( jn - jm ) % 2 == 0 ) { is1++; } + else { + ia1++; + } + } + int is2 = 0, ia2 = 0; + // the choice between the following two code lines determines whether + // total wavenumbers are summed in an ascending or descending order. + // The trans library in IFS uses descending order because it should + // be more accurate (higher wavenumbers have smaller contributions). + // This also needs to be changed when splitting the spectral data in + // TransLocalopt3::invtrans_uv! + //for ( int jn = jm; jn <= trc; jn++ ) { + for ( int jn = trc; jn >= jm; jn-- ) { + if ( ( jn - jm ) % 2 == 0 ) { + int is = leg_start_sym[jm] + is1 * jlat + is2++; + leg_sym[is] = legpol[idxmn( jm, jn )]; + } + else { + int ia = leg_start_asym[jm] + ia1 * jlat + ia2++; + leg_asym[ia] = legpol[idxmn( jm, jn )]; + } + } + } + } + } +} + +// -------------------------------------------------------------------------------------------------------------------- + +} // namespace trans +} // namespace atlas diff --git a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.h b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.h new file mode 100644 index 000000000..1698fb80c --- /dev/null +++ b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.h @@ -0,0 +1,47 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#pragma once + +#include + +namespace atlas { +namespace trans { + +//----------------------------------------------------------------------------- +// Routine to compute the Legendre polynomials in serial according to Belousov +// (using correction by Swarztrauber) +// +// Reference: +// S.L. Belousov, Tables of normalized associated Legendre Polynomials, Pergamon +// Press (1962) +// P.N. Swarztrauber, On computing the points and weights for Gauss-Legendre +// quadrature, +// SIAM J. Sci. Comput. Vol. 24 (3) pp. 945-954 (2002) +// +// Author of Fortran version: +// Mats Hamrud, Philippe Courtier, Nils Wedi *ECMWF* +// +// Ported to C++ by: +// Andreas Mueller *ECMWF* +// +void compute_legendre_polynomialsopt3( + const size_t trc, // truncation (in) + const int nlats, // number of latitudes + const double lats[], // latitudes in radians (in) + double legendre_sym[], // values of associated Legendre functions, symmetric part + double legendre_asym[], // values of associated Legendre functions, asymmetric part + size_t leg_start_sym[], // start indices for different zonal wave numbers, symmetric part + size_t leg_start_asym[] ); // start indices for different zonal wave numbers, asymmetric part + +// -------------------------------------------------------------------------------------------------------------------- + +} // namespace trans +} // namespace atlas diff --git a/src/atlas/trans/localopt3/LegendreTransformsopt3.cc b/src/atlas/trans/localopt3/LegendreTransformsopt3.cc new file mode 100644 index 000000000..919e9246c --- /dev/null +++ b/src/atlas/trans/localopt3/LegendreTransformsopt3.cc @@ -0,0 +1,62 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include + +#include "atlas/trans/localopt3/LegendreTransformsopt3.h" + +namespace atlas { +namespace trans { + +//----------------------------------------------------------------------------- + +void invtrans_legendreopt3( + const size_t trc, // truncation (in) + const size_t trcFT, // truncation for Fourier transformation (in) + const size_t trcLP, // truncation of Legendre polynomials data legpol. Needs to be >= trc (in) + const double legpol[], // values of associated Legendre functions, size (trc+1)*trc/2 (in) + const int nb_fields, // number of fields + const double spec[], // spectral data, size (trc+1)*trc (in) + double leg_real[], // values of associated Legendre functions, size (trc+1)*trc/2 (out) + double leg_imag[] ) // values of associated Legendre functions, size (trc+1)*trc/2 (out) +{ + // Legendre transformation: + int k = 0, klp = 0; + for ( int jm = 0; jm <= trcFT; ++jm ) { + for ( int jfld = 0; jfld < nb_fields; ++jfld ) { + leg_real[jm * nb_fields + jfld] = 0.; + leg_imag[jm * nb_fields + jfld] = 0.; + } + for ( int jn = jm; jn <= trcLP; ++jn, ++klp ) { + if ( jn <= trc ) { + for ( int jfld = 0; jfld < nb_fields; ++jfld ) { + // not completely sure where this factor 2 comes from. One possible + // explanation: + // normalization of trigonometric functions in the spherical harmonics + // integral over square of trig function is 1 for m=0 and 0.5 (?) for + // m>0 + leg_real[jm * nb_fields + jfld] += 2. * spec[( 2 * k ) * nb_fields + jfld] * legpol[klp]; + leg_imag[jm * nb_fields + jfld] += 2. * spec[( 2 * k + 1 ) * nb_fields + jfld] * legpol[klp]; + } + ++k; + } + } + } + // Undo factor 2 for (jm == 0) + for ( int jfld = 0; jfld < nb_fields; ++jfld ) { + leg_real[jfld] /= 2.; + leg_imag[jfld] /= 2.; + } +} + +// -------------------------------------------------------------------------------------------------------------------- + +} // namespace trans +} // namespace atlas diff --git a/src/atlas/trans/localopt3/LegendreTransformsopt3.h b/src/atlas/trans/localopt3/LegendreTransformsopt3.h new file mode 100644 index 000000000..7205b3bf0 --- /dev/null +++ b/src/atlas/trans/localopt3/LegendreTransformsopt3.h @@ -0,0 +1,37 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#pragma once + +#include + +namespace atlas { +namespace trans { + +//----------------------------------------------------------------------------- +// Routine to compute the Legendre transformation +// +// Author: +// Andreas Mueller *ECMWF* +// +void invtrans_legendreopt3( + const size_t trc, // truncation (in) + const size_t trcFT, // truncation for Fourier transformation (in) + const size_t trcLP, // truncation of Legendre polynomials data legpol. Needs to be >= trc (in) + const double legpol[], // values of associated Legendre functions, size (trc+1)*trc/2 (in) + const int nb_fields, // number of fields + const double spec[], // spectral data, size (trc+1)*trc (in) + double leg_real[], // values of associated Legendre functions, size (trc+1)*trc/2 (out) + double leg_imag[] ); // values of associated Legendre functions, size (trc+1)*trc/2 (out) + +// -------------------------------------------------------------------------------------------------------------------- + +} // namespace trans +} // namespace atlas diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc new file mode 100644 index 000000000..e65ac2e63 --- /dev/null +++ b/src/atlas/trans/localopt3/TransLocalopt3.cc @@ -0,0 +1,690 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include "atlas/trans/localopt3/TransLocalopt3.h" +#include +#include "atlas/array.h" +#include "atlas/option.h" +#include "atlas/parallel/mpi/mpi.h" +#include "atlas/runtime/ErrorHandling.h" +#include "atlas/runtime/Log.h" +#include "atlas/trans/VorDivToUV.h" +#include "atlas/trans/local/LegendrePolynomials.h" +#include "atlas/trans/localopt3/FourierTransformsopt3.h" +#include "atlas/trans/localopt3/LegendrePolynomialsopt3.h" +#include "atlas/trans/localopt3/LegendreTransformsopt3.h" +#include "atlas/util/Constants.h" +#include "eckit/linalg/LinearAlgebra.h" +#include "eckit/linalg/Matrix.h" +#if ATLAS_HAVE_MKL +#include "mkl.h" +#endif + +namespace atlas { +namespace trans { + +namespace { +static TransBuilderGrid builder( "localopt3" ); +} + +// -------------------------------------------------------------------------------------------------------------------- +// Helper functions +// -------------------------------------------------------------------------------------------------------------------- +namespace { // anonymous + +size_t legendre_size( const size_t truncation ) { + return ( truncation + 2 ) * ( truncation + 1 ) / 2; +} + +int nlats_northernHemisphere( const int nlats ) { + return ceil( nlats / 2. ); + // using ceil here should make it possible to have odd number of latitudes (with the centre latitude being the equator) +} + +int num_n( const int truncation, const int m, const bool symmetric ) { + int len = 0; + if ( symmetric ) { len = ( truncation - m + 2 ) / 2; } + else { + len = ( truncation - m + 1 ) / 2; + } + return len; +} + +void alloc_aligned( double*& ptr, size_t n ) { +#if ATLAS_HAVE_MKL + int al = 64; + ptr = (double*)mkl_malloc( sizeof( double ) * n, al ); +#else + posix_memalign( (void**)&ptr, sizeof( double ) * 64, sizeof( double ) * n ); + //ptr = (double*)malloc( sizeof( double ) * n ); + //ptr = new double[n]; +#endif +} + +void free_aligned( double*& ptr ) { +#if ATLAS_HAVE_MKL + mkl_free( ptr ); +#else + free( ptr ); +#endif +} + +int add_padding( int n ) { + return std::ceil( n / 8. ) * 8; +} +} // namespace + +// -------------------------------------------------------------------------------------------------------------------- +// Class TransLocalopt3 +// -------------------------------------------------------------------------------------------------------------------- + +TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long truncation, + const eckit::Configuration& config ) : + grid_( grid ), + truncation_( truncation ), + precompute_( config.getBool( "precompute", true ) ) { + ATLAS_TRACE( "Precompute legendre opt3" ); +#if ATLAS_HAVE_MKL + eckit::linalg::LinearAlgebra::backend( "mkl" ); // might want to choose backend with this command +#else + eckit::linalg::LinearAlgebra::backend( "generic" ); // might want to choose backend with this command +#endif + int nlats = 0; + int nlons = 0; + int nlatsNH = nlats_northernHemisphere( nlats ); + if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { + grid::StructuredGrid g( grid_ ); + nlats = g.ny(); + nlons = g.nxmax(); + nlatsNH = nlats_northernHemisphere( nlats ); + } + else { + nlats = grid_.size(); + nlons = grid_.size(); + nlatsNH = nlats; + } + std::vector lats( nlatsNH ); + std::vector lons( nlons ); + if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { + grid::StructuredGrid g( grid_ ); + // TODO: remove legendre_begin and legendre_data (only legendre_ should be needed) + for ( size_t j = 0; j < nlatsNH; ++j ) { + lats[j] = g.y( j ) * util::Constants::degreesToRadians(); + } + for ( size_t j = 0; j < nlons; ++j ) { + lons[j] = g.x( j, 0 ) * util::Constants::degreesToRadians(); + } + } + else { + int j( 0 ); + for ( PointXY p : grid_.xy() ) { + lats[j++] = p.y() * util::Constants::degreesToRadians(); + lons[j++] = p.x() * util::Constants::degreesToRadians(); + } + } + // precomputations for Legendre polynomials: + { + ATLAS_TRACE( "opt3 precomp Legendre" ); + int size_sym = 0; + int size_asym = 0; + legendre_sym_begin_.resize( truncation_ + 3 ); + legendre_asym_begin_.resize( truncation_ + 3 ); + legendre_sym_begin_[0] = 0; + legendre_asym_begin_[0] = 0; + for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { + size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsNH ); + size_asym += add_padding( num_n( truncation_ + 1, jm, false ) * nlatsNH ); + legendre_sym_begin_[jm + 1] = size_sym; + legendre_asym_begin_[jm + 1] = size_asym; + } + alloc_aligned( legendre_sym_, size_sym ); + alloc_aligned( legendre_asym_, size_asym ); + compute_legendre_polynomialsopt3( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_, + legendre_sym_begin_.data(), legendre_asym_begin_.data() ); + } + + // precomputations for Fourier transformations: + { + ATLAS_TRACE( "opt3 precomp Fourier" ); + alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons ); + int idx = 0; + for ( int jlon = 0; jlon < nlons; jlon++ ) { + for ( int jm = 0; jm < truncation_ + 1; jm++ ) { + fourier_[idx++] = +std::cos( jm * lons[jlon] ); // real part + fourier_[idx++] = -std::sin( jm * lons[jlon] ); // imaginary part + } + } + } + { + ATLAS_TRACE( "opt3 precomp Fourier tp" ); + alloc_aligned( fouriertp_, 2 * ( truncation_ + 1 ) * nlons ); + int idx = 0; + for ( int jm = 0; jm < truncation_ + 1; jm++ ) { + for ( int jlon = 0; jlon < nlons; jlon++ ) { + fouriertp_[idx++] = +std::cos( jm * lons[jlon] ); // real part + } + for ( int jlon = 0; jlon < nlons; jlon++ ) { + fouriertp_[idx++] = -std::sin( jm * lons[jlon] ); // imaginary part + } + } + } +#if ATLAS_HAVE_FFTW + { + ATLAS_TRACE( "opt3 precomp FFTW" ); + int num_complex = ( nlons / 2 ) + 1; + fft_in_ = fftw_alloc_complex( nlats * num_complex ); + fft_out_ = fftw_alloc_real( nlats * nlons ); + plan_ = fftw_plan_many_dft_c2r( 1, &nlons, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1, nlons, + FFTW_ESTIMATE ); + } +#endif +} // namespace atlas + +// -------------------------------------------------------------------------------------------------------------------- + +TransLocalopt3::TransLocalopt3( const Grid& grid, const long truncation, const eckit::Configuration& config ) : + TransLocalopt3( Cache(), grid, truncation, config ) {} + +// -------------------------------------------------------------------------------------------------------------------- + +TransLocalopt3::~TransLocalopt3() { + free_aligned( legendre_sym_ ); + free_aligned( legendre_asym_ ); + free_aligned( fourier_ ); + free_aligned( fouriertp_ ); +#if ATLAS_HAVE_FFTW + fftw_destroy_plan( plan_ ); + fftw_free( fft_in_ ); + fftw_free( fft_out_ ); +#endif +} + +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt3::invtrans( const Field& spfield, Field& gpfield, const eckit::Configuration& config ) const { + NOTIMP; +} + +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt3::invtrans( const FieldSet& spfields, FieldSet& gpfields, + const eckit::Configuration& config ) const { + NOTIMP; +} + +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt3::invtrans_grad( const Field& spfield, Field& gradfield, const eckit::Configuration& config ) const { + NOTIMP; +} + +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt3::invtrans_grad( const FieldSet& spfields, FieldSet& gradfields, + const eckit::Configuration& config ) const { + NOTIMP; +} + +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt3::invtrans_vordiv2wind( const Field& spvor, const Field& spdiv, Field& gpwind, + const eckit::Configuration& config ) const { + NOTIMP; +} + +void TransLocalopt3::invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[], + const eckit::Configuration& config ) const { + invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields, config ); +} + +void gp_transposeopt3( const int nb_size, const int nb_fields, const double gp_tmp[], double gp_fields[] ) { + for ( int jgp = 0; jgp < nb_size; jgp++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + gp_fields[jfld * nb_size + jgp] = gp_tmp[jgp * nb_fields + jfld]; + } + } +} + +//----------------------------------------------------------------------------- +// Routine to compute the spectral transform by using a localopt3 Fourier +// transformation +// for a grid (same latitude for all longitudes, allows to compute Legendre +// functions +// once for all longitudes). U and v components are divided by cos(latitude) for +// nb_vordiv_fields > 0. +// +// Author: +// Andreas Mueller *ECMWF* +// +void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields, + const double scalar_spectra[], double gp_fields[], + const eckit::Configuration& config ) const { + if ( nb_scalar_fields > 0 ) { + int nb_fields = nb_scalar_fields; + + // Transform + if ( grid::StructuredGrid g = grid_ ) { + ATLAS_TRACE( "invtrans_uv structured opt3" ); + int nlats = g.ny(); + int nlons = g.nxmax(); + int nlatsNH = nlats_northernHemisphere( nlats ); + auto posFFTW = [&]( int jfld, int imag, int jlat, int jm ) { + return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) ); + }; + /*auto posFFTW = [&]( int jfld, int imag, int jlat, int jm ) { + return jfld + nb_fields * ( imag + 2 * ( nlats - jlat - 1 ) + jm * nb_fields * 2 * nlats ); + };*/ + auto posGemm1 = [&]( int jfld, int imag, int jlat, int jm ) { + return jfld + nb_fields * ( jlat + nlats * ( imag + 2 * ( jm ) ) ); + }; + auto posGemm2 = [&]( int jfld, int imag, int jlat, int jm ) { + return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) ); + }; + int size_fourier_max = nb_fields * 2 * nlats; + double* scl_fourier; + alloc_aligned( scl_fourier, size_fourier_max * ( truncation + 1 ) ); + + // Legendre transform: + { + ATLAS_TRACE( "opt3 Legendre dgemm" ); + for ( int jm = 0; jm <= truncation_; jm++ ) { + int size_sym = num_n( truncation_ + 1, jm, true ); + int size_asym = num_n( truncation_ + 1, jm, false ); + int n_imag = 2; + if ( jm == 0 ) { n_imag = 1; } + int size_fourier = nb_fields * n_imag * nlatsNH; + double* scalar_sym; + double* scalar_asym; + double* scl_fourier_sym; + double* scl_fourier_asym; + alloc_aligned( scalar_sym, n_imag * nb_fields * size_sym ); + alloc_aligned( scalar_asym, n_imag * nb_fields * size_asym ); + alloc_aligned( scl_fourier_sym, size_fourier ); + alloc_aligned( scl_fourier_asym, size_fourier ); + { + //ATLAS_TRACE( "opt3 Legendre split" ); + int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2; + // the choice between the following two code lines determines whether + // total wavenumbers are summed in an ascending or descending order. + // The trans library in IFS uses descending order because it should + // be more accurate (higher wavenumbers have smaller contributions). + // This also needs to be changed when splitting the spectral data in + // compute_legendre_polynomialsopt3! + //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) { + for ( int jn = truncation_ + 1; jn >= jm; jn-- ) { + for ( int imag = 0; imag < n_imag; imag++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + idx = jfld + nb_fields * ( imag + 2 * ( jn - jm ) ); + if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; } + else { + scalar_asym[ia++] = scalar_spectra[idx + ioff]; + } + } + } + } + ASSERT( ia == n_imag * nb_fields * size_asym && is == n_imag * nb_fields * size_sym ); + } + { + eckit::linalg::Matrix A( scalar_sym, nb_fields * n_imag, size_sym ); + eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm], size_sym, nlatsNH ); + eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsNH ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + } + if ( size_asym > 0 ) { + eckit::linalg::Matrix A( scalar_asym, nb_fields * n_imag, size_asym ); + eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm], size_asym, nlatsNH ); + eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsNH ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + } +#if 0 //ATLAS_HAVE_FFTW + { + //ATLAS_TRACE( "opt3 merge spheres" ); + // northern hemisphere: + int ioff = jm * size_fourier_max; + int pos0 = ioff; + int idx = 0; + for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { + int poslat = pos0 + 2 * jlat; + for ( int imag = 0; imag < n_imag; imag++ ) { + int posimag = nb_fields * ( imag + poslat ); + for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { + scl_fourier[posFFTW( jfld, imag, jlat, jm )] = + scl_fourier_sym[idx] + scl_fourier_asym[idx]; + } + } + } + // southern hemisphere: + idx = 0; + pos0 = 2 * ( nlats - 1 ) + ioff; + for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { + int poslat = pos0 - 2 * jlat; + for ( int imag = 0; imag < n_imag; imag++ ) { + int posimag = nb_fields * ( imag + poslat ); + for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { + int jslat = nlats - jlat - 1; + scl_fourier[posFFTW( jfld, imag, jslat, jm )] = + scl_fourier_sym[idx] - scl_fourier_asym[idx]; + } + } + } + } + free_aligned( scalar_sym ); + free_aligned( scalar_asym ); + free_aligned( scl_fourier_sym ); + free_aligned( scl_fourier_asym ); + } + } + { + int num_complex = ( nlons / 2 ) + 1; + { + ATLAS_TRACE( "opt3 FFTW" ); + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int idx = 0; + for ( int jlat = 0; jlat < nlats; jlat++ ) { + fft_in_[idx++][0] = scl_fourier[posFFTW( jfld, 0, jlat, 0 )]; + for ( int jm = 1; jm < num_complex; jm++, idx++ ) { + for ( int imag = 0; imag < 2; imag++ ) { + if ( jm <= truncation_ ) { + fft_in_[idx][imag] = scl_fourier[posFFTW( jfld, imag, jlat, jm )] / 2.; + } + else { + fft_in_[idx][imag] = 0.; + } + } + } + } + fftw_execute_dft_c2r( plan_, fft_in_, fft_out_ ); + for ( int j = 0; j < nlats * nlons; j++ ) { + gp_fields[j + jfld * nlats * nlons] = fft_out_[j]; + } + } + } + } +#else +#if 0 // 1: better for small number of columns, large truncation; 0: better for large number of columns + { + //ATLAS_TRACE( "opt3 merge spheres" ); + // northern hemisphere: + int ioff = jm * size_fourier_max; + int pos0 = ioff; + int idx = 0; + for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { + int poslat = pos0 + 2 * jlat; + for ( int imag = 0; imag < n_imag; imag++ ) { + int posimag = nb_fields * ( imag + poslat ); + for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { + scl_fourier[posGemm1( jfld, imag, jlat, jm )] = + scl_fourier_sym[idx] + scl_fourier_asym[idx]; + } + } + } + // southern hemisphere: + idx = 0; + pos0 = 2 * ( nlats - 1 ) + ioff; + for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { + int poslat = pos0 - 2 * jlat; + for ( int imag = 0; imag < n_imag; imag++ ) { + int posimag = nb_fields * ( imag + poslat ); + for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { + int jslat = nlats - jlat - 1; + scl_fourier[posGemm1( jfld, imag, jslat, jm )] = + scl_fourier_sym[idx] - scl_fourier_asym[idx]; + } + } + } + } + free_aligned( scalar_sym ); + free_aligned( scalar_asym ); + free_aligned( scl_fourier_sym ); + free_aligned( scl_fourier_asym ); + } + } + + // Fourier transformation: + double* gp_opt3; + alloc_aligned( gp_opt3, nb_fields * grid_.size() ); + { + ATLAS_TRACE( "opt3 Fourier dgemm" ); + eckit::linalg::Matrix A( scl_fourier, nb_fields * g.ny(), ( truncation_ + 1 ) * 2 ); + eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, g.nxmax() ); + eckit::linalg::Matrix C( gp_opt3, nb_fields * g.ny(), g.nxmax() ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + } + + // Transposition in grid point space: + { + ATLAS_TRACE( "opt3 transposition in gp-space" ); + int idx = 0; + for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) { + for ( int jlat = 0; jlat < g.ny(); jlat++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int pos_tp = jlon + g.nxmax() * ( jlat + g.ny() * ( jfld ) ); + //int pos = jfld + nb_fields * ( jlat + g.ny() * ( jlon ) ); + gp_fields[pos_tp] = gp_opt3[idx++]; // = gp_opt3[pos] + } + } + } + } + free_aligned( gp_opt3 ); +#else + { + //ATLAS_TRACE( "opt3 merge spheres" ); + // northern hemisphere: + int ioff = jm * size_fourier_max; + int pos0 = ioff; + int idx = 0; + for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { + int poslat = pos0 + 2 * jlat; + for ( int imag = 0; imag < n_imag; imag++ ) { + int posimag = nb_fields * ( imag + poslat ); + for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { + scl_fourier[posGemm2( jfld, imag, jlat, jm )] = + scl_fourier_sym[idx] + scl_fourier_asym[idx]; + } + } + } + // southern hemisphere: + idx = 0; + pos0 = 2 * ( nlats - 1 ) + ioff; + for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { + int poslat = pos0 - 2 * jlat; + for ( int imag = 0; imag < n_imag; imag++ ) { + int posimag = nb_fields * ( imag + poslat ); + for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { + int jslat = nlats - jlat - 1; + scl_fourier[posGemm2( jfld, imag, jslat, jm )] = + scl_fourier_sym[idx] - scl_fourier_asym[idx]; + } + } + } + } + free_aligned( scalar_sym ); + free_aligned( scalar_asym ); + free_aligned( scl_fourier_sym ); + free_aligned( scl_fourier_asym ); + } + } + + // Fourier transformation: + { + ATLAS_TRACE( "opt3 Fourier dgemm" ); + eckit::linalg::Matrix A( fouriertp_, g.nxmax(), ( truncation_ + 1 ) * 2 ); + eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * g.ny() ); + eckit::linalg::Matrix C( gp_fields, g.nxmax(), nb_fields * g.ny() ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + } + +#endif +#endif + // Computing u,v from U,V: + { + if ( nb_vordiv_fields > 0 ) { + ATLAS_TRACE( "opt3 u,v from U,V" ); + std::vector coslats( nlats ); + for ( size_t j = 0; j < nlats; ++j ) { + coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() ); + } + int idx = 0; + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + for ( int jlat = 0; jlat < g.ny(); jlat++ ) { + for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) { + gp_fields[idx] /= coslats[jlat]; + idx++; + } + } + } + } + } + free_aligned( scl_fourier ); + } + else { + ATLAS_TRACE( "invtrans_uv unstructured opt3" ); + int idx = 0; + for ( PointXY p : grid_.xy() ) { + double lon = p.x() * util::Constants::degreesToRadians(); + double lat = p.y() * util::Constants::degreesToRadians(); + double trcFT = truncation; + + // Legendre transform: + //invtrans_legendreopt3( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra, + // legReal.data(), legImag.data() ); + + // Fourier transform: + //invtrans_fourieropt3( trcFT, lon, nb_fields, legReal.data(), legImag.data(), + // gp_tmp.data() + ( nb_fields * idx ) ); + for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { + //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat ); + } + ++idx; + } + } + } + } // namespace trans + + // -------------------------------------------------------------------------------------------------------------------- + + void TransLocalopt3::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[], + const double divergence_spectra[], double gp_fields[], + const eckit::Configuration& config ) const { + invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config ); + } + + void extend_truncationopt3( const int old_truncation, const int nb_fields, const double old_spectra[], + double new_spectra[] ) { + int k = 0, k_old = 0; + for ( int m = 0; m <= old_truncation + 1; m++ ) { // zonal wavenumber + for ( int n = m; n <= old_truncation + 1; n++ ) { // total wavenumber + for ( int imag = 0; imag < 2; imag++ ) { // imaginary/real part + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { // field + if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; } + else { + new_spectra[k++] = old_spectra[k_old++]; + } + } + } + } + } + } + + // -------------------------------------------------------------------------------------------------------------------- + + void TransLocalopt3::invtrans( const int nb_scalar_fields, const double scalar_spectra[], + const int nb_vordiv_fields, const double vorticity_spectra[], + const double divergence_spectra[], double gp_fields[], + const eckit::Configuration& config ) const { + ATLAS_TRACE( "TransLocalopt3::invtrans" ); + int nb_gp = grid_.size(); + int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields; + if ( nb_vordiv_fields > 0 ) { + std::vector vorticity_spectra_extended( nb_vordiv_spec_ext, 0. ); + std::vector divergence_spectra_extended( nb_vordiv_spec_ext, 0. ); + std::vector U_ext( nb_vordiv_spec_ext, 0. ); + std::vector V_ext( nb_vordiv_spec_ext, 0. ); + + { + ATLAS_TRACE( "opt3 extend vordiv" ); + // increase truncation in vorticity_spectra and divergence_spectra: + extend_truncationopt3( truncation_, nb_vordiv_fields, vorticity_spectra, + vorticity_spectra_extended.data() ); + extend_truncationopt3( truncation_, nb_vordiv_fields, divergence_spectra, + divergence_spectra_extended.data() ); + } + + { + ATLAS_TRACE( "vordiv to UV opt3" ); + // call vd2uv to compute u and v in spectral space + trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt3" ) ); + vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(), + divergence_spectra_extended.data(), U_ext.data(), V_ext.data() ); + } + + // perform spectral transform to compute all fields in grid point space + invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config ); + invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(), + gp_fields + nb_gp * nb_vordiv_fields, config ); + } + if ( nb_scalar_fields > 0 ) { + int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields; + std::vector scalar_spectra_extended( nb_scalar_spec_ext, 0. ); + extend_truncationopt3( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() ); + invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(), + gp_fields + 2 * nb_gp * nb_vordiv_fields, config ); + } + } + + // -------------------------------------------------------------------------------------------------------------------- + + void TransLocalopt3::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) + const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. + } + + // -------------------------------------------------------------------------------------------------------------------- + + void TransLocalopt3::dirtrans( const FieldSet& gpfields, FieldSet& spfields, + const eckit::Configuration& config ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. + } + + // -------------------------------------------------------------------------------------------------------------------- + + void TransLocalopt3::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv, + const eckit::Configuration& config ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. + } + + // -------------------------------------------------------------------------------------------------------------------- + + void TransLocalopt3::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[], + const eckit::Configuration& ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. + } + + // -------------------------------------------------------------------------------------------------------------------- + + void TransLocalopt3::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[], + double divergence_spectra[], const eckit::Configuration& ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. + } + + // -------------------------------------------------------------------------------------------------------------------- + + } // namespace trans +} // namespace atlas diff --git a/src/atlas/trans/localopt3/TransLocalopt3.h b/src/atlas/trans/localopt3/TransLocalopt3.h new file mode 100644 index 000000000..3673cd07c --- /dev/null +++ b/src/atlas/trans/localopt3/TransLocalopt3.h @@ -0,0 +1,133 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#pragma once + +#include + +#include "atlas/array.h" +#include "atlas/grid/Grid.h" +#include "atlas/trans/Trans.h" +#if ATLAS_HAVE_FFTW +#include +#endif + +//----------------------------------------------------------------------------- +// Forward declarations + +namespace atlas { +class Field; +class FieldSet; +} // namespace atlas + +//----------------------------------------------------------------------------- + +namespace atlas { +namespace trans { + +//----------------------------------------------------------------------------- + +/// @class TransLocalopt3 +/// +/// Localopt3 spherical harmonics transformations to any grid +/// Optimisations are present for structured grids +/// For global grids, please consider using TransIFS instead. +/// +/// @todo: +/// - support multiple fields +/// - support atlas::Field and atlas::FieldSet based on function spaces +/// +/// @note: Direct transforms are not implemented and cannot be unless +/// the grid is global. There are no plans to support this at the moment. +class TransLocalopt3 : public trans::TransImpl { +public: + TransLocalopt3( const Grid& g, const long truncation, const eckit::Configuration& = util::NoConfig() ); + TransLocalopt3( const Cache&, const Grid& g, const long truncation, + const eckit::Configuration& = util::NoConfig() ); + + virtual ~TransLocalopt3(); + + virtual int truncation() const override { return truncation_; } + virtual size_t spectralCoefficients() const override { return ( truncation_ + 1 ) * ( truncation_ + 2 ); } + + virtual const Grid& grid() const override { return grid_; } + + virtual void invtrans( const Field& spfield, Field& gpfield, + const eckit::Configuration& = util::NoConfig() ) const override; + + virtual void invtrans( const FieldSet& spfields, FieldSet& gpfields, + const eckit::Configuration& = util::NoConfig() ) const override; + + virtual void invtrans_grad( const Field& spfield, Field& gradfield, + const eckit::Configuration& = util::NoConfig() ) const override; + + virtual void invtrans_grad( const FieldSet& spfields, FieldSet& gradfields, + const eckit::Configuration& = util::NoConfig() ) const override; + + virtual void invtrans_vordiv2wind( const Field& spvor, const Field& spdiv, Field& gpwind, + const eckit::Configuration& = util::NoConfig() ) const override; + + // -- IFS style API -- + + virtual void invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields, + const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[], + const eckit::Configuration& = util::NoConfig() ) const override; + + virtual void invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[], + const eckit::Configuration& = util::NoConfig() ) const override; + + virtual void invtrans( const int nb_vordiv_fields, const double vorticity_spectra[], + const double divergence_spectra[], double gp_fields[], + const eckit::Configuration& = util::NoConfig() ) const override; + + // -- NOT SUPPORTED -- // + + virtual void dirtrans( const Field& gpfield, Field& spfield, + const eckit::Configuration& = util::NoConfig() ) const override; + + virtual void dirtrans( const FieldSet& gpfields, FieldSet& spfields, + const eckit::Configuration& = util::NoConfig() ) const override; + + virtual void dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv, + const eckit::Configuration& = util::NoConfig() ) const override; + + virtual void dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[], + const eckit::Configuration& = util::NoConfig() ) const override; + + virtual void dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[], + double divergence_spectra[], const eckit::Configuration& = util::NoConfig() ) const override; + +private: + void invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields, + const double scalar_spectra[], double gp_fields[], + const eckit::Configuration& = util::NoConfig() ) const; + +private: + Grid grid_; + int truncation_; + bool precompute_; + double* legendre_sym_; + double* legendre_asym_; + double* fourier_; + double* fouriertp_; + std::vector legendre_begin_; + std::vector legendre_sym_begin_; + std::vector legendre_asym_begin_; +#if ATLAS_HAVE_FFTW + fftw_complex* fft_in_; + double* fft_out_; + fftw_plan plan_; +#endif +}; + +//----------------------------------------------------------------------------- + +} // namespace trans +} // namespace atlas diff --git a/src/atlas/trans/localopt3/VorDivToUVLocalopt3.cc b/src/atlas/trans/localopt3/VorDivToUVLocalopt3.cc new file mode 100644 index 000000000..4e7267748 --- /dev/null +++ b/src/atlas/trans/localopt3/VorDivToUVLocalopt3.cc @@ -0,0 +1,184 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include "atlas/trans/localopt3/VorDivToUVLocalopt3.h" +#include // for std::sqrt +#include "atlas/functionspace/Spectral.h" +#include "atlas/runtime/Log.h" +#include "atlas/util/Earth.h" + +using atlas::FunctionSpace; +using atlas::functionspace::Spectral; + +namespace atlas { +namespace trans { + +namespace { +static VorDivToUVBuilder builder( "localopt3" ); +} + +// -------------------------------------------------------------------------------------------------------------------- +// Routine to copy spectral data into internal storage form of IFS trans +// Ported to C++ by: Andreas Mueller *ECMWF* +void prfi1bopt3( const int truncation, + const int km, // zonal wavenumber + const int nb_fields, // number of fields + const double rspec[], // spectral data + double pia[] ) // spectral components in data layout of trans library +{ + int ilcm = truncation + 1 - km, ioff = ( 2 * truncation - km + 3 ) * km, + nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2; + for ( int j = 1; j <= ilcm; j++ ) { + int inm = ioff + ( ilcm - j ) * 2; + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int ir = 2 * jfld, ii = ir + 1; + pia[ir * nlei1 + j + 1] = rspec[inm * nb_fields + jfld]; + pia[ii * nlei1 + j + 1] = rspec[( inm + 1 ) * nb_fields + jfld]; + } + } + + for ( int jfld = 0; jfld < 2 * nb_fields; jfld++ ) { + pia[jfld * nlei1] = 0.; + pia[jfld * nlei1 + 1] = 0.; + pia[jfld * nlei1 + ilcm + 2] = 0.; + } +} + +// -------------------------------------------------------------------------------------------------------------------- +// Routine to compute spectral velocities (*cos(latitude)) out of spectral +// vorticity and divergence +// Reference: +// ECMWF Research Department documentation of the IFS +// Temperton, 1991, MWR 119 p1303 +// Ported to C++ by: Andreas Mueller *ECMWF* +void vd2uvopt3( const int truncation, // truncation + const int km, // zonal wavenumber + const int nb_vordiv_fields, // number of vorticity and divergence fields + const double vorticity_spectra[], // spectral data of vorticity + const double divergence_spectra[], // spectral data of divergence + double U[], // spectral data of U + double V[], // spectral data of V + const eckit::Configuration& config ) { + int nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2; + + // repsnm: epsilon from eq.(2.12) and (2.13) in [Temperton 1991] + std::vector repsnm( ( truncation + 1 ) * ( truncation + 6 ) / 2 ); + int idx = 0; + for ( int jm = 0; jm <= truncation; ++jm ) { + for ( int jn = jm; jn <= truncation + 2; ++jn, ++idx ) { + repsnm[idx] = std::sqrt( ( jn * jn - jm * jm ) / ( 4. * jn * jn - 1. ) ); + } + } + repsnm[0] = 0.; + + // rlapin: constant factor from eq.(2.2) and (2.3) in [Temperton 1991] + double ra = util::Earth::radius(); + std::vector rlapin( truncation + 3 ); + for ( int jn = 1; jn <= truncation + 2; ++jn ) { + rlapin[jn] = -ra * ra / ( jn * ( jn + 1. ) ); + } + rlapin[0] = 0.; + + // inverse the order of repsnm and rlapin for improved accuracy + std::vector zepsnm( truncation + 6 ); + std::vector zlapin( truncation + 6 ); + std::vector zn( truncation + 6 ); + for ( int jn = km - 1; jn <= truncation + 2; ++jn ) { + int ij = truncation + 3 - jn; + if ( jn >= 0 ) { + zlapin[ij] = rlapin[jn]; + if ( jn < km ) { zepsnm[ij] = 0.; } + else { + zepsnm[ij] = repsnm[jn + ( 2 * truncation - km + 5 ) * km / 2]; + } + } + else { + zlapin[ij] = 0.; + zepsnm[ij] = 0.; + } + zn[ij] = jn; + } + zn[0] = truncation + 3; + + // copy spectral data into internal trans storage: + std::vector rvor( 2 * nb_vordiv_fields * nlei1 ); + std::vector rdiv( 2 * nb_vordiv_fields * nlei1 ); + std::vector ru( 2 * nb_vordiv_fields * nlei1 ); + std::vector rv( 2 * nb_vordiv_fields * nlei1 ); + prfi1bopt3( truncation, km, nb_vordiv_fields, vorticity_spectra, rvor.data() ); + prfi1bopt3( truncation, km, nb_vordiv_fields, divergence_spectra, rdiv.data() ); + + // compute eq.(2.12) and (2.13) in [Temperton 1991]: + if ( km == 0 ) { + for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { + int ir = 2 * jfld * nlei1 - 1; + for ( int ji = 2; ji < truncation + 4 - km; ++ji ) { + double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1]; + double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1]; + ru[ir + ji] = +psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1]; + rv[ir + ji] = -psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1]; + } + } + } + else { + for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { + int ir = 2 * jfld * nlei1 - 1, ii = ir + nlei1; + for ( int ji = 2; ji < truncation + 4 - km; ++ji ) { + double chiIm = km * zlapin[ji]; + double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1]; + double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1]; + ru[ir + ji] = -chiIm * rdiv[ii + ji] + psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1]; + ru[ii + ji] = +chiIm * rdiv[ir + ji] + psiM1 * rvor[ii + ji + 1] - psiP1 * rvor[ii + ji - 1]; + rv[ir + ji] = -chiIm * rvor[ii + ji] - psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1]; + rv[ii + ji] = +chiIm * rvor[ir + ji] - psiM1 * rdiv[ii + ji + 1] + psiP1 * rdiv[ii + ji - 1]; + } + } + } + + // copy data from internal storage back to external spectral data: + int ilcm = truncation - km; + int ioff = ( 2 * truncation - km + 3 ) * km; + // ioff: start index of zonal wavenumber km in spectral data + double za_r = 1. / util::Earth::radius(); + for ( int j = 0; j <= ilcm; ++j ) { + // ilcm-j = total wavenumber + int inm = ioff + ( ilcm - j ) * 2; + for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { + int ir = 2 * jfld * nlei1, ii = ir + nlei1; + int idx = inm * nb_vordiv_fields + jfld; + // real part: + U[idx] = ru[ir + j + 2] * za_r; + V[idx] = rv[ir + j + 2] * za_r; + idx += nb_vordiv_fields; + // imaginary part: + U[idx] = ru[ii + j + 2] * za_r; + V[idx] = rv[ii + j + 2] * za_r; + } + } +} + +void VorDivToUVLocalopt3::execute( const int nb_coeff, const int nb_fields, const double vorticity[], + const double divergence[], double U[], double V[], + const eckit::Configuration& config ) const { + for ( int jm = 0; jm <= truncation_; ++jm ) { + vd2uvopt3( truncation_, jm, nb_fields, vorticity, divergence, U, V, config ); + } +} + +VorDivToUVLocalopt3::VorDivToUVLocalopt3( const int truncation, const eckit::Configuration& config ) : + truncation_( truncation ) {} + +VorDivToUVLocalopt3::VorDivToUVLocalopt3( const FunctionSpace& fs, const eckit::Configuration& config ) : + truncation_( Spectral( fs ).truncation() ) {} + +VorDivToUVLocalopt3::~VorDivToUVLocalopt3() {} + +} // namespace trans +} // namespace atlas diff --git a/src/atlas/trans/localopt3/VorDivToUVLocalopt3.h b/src/atlas/trans/localopt3/VorDivToUVLocalopt3.h new file mode 100644 index 000000000..44fdc98fe --- /dev/null +++ b/src/atlas/trans/localopt3/VorDivToUVLocalopt3.h @@ -0,0 +1,67 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#pragma once + +#include "atlas/trans/VorDivToUV.h" + +//----------------------------------------------------------------------------- +// Forward declarations + +namespace atlas { +class FunctionSpace; +} + +//----------------------------------------------------------------------------- + +namespace atlas { +namespace trans { + +//----------------------------------------------------------------------------- + +class VorDivToUVLocalopt3 : public trans::VorDivToUVImpl { +public: + VorDivToUVLocalopt3( const FunctionSpace&, const eckit::Configuration& = util::NoConfig() ); + VorDivToUVLocalopt3( int truncation, const eckit::Configuration& = util::NoConfig() ); + + virtual ~VorDivToUVLocalopt3(); + + virtual int truncation() const override { return truncation_; } + + // pure virtual interface + + // -- IFS style API -- + // These fields have special interpretation required. You need to know what + // you're doing. + // See IFS trans library. + + /*! + * @brief Compute spectral wind (U/V) from spectral vorticity/divergence + * + * U = u*cos(lat) + * V = v*cos(lat) + * + * @param nb_fields [in] Number of fields + * @param vorticity [in] Spectral vorticity + * @param divergence [in] Spectral divergence + * @param U [out] Spectral wind U = u*cos(lat) + * @param V [out] Spectral wind V = v*cos(lat) + */ + virtual void execute( const int nb_coeff, const int nb_fields, const double vorticity[], const double divergence[], + double U[], double V[], const eckit::Configuration& = util::NoConfig() ) const override; + +private: + int truncation_; +}; + +// ------------------------------------------------------------------ + +} // namespace trans +} // namespace atlas diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index 7ad11b882..fd1693cef 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -704,7 +704,7 @@ CASE( "test_transgeneral_with_translib" ) { #endif #endif //----------------------------------------------------------------------------- -#if 1 +#if 0 CASE( "test_trans_vordiv_with_translib" ) { Log::info() << "test_trans_vordiv_with_translib" << std::endl; // test transgeneral by comparing its result with the trans library @@ -853,7 +853,7 @@ CASE( "test_trans_vordiv_with_translib" ) { } #endif //----------------------------------------------------------------------------- -#if 0 +#if 1 CASE( "test_trans_hires" ) { Log::info() << "test_trans_hires" << std::endl; // test transgeneral by comparing its result with the trans library @@ -865,7 +865,7 @@ CASE( "test_trans_hires" ) { // Grid: (Adjust the following line if the test takes too long!) Grid g( "F640" ); #if ATLAS_HAVE_TRANS - std::string transTypes[2] = {"localopt2", "ifs"}; + std::string transTypes[4] = {"localopt", "localopt2", "localopt3", "ifs"}; //std::string transTypes[3] = {"localopt", "localopt2", "ifs"}; #else std::string transTypes[1] = {"localopt2"}; From 3c7070308a386c7b3a209c6f3294c70187bff02e Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Tue, 20 Mar 2018 11:53:19 +0000 Subject: [PATCH 029/123] got rid of intermediate variable in FFT. Creating the FFTW plan now in invtrans_uv. --- .../trans/localopt/LegendrePolynomialsopt.cc | 2 +- src/atlas/trans/localopt/TransLocalopt.cc | 342 +++++++++--------- src/atlas/trans/localopt/TransLocalopt.h | 8 - src/atlas/trans/localopt2/TransLocalopt2.cc | 270 +++++++------- src/tests/trans/test_transgeneral.cc | 4 +- 5 files changed, 301 insertions(+), 325 deletions(-) diff --git a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc index a31d893c6..fc0e3d879 100644 --- a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc +++ b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc @@ -157,7 +157,7 @@ void compute_legendre_polynomialsopt( // take factor 2 for m > 0 into account: for ( int jm = 1; jm <= trc; ++jm ) { for ( int jn = jm; jn <= trc; ++jn ) { - legpol[idxmn( jm, jn )] *= 2.; + //legpol[idxmn( jm, jn )] *= 2.; } } for ( int jm = 0; jm <= trc; jm++ ) { diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc index 0b9e1bf5f..0be4d4fe7 100644 --- a/src/atlas/trans/localopt/TransLocalopt.cc +++ b/src/atlas/trans/localopt/TransLocalopt.cc @@ -26,6 +26,9 @@ #if ATLAS_HAVE_MKL #include "mkl.h" #endif +#if ATLAS_HAVE_FFTW +#include +#endif namespace atlas { namespace trans { @@ -175,16 +178,6 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t } } } -#if ATLAS_HAVE_FFTW - { - ATLAS_TRACE( "opt precomp FFTW" ); - int num_complex = ( nlons / 2 ) + 1; - fft_in_ = fftw_alloc_complex( nlats * num_complex ); - fft_out_ = fftw_alloc_real( nlats * nlons ); - plan_ = fftw_plan_many_dft_c2r( 1, &nlons, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1, nlons, - FFTW_ESTIMATE ); - } -#endif } // namespace atlas // -------------------------------------------------------------------------------------------------------------------- @@ -199,11 +192,6 @@ TransLocalopt::~TransLocalopt() { free_aligned( legendre_asym_ ); free_aligned( fourier_ ); free_aligned( fouriertp_ ); -#if ATLAS_HAVE_FFTW - fftw_destroy_plan( plan_ ); - fftw_free( fft_in_ ); - fftw_free( fft_out_ ); -#endif } // -------------------------------------------------------------------------------------------------------------------- @@ -290,6 +278,21 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field double* scl_fourier; alloc_aligned( scl_fourier, size_fourier_max * ( truncation + 1 ) ); +#if ATLAS_HAVE_FFTW + int num_complex = ( nlons / 2 ) + 1; + fftw_complex* fft_in = fftw_alloc_complex( nlats * num_complex * nb_fields ); + double* fft_out = fftw_alloc_real( nlats * nlons * nb_fields ); + fftw_plan plan = fftw_plan_many_dft_c2r( 1, &nlons, nlats * nb_fields, fft_in, NULL, 1, num_complex, + fft_out, NULL, 1, nlons, FFTW_ESTIMATE ); + for ( int j = 0; j < nlats * num_complex * nb_fields; j++ ) { + for ( int imag = 0; imag < 2; imag++ ) { + fft_in[j][imag] = 0.; + } + } + auto posFFTWin = [&]( int jfld, int jlat, int jm ) { + return jm + num_complex * ( jlat + nlats * ( jfld ) ); + }; +#endif // Legendre transform: { ATLAS_TRACE( "opt Legendre dgemm" ); @@ -346,29 +349,22 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field { //ATLAS_TRACE( "opt merge spheres" ); // northern hemisphere: - int ioff = jm * size_fourier_max; - int pos0 = ioff; - int idx = 0; + int idx = 0; for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { - int poslat = pos0 + 2 * jlat; for ( int imag = 0; imag < n_imag; imag++ ) { - int posimag = nb_fields * ( imag + poslat ); for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { - scl_fourier[posFFTW( jfld, imag, jlat, jm )] = + fft_in[posFFTWin( jfld, jlat, jm )][imag] = scl_fourier_sym[idx] + scl_fourier_asym[idx]; } } } // southern hemisphere: - idx = 0; - pos0 = 2 * ( nlats - 1 ) + ioff; + idx = 0; for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { - int poslat = pos0 - 2 * jlat; for ( int imag = 0; imag < n_imag; imag++ ) { - int posimag = nb_fields * ( imag + poslat ); for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { int jslat = nlats - jlat - 1; - scl_fourier[posFFTW( jfld, imag, jslat, jm )] = + fft_in[posFFTWin( jfld, jslat, jm )][imag] = scl_fourier_sym[idx] - scl_fourier_asym[idx]; } } @@ -384,28 +380,21 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field int num_complex = ( nlons / 2 ) + 1; { ATLAS_TRACE( "opt FFTW" ); - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int idx = 0; - for ( int jlat = 0; jlat < nlats; jlat++ ) { - fft_in_[idx++][0] = scl_fourier[posFFTW( jfld, 0, jlat, 0 )]; - for ( int jm = 1; jm < num_complex; jm++, idx++ ) { - for ( int imag = 0; imag < 2; imag++ ) { - if ( jm <= truncation_ ) { - fft_in_[idx][imag] = scl_fourier[posFFTW( jfld, imag, jlat, jm )] / 2.; - } - else { - fft_in_[idx][imag] = 0.; - } - } - } - } - fftw_execute_dft_c2r( plan_, fft_in_, fft_out_ ); - for ( int j = 0; j < nlats * nlons; j++ ) { - gp_fields[j + jfld * nlats * nlons] = fft_out_[j]; + { + ATLAS_TRACE( "fftw_execute" ); + fftw_execute( plan ); + } + { + ATLAS_TRACE( "read fft_out" ); + for ( int j = 0; j < nlats * nlons * nb_fields; j++ ) { + gp_fields[j] = fft_out[j]; } } } } + fftw_destroy_plan( plan ); + fftw_free( fft_in ); + fftw_free( fft_out ); #else #if 0 // 1: better for small number of columns, large truncation; 0: better for large number of columns { @@ -522,167 +511,164 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field #endif #endif - // Computing u,v from U,V: - { - if ( nb_vordiv_fields > 0 ) { - ATLAS_TRACE( "opt u,v from U,V" ); - std::vector coslats( nlats ); - for ( size_t j = 0; j < nlats; ++j ) { - coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() ); - } - int idx = 0; - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - for ( int jlat = 0; jlat < g.ny(); jlat++ ) { - for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) { - gp_fields[idx] /= coslats[jlat]; - idx++; - } - } - } - } + // Computing u,v from U,V: + { + if ( nb_vordiv_fields > 0 ) { + ATLAS_TRACE( "opt u,v from U,V" ); + std::vector coslats( nlats ); + for ( size_t j = 0; j < nlats; ++j ) { + coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() ); } - free_aligned( scl_fourier ); - } - else { - ATLAS_TRACE( "invtrans_uv unstructured opt" ); int idx = 0; - for ( PointXY p : grid_.xy() ) { - double lon = p.x() * util::Constants::degreesToRadians(); - double lat = p.y() * util::Constants::degreesToRadians(); - double trcFT = truncation; - - // Legendre transform: - //invtrans_legendreopt( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra, - // legReal.data(), legImag.data() ); - - // Fourier transform: - //invtrans_fourieropt( trcFT, lon, nb_fields, legReal.data(), legImag.data(), - // gp_tmp.data() + ( nb_fields * idx ) ); - for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { - //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat ); + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + for ( int jlat = 0; jlat < g.ny(); jlat++ ) { + for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) { + gp_fields[idx] /= coslats[jlat]; + idx++; + } } - ++idx; } } } - } // namespace trans - - // -------------------------------------------------------------------------------------------------------------------- - - void TransLocalopt::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[], - const double divergence_spectra[], double gp_fields[], - const eckit::Configuration& config ) const { - invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config ); + free_aligned( scl_fourier ); } - - void extend_truncationopt( const int old_truncation, const int nb_fields, const double old_spectra[], - double new_spectra[] ) { - int k = 0, k_old = 0; - for ( int m = 0; m <= old_truncation + 1; m++ ) { // zonal wavenumber - for ( int n = m; n <= old_truncation + 1; n++ ) { // total wavenumber - for ( int imag = 0; imag < 2; imag++ ) { // imaginary/real part - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { // field - if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; } - else { - new_spectra[k++] = old_spectra[k_old++]; - } - } - } + else { + ATLAS_TRACE( "invtrans_uv unstructured opt" ); + int idx = 0; + for ( PointXY p : grid_.xy() ) { + double lon = p.x() * util::Constants::degreesToRadians(); + double lat = p.y() * util::Constants::degreesToRadians(); + double trcFT = truncation; + + // Legendre transform: + //invtrans_legendreopt( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra, + // legReal.data(), legImag.data() ); + + // Fourier transform: + //invtrans_fourieropt( trcFT, lon, nb_fields, legReal.data(), legImag.data(), + // gp_tmp.data() + ( nb_fields * idx ) ); + for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { + //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat ); } + ++idx; } } + } +} // namespace trans - // -------------------------------------------------------------------------------------------------------------------- - - void TransLocalopt::invtrans( const int nb_scalar_fields, const double scalar_spectra[], - const int nb_vordiv_fields, const double vorticity_spectra[], - const double divergence_spectra[], double gp_fields[], - const eckit::Configuration& config ) const { - ATLAS_TRACE( "TransLocalopt::invtrans" ); - int nb_gp = grid_.size(); - int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields; - if ( nb_vordiv_fields > 0 ) { - std::vector vorticity_spectra_extended( nb_vordiv_spec_ext, 0. ); - std::vector divergence_spectra_extended( nb_vordiv_spec_ext, 0. ); - std::vector U_ext( nb_vordiv_spec_ext, 0. ); - std::vector V_ext( nb_vordiv_spec_ext, 0. ); +// -------------------------------------------------------------------------------------------------------------------- - { - ATLAS_TRACE( "opt extend vordiv" ); - // increase truncation in vorticity_spectra and divergence_spectra: - extend_truncationopt( truncation_, nb_vordiv_fields, vorticity_spectra, - vorticity_spectra_extended.data() ); - extend_truncationopt( truncation_, nb_vordiv_fields, divergence_spectra, - divergence_spectra_extended.data() ); - } +void TransLocalopt::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[], + const double divergence_spectra[], double gp_fields[], + const eckit::Configuration& config ) const { + invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config ); +} - { - ATLAS_TRACE( "vordiv to UV opt" ); - // call vd2uv to compute u and v in spectral space - trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt" ) ); - vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(), - divergence_spectra_extended.data(), U_ext.data(), V_ext.data() ); +void extend_truncationopt( const int old_truncation, const int nb_fields, const double old_spectra[], + double new_spectra[] ) { + int k = 0, k_old = 0; + for ( int m = 0; m <= old_truncation + 1; m++ ) { // zonal wavenumber + for ( int n = m; n <= old_truncation + 1; n++ ) { // total wavenumber + for ( int imag = 0; imag < 2; imag++ ) { // imaginary/real part + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { // field + if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; } + else { + new_spectra[k++] = old_spectra[k_old++]; + } } - - // perform spectral transform to compute all fields in grid point space - invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config ); - invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(), - gp_fields + nb_gp * nb_vordiv_fields, config ); - } - if ( nb_scalar_fields > 0 ) { - int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields; - std::vector scalar_spectra_extended( nb_scalar_spec_ext, 0. ); - extend_truncationopt( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() ); - invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(), - gp_fields + 2 * nb_gp * nb_vordiv_fields, config ); } } + } +} - // -------------------------------------------------------------------------------------------------------------------- +// -------------------------------------------------------------------------------------------------------------------- - void TransLocalopt::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. +void TransLocalopt::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields, + const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[], + const eckit::Configuration& config ) const { + ATLAS_TRACE( "TransLocalopt::invtrans" ); + int nb_gp = grid_.size(); + int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields; + if ( nb_vordiv_fields > 0 ) { + std::vector vorticity_spectra_extended( nb_vordiv_spec_ext, 0. ); + std::vector divergence_spectra_extended( nb_vordiv_spec_ext, 0. ); + std::vector U_ext( nb_vordiv_spec_ext, 0. ); + std::vector V_ext( nb_vordiv_spec_ext, 0. ); + + { + ATLAS_TRACE( "opt extend vordiv" ); + // increase truncation in vorticity_spectra and divergence_spectra: + extend_truncationopt( truncation_, nb_vordiv_fields, vorticity_spectra, vorticity_spectra_extended.data() ); + extend_truncationopt( truncation_, nb_vordiv_fields, divergence_spectra, + divergence_spectra_extended.data() ); } - // -------------------------------------------------------------------------------------------------------------------- - - void TransLocalopt::dirtrans( const FieldSet& gpfields, FieldSet& spfields, const eckit::Configuration& config ) - const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. + { + ATLAS_TRACE( "vordiv to UV opt" ); + // call vd2uv to compute u and v in spectral space + trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt" ) ); + vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(), + divergence_spectra_extended.data(), U_ext.data(), V_ext.data() ); } - // -------------------------------------------------------------------------------------------------------------------- + // perform spectral transform to compute all fields in grid point space + invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config ); + invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(), + gp_fields + nb_gp * nb_vordiv_fields, config ); + } + if ( nb_scalar_fields > 0 ) { + int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields; + std::vector scalar_spectra_extended( nb_scalar_spec_ext, 0. ); + extend_truncationopt( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() ); + invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(), + gp_fields + 2 * nb_gp * nb_vordiv_fields, config ); + } +} - void TransLocalopt::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv, - const eckit::Configuration& config ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. - } +// -------------------------------------------------------------------------------------------------------------------- - // -------------------------------------------------------------------------------------------------------------------- +void TransLocalopt::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. +} - void TransLocalopt::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[], - const eckit::Configuration& ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. - } +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt::dirtrans( const FieldSet& gpfields, FieldSet& spfields, const eckit::Configuration& config ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. +} - // -------------------------------------------------------------------------------------------------------------------- +// -------------------------------------------------------------------------------------------------------------------- - void TransLocalopt::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[], - double divergence_spectra[], const eckit::Configuration& ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. - } +void TransLocalopt::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv, + const eckit::Configuration& config ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. +} - // -------------------------------------------------------------------------------------------------------------------- +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[], + const eckit::Configuration& ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. +} + +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[], + double divergence_spectra[], const eckit::Configuration& ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. +} + +// -------------------------------------------------------------------------------------------------------------------- - } // namespace trans +} // namespace trans } // namespace atlas diff --git a/src/atlas/trans/localopt/TransLocalopt.h b/src/atlas/trans/localopt/TransLocalopt.h index fce71261a..55b7a074e 100644 --- a/src/atlas/trans/localopt/TransLocalopt.h +++ b/src/atlas/trans/localopt/TransLocalopt.h @@ -15,9 +15,6 @@ #include "atlas/array.h" #include "atlas/grid/Grid.h" #include "atlas/trans/Trans.h" -#if ATLAS_HAVE_FFTW -#include -#endif //----------------------------------------------------------------------------- // Forward declarations @@ -119,11 +116,6 @@ class TransLocalopt : public trans::TransImpl { std::vector legendre_begin_; std::vector legendre_sym_begin_; std::vector legendre_asym_begin_; -#if ATLAS_HAVE_FFTW - fftw_complex* fft_in_; - double* fft_out_; - fftw_plan plan_; -#endif }; //----------------------------------------------------------------------------- diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc index f4672becc..d3bd6d45b 100644 --- a/src/atlas/trans/localopt2/TransLocalopt2.cc +++ b/src/atlas/trans/localopt2/TransLocalopt2.cc @@ -343,7 +343,7 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsNH ); eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); } -#if 0 //ATLAS_HAVE_FFTW +#if 1 //ATLAS_HAVE_FFTW { //ATLAS_TRACE( "opt2 merge spheres" ); // northern hemisphere: @@ -523,168 +523,166 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel #endif #endif - // Computing u,v from U,V: - { - if ( nb_vordiv_fields > 0 ) { - ATLAS_TRACE( "opt2 u,v from U,V" ); - std::vector coslats( nlats ); - for ( size_t j = 0; j < nlats; ++j ) { - coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() ); - } - int idx = 0; - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - for ( int jlat = 0; jlat < g.ny(); jlat++ ) { - for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) { - gp_fields[idx] /= coslats[jlat]; - idx++; - } - } - } - } + // Computing u,v from U,V: + { + if ( nb_vordiv_fields > 0 ) { + ATLAS_TRACE( "opt2 u,v from U,V" ); + std::vector coslats( nlats ); + for ( size_t j = 0; j < nlats; ++j ) { + coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() ); } - free_aligned( scl_fourier ); - } - else { - ATLAS_TRACE( "invtrans_uv unstructured opt2" ); int idx = 0; - for ( PointXY p : grid_.xy() ) { - double lon = p.x() * util::Constants::degreesToRadians(); - double lat = p.y() * util::Constants::degreesToRadians(); - double trcFT = truncation; - - // Legendre transform: - //invtrans_legendreopt2( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra, - // legReal.data(), legImag.data() ); - - // Fourier transform: - //invtrans_fourieropt2( trcFT, lon, nb_fields, legReal.data(), legImag.data(), - // gp_tmp.data() + ( nb_fields * idx ) ); - for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { - //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat ); + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + for ( int jlat = 0; jlat < g.ny(); jlat++ ) { + for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) { + gp_fields[idx] /= coslats[jlat]; + idx++; + } } - ++idx; } } } - } // namespace trans - - // -------------------------------------------------------------------------------------------------------------------- - - void TransLocalopt2::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[], - const double divergence_spectra[], double gp_fields[], - const eckit::Configuration& config ) const { - invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config ); + free_aligned( scl_fourier ); } - - void extend_truncationopt2( const int old_truncation, const int nb_fields, const double old_spectra[], - double new_spectra[] ) { - int k = 0, k_old = 0; - for ( int m = 0; m <= old_truncation + 1; m++ ) { // zonal wavenumber - for ( int n = m; n <= old_truncation + 1; n++ ) { // total wavenumber - for ( int imag = 0; imag < 2; imag++ ) { // imaginary/real part - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { // field - if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; } - else { - new_spectra[k++] = old_spectra[k_old++]; - } - } - } + else { + ATLAS_TRACE( "invtrans_uv unstructured opt2" ); + int idx = 0; + for ( PointXY p : grid_.xy() ) { + double lon = p.x() * util::Constants::degreesToRadians(); + double lat = p.y() * util::Constants::degreesToRadians(); + double trcFT = truncation; + + // Legendre transform: + //invtrans_legendreopt2( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra, + // legReal.data(), legImag.data() ); + + // Fourier transform: + //invtrans_fourieropt2( trcFT, lon, nb_fields, legReal.data(), legImag.data(), + // gp_tmp.data() + ( nb_fields * idx ) ); + for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { + //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat ); } + ++idx; } } + } +} // namespace trans - // -------------------------------------------------------------------------------------------------------------------- - - void TransLocalopt2::invtrans( const int nb_scalar_fields, const double scalar_spectra[], - const int nb_vordiv_fields, const double vorticity_spectra[], - const double divergence_spectra[], double gp_fields[], - const eckit::Configuration& config ) const { - ATLAS_TRACE( "TransLocalopt2::invtrans" ); - int nb_gp = grid_.size(); - int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields; - if ( nb_vordiv_fields > 0 ) { - std::vector vorticity_spectra_extended( nb_vordiv_spec_ext, 0. ); - std::vector divergence_spectra_extended( nb_vordiv_spec_ext, 0. ); - std::vector U_ext( nb_vordiv_spec_ext, 0. ); - std::vector V_ext( nb_vordiv_spec_ext, 0. ); +// -------------------------------------------------------------------------------------------------------------------- - { - ATLAS_TRACE( "opt2 extend vordiv" ); - // increase truncation in vorticity_spectra and divergence_spectra: - extend_truncationopt2( truncation_, nb_vordiv_fields, vorticity_spectra, - vorticity_spectra_extended.data() ); - extend_truncationopt2( truncation_, nb_vordiv_fields, divergence_spectra, - divergence_spectra_extended.data() ); - } +void TransLocalopt2::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[], + const double divergence_spectra[], double gp_fields[], + const eckit::Configuration& config ) const { + invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config ); +} - { - ATLAS_TRACE( "vordiv to UV opt2" ); - // call vd2uv to compute u and v in spectral space - trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt2" ) ); - vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(), - divergence_spectra_extended.data(), U_ext.data(), V_ext.data() ); +void extend_truncationopt2( const int old_truncation, const int nb_fields, const double old_spectra[], + double new_spectra[] ) { + int k = 0, k_old = 0; + for ( int m = 0; m <= old_truncation + 1; m++ ) { // zonal wavenumber + for ( int n = m; n <= old_truncation + 1; n++ ) { // total wavenumber + for ( int imag = 0; imag < 2; imag++ ) { // imaginary/real part + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { // field + if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; } + else { + new_spectra[k++] = old_spectra[k_old++]; + } } - - // perform spectral transform to compute all fields in grid point space - invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config ); - invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(), - gp_fields + nb_gp * nb_vordiv_fields, config ); - } - if ( nb_scalar_fields > 0 ) { - int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields; - std::vector scalar_spectra_extended( nb_scalar_spec_ext, 0. ); - extend_truncationopt2( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() ); - invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(), - gp_fields + 2 * nb_gp * nb_vordiv_fields, config ); } } + } +} - // -------------------------------------------------------------------------------------------------------------------- +// -------------------------------------------------------------------------------------------------------------------- - void TransLocalopt2::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) - const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. +void TransLocalopt2::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields, + const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[], + const eckit::Configuration& config ) const { + ATLAS_TRACE( "TransLocalopt2::invtrans" ); + int nb_gp = grid_.size(); + int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields; + if ( nb_vordiv_fields > 0 ) { + std::vector vorticity_spectra_extended( nb_vordiv_spec_ext, 0. ); + std::vector divergence_spectra_extended( nb_vordiv_spec_ext, 0. ); + std::vector U_ext( nb_vordiv_spec_ext, 0. ); + std::vector V_ext( nb_vordiv_spec_ext, 0. ); + + { + ATLAS_TRACE( "opt2 extend vordiv" ); + // increase truncation in vorticity_spectra and divergence_spectra: + extend_truncationopt2( truncation_, nb_vordiv_fields, vorticity_spectra, + vorticity_spectra_extended.data() ); + extend_truncationopt2( truncation_, nb_vordiv_fields, divergence_spectra, + divergence_spectra_extended.data() ); } - // -------------------------------------------------------------------------------------------------------------------- - - void TransLocalopt2::dirtrans( const FieldSet& gpfields, FieldSet& spfields, - const eckit::Configuration& config ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. + { + ATLAS_TRACE( "vordiv to UV opt2" ); + // call vd2uv to compute u and v in spectral space + trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt2" ) ); + vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(), + divergence_spectra_extended.data(), U_ext.data(), V_ext.data() ); } - // -------------------------------------------------------------------------------------------------------------------- + // perform spectral transform to compute all fields in grid point space + invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config ); + invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(), + gp_fields + nb_gp * nb_vordiv_fields, config ); + } + if ( nb_scalar_fields > 0 ) { + int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields; + std::vector scalar_spectra_extended( nb_scalar_spec_ext, 0. ); + extend_truncationopt2( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() ); + invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(), + gp_fields + 2 * nb_gp * nb_vordiv_fields, config ); + } +} - void TransLocalopt2::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv, - const eckit::Configuration& config ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. - } +// -------------------------------------------------------------------------------------------------------------------- - // -------------------------------------------------------------------------------------------------------------------- +void TransLocalopt2::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. +} - void TransLocalopt2::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[], - const eckit::Configuration& ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. - } +// -------------------------------------------------------------------------------------------------------------------- - // -------------------------------------------------------------------------------------------------------------------- +void TransLocalopt2::dirtrans( const FieldSet& gpfields, FieldSet& spfields, + const eckit::Configuration& config ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. +} - void TransLocalopt2::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[], - double divergence_spectra[], const eckit::Configuration& ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. - } +// -------------------------------------------------------------------------------------------------------------------- - // -------------------------------------------------------------------------------------------------------------------- +void TransLocalopt2::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv, + const eckit::Configuration& config ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. +} + +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt2::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[], + const eckit::Configuration& ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. +} + +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt2::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[], + double divergence_spectra[], const eckit::Configuration& ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. +} + +// -------------------------------------------------------------------------------------------------------------------- - } // namespace trans +} // namespace trans } // namespace atlas diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index fd1693cef..9e7540029 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -704,7 +704,7 @@ CASE( "test_transgeneral_with_translib" ) { #endif #endif //----------------------------------------------------------------------------- -#if 0 +#if 1 CASE( "test_trans_vordiv_with_translib" ) { Log::info() << "test_trans_vordiv_with_translib" << std::endl; // test transgeneral by comparing its result with the trans library @@ -853,7 +853,7 @@ CASE( "test_trans_vordiv_with_translib" ) { } #endif //----------------------------------------------------------------------------- -#if 1 +#if 0 CASE( "test_trans_hires" ) { Log::info() << "test_trans_hires" << std::endl; // test transgeneral by comparing its result with the trans library From 40454fe6bcd2f7156592478815fa95b647422a5a Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Tue, 20 Mar 2018 17:12:31 +0000 Subject: [PATCH 030/123] TO BE DELETED! Commented out Legendre polynomials for testing purposes --- src/atlas/trans/localopt/TransLocalopt.cc | 12 +++++--- src/atlas/trans/localopt2/TransLocalopt2.cc | 32 +++++++++++---------- src/atlas/trans/localopt3/TransLocalopt3.cc | 27 ++++++++--------- src/tests/trans/test_transgeneral.cc | 15 +++++----- 4 files changed, 47 insertions(+), 39 deletions(-) diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc index 0be4d4fe7..087dcee41 100644 --- a/src/atlas/trans/localopt/TransLocalopt.cc +++ b/src/atlas/trans/localopt/TransLocalopt.cc @@ -149,11 +149,12 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t } alloc_aligned( legendre_sym_, size_sym ); alloc_aligned( legendre_asym_, size_asym ); - compute_legendre_polynomialsopt( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_, - legendre_sym_begin_.data(), legendre_asym_begin_.data() ); + //compute_legendre_polynomialsopt( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_, + // legendre_sym_begin_.data(), legendre_asym_begin_.data() ); } - // precomputations for Fourier transformations: + // precomputations for Fourier transformations: +#if !ATLAS_HAVE_FFTW { ATLAS_TRACE( "opt precomp Fourier" ); alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons ); @@ -178,6 +179,7 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t } } } +#endif } // namespace atlas // -------------------------------------------------------------------------------------------------------------------- @@ -190,8 +192,10 @@ TransLocalopt::TransLocalopt( const Grid& grid, const long truncation, const eck TransLocalopt::~TransLocalopt() { free_aligned( legendre_sym_ ); free_aligned( legendre_asym_ ); +#if !ATLAS_HAVE_FFTW free_aligned( fourier_ ); free_aligned( fouriertp_ ); +#endif } // -------------------------------------------------------------------------------------------------------------------- @@ -347,7 +351,7 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field } #if 1 //ATLAS_HAVE_FFTW { - //ATLAS_TRACE( "opt merge spheres" ); + ATLAS_TRACE( "opt merge spheres" ); // northern hemisphere: int idx = 0; for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc index d3bd6d45b..66b64d910 100644 --- a/src/atlas/trans/localopt2/TransLocalopt2.cc +++ b/src/atlas/trans/localopt2/TransLocalopt2.cc @@ -146,11 +146,21 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long } alloc_aligned( legendre_sym_, size_sym ); alloc_aligned( legendre_asym_, size_asym ); - compute_legendre_polynomialsopt2( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_, - legendre_sym_begin_.data(), legendre_asym_begin_.data() ); + //compute_legendre_polynomialsopt2( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_, + // legendre_sym_begin_.data(), legendre_asym_begin_.data() ); } - // precomputations for Fourier transformations: + // precomputations for Fourier transformations: +#if ATLAS_HAVE_FFTW + { + ATLAS_TRACE( "opt2 precomp FFTW" ); + int num_complex = ( nlons / 2 ) + 1; + fft_in_ = fftw_alloc_complex( nlats * num_complex ); + fft_out_ = fftw_alloc_real( nlats * nlons ); + plan_ = fftw_plan_many_dft_c2r( 1, &nlons, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1, nlons, + FFTW_ESTIMATE ); + } +#else { ATLAS_TRACE( "opt2 precomp Fourier" ); alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons ); @@ -175,15 +185,6 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long } } } -#if ATLAS_HAVE_FFTW - { - ATLAS_TRACE( "opt2 precomp FFTW" ); - int num_complex = ( nlons / 2 ) + 1; - fft_in_ = fftw_alloc_complex( nlats * num_complex ); - fft_out_ = fftw_alloc_real( nlats * nlons ); - plan_ = fftw_plan_many_dft_c2r( 1, &nlons, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1, nlons, - FFTW_ESTIMATE ); - } #endif } // namespace atlas @@ -197,12 +198,13 @@ TransLocalopt2::TransLocalopt2( const Grid& grid, const long truncation, const e TransLocalopt2::~TransLocalopt2() { free_aligned( legendre_sym_ ); free_aligned( legendre_asym_ ); - free_aligned( fourier_ ); - free_aligned( fouriertp_ ); #if ATLAS_HAVE_FFTW fftw_destroy_plan( plan_ ); fftw_free( fft_in_ ); fftw_free( fft_out_ ); +#else + free_aligned( fourier_ ); + free_aligned( fouriertp_ ); #endif } @@ -345,7 +347,7 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel } #if 1 //ATLAS_HAVE_FFTW { - //ATLAS_TRACE( "opt2 merge spheres" ); + ATLAS_TRACE( "opt2 merge spheres" ); // northern hemisphere: int ioff = jm * size_fourier_max; int pos0 = ioff; diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc index e65ac2e63..3cf7299bc 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.cc +++ b/src/atlas/trans/localopt3/TransLocalopt3.cc @@ -146,11 +146,21 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long } alloc_aligned( legendre_sym_, size_sym ); alloc_aligned( legendre_asym_, size_asym ); - compute_legendre_polynomialsopt3( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_, - legendre_sym_begin_.data(), legendre_asym_begin_.data() ); + //compute_legendre_polynomialsopt3( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_, + // legendre_sym_begin_.data(), legendre_asym_begin_.data() ); } - // precomputations for Fourier transformations: + // precomputations for Fourier transformations: +#if 0 //ATLAS_HAVE_FFTW + { + ATLAS_TRACE( "opt3 precomp FFTW" ); + int num_complex = ( nlons / 2 ) + 1; + fft_in_ = fftw_alloc_complex( nlats * num_complex ); + fft_out_ = fftw_alloc_real( nlats * nlons ); + plan_ = fftw_plan_many_dft_c2r( 1, &nlons, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1, nlons, + FFTW_ESTIMATE ); + } +#else { ATLAS_TRACE( "opt3 precomp Fourier" ); alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons ); @@ -175,15 +185,6 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long } } } -#if ATLAS_HAVE_FFTW - { - ATLAS_TRACE( "opt3 precomp FFTW" ); - int num_complex = ( nlons / 2 ) + 1; - fft_in_ = fftw_alloc_complex( nlats * num_complex ); - fft_out_ = fftw_alloc_real( nlats * nlons ); - plan_ = fftw_plan_many_dft_c2r( 1, &nlons, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1, nlons, - FFTW_ESTIMATE ); - } #endif } // namespace atlas @@ -199,7 +200,7 @@ TransLocalopt3::~TransLocalopt3() { free_aligned( legendre_asym_ ); free_aligned( fourier_ ); free_aligned( fouriertp_ ); -#if ATLAS_HAVE_FFTW +#if 0 //ATLAS_HAVE_FFTW fftw_destroy_plan( plan_ ); fftw_free( fft_in_ ); fftw_free( fft_out_ ); diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index 9e7540029..2f17a4cd1 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -704,7 +704,7 @@ CASE( "test_transgeneral_with_translib" ) { #endif #endif //----------------------------------------------------------------------------- -#if 1 +#if 0 CASE( "test_trans_vordiv_with_translib" ) { Log::info() << "test_trans_vordiv_with_translib" << std::endl; // test transgeneral by comparing its result with the trans library @@ -853,7 +853,7 @@ CASE( "test_trans_vordiv_with_translib" ) { } #endif //----------------------------------------------------------------------------- -#if 0 +#if 1 CASE( "test_trans_hires" ) { Log::info() << "test_trans_hires" << std::endl; // test transgeneral by comparing its result with the trans library @@ -863,9 +863,10 @@ CASE( "test_trans_hires" ) { double tolerance = 1.e-13; // Grid: (Adjust the following line if the test takes too long!) - Grid g( "F640" ); + Grid g( "F1280" ); #if ATLAS_HAVE_TRANS - std::string transTypes[4] = {"localopt", "localopt2", "localopt3", "ifs"}; + //std::string transTypes[4] = {"localopt", "localopt2", "localopt3", "ifs"}; + std::string transTypes[3] = {"localopt", "localopt2", "localopt3"}; //std::string transTypes[3] = {"localopt", "localopt2", "ifs"}; #else std::string transTypes[1] = {"localopt2"}; @@ -878,9 +879,9 @@ CASE( "test_trans_hires" ) { int nb_scalar = 1, nb_vordiv = 0; for ( auto transType : transTypes ) { - if ( transType == "localopt2" ) { trc = ndgl / 2. - 2; } + if ( transType == "ifs" ) { trc = ndgl / 2. - 1; } else { - trc = ndgl / 2. - 1; + trc = ndgl / 2. - 2; } int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv; int icase = 0; @@ -899,7 +900,7 @@ CASE( "test_trans_hires" ) { for ( int imag = 0; imag <= 1; imag++ ) { // real and imaginary part if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. && - icase < 25 ) { + icase < 1 ) { auto start = std::chrono::system_clock::now(); std::vector sp( 2 * N * nb_scalar ); std::vector gp( nb_all * g.size() ); From c20fca7c55bc0004b1d1f10f5da40faf61ac2830 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Wed, 21 Mar 2018 16:01:00 +0000 Subject: [PATCH 031/123] opt and opt2 are now doing the Legendre dgemm in a transposed way; opt: Legendre is writing data streight to fft_in and fftw to gp_fields --- .../trans/localopt/LegendrePolynomialsopt.cc | 4 +- src/atlas/trans/localopt/TransLocalopt.cc | 54 ++-- .../localopt2/LegendrePolynomialsopt2.cc | 4 +- src/atlas/trans/localopt2/TransLocalopt2.cc | 53 ++-- src/atlas/trans/localopt3/TransLocalopt3.cc | 285 +++++++++--------- src/tests/trans/test_transgeneral.cc | 10 +- 6 files changed, 195 insertions(+), 215 deletions(-) diff --git a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc index fc0e3d879..993936124 100644 --- a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc +++ b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc @@ -178,11 +178,11 @@ void compute_legendre_polynomialsopt( //for ( int jn = jm; jn <= trc; jn++ ) { for ( int jn = trc; jn >= jm; jn-- ) { if ( ( jn - jm ) % 2 == 0 ) { - int is = leg_start_sym[jm] + is1 * jlat + is2++; + int is = leg_start_sym[jm] + jlat + nlats * is2++; leg_sym[is] = legpol[idxmn( jm, jn )]; } else { - int ia = leg_start_asym[jm] + ia1 * jlat + ia2++; + int ia = leg_start_asym[jm] + jlat + nlats * ia2++; leg_asym[ia] = legpol[idxmn( jm, jn )]; } } diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc index 087dcee41..112b70fb9 100644 --- a/src/atlas/trans/localopt/TransLocalopt.cc +++ b/src/atlas/trans/localopt/TransLocalopt.cc @@ -149,8 +149,8 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t } alloc_aligned( legendre_sym_, size_sym ); alloc_aligned( legendre_asym_, size_asym ); - //compute_legendre_polynomialsopt( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_, - // legendre_sym_begin_.data(), legendre_asym_begin_.data() ); + compute_legendre_polynomialsopt( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_, + legendre_sym_begin_.data(), legendre_asym_begin_.data() ); } // precomputations for Fourier transformations: @@ -285,9 +285,8 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field #if ATLAS_HAVE_FFTW int num_complex = ( nlons / 2 ) + 1; fftw_complex* fft_in = fftw_alloc_complex( nlats * num_complex * nb_fields ); - double* fft_out = fftw_alloc_real( nlats * nlons * nb_fields ); fftw_plan plan = fftw_plan_many_dft_c2r( 1, &nlons, nlats * nb_fields, fft_in, NULL, 1, num_complex, - fft_out, NULL, 1, nlons, FFTW_ESTIMATE ); + gp_fields, NULL, 1, nlons, FFTW_ESTIMATE ); for ( int j = 0; j < nlats * num_complex * nb_fields; j++ ) { for ( int imag = 0; imag < 2; imag++ ) { fft_in[j][imag] = 0.; @@ -317,16 +316,10 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field { //ATLAS_TRACE( "opt Legendre split" ); int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2; - // the choice between the following two code lines determines whether - // total wavenumbers are summed in an ascending or descending order. - // The trans library in IFS uses descending order because it should - // be more accurate (higher wavenumbers have smaller contributions). - // This also needs to be changed when splitting the spectral data in - // compute_legendre_polynomialsopt! - //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) { - for ( int jn = truncation_ + 1; jn >= jm; jn-- ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { for ( int imag = 0; imag < n_imag; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) { //ascending + for ( int jn = truncation_ + 1; jn >= jm; jn-- ) { // descending idx = jfld + nb_fields * ( imag + 2 * ( jn - jm ) ); if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; } else { @@ -338,25 +331,25 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field ASSERT( ia == n_imag * nb_fields * size_asym && is == n_imag * nb_fields * size_sym ); } { - eckit::linalg::Matrix A( scalar_sym, nb_fields * n_imag, size_sym ); - eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm], size_sym, nlatsNH ); - eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsNH ); + eckit::linalg::Matrix A( legendre_sym_ + legendre_sym_begin_[jm], nlatsNH, size_sym ); + eckit::linalg::Matrix B( scalar_sym, size_sym, nb_fields * n_imag ); + eckit::linalg::Matrix C( scl_fourier_sym, nlatsNH, nb_fields * n_imag ); eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); } if ( size_asym > 0 ) { - eckit::linalg::Matrix A( scalar_asym, nb_fields * n_imag, size_asym ); - eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm], size_asym, nlatsNH ); - eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsNH ); + eckit::linalg::Matrix A( legendre_asym_ + legendre_asym_begin_[jm], nlatsNH, size_asym ); + eckit::linalg::Matrix B( scalar_asym, size_asym, nb_fields * n_imag ); + eckit::linalg::Matrix C( scl_fourier_asym, nlatsNH, nb_fields * n_imag ); eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); } #if 1 //ATLAS_HAVE_FFTW { - ATLAS_TRACE( "opt merge spheres" ); + //ATLAS_TRACE( "opt merge spheres" ); // northern hemisphere: int idx = 0; - for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { for ( int imag = 0; imag < n_imag; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { + for ( int jlat = 0; jlat < nlatsNH; jlat++, idx++ ) { fft_in[posFFTWin( jfld, jlat, jm )][imag] = scl_fourier_sym[idx] + scl_fourier_asym[idx]; } @@ -364,9 +357,9 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field } // southern hemisphere: idx = 0; - for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { for ( int imag = 0; imag < n_imag; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { + for ( int jlat = 0; jlat < nlatsNH; jlat++, idx++ ) { int jslat = nlats - jlat - 1; fft_in[posFFTWin( jfld, jslat, jm )][imag] = scl_fourier_sym[idx] - scl_fourier_asym[idx]; @@ -381,24 +374,13 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field } } { - int num_complex = ( nlons / 2 ) + 1; { ATLAS_TRACE( "opt FFTW" ); - { - ATLAS_TRACE( "fftw_execute" ); - fftw_execute( plan ); - } - { - ATLAS_TRACE( "read fft_out" ); - for ( int j = 0; j < nlats * nlons * nb_fields; j++ ) { - gp_fields[j] = fft_out[j]; - } - } + { fftw_execute( plan ); } } } fftw_destroy_plan( plan ); fftw_free( fft_in ); - fftw_free( fft_out ); #else #if 0 // 1: better for small number of columns, large truncation; 0: better for large number of columns { diff --git a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc index 9d96b1ac0..3221c3936 100644 --- a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc +++ b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc @@ -178,11 +178,11 @@ void compute_legendre_polynomialsopt2( //for ( int jn = jm; jn <= trc; jn++ ) { for ( int jn = trc; jn >= jm; jn-- ) { if ( ( jn - jm ) % 2 == 0 ) { - int is = leg_start_sym[jm] + is1 * jlat + is2++; + int is = leg_start_sym[jm] + jlat + nlats * is2++; leg_sym[is] = legpol[idxmn( jm, jn )]; } else { - int ia = leg_start_asym[jm] + ia1 * jlat + ia2++; + int ia = leg_start_asym[jm] + jlat + nlats * ia2++; leg_asym[ia] = legpol[idxmn( jm, jn )]; } } diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc index 66b64d910..ac1da815b 100644 --- a/src/atlas/trans/localopt2/TransLocalopt2.cc +++ b/src/atlas/trans/localopt2/TransLocalopt2.cc @@ -146,8 +146,8 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long } alloc_aligned( legendre_sym_, size_sym ); alloc_aligned( legendre_asym_, size_asym ); - //compute_legendre_polynomialsopt2( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_, - // legendre_sym_begin_.data(), legendre_asym_begin_.data() ); + compute_legendre_polynomialsopt2( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_, + legendre_sym_begin_.data(), legendre_asym_begin_.data() ); } // precomputations for Fourier transformations: @@ -313,16 +313,10 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel { //ATLAS_TRACE( "opt2 Legendre split" ); int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2; - // the choice between the following two code lines determines whether - // total wavenumbers are summed in an ascending or descending order. - // The trans library in IFS uses descending order because it should - // be more accurate (higher wavenumbers have smaller contributions). - // This also needs to be changed when splitting the spectral data in - // compute_legendre_polynomialsopt2! - //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) { - for ( int jn = truncation_ + 1; jn >= jm; jn-- ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { for ( int imag = 0; imag < n_imag; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) { //ascending + for ( int jn = truncation_ + 1; jn >= jm; jn-- ) { // descending idx = jfld + nb_fields * ( imag + 2 * ( jn - jm ) ); if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; } else { @@ -333,7 +327,19 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel } ASSERT( ia == n_imag * nb_fields * size_asym && is == n_imag * nb_fields * size_sym ); } - { + { // transposed + eckit::linalg::Matrix A( legendre_sym_ + legendre_sym_begin_[jm], nlatsNH, size_sym ); + eckit::linalg::Matrix B( scalar_sym, size_sym, nb_fields * n_imag ); + eckit::linalg::Matrix C( scl_fourier_sym, nlatsNH, nb_fields * n_imag ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + } + if ( size_asym > 0 ) { + eckit::linalg::Matrix A( legendre_asym_ + legendre_asym_begin_[jm], nlatsNH, size_asym ); + eckit::linalg::Matrix B( scalar_asym, size_asym, nb_fields * n_imag ); + eckit::linalg::Matrix C( scl_fourier_asym, nlatsNH, nb_fields * n_imag ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + } + /*{ // non-transposed eckit::linalg::Matrix A( scalar_sym, nb_fields * n_imag, size_sym ); eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm], size_sym, nlatsNH ); eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsNH ); @@ -344,32 +350,25 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm], size_asym, nlatsNH ); eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsNH ); eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); - } + }*/ #if 1 //ATLAS_HAVE_FFTW { - ATLAS_TRACE( "opt2 merge spheres" ); + //ATLAS_TRACE( "opt2 merge spheres" ); // northern hemisphere: - int ioff = jm * size_fourier_max; - int pos0 = ioff; - int idx = 0; - for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { - int poslat = pos0 + 2 * jlat; + int idx = 0; + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { for ( int imag = 0; imag < n_imag; imag++ ) { - int posimag = nb_fields * ( imag + poslat ); - for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { + for ( int jlat = 0; jlat < nlatsNH; jlat++, idx++ ) { scl_fourier[posFFTW( jfld, imag, jlat, jm )] = scl_fourier_sym[idx] + scl_fourier_asym[idx]; } } } // southern hemisphere: - idx = 0; - pos0 = 2 * ( nlats - 1 ) + ioff; - for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { - int poslat = pos0 - 2 * jlat; + idx = 0; + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { for ( int imag = 0; imag < n_imag; imag++ ) { - int posimag = nb_fields * ( imag + poslat ); - for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { + for ( int jlat = 0; jlat < nlatsNH; jlat++, idx++ ) { int jslat = nlats - jlat - 1; scl_fourier[posFFTW( jfld, imag, jslat, jm )] = scl_fourier_sym[idx] - scl_fourier_asym[idx]; diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc index 3cf7299bc..273af7082 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.cc +++ b/src/atlas/trans/localopt3/TransLocalopt3.cc @@ -146,12 +146,12 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long } alloc_aligned( legendre_sym_, size_sym ); alloc_aligned( legendre_asym_, size_asym ); - //compute_legendre_polynomialsopt3( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_, - // legendre_sym_begin_.data(), legendre_asym_begin_.data() ); + compute_legendre_polynomialsopt3( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_, + legendre_sym_begin_.data(), legendre_asym_begin_.data() ); } // precomputations for Fourier transformations: -#if 0 //ATLAS_HAVE_FFTW +#if ATLAS_HAVE_FFTW { ATLAS_TRACE( "opt3 precomp FFTW" ); int num_complex = ( nlons / 2 ) + 1; @@ -198,12 +198,13 @@ TransLocalopt3::TransLocalopt3( const Grid& grid, const long truncation, const e TransLocalopt3::~TransLocalopt3() { free_aligned( legendre_sym_ ); free_aligned( legendre_asym_ ); - free_aligned( fourier_ ); - free_aligned( fouriertp_ ); -#if 0 //ATLAS_HAVE_FFTW +#if ATLAS_HAVE_FFTW fftw_destroy_plan( plan_ ); fftw_free( fft_in_ ); fftw_free( fft_out_ ); +#else + free_aligned( fourier_ ); + free_aligned( fouriertp_ ); #endif } @@ -344,7 +345,7 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsNH ); eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); } -#if 0 //ATLAS_HAVE_FFTW +#if 1 //ATLAS_HAVE_FFTW { //ATLAS_TRACE( "opt3 merge spheres" ); // northern hemisphere: @@ -409,7 +410,7 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel } } #else -#if 0 // 1: better for small number of columns, large truncation; 0: better for large number of columns +#if 1 // 1: better for small number of columns, large truncation; 0: better for large number of columns { //ATLAS_TRACE( "opt3 merge spheres" ); // northern hemisphere: @@ -524,168 +525,166 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel #endif #endif - // Computing u,v from U,V: - { - if ( nb_vordiv_fields > 0 ) { - ATLAS_TRACE( "opt3 u,v from U,V" ); - std::vector coslats( nlats ); - for ( size_t j = 0; j < nlats; ++j ) { - coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() ); - } - int idx = 0; - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - for ( int jlat = 0; jlat < g.ny(); jlat++ ) { - for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) { - gp_fields[idx] /= coslats[jlat]; - idx++; - } - } - } - } + // Computing u,v from U,V: + { + if ( nb_vordiv_fields > 0 ) { + ATLAS_TRACE( "opt3 u,v from U,V" ); + std::vector coslats( nlats ); + for ( size_t j = 0; j < nlats; ++j ) { + coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() ); } - free_aligned( scl_fourier ); - } - else { - ATLAS_TRACE( "invtrans_uv unstructured opt3" ); int idx = 0; - for ( PointXY p : grid_.xy() ) { - double lon = p.x() * util::Constants::degreesToRadians(); - double lat = p.y() * util::Constants::degreesToRadians(); - double trcFT = truncation; - - // Legendre transform: - //invtrans_legendreopt3( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra, - // legReal.data(), legImag.data() ); - - // Fourier transform: - //invtrans_fourieropt3( trcFT, lon, nb_fields, legReal.data(), legImag.data(), - // gp_tmp.data() + ( nb_fields * idx ) ); - for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { - //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat ); + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + for ( int jlat = 0; jlat < g.ny(); jlat++ ) { + for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) { + gp_fields[idx] /= coslats[jlat]; + idx++; + } } - ++idx; } } } - } // namespace trans - - // -------------------------------------------------------------------------------------------------------------------- - - void TransLocalopt3::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[], - const double divergence_spectra[], double gp_fields[], - const eckit::Configuration& config ) const { - invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config ); + free_aligned( scl_fourier ); } - - void extend_truncationopt3( const int old_truncation, const int nb_fields, const double old_spectra[], - double new_spectra[] ) { - int k = 0, k_old = 0; - for ( int m = 0; m <= old_truncation + 1; m++ ) { // zonal wavenumber - for ( int n = m; n <= old_truncation + 1; n++ ) { // total wavenumber - for ( int imag = 0; imag < 2; imag++ ) { // imaginary/real part - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { // field - if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; } - else { - new_spectra[k++] = old_spectra[k_old++]; - } - } - } + else { + ATLAS_TRACE( "invtrans_uv unstructured opt3" ); + int idx = 0; + for ( PointXY p : grid_.xy() ) { + double lon = p.x() * util::Constants::degreesToRadians(); + double lat = p.y() * util::Constants::degreesToRadians(); + double trcFT = truncation; + + // Legendre transform: + //invtrans_legendreopt3( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra, + // legReal.data(), legImag.data() ); + + // Fourier transform: + //invtrans_fourieropt3( trcFT, lon, nb_fields, legReal.data(), legImag.data(), + // gp_tmp.data() + ( nb_fields * idx ) ); + for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { + //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat ); } + ++idx; } } + } +} // namespace trans - // -------------------------------------------------------------------------------------------------------------------- - - void TransLocalopt3::invtrans( const int nb_scalar_fields, const double scalar_spectra[], - const int nb_vordiv_fields, const double vorticity_spectra[], - const double divergence_spectra[], double gp_fields[], - const eckit::Configuration& config ) const { - ATLAS_TRACE( "TransLocalopt3::invtrans" ); - int nb_gp = grid_.size(); - int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields; - if ( nb_vordiv_fields > 0 ) { - std::vector vorticity_spectra_extended( nb_vordiv_spec_ext, 0. ); - std::vector divergence_spectra_extended( nb_vordiv_spec_ext, 0. ); - std::vector U_ext( nb_vordiv_spec_ext, 0. ); - std::vector V_ext( nb_vordiv_spec_ext, 0. ); +// -------------------------------------------------------------------------------------------------------------------- - { - ATLAS_TRACE( "opt3 extend vordiv" ); - // increase truncation in vorticity_spectra and divergence_spectra: - extend_truncationopt3( truncation_, nb_vordiv_fields, vorticity_spectra, - vorticity_spectra_extended.data() ); - extend_truncationopt3( truncation_, nb_vordiv_fields, divergence_spectra, - divergence_spectra_extended.data() ); - } +void TransLocalopt3::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[], + const double divergence_spectra[], double gp_fields[], + const eckit::Configuration& config ) const { + invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config ); +} - { - ATLAS_TRACE( "vordiv to UV opt3" ); - // call vd2uv to compute u and v in spectral space - trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt3" ) ); - vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(), - divergence_spectra_extended.data(), U_ext.data(), V_ext.data() ); +void extend_truncationopt3( const int old_truncation, const int nb_fields, const double old_spectra[], + double new_spectra[] ) { + int k = 0, k_old = 0; + for ( int m = 0; m <= old_truncation + 1; m++ ) { // zonal wavenumber + for ( int n = m; n <= old_truncation + 1; n++ ) { // total wavenumber + for ( int imag = 0; imag < 2; imag++ ) { // imaginary/real part + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { // field + if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; } + else { + new_spectra[k++] = old_spectra[k_old++]; + } } - - // perform spectral transform to compute all fields in grid point space - invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config ); - invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(), - gp_fields + nb_gp * nb_vordiv_fields, config ); - } - if ( nb_scalar_fields > 0 ) { - int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields; - std::vector scalar_spectra_extended( nb_scalar_spec_ext, 0. ); - extend_truncationopt3( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() ); - invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(), - gp_fields + 2 * nb_gp * nb_vordiv_fields, config ); } } + } +} - // -------------------------------------------------------------------------------------------------------------------- +// -------------------------------------------------------------------------------------------------------------------- - void TransLocalopt3::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) - const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. +void TransLocalopt3::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields, + const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[], + const eckit::Configuration& config ) const { + ATLAS_TRACE( "TransLocalopt3::invtrans" ); + int nb_gp = grid_.size(); + int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields; + if ( nb_vordiv_fields > 0 ) { + std::vector vorticity_spectra_extended( nb_vordiv_spec_ext, 0. ); + std::vector divergence_spectra_extended( nb_vordiv_spec_ext, 0. ); + std::vector U_ext( nb_vordiv_spec_ext, 0. ); + std::vector V_ext( nb_vordiv_spec_ext, 0. ); + + { + ATLAS_TRACE( "opt3 extend vordiv" ); + // increase truncation in vorticity_spectra and divergence_spectra: + extend_truncationopt3( truncation_, nb_vordiv_fields, vorticity_spectra, + vorticity_spectra_extended.data() ); + extend_truncationopt3( truncation_, nb_vordiv_fields, divergence_spectra, + divergence_spectra_extended.data() ); } - // -------------------------------------------------------------------------------------------------------------------- - - void TransLocalopt3::dirtrans( const FieldSet& gpfields, FieldSet& spfields, - const eckit::Configuration& config ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. + { + ATLAS_TRACE( "vordiv to UV opt3" ); + // call vd2uv to compute u and v in spectral space + trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt3" ) ); + vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(), + divergence_spectra_extended.data(), U_ext.data(), V_ext.data() ); } - // -------------------------------------------------------------------------------------------------------------------- + // perform spectral transform to compute all fields in grid point space + invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config ); + invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(), + gp_fields + nb_gp * nb_vordiv_fields, config ); + } + if ( nb_scalar_fields > 0 ) { + int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields; + std::vector scalar_spectra_extended( nb_scalar_spec_ext, 0. ); + extend_truncationopt3( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() ); + invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(), + gp_fields + 2 * nb_gp * nb_vordiv_fields, config ); + } +} - void TransLocalopt3::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv, - const eckit::Configuration& config ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. - } +// -------------------------------------------------------------------------------------------------------------------- - // -------------------------------------------------------------------------------------------------------------------- +void TransLocalopt3::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. +} - void TransLocalopt3::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[], - const eckit::Configuration& ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. - } +// -------------------------------------------------------------------------------------------------------------------- - // -------------------------------------------------------------------------------------------------------------------- +void TransLocalopt3::dirtrans( const FieldSet& gpfields, FieldSet& spfields, + const eckit::Configuration& config ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. +} - void TransLocalopt3::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[], - double divergence_spectra[], const eckit::Configuration& ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. - } +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt3::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv, + const eckit::Configuration& config ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. +} + +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt3::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[], + const eckit::Configuration& ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. +} + +// -------------------------------------------------------------------------------------------------------------------- - // -------------------------------------------------------------------------------------------------------------------- +void TransLocalopt3::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[], + double divergence_spectra[], const eckit::Configuration& ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. +} + +// -------------------------------------------------------------------------------------------------------------------- - } // namespace trans +} // namespace trans } // namespace atlas diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index 2f17a4cd1..9aa9eb6a6 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -724,8 +724,8 @@ CASE( "test_trans_vordiv_with_translib" ) { trans::Trans transIFS( g, trc, util::Config( "type", "ifs" ) ); double rav = 0.; // compute average rms error of trans library in rav #endif - trans::Trans transLocal1( g, trc, util::Config( "type", "localopt" ) ); - trans::Trans transLocal2( g, trc, util::Config( "type", "localopt2" ) ); + trans::Trans transLocal1( g, trc, util::Config( "type", "localopt2" ) ); + trans::Trans transLocal2( g, trc, util::Config( "type", "localopt3" ) ); double rav1 = 0., rav2 = 0.; // compute average rms errors of transLocal1 and transLocal2 functionspace::Spectral spectral( trc ); @@ -863,10 +863,10 @@ CASE( "test_trans_hires" ) { double tolerance = 1.e-13; // Grid: (Adjust the following line if the test takes too long!) - Grid g( "F1280" ); + Grid g( "F640" ); #if ATLAS_HAVE_TRANS - //std::string transTypes[4] = {"localopt", "localopt2", "localopt3", "ifs"}; - std::string transTypes[3] = {"localopt", "localopt2", "localopt3"}; + std::string transTypes[4] = {"localopt", "localopt2", "localopt3", "ifs"}; + //std::string transTypes[2] = {"localopt", "localopt2"}; //std::string transTypes[3] = {"localopt", "localopt2", "ifs"}; #else std::string transTypes[1] = {"localopt2"}; From 2a3a977dbc2df310193be4f10df4b847f00042ab Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Wed, 21 Mar 2018 19:12:34 +0000 Subject: [PATCH 032/123] optimised computation of Legendre polynomials --- .../trans/localopt/LegendrePolynomialsopt.cc | 15 ++++++++--- .../localopt2/LegendrePolynomialsopt2.cc | 15 ++++++++--- .../localopt3/LegendrePolynomialsopt3.cc | 21 ++++++++-------- src/atlas/trans/localopt3/TransLocalopt3.cc | 25 ++++++++----------- src/tests/trans/test_transgeneral.cc | 7 +++--- 5 files changed, 48 insertions(+), 35 deletions(-) diff --git a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc index 993936124..413620301 100644 --- a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc +++ b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc @@ -69,6 +69,13 @@ void compute_legendre_polynomialsopt( double zdlsita = std::sqrt( 1. - zdlx * zdlx ); // sin(theta) (this is how trans library does it) legpol[idxmn( 0, 0 )] = 1.; + double vsin[trc + 1], vcos[trc + 1]; + for ( int j = 1; j <= trc; j++ ) { + vsin[j] = std::sin( j * zdlx1 ); + } + for ( int j = 1; j <= trc; j++ ) { + vcos[j] = std::cos( j * zdlx1 ); + } double zdl1sita = 0.; // if we are less than 1 meter from the pole, @@ -91,9 +98,9 @@ void compute_legendre_polynomialsopt( // represented by only even k for ( int jk = 2; jk <= jn; jk += 2 ) { // normalised ordinary Legendre polynomial == \overbar{P_n}^0 - zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 ); + zdlk = zdlk + zfn( jn, jk ) * vcos[jk]; // normalised associated Legendre polynomial == \overbar{P_n}^1 - zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 ); + zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * vsin[jk]; } legpol[idxmn( 0, jn )] = zdlk; legpol[idxmn( 1, jn )] = zdlldn; @@ -108,9 +115,9 @@ void compute_legendre_polynomialsopt( // represented by only even k for ( int jk = 1; jk <= jn; jk += 2 ) { // normalised ordinary Legendre polynomial == \overbar{P_n}^0 - zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 ); + zdlk = zdlk + zfn( jn, jk ) * vcos[jk]; // normalised associated Legendre polynomial == \overbar{P_n}^1 - zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 ); + zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * vsin[jk]; } legpol[idxmn( 0, jn )] = zdlk; legpol[idxmn( 1, jn )] = zdlldn; diff --git a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc index 3221c3936..1d9f86daa 100644 --- a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc +++ b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc @@ -69,6 +69,13 @@ void compute_legendre_polynomialsopt2( double zdlsita = std::sqrt( 1. - zdlx * zdlx ); // sin(theta) (this is how trans library does it) legpol[idxmn( 0, 0 )] = 1.; + double vsin[trc + 1], vcos[trc + 1]; + for ( int j = 1; j <= trc; j++ ) { + vsin[j] = std::sin( j * zdlx1 ); + } + for ( int j = 1; j <= trc; j++ ) { + vcos[j] = std::cos( j * zdlx1 ); + } double zdl1sita = 0.; // if we are less than 1 meter from the pole, @@ -91,9 +98,9 @@ void compute_legendre_polynomialsopt2( // represented by only even k for ( int jk = 2; jk <= jn; jk += 2 ) { // normalised ordinary Legendre polynomial == \overbar{P_n}^0 - zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 ); + zdlk = zdlk + zfn( jn, jk ) * vcos[jk]; // normalised associated Legendre polynomial == \overbar{P_n}^1 - zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 ); + zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * vsin[jk]; } legpol[idxmn( 0, jn )] = zdlk; legpol[idxmn( 1, jn )] = zdlldn; @@ -108,9 +115,9 @@ void compute_legendre_polynomialsopt2( // represented by only even k for ( int jk = 1; jk <= jn; jk += 2 ) { // normalised ordinary Legendre polynomial == \overbar{P_n}^0 - zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 ); + zdlk = zdlk + zfn( jn, jk ) * vcos[jk]; // normalised associated Legendre polynomial == \overbar{P_n}^1 - zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 ); + zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * vsin[jk]; } legpol[idxmn( 0, jn )] = zdlk; legpol[idxmn( 1, jn )] = zdlldn; diff --git a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc index c6fbcad68..3ea2b41ef 100644 --- a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc +++ b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc @@ -69,6 +69,13 @@ void compute_legendre_polynomialsopt3( double zdlsita = std::sqrt( 1. - zdlx * zdlx ); // sin(theta) (this is how trans library does it) legpol[idxmn( 0, 0 )] = 1.; + double vsin[trc + 1], vcos[trc + 1]; + for ( int j = 1; j <= trc; j++ ) { + vsin[j] = std::sin( j * zdlx1 ); + } + for ( int j = 1; j <= trc; j++ ) { + vcos[j] = std::cos( j * zdlx1 ); + } double zdl1sita = 0.; // if we are less than 1 meter from the pole, @@ -91,9 +98,9 @@ void compute_legendre_polynomialsopt3( // represented by only even k for ( int jk = 2; jk <= jn; jk += 2 ) { // normalised ordinary Legendre polynomial == \overbar{P_n}^0 - zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 ); + zdlk = zdlk + zfn( jn, jk ) * vcos[jk]; // normalised associated Legendre polynomial == \overbar{P_n}^1 - zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 ); + zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * vsin[jk]; } legpol[idxmn( 0, jn )] = zdlk; legpol[idxmn( 1, jn )] = zdlldn; @@ -108,9 +115,9 @@ void compute_legendre_polynomialsopt3( // represented by only even k for ( int jk = 1; jk <= jn; jk += 2 ) { // normalised ordinary Legendre polynomial == \overbar{P_n}^0 - zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 ); + zdlk = zdlk + zfn( jn, jk ) * vcos[jk]; // normalised associated Legendre polynomial == \overbar{P_n}^1 - zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 ); + zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * vsin[jk]; } legpol[idxmn( 0, jn )] = zdlk; legpol[idxmn( 1, jn )] = zdlldn; @@ -154,12 +161,6 @@ void compute_legendre_polynomialsopt3( { //ATLAS_TRACE( "add to global arrays" ); - // take factor 2 for m > 0 into account: - for ( int jm = 1; jm <= trc; ++jm ) { - for ( int jn = jm; jn <= trc; ++jn ) { - legpol[idxmn( jm, jn )] *= 2.; - } - } for ( int jm = 0; jm <= trc; jm++ ) { int is1 = 0, ia1 = 0; for ( int jn = jm; jn <= trc; jn++ ) { diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc index 273af7082..ebab9bf06 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.cc +++ b/src/atlas/trans/localopt3/TransLocalopt3.cc @@ -166,9 +166,11 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons ); int idx = 0; for ( int jlon = 0; jlon < nlons; jlon++ ) { + double factor = 1.; + if ( jm > 0 ) { factor = 0.5; } for ( int jm = 0; jm < truncation_ + 1; jm++ ) { - fourier_[idx++] = +std::cos( jm * lons[jlon] ); // real part - fourier_[idx++] = -std::sin( jm * lons[jlon] ); // imaginary part + fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor; // real part + fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor; // imaginary part } } } @@ -177,11 +179,13 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long alloc_aligned( fouriertp_, 2 * ( truncation_ + 1 ) * nlons ); int idx = 0; for ( int jm = 0; jm < truncation_ + 1; jm++ ) { + double factor = 1.; + if ( jm > 0 ) { factor = 0.5; } for ( int jlon = 0; jlon < nlons; jlon++ ) { - fouriertp_[idx++] = +std::cos( jm * lons[jlon] ); // real part + fouriertp_[idx++] = +std::cos( jm * lons[jlon] ) * factor; // real part } for ( int jlon = 0; jlon < nlons; jlon++ ) { - fouriertp_[idx++] = -std::sin( jm * lons[jlon] ); // imaginary part + fouriertp_[idx++] = -std::sin( jm * lons[jlon] ) * factor; // imaginary part } } } @@ -349,13 +353,9 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel { //ATLAS_TRACE( "opt3 merge spheres" ); // northern hemisphere: - int ioff = jm * size_fourier_max; - int pos0 = ioff; - int idx = 0; + int idx = 0; for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { - int poslat = pos0 + 2 * jlat; for ( int imag = 0; imag < n_imag; imag++ ) { - int posimag = nb_fields * ( imag + poslat ); for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { scl_fourier[posFFTW( jfld, imag, jlat, jm )] = scl_fourier_sym[idx] + scl_fourier_asym[idx]; @@ -363,12 +363,9 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel } } // southern hemisphere: - idx = 0; - pos0 = 2 * ( nlats - 1 ) + ioff; + idx = 0; for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { - int poslat = pos0 - 2 * jlat; for ( int imag = 0; imag < n_imag; imag++ ) { - int posimag = nb_fields * ( imag + poslat ); for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { int jslat = nlats - jlat - 1; scl_fourier[posFFTW( jfld, imag, jslat, jm )] = @@ -394,7 +391,7 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel for ( int jm = 1; jm < num_complex; jm++, idx++ ) { for ( int imag = 0; imag < 2; imag++ ) { if ( jm <= truncation_ ) { - fft_in_[idx][imag] = scl_fourier[posFFTW( jfld, imag, jlat, jm )] / 2.; + fft_in_[idx][imag] = scl_fourier[posFFTW( jfld, imag, jlat, jm )]; } else { fft_in_[idx][imag] = 0.; diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index 9aa9eb6a6..db9188c5a 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -724,8 +724,8 @@ CASE( "test_trans_vordiv_with_translib" ) { trans::Trans transIFS( g, trc, util::Config( "type", "ifs" ) ); double rav = 0.; // compute average rms error of trans library in rav #endif - trans::Trans transLocal1( g, trc, util::Config( "type", "localopt2" ) ); - trans::Trans transLocal2( g, trc, util::Config( "type", "localopt3" ) ); + trans::Trans transLocal1( g, trc, util::Config( "type", "localopt" ) ); + trans::Trans transLocal2( g, trc, util::Config( "type", "localopt2" ) ); double rav1 = 0., rav2 = 0.; // compute average rms errors of transLocal1 and transLocal2 functionspace::Spectral spectral( trc ); @@ -863,11 +863,12 @@ CASE( "test_trans_hires" ) { double tolerance = 1.e-13; // Grid: (Adjust the following line if the test takes too long!) - Grid g( "F640" ); + Grid g( "F1280" ); #if ATLAS_HAVE_TRANS std::string transTypes[4] = {"localopt", "localopt2", "localopt3", "ifs"}; //std::string transTypes[2] = {"localopt", "localopt2"}; //std::string transTypes[3] = {"localopt", "localopt2", "ifs"}; + //std::string transTypes[1] = {"localopt3"}; #else std::string transTypes[1] = {"localopt2"}; #endif From ba42bb8600c5cd1f2babef3fea47412b34a67a54 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Fri, 23 Mar 2018 18:21:25 +0000 Subject: [PATCH 033/123] added test for limited domain; fixed Fourier transformation with dgemm --- src/atlas/trans/localopt3/TransLocalopt3.cc | 289 ++++++++++---------- src/tests/trans/test_transgeneral.cc | 139 +++++++++- 2 files changed, 277 insertions(+), 151 deletions(-) diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc index ebab9bf06..d595f500a 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.cc +++ b/src/atlas/trans/localopt3/TransLocalopt3.cc @@ -151,7 +151,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long } // precomputations for Fourier transformations: -#if ATLAS_HAVE_FFTW +#if 0 //ATLAS_HAVE_FFTW { ATLAS_TRACE( "opt3 precomp FFTW" ); int num_complex = ( nlons / 2 ) + 1; @@ -167,8 +167,8 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long int idx = 0; for ( int jlon = 0; jlon < nlons; jlon++ ) { double factor = 1.; - if ( jm > 0 ) { factor = 0.5; } for ( int jm = 0; jm < truncation_ + 1; jm++ ) { + if ( jm > 0 ) { factor = 2.; } fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor; // real part fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor; // imaginary part } @@ -180,7 +180,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long int idx = 0; for ( int jm = 0; jm < truncation_ + 1; jm++ ) { double factor = 1.; - if ( jm > 0 ) { factor = 0.5; } + if ( jm > 0 ) { factor = 2.; } for ( int jlon = 0; jlon < nlons; jlon++ ) { fouriertp_[idx++] = +std::cos( jm * lons[jlon] ) * factor; // real part } @@ -202,7 +202,7 @@ TransLocalopt3::TransLocalopt3( const Grid& grid, const long truncation, const e TransLocalopt3::~TransLocalopt3() { free_aligned( legendre_sym_ ); free_aligned( legendre_asym_ ); -#if ATLAS_HAVE_FFTW +#if 0 //ATLAS_HAVE_FFTW fftw_destroy_plan( plan_ ); fftw_free( fft_in_ ); fftw_free( fft_out_ ); @@ -349,7 +349,7 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsNH ); eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); } -#if 1 //ATLAS_HAVE_FFTW +#if 0 //ATLAS_HAVE_FFTW { //ATLAS_TRACE( "opt3 merge spheres" ); // northern hemisphere: @@ -411,13 +411,9 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel { //ATLAS_TRACE( "opt3 merge spheres" ); // northern hemisphere: - int ioff = jm * size_fourier_max; - int pos0 = ioff; - int idx = 0; + int idx = 0; for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { - int poslat = pos0 + 2 * jlat; for ( int imag = 0; imag < n_imag; imag++ ) { - int posimag = nb_fields * ( imag + poslat ); for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { scl_fourier[posGemm1( jfld, imag, jlat, jm )] = scl_fourier_sym[idx] + scl_fourier_asym[idx]; @@ -425,12 +421,9 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel } } // southern hemisphere: - idx = 0; - pos0 = 2 * ( nlats - 1 ) + ioff; + idx = 0; for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { - int poslat = pos0 - 2 * jlat; for ( int imag = 0; imag < n_imag; imag++ ) { - int posimag = nb_fields * ( imag + poslat ); for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { int jslat = nlats - jlat - 1; scl_fourier[posGemm1( jfld, imag, jslat, jm )] = @@ -522,166 +515,168 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel #endif #endif - // Computing u,v from U,V: - { - if ( nb_vordiv_fields > 0 ) { - ATLAS_TRACE( "opt3 u,v from U,V" ); - std::vector coslats( nlats ); - for ( size_t j = 0; j < nlats; ++j ) { - coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() ); - } - int idx = 0; - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - for ( int jlat = 0; jlat < g.ny(); jlat++ ) { - for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) { - gp_fields[idx] /= coslats[jlat]; - idx++; + // Computing u,v from U,V: + { + if ( nb_vordiv_fields > 0 ) { + ATLAS_TRACE( "opt3 u,v from U,V" ); + std::vector coslats( nlats ); + for ( size_t j = 0; j < nlats; ++j ) { + coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() ); + } + int idx = 0; + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + for ( int jlat = 0; jlat < g.ny(); jlat++ ) { + for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) { + gp_fields[idx] /= coslats[jlat]; + idx++; + } + } } } } + free_aligned( scl_fourier ); } - } - free_aligned( scl_fourier ); - } - else { - ATLAS_TRACE( "invtrans_uv unstructured opt3" ); - int idx = 0; - for ( PointXY p : grid_.xy() ) { - double lon = p.x() * util::Constants::degreesToRadians(); - double lat = p.y() * util::Constants::degreesToRadians(); - double trcFT = truncation; - - // Legendre transform: - //invtrans_legendreopt3( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra, - // legReal.data(), legImag.data() ); - - // Fourier transform: - //invtrans_fourieropt3( trcFT, lon, nb_fields, legReal.data(), legImag.data(), - // gp_tmp.data() + ( nb_fields * idx ) ); - for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { - //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat ); + else { + ATLAS_TRACE( "invtrans_uv unstructured opt3" ); + int idx = 0; + for ( PointXY p : grid_.xy() ) { + double lon = p.x() * util::Constants::degreesToRadians(); + double lat = p.y() * util::Constants::degreesToRadians(); + double trcFT = truncation; + + // Legendre transform: + //invtrans_legendreopt3( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra, + // legReal.data(), legImag.data() ); + + // Fourier transform: + //invtrans_fourieropt3( trcFT, lon, nb_fields, legReal.data(), legImag.data(), + // gp_tmp.data() + ( nb_fields * idx ) ); + for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { + //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat ); + } + ++idx; + } } - ++idx; } - } - } -} // namespace trans + } // namespace trans -// -------------------------------------------------------------------------------------------------------------------- + // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt3::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[], - const double divergence_spectra[], double gp_fields[], - const eckit::Configuration& config ) const { - invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config ); -} + void TransLocalopt3::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[], + const double divergence_spectra[], double gp_fields[], + const eckit::Configuration& config ) const { + invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config ); + } -void extend_truncationopt3( const int old_truncation, const int nb_fields, const double old_spectra[], - double new_spectra[] ) { - int k = 0, k_old = 0; - for ( int m = 0; m <= old_truncation + 1; m++ ) { // zonal wavenumber - for ( int n = m; n <= old_truncation + 1; n++ ) { // total wavenumber - for ( int imag = 0; imag < 2; imag++ ) { // imaginary/real part - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { // field - if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; } - else { - new_spectra[k++] = old_spectra[k_old++]; + void extend_truncationopt3( const int old_truncation, const int nb_fields, const double old_spectra[], + double new_spectra[] ) { + int k = 0, k_old = 0; + for ( int m = 0; m <= old_truncation + 1; m++ ) { // zonal wavenumber + for ( int n = m; n <= old_truncation + 1; n++ ) { // total wavenumber + for ( int imag = 0; imag < 2; imag++ ) { // imaginary/real part + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { // field + if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; } + else { + new_spectra[k++] = old_spectra[k_old++]; + } + } } } } } - } -} -// -------------------------------------------------------------------------------------------------------------------- + // -------------------------------------------------------------------------------------------------------------------- + + void TransLocalopt3::invtrans( const int nb_scalar_fields, const double scalar_spectra[], + const int nb_vordiv_fields, const double vorticity_spectra[], + const double divergence_spectra[], double gp_fields[], + const eckit::Configuration& config ) const { + ATLAS_TRACE( "TransLocalopt3::invtrans" ); + int nb_gp = grid_.size(); + int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields; + if ( nb_vordiv_fields > 0 ) { + std::vector vorticity_spectra_extended( nb_vordiv_spec_ext, 0. ); + std::vector divergence_spectra_extended( nb_vordiv_spec_ext, 0. ); + std::vector U_ext( nb_vordiv_spec_ext, 0. ); + std::vector V_ext( nb_vordiv_spec_ext, 0. ); -void TransLocalopt3::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields, - const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[], - const eckit::Configuration& config ) const { - ATLAS_TRACE( "TransLocalopt3::invtrans" ); - int nb_gp = grid_.size(); - int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields; - if ( nb_vordiv_fields > 0 ) { - std::vector vorticity_spectra_extended( nb_vordiv_spec_ext, 0. ); - std::vector divergence_spectra_extended( nb_vordiv_spec_ext, 0. ); - std::vector U_ext( nb_vordiv_spec_ext, 0. ); - std::vector V_ext( nb_vordiv_spec_ext, 0. ); - - { - ATLAS_TRACE( "opt3 extend vordiv" ); - // increase truncation in vorticity_spectra and divergence_spectra: - extend_truncationopt3( truncation_, nb_vordiv_fields, vorticity_spectra, - vorticity_spectra_extended.data() ); - extend_truncationopt3( truncation_, nb_vordiv_fields, divergence_spectra, - divergence_spectra_extended.data() ); - } + { + ATLAS_TRACE( "opt3 extend vordiv" ); + // increase truncation in vorticity_spectra and divergence_spectra: + extend_truncationopt3( truncation_, nb_vordiv_fields, vorticity_spectra, + vorticity_spectra_extended.data() ); + extend_truncationopt3( truncation_, nb_vordiv_fields, divergence_spectra, + divergence_spectra_extended.data() ); + } - { - ATLAS_TRACE( "vordiv to UV opt3" ); - // call vd2uv to compute u and v in spectral space - trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt3" ) ); - vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(), - divergence_spectra_extended.data(), U_ext.data(), V_ext.data() ); - } + { + ATLAS_TRACE( "vordiv to UV opt3" ); + // call vd2uv to compute u and v in spectral space + trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt3" ) ); + vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(), + divergence_spectra_extended.data(), U_ext.data(), V_ext.data() ); + } - // perform spectral transform to compute all fields in grid point space - invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config ); - invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(), - gp_fields + nb_gp * nb_vordiv_fields, config ); - } - if ( nb_scalar_fields > 0 ) { - int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields; - std::vector scalar_spectra_extended( nb_scalar_spec_ext, 0. ); - extend_truncationopt3( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() ); - invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(), - gp_fields + 2 * nb_gp * nb_vordiv_fields, config ); - } -} + // perform spectral transform to compute all fields in grid point space + invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config ); + invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(), + gp_fields + nb_gp * nb_vordiv_fields, config ); + } + if ( nb_scalar_fields > 0 ) { + int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields; + std::vector scalar_spectra_extended( nb_scalar_spec_ext, 0. ); + extend_truncationopt3( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() ); + invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(), + gp_fields + 2 * nb_gp * nb_vordiv_fields, config ); + } + } -// -------------------------------------------------------------------------------------------------------------------- + // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt3::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. -} + void TransLocalopt3::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) + const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. + } -// -------------------------------------------------------------------------------------------------------------------- + // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt3::dirtrans( const FieldSet& gpfields, FieldSet& spfields, - const eckit::Configuration& config ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. -} + void TransLocalopt3::dirtrans( const FieldSet& gpfields, FieldSet& spfields, + const eckit::Configuration& config ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. + } -// -------------------------------------------------------------------------------------------------------------------- + // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt3::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv, - const eckit::Configuration& config ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. -} + void TransLocalopt3::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv, + const eckit::Configuration& config ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. + } -// -------------------------------------------------------------------------------------------------------------------- + // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt3::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[], - const eckit::Configuration& ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. -} + void TransLocalopt3::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[], + const eckit::Configuration& ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. + } -// -------------------------------------------------------------------------------------------------------------------- + // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt3::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[], - double divergence_spectra[], const eckit::Configuration& ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. -} + void TransLocalopt3::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[], + double divergence_spectra[], const eckit::Configuration& ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. + } -// -------------------------------------------------------------------------------------------------------------------- + // -------------------------------------------------------------------------------------------------------------------- -} // namespace trans + } // namespace trans } // namespace atlas diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index db9188c5a..8a1034b86 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -704,7 +704,7 @@ CASE( "test_transgeneral_with_translib" ) { #endif #endif //----------------------------------------------------------------------------- -#if 0 +#if 1 CASE( "test_trans_vordiv_with_translib" ) { Log::info() << "test_trans_vordiv_with_translib" << std::endl; // test transgeneral by comparing its result with the trans library @@ -724,8 +724,8 @@ CASE( "test_trans_vordiv_with_translib" ) { trans::Trans transIFS( g, trc, util::Config( "type", "ifs" ) ); double rav = 0.; // compute average rms error of trans library in rav #endif - trans::Trans transLocal1( g, trc, util::Config( "type", "localopt" ) ); - trans::Trans transLocal2( g, trc, util::Config( "type", "localopt2" ) ); + trans::Trans transLocal1( g, trc, util::Config( "type", "localopt3" ) ); + trans::Trans transLocal2( g, trc, util::Config( "type", "localopt3" ) ); double rav1 = 0., rav2 = 0.; // compute average rms errors of transLocal1 and transLocal2 functionspace::Spectral spectral( trc ); @@ -853,7 +853,7 @@ CASE( "test_trans_vordiv_with_translib" ) { } #endif //----------------------------------------------------------------------------- -#if 1 +#if 0 CASE( "test_trans_hires" ) { Log::info() << "test_trans_hires" << std::endl; // test transgeneral by comparing its result with the trans library @@ -932,6 +932,137 @@ CASE( "test_trans_hires" ) { #endif //----------------------------------------------------------------------------- #if 0 +CASE( "test_trans_domain" ) { + Log::info() << "test_trans_domain" << std::endl; + // test transgeneral by comparing with analytic solution on a cropped domain + + std::ostream& out = Log::info(); + double tolerance = 1.e-13; + + //Domain testdomain = ZonalBandDomain( {-90., 90.} ); + //Domain testdomain = ZonalBandDomain( {-.5, .5} ); + Domain testdomain = RectangularDomain( {0., 30.}, {-.05, .05} ); + // Grid: (Adjust the following line if the test takes too long!) + Grid g( "F8000", testdomain ); + Grid g_global( g.name() ); + + grid::StructuredGrid gs( g ); + grid::StructuredGrid gs_global( g_global ); + int ndgl = gs_global.ny(); + //int trc = ndgl - 1; // linear + int trc = ndgl / 2. - 1; // cubic + trans::Trans transLocal1( g, trc, util::Config( "type", "localopt3" ) ); + trans::Trans transLocal2( g, trc, util::Config( "type", "localopt3" ) ); + double rav1 = 0., rav2 = 0.; // compute average rms errors of transLocal1 and transLocal2 + + functionspace::Spectral spectral( trc ); + functionspace::StructuredColumns gridpoints( g ); + + int nb_scalar = 1, nb_vordiv = 0; + int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv; + std::vector sp( 2 * N * nb_scalar ); + std::vector vor( 2 * N * nb_vordiv ); + std::vector div( 2 * N * nb_vordiv ); + std::vector rspecg( 2 * N ); + std::vector gp( nb_all * g.size() ); + std::vector rgp1( nb_all * g.size() ); + std::vector rgp2( nb_all * g.size() ); + std::vector rgp_analytic( g.size() ); + + int icase = 0; + for ( int ivar_in = 2; ivar_in < 3; ivar_in++ ) { // vorticity, divergence, scalar + for ( int ivar_out = 2; ivar_out < 3; ivar_out++ ) { // u, v, scalar + int nb_fld = 1; + if ( ivar_out == 2 ) { + tolerance = 1.e-13; + nb_fld = nb_scalar; + } + else { + tolerance = 2.e-6; + nb_fld = nb_vordiv; + } + for ( int jfld = 0; jfld < nb_fld; jfld++ ) { // multiple fields + int k = 0; + for ( int m = 0; m <= trc; m++ ) { // zonal wavenumber + for ( int n = m; n <= trc; n++ ) { // total wavenumber + for ( int imag = 0; imag <= 1; imag++ ) { // real and imaginary part + + if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. && + icase < 1 ) { + auto start = std::chrono::system_clock::now(); + for ( int j = 0; j < 2 * N * nb_scalar; j++ ) { + sp[j] = 0.; + } + for ( int j = 0; j < 2 * N * nb_vordiv; j++ ) { + vor[j] = 0.; + div[j] = 0.; + } + if ( ivar_in == 0 ) vor[k * nb_vordiv + jfld] = 1.; + if ( ivar_in == 1 ) div[k * nb_vordiv + jfld] = 1.; + if ( ivar_in == 2 ) sp[k * nb_scalar + jfld] = 1.; + + for ( int j = 0; j < nb_all * g.size(); j++ ) { + gp[j] = 0.; + rgp1[j] = 0.; + rgp2[j] = 0.; + } + for ( int j = 0; j < g.size(); j++ ) { + rgp_analytic[j] = 0.; + } + + spectral_transform_grid_analytic( trc, trc, n, m, imag, g, rspecg.data(), + rgp_analytic.data(), ivar_in, ivar_out ); + + EXPECT_NO_THROW( transLocal1.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), + div.data(), rgp1.data() ) ); + + EXPECT_NO_THROW( transLocal2.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), + div.data(), rgp2.data() ) ); + + int pos = ( ivar_out * nb_vordiv + jfld ); + + double rms_gen1 = + compute_rms( g.size(), rgp1.data() + pos * g.size(), rgp_analytic.data() ); + + double rms_gen2 = + compute_rms( g.size(), rgp2.data() + pos * g.size(), rgp_analytic.data() ); + + rav1 += rms_gen1; + rav2 += rms_gen2; + if ( !( rms_gen1 < tolerance ) || !( rms_gen2 < tolerance ) ) { + Log::info() + << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out + << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl; + ATLAS_DEBUG_VAR( rms_gen1 ); + ATLAS_DEBUG_VAR( rms_gen2 ); + ATLAS_DEBUG_VAR( tolerance ); + } + EXPECT( rms_gen1 < tolerance ); + EXPECT( rms_gen2 < tolerance ); + icase++; + auto end = std::chrono::system_clock::now(); // + std::chrono::duration elapsed_seconds = end - start; + std::time_t end_time = std::chrono::system_clock::to_time_t( end ); + std::string time_str = std::ctime( &end_time ); + Log::info() << "case " << icase << ", elapsed time: " << elapsed_seconds.count() + << "s. Now: " << time_str.substr( 0, time_str.length() - 1 ) << std::endl; + } + k++; + } + } + } + } + } + } + Log::info() << "Vordiv+scalar comparison with trans: all " << icase << " cases successfully passed!" << std::endl; + rav1 /= icase; + Log::info() << "average RMS error of transLocal1: " << rav1 << std::endl; + rav2 /= icase; + Log::info() << "average RMS error of transLocal2: " << rav2 << std::endl; +} +#endif +//----------------------------------------------------------------------------- +#if 0 CASE( "test_trans_invtrans" ) { trans::Trans trans( Grid( "O64" ), 63, util::Config( "type", "local" ) ); From a82999018a92f0ad498b5450ec7b383f4ef3b2c0 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Mon, 26 Mar 2018 18:29:16 +0100 Subject: [PATCH 034/123] localopt3 can now handle arbitrary domains with regular grids; FFT still requires zonal band --- src/atlas/trans/localopt3/TransLocalopt3.cc | 116 ++++++++++++-------- src/atlas/trans/localopt3/TransLocalopt3.h | 3 + src/tests/trans/test_transgeneral.cc | 16 +-- 3 files changed, 82 insertions(+), 53 deletions(-) diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc index d595f500a..d57b2810d 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.cc +++ b/src/atlas/trans/localopt3/TransLocalopt3.cc @@ -98,25 +98,52 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long #endif int nlats = 0; int nlons = 0; + int neqtr = 0; int nlatsNH = nlats_northernHemisphere( nlats ); + nlatsNH_ = 0; + nlatsSH_ = 0; + nlatsLeg_ = 0; + double sign = 1.; if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { grid::StructuredGrid g( grid_ ); - nlats = g.ny(); - nlons = g.nxmax(); - nlatsNH = nlats_northernHemisphere( nlats ); + nlats = g.ny(); + nlons = g.nxmax(); + for ( size_t j = 0; j < nlats; ++j ) { + // assumptions: latitudes in g.y(j) are monotone and decreasing + // no assumption on whether we have 0, 1 or 2 latitudes at the equator + double lat = g.y( j ); + if ( lat > 0. ) { nlatsNH_++; } + if ( lat == 0. ) { neqtr++; } + if ( lat < 0. ) { nlatsSH_++; } + } + if ( neqtr > 0 ) { + nlatsNH_++; + nlatsSH_++; + } + if ( nlatsNH_ >= nlatsSH_ ) { nlatsLeg_ = nlatsNH_; } + else { + nlatsLeg_ = nlatsSH_; + } } else { - nlats = grid_.size(); - nlons = grid_.size(); - nlatsNH = nlats; + nlats = grid_.size(); + nlons = grid_.size(); + nlatsNH_ = nlats; + nlatsLeg_ = nlats; } - std::vector lats( nlatsNH ); + std::vector lats( nlatsLeg_ ); std::vector lons( nlons ); if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { grid::StructuredGrid g( grid_ ); - // TODO: remove legendre_begin and legendre_data (only legendre_ should be needed) - for ( size_t j = 0; j < nlatsNH; ++j ) { - lats[j] = g.y( j ) * util::Constants::degreesToRadians(); + if ( nlatsNH_ >= nlatsSH_ ) { + for ( size_t j = 0; j < nlatsLeg_; ++j ) { + lats[j] = g.y( j ) * util::Constants::degreesToRadians(); + } + } + else { + for ( size_t j = nlats - 1, idx = 0; idx < nlatsLeg_; --j, ++idx ) { + lats[idx] = -g.y( j ) * util::Constants::degreesToRadians(); + } } for ( size_t j = 0; j < nlons; ++j ) { lons[j] = g.x( j, 0 ) * util::Constants::degreesToRadians(); @@ -139,14 +166,14 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long legendre_sym_begin_[0] = 0; legendre_asym_begin_[0] = 0; for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { - size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsNH ); - size_asym += add_padding( num_n( truncation_ + 1, jm, false ) * nlatsNH ); + size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsLeg_ ); + size_asym += add_padding( num_n( truncation_ + 1, jm, false ) * nlatsLeg_ ); legendre_sym_begin_[jm + 1] = size_sym; legendre_asym_begin_[jm + 1] = size_asym; } alloc_aligned( legendre_sym_, size_sym ); alloc_aligned( legendre_asym_, size_asym ); - compute_legendre_polynomialsopt3( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_, + compute_legendre_polynomialsopt3( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, legendre_asym_, legendre_sym_begin_.data(), legendre_asym_begin_.data() ); } @@ -162,6 +189,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long } #else { + // todo: only compute fourier_ if needed ATLAS_TRACE( "opt3 precomp Fourier" ); alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons ); int idx = 0; @@ -175,6 +203,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long } } { + // todo: only compute fouriertp_ if needed ATLAS_TRACE( "opt3 precomp Fourier tp" ); alloc_aligned( fouriertp_, 2 * ( truncation_ + 1 ) * nlons ); int idx = 0; @@ -190,7 +219,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long } } #endif -} // namespace atlas +} // namespace trans // -------------------------------------------------------------------------------------------------------------------- @@ -280,7 +309,6 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel ATLAS_TRACE( "invtrans_uv structured opt3" ); int nlats = g.ny(); int nlons = g.nxmax(); - int nlatsNH = nlats_northernHemisphere( nlats ); auto posFFTW = [&]( int jfld, int imag, int jlat, int jm ) { return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) ); }; @@ -305,7 +333,10 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel int size_asym = num_n( truncation_ + 1, jm, false ); int n_imag = 2; if ( jm == 0 ) { n_imag = 1; } - int size_fourier = nb_fields * n_imag * nlatsNH; + int size_fourier = nb_fields * n_imag * nlatsLeg_; + auto posFourier = [&]( int jfld, int imag, int jlat, int jm, int nlatsH ) { + return jfld + nb_fields * ( imag + n_imag * ( nlatsLeg_ - nlatsH + jlat ) ); + }; double* scalar_sym; double* scalar_asym; double* scl_fourier_sym; @@ -339,34 +370,34 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel } { eckit::linalg::Matrix A( scalar_sym, nb_fields * n_imag, size_sym ); - eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm], size_sym, nlatsNH ); - eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsNH ); + eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm], size_sym, nlatsLeg_ ); + eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsLeg_ ); eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); } if ( size_asym > 0 ) { eckit::linalg::Matrix A( scalar_asym, nb_fields * n_imag, size_asym ); - eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm], size_asym, nlatsNH ); - eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsNH ); + eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm], size_asym, nlatsLeg_ ); + eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsLeg_ ); eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); } #if 0 //ATLAS_HAVE_FFTW { //ATLAS_TRACE( "opt3 merge spheres" ); // northern hemisphere: - int idx = 0; - for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { + for ( int jlat = 0; jlat < nlatsNH_; jlat++ ) { for ( int imag = 0; imag < n_imag; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int idx = posFourier( jfld, imag, jlat, jm, nlatsNH_ ); scl_fourier[posFFTW( jfld, imag, jlat, jm )] = scl_fourier_sym[idx] + scl_fourier_asym[idx]; } } } // southern hemisphere: - idx = 0; - for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { + for ( int jlat = 0; jlat < nlatsSH_; jlat++ ) { for ( int imag = 0; imag < n_imag; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int idx = posFourier( jfld, imag, jlat, jm, nlatsSH_ ); int jslat = nlats - jlat - 1; scl_fourier[posFFTW( jfld, imag, jslat, jm )] = scl_fourier_sym[idx] - scl_fourier_asym[idx]; @@ -407,24 +438,24 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel } } #else -#if 1 // 1: better for small number of columns, large truncation; 0: better for large number of columns +#if 0 // 1: better for small number of columns, large truncation; 0: better for large number of columns { //ATLAS_TRACE( "opt3 merge spheres" ); // northern hemisphere: - int idx = 0; - for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { + for ( int jlat = 0; jlat < nlatsNH_; jlat++ ) { for ( int imag = 0; imag < n_imag; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int idx = posFourier( jfld, imag, jlat, jm, nlatsNH_ ); scl_fourier[posGemm1( jfld, imag, jlat, jm )] = scl_fourier_sym[idx] + scl_fourier_asym[idx]; } } } // southern hemisphere: - idx = 0; - for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { + for ( int jlat = 0; jlat < nlatsSH_; jlat++ ) { for ( int imag = 0; imag < n_imag; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int idx = posFourier( jfld, imag, jlat, jm, nlatsSH_ ); int jslat = nlats - jlat - 1; scl_fourier[posGemm1( jfld, imag, jslat, jm )] = scl_fourier_sym[idx] - scl_fourier_asym[idx]; @@ -469,27 +500,20 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel { //ATLAS_TRACE( "opt3 merge spheres" ); // northern hemisphere: - int ioff = jm * size_fourier_max; - int pos0 = ioff; - int idx = 0; - for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { - int poslat = pos0 + 2 * jlat; + for ( int jlat = 0; jlat < nlatsNH_; jlat++ ) { for ( int imag = 0; imag < n_imag; imag++ ) { - int posimag = nb_fields * ( imag + poslat ); - for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int idx = posFourier( jfld, imag, jlat, jm, nlatsNH_ ); scl_fourier[posGemm2( jfld, imag, jlat, jm )] = scl_fourier_sym[idx] + scl_fourier_asym[idx]; } } } // southern hemisphere: - idx = 0; - pos0 = 2 * ( nlats - 1 ) + ioff; - for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { - int poslat = pos0 - 2 * jlat; + for ( int jlat = 0; jlat < nlatsSH_; jlat++ ) { for ( int imag = 0; imag < n_imag; imag++ ) { - int posimag = nb_fields * ( imag + poslat ); - for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int idx = posFourier( jfld, imag, jlat, jm, nlatsSH_ ); int jslat = nlats - jlat - 1; scl_fourier[posGemm2( jfld, imag, jslat, jm )] = scl_fourier_sym[idx] - scl_fourier_asym[idx]; diff --git a/src/atlas/trans/localopt3/TransLocalopt3.h b/src/atlas/trans/localopt3/TransLocalopt3.h index 3673cd07c..286ebc9d5 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.h +++ b/src/atlas/trans/localopt3/TransLocalopt3.h @@ -112,6 +112,9 @@ class TransLocalopt3 : public trans::TransImpl { private: Grid grid_; int truncation_; + int nlatsNH_; + int nlatsSH_; + int nlatsLeg_; bool precompute_; double* legendre_sym_; double* legendre_asym_; diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index 8a1034b86..6b556f663 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -704,7 +704,7 @@ CASE( "test_transgeneral_with_translib" ) { #endif #endif //----------------------------------------------------------------------------- -#if 1 +#if 0 CASE( "test_trans_vordiv_with_translib" ) { Log::info() << "test_trans_vordiv_with_translib" << std::endl; // test transgeneral by comparing its result with the trans library @@ -931,7 +931,7 @@ CASE( "test_trans_hires" ) { } #endif //----------------------------------------------------------------------------- -#if 0 +#if 1 CASE( "test_trans_domain" ) { Log::info() << "test_trans_domain" << std::endl; // test transgeneral by comparing with analytic solution on a cropped domain @@ -941,9 +941,11 @@ CASE( "test_trans_domain" ) { //Domain testdomain = ZonalBandDomain( {-90., 90.} ); //Domain testdomain = ZonalBandDomain( {-.5, .5} ); - Domain testdomain = RectangularDomain( {0., 30.}, {-.05, .05} ); + //Domain testdomain = RectangularDomain( {0., 30.}, {-.05, .05} ); + //Domain testdomain = ZonalBandDomain( {-85., -86.} ); + Domain testdomain = RectangularDomain( {-5., 10.}, {5., 6.} ); // Grid: (Adjust the following line if the test takes too long!) - Grid g( "F8000", testdomain ); + Grid g( "F1280", testdomain ); Grid g_global( g.name() ); grid::StructuredGrid gs( g ); @@ -988,7 +990,7 @@ CASE( "test_trans_domain" ) { for ( int imag = 0; imag <= 1; imag++ ) { // real and imaginary part if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. && - icase < 1 ) { + icase < 1000 ) { auto start = std::chrono::system_clock::now(); for ( int j = 0; j < 2 * N * nb_scalar; j++ ) { sp[j] = 0.; @@ -1044,8 +1046,8 @@ CASE( "test_trans_domain" ) { std::chrono::duration elapsed_seconds = end - start; std::time_t end_time = std::chrono::system_clock::to_time_t( end ); std::string time_str = std::ctime( &end_time ); - Log::info() << "case " << icase << ", elapsed time: " << elapsed_seconds.count() - << "s. Now: " << time_str.substr( 0, time_str.length() - 1 ) << std::endl; + //Log::info() << "case " << icase << ", elapsed time: " << elapsed_seconds.count() + // << "s. Now: " << time_str.substr( 0, time_str.length() - 1 ) << std::endl; } k++; } From d253ac929e5630c9d0bf2acda153a503d0fd1c5b Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Mon, 26 Mar 2018 19:15:57 +0100 Subject: [PATCH 035/123] FFT is now used for zonal bands and dgemm for smaller domains --- src/atlas/trans/localopt3/TransLocalopt3.cc | 548 +++++++++----------- src/atlas/trans/localopt3/TransLocalopt3.h | 2 + 2 files changed, 254 insertions(+), 296 deletions(-) diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc index d57b2810d..796f627ac 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.cc +++ b/src/atlas/trans/localopt3/TransLocalopt3.cc @@ -96,14 +96,15 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long #else eckit::linalg::LinearAlgebra::backend( "generic" ); // might want to choose backend with this command #endif - int nlats = 0; - int nlons = 0; - int neqtr = 0; - int nlatsNH = nlats_northernHemisphere( nlats ); - nlatsNH_ = 0; - nlatsSH_ = 0; - nlatsLeg_ = 0; - double sign = 1.; + int nlats = 0; + int nlons = 0; + int neqtr = 0; + int nlatsNH = nlats_northernHemisphere( nlats ); + useFFT_ = true; + dgemmMethod1_ = false; + nlatsNH_ = 0; + nlatsSH_ = 0; + nlatsLeg_ = 0; if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { grid::StructuredGrid g( grid_ ); nlats = g.ny(); @@ -124,8 +125,12 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long else { nlatsLeg_ = nlatsSH_; } + Grid g_global( grid.name() ); + grid::StructuredGrid gs_global( g_global ); + if ( nlons < 1.0 * gs_global.nxmax() ) { useFFT_ = false; } } else { + useFFT_ = false; nlats = grid_.size(); nlons = grid_.size(); nlatsNH_ = nlats; @@ -177,48 +182,55 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long legendre_sym_begin_.data(), legendre_asym_begin_.data() ); } - // precomputations for Fourier transformations: -#if 0 //ATLAS_HAVE_FFTW - { - ATLAS_TRACE( "opt3 precomp FFTW" ); - int num_complex = ( nlons / 2 ) + 1; - fft_in_ = fftw_alloc_complex( nlats * num_complex ); - fft_out_ = fftw_alloc_real( nlats * nlons ); - plan_ = fftw_plan_many_dft_c2r( 1, &nlons, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1, nlons, - FFTW_ESTIMATE ); - } + // precomputations for Fourier transformations: + if ( useFFT_ ) { +#if ATLAS_HAVE_FFTW + { + ATLAS_TRACE( "opt3 precomp FFTW" ); + int num_complex = ( nlons / 2 ) + 1; + fft_in_ = fftw_alloc_complex( nlats * num_complex ); + fft_out_ = fftw_alloc_real( nlats * nlons ); + plan_ = fftw_plan_many_dft_c2r( 1, &nlons, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1, nlons, + FFTW_ESTIMATE ); + } + // other FFT implementations should be added with #elif statements #else - { - // todo: only compute fourier_ if needed - ATLAS_TRACE( "opt3 precomp Fourier" ); + useFFT_ = false; // no FFT implemented => default to dgemm +#endif + } + if ( !useFFT_ ) { alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons ); - int idx = 0; - for ( int jlon = 0; jlon < nlons; jlon++ ) { - double factor = 1.; - for ( int jm = 0; jm < truncation_ + 1; jm++ ) { - if ( jm > 0 ) { factor = 2.; } - fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor; // real part - fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor; // imaginary part + if ( dgemmMethod1_ ) { + { + ATLAS_TRACE( "opt3 precomp Fourier" ); + int idx = 0; + for ( int jlon = 0; jlon < nlons; jlon++ ) { + double factor = 1.; + for ( int jm = 0; jm < truncation_ + 1; jm++ ) { + if ( jm > 0 ) { factor = 2.; } + fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor; // real part + fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor; // imaginary part + } + } } } - } - { - // todo: only compute fouriertp_ if needed - ATLAS_TRACE( "opt3 precomp Fourier tp" ); - alloc_aligned( fouriertp_, 2 * ( truncation_ + 1 ) * nlons ); - int idx = 0; - for ( int jm = 0; jm < truncation_ + 1; jm++ ) { - double factor = 1.; - if ( jm > 0 ) { factor = 2.; } - for ( int jlon = 0; jlon < nlons; jlon++ ) { - fouriertp_[idx++] = +std::cos( jm * lons[jlon] ) * factor; // real part - } - for ( int jlon = 0; jlon < nlons; jlon++ ) { - fouriertp_[idx++] = -std::sin( jm * lons[jlon] ) * factor; // imaginary part + else { + { + ATLAS_TRACE( "opt3 precomp Fourier tp" ); + int idx = 0; + for ( int jm = 0; jm < truncation_ + 1; jm++ ) { + double factor = 1.; + if ( jm > 0 ) { factor = 2.; } + for ( int jlon = 0; jlon < nlons; jlon++ ) { + fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor; // real part + } + for ( int jlon = 0; jlon < nlons; jlon++ ) { + fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor; // imaginary part + } + } } } } -#endif } // namespace trans // -------------------------------------------------------------------------------------------------------------------- @@ -231,14 +243,16 @@ TransLocalopt3::TransLocalopt3( const Grid& grid, const long truncation, const e TransLocalopt3::~TransLocalopt3() { free_aligned( legendre_sym_ ); free_aligned( legendre_asym_ ); -#if 0 //ATLAS_HAVE_FFTW - fftw_destroy_plan( plan_ ); - fftw_free( fft_in_ ); - fftw_free( fft_out_ ); -#else - free_aligned( fourier_ ); - free_aligned( fouriertp_ ); + if ( useFFT_ ) { +#if ATLAS_HAVE_FFTW + fftw_destroy_plan( plan_ ); + fftw_free( fft_in_ ); + fftw_free( fft_out_ ); #endif + } + else { + free_aligned( fourier_ ); + } } // -------------------------------------------------------------------------------------------------------------------- @@ -321,6 +335,8 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel auto posGemm2 = [&]( int jfld, int imag, int jlat, int jm ) { return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) ); }; + auto posMethod = posGemm2; + if ( useFFT_ ) { auto posMethod = posFFTW; } int size_fourier_max = nb_fields * 2 * nlats; double* scl_fourier; alloc_aligned( scl_fourier, size_fourier_max * ( truncation + 1 ) ); @@ -380,7 +396,6 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsLeg_ ); eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); } -#if 0 //ATLAS_HAVE_FFTW { //ATLAS_TRACE( "opt3 merge spheres" ); // northern hemisphere: @@ -388,7 +403,7 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel for ( int imag = 0; imag < n_imag; imag++ ) { for ( int jfld = 0; jfld < nb_fields; jfld++ ) { int idx = posFourier( jfld, imag, jlat, jm, nlatsNH_ ); - scl_fourier[posFFTW( jfld, imag, jlat, jm )] = + scl_fourier[posMethod( jfld, imag, jlat, jm )] = scl_fourier_sym[idx] + scl_fourier_asym[idx]; } } @@ -399,7 +414,7 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel for ( int jfld = 0; jfld < nb_fields; jfld++ ) { int idx = posFourier( jfld, imag, jlat, jm, nlatsSH_ ); int jslat = nlats - jlat - 1; - scl_fourier[posFFTW( jfld, imag, jslat, jm )] = + scl_fourier[posMethod( jfld, imag, jslat, jm )] = scl_fourier_sym[idx] - scl_fourier_asym[idx]; } } @@ -411,296 +426,237 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel free_aligned( scl_fourier_asym ); } } - { - int num_complex = ( nlons / 2 ) + 1; + // Fourier transformation: + if ( useFFT_ ) { +#if ATLAS_HAVE_FFTW { - ATLAS_TRACE( "opt3 FFTW" ); - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int idx = 0; - for ( int jlat = 0; jlat < nlats; jlat++ ) { - fft_in_[idx++][0] = scl_fourier[posFFTW( jfld, 0, jlat, 0 )]; - for ( int jm = 1; jm < num_complex; jm++, idx++ ) { - for ( int imag = 0; imag < 2; imag++ ) { - if ( jm <= truncation_ ) { - fft_in_[idx][imag] = scl_fourier[posFFTW( jfld, imag, jlat, jm )]; - } - else { - fft_in_[idx][imag] = 0.; + int num_complex = ( nlons / 2 ) + 1; + { + ATLAS_TRACE( "opt3 FFTW" ); + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int idx = 0; + for ( int jlat = 0; jlat < nlats; jlat++ ) { + fft_in_[idx++][0] = scl_fourier[posFFTW( jfld, 0, jlat, 0 )]; + for ( int jm = 1; jm < num_complex; jm++, idx++ ) { + for ( int imag = 0; imag < 2; imag++ ) { + if ( jm <= truncation_ ) { + fft_in_[idx][imag] = scl_fourier[posFFTW( jfld, imag, jlat, jm )]; + } + else { + fft_in_[idx][imag] = 0.; + } } } } - } - fftw_execute_dft_c2r( plan_, fft_in_, fft_out_ ); - for ( int j = 0; j < nlats * nlons; j++ ) { - gp_fields[j + jfld * nlats * nlons] = fft_out_[j]; + fftw_execute_dft_c2r( plan_, fft_in_, fft_out_ ); + for ( int j = 0; j < nlats * nlons; j++ ) { + gp_fields[j + jfld * nlats * nlons] = fft_out_[j]; + } } } } +#endif } -#else -#if 0 // 1: better for small number of columns, large truncation; 0: better for large number of columns + else { + if ( dgemmMethod1_ ) { + // dgemm-method 1 + double* gp_opt3; + alloc_aligned( gp_opt3, nb_fields * grid_.size() ); { - //ATLAS_TRACE( "opt3 merge spheres" ); - // northern hemisphere: - for ( int jlat = 0; jlat < nlatsNH_; jlat++ ) { - for ( int imag = 0; imag < n_imag; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int idx = posFourier( jfld, imag, jlat, jm, nlatsNH_ ); - scl_fourier[posGemm1( jfld, imag, jlat, jm )] = - scl_fourier_sym[idx] + scl_fourier_asym[idx]; - } - } - } - // southern hemisphere: - for ( int jlat = 0; jlat < nlatsSH_; jlat++ ) { - for ( int imag = 0; imag < n_imag; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int idx = posFourier( jfld, imag, jlat, jm, nlatsSH_ ); - int jslat = nlats - jlat - 1; - scl_fourier[posGemm1( jfld, imag, jslat, jm )] = - scl_fourier_sym[idx] - scl_fourier_asym[idx]; - } - } - } + ATLAS_TRACE( "opt3 Fourier dgemm" ); + eckit::linalg::Matrix A( scl_fourier, nb_fields * g.ny(), ( truncation_ + 1 ) * 2 ); + eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, g.nxmax() ); + eckit::linalg::Matrix C( gp_opt3, nb_fields * g.ny(), g.nxmax() ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); } - free_aligned( scalar_sym ); - free_aligned( scalar_asym ); - free_aligned( scl_fourier_sym ); - free_aligned( scl_fourier_asym ); - } - } - // Fourier transformation: - double* gp_opt3; - alloc_aligned( gp_opt3, nb_fields * grid_.size() ); - { - ATLAS_TRACE( "opt3 Fourier dgemm" ); - eckit::linalg::Matrix A( scl_fourier, nb_fields * g.ny(), ( truncation_ + 1 ) * 2 ); - eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, g.nxmax() ); - eckit::linalg::Matrix C( gp_opt3, nb_fields * g.ny(), g.nxmax() ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); - } - - // Transposition in grid point space: - { - ATLAS_TRACE( "opt3 transposition in gp-space" ); - int idx = 0; - for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) { - for ( int jlat = 0; jlat < g.ny(); jlat++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int pos_tp = jlon + g.nxmax() * ( jlat + g.ny() * ( jfld ) ); - //int pos = jfld + nb_fields * ( jlat + g.ny() * ( jlon ) ); - gp_fields[pos_tp] = gp_opt3[idx++]; // = gp_opt3[pos] - } - } - } - } - free_aligned( gp_opt3 ); -#else + // Transposition in grid point space: { - //ATLAS_TRACE( "opt3 merge spheres" ); - // northern hemisphere: - for ( int jlat = 0; jlat < nlatsNH_; jlat++ ) { - for ( int imag = 0; imag < n_imag; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int idx = posFourier( jfld, imag, jlat, jm, nlatsNH_ ); - scl_fourier[posGemm2( jfld, imag, jlat, jm )] = - scl_fourier_sym[idx] + scl_fourier_asym[idx]; - } - } - } - // southern hemisphere: - for ( int jlat = 0; jlat < nlatsSH_; jlat++ ) { - for ( int imag = 0; imag < n_imag; imag++ ) { + ATLAS_TRACE( "opt3 transposition in gp-space" ); + int idx = 0; + for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) { + for ( int jlat = 0; jlat < g.ny(); jlat++ ) { for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int idx = posFourier( jfld, imag, jlat, jm, nlatsSH_ ); - int jslat = nlats - jlat - 1; - scl_fourier[posGemm2( jfld, imag, jslat, jm )] = - scl_fourier_sym[idx] - scl_fourier_asym[idx]; + int pos_tp = jlon + g.nxmax() * ( jlat + g.ny() * ( jfld ) ); + //int pos = jfld + nb_fields * ( jlat + g.ny() * ( jlon ) ); + gp_fields[pos_tp] = gp_opt3[idx++]; // = gp_opt3[pos] } } } } - free_aligned( scalar_sym ); - free_aligned( scalar_asym ); - free_aligned( scl_fourier_sym ); - free_aligned( scl_fourier_asym ); + free_aligned( gp_opt3 ); } - } - - // Fourier transformation: - { - ATLAS_TRACE( "opt3 Fourier dgemm" ); - eckit::linalg::Matrix A( fouriertp_, g.nxmax(), ( truncation_ + 1 ) * 2 ); - eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * g.ny() ); - eckit::linalg::Matrix C( gp_fields, g.nxmax(), nb_fields * g.ny() ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); - } - -#endif -#endif - // Computing u,v from U,V: + else { + // dgemm-method 2 { - if ( nb_vordiv_fields > 0 ) { - ATLAS_TRACE( "opt3 u,v from U,V" ); - std::vector coslats( nlats ); - for ( size_t j = 0; j < nlats; ++j ) { - coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() ); - } - int idx = 0; - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - for ( int jlat = 0; jlat < g.ny(); jlat++ ) { - for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) { - gp_fields[idx] /= coslats[jlat]; - idx++; - } - } - } - } + ATLAS_TRACE( "opt3 Fourier dgemm" ); + eckit::linalg::Matrix A( fourier_, g.nxmax(), ( truncation_ + 1 ) * 2 ); + eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * g.ny() ); + eckit::linalg::Matrix C( gp_fields, g.nxmax(), nb_fields * g.ny() ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); } - free_aligned( scl_fourier ); } - else { - ATLAS_TRACE( "invtrans_uv unstructured opt3" ); + } // namespace trans + // Computing u,v from U,V: + { + if ( nb_vordiv_fields > 0 ) { + ATLAS_TRACE( "opt3 u,v from U,V" ); + std::vector coslats( nlats ); + for ( size_t j = 0; j < nlats; ++j ) { + coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() ); + } int idx = 0; - for ( PointXY p : grid_.xy() ) { - double lon = p.x() * util::Constants::degreesToRadians(); - double lat = p.y() * util::Constants::degreesToRadians(); - double trcFT = truncation; - - // Legendre transform: - //invtrans_legendreopt3( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra, - // legReal.data(), legImag.data() ); - - // Fourier transform: - //invtrans_fourieropt3( trcFT, lon, nb_fields, legReal.data(), legImag.data(), - // gp_tmp.data() + ( nb_fields * idx ) ); - for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { - //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat ); + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + for ( int jlat = 0; jlat < g.ny(); jlat++ ) { + for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) { + gp_fields[idx] /= coslats[jlat]; + idx++; + } } - ++idx; } } } - } // namespace trans + free_aligned( scl_fourier ); + } // namespace atlas + else { + ATLAS_TRACE( "invtrans_uv unstructured opt3" ); + int idx = 0; + for ( PointXY p : grid_.xy() ) { + double lon = p.x() * util::Constants::degreesToRadians(); + double lat = p.y() * util::Constants::degreesToRadians(); + double trcFT = truncation; + + // Legendre transform: + //invtrans_legendreopt3( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra, + // legReal.data(), legImag.data() ); + + // Fourier transform: + //invtrans_fourieropt3( trcFT, lon, nb_fields, legReal.data(), legImag.data(), + // gp_tmp.data() + ( nb_fields * idx ) ); + for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { + //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat ); + } + ++idx; + } + } + } // namespace trans +} // namespace atlas - // -------------------------------------------------------------------------------------------------------------------- +// -------------------------------------------------------------------------------------------------------------------- - void TransLocalopt3::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[], - const double divergence_spectra[], double gp_fields[], - const eckit::Configuration& config ) const { - invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config ); - } +void TransLocalopt3::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[], + const double divergence_spectra[], double gp_fields[], + const eckit::Configuration& config ) const { + invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config ); +} - void extend_truncationopt3( const int old_truncation, const int nb_fields, const double old_spectra[], - double new_spectra[] ) { - int k = 0, k_old = 0; - for ( int m = 0; m <= old_truncation + 1; m++ ) { // zonal wavenumber - for ( int n = m; n <= old_truncation + 1; n++ ) { // total wavenumber - for ( int imag = 0; imag < 2; imag++ ) { // imaginary/real part - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { // field - if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; } - else { - new_spectra[k++] = old_spectra[k_old++]; - } - } +void extend_truncationopt3( const int old_truncation, const int nb_fields, const double old_spectra[], + double new_spectra[] ) { + int k = 0, k_old = 0; + for ( int m = 0; m <= old_truncation + 1; m++ ) { // zonal wavenumber + for ( int n = m; n <= old_truncation + 1; n++ ) { // total wavenumber + for ( int imag = 0; imag < 2; imag++ ) { // imaginary/real part + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { // field + if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; } + else { + new_spectra[k++] = old_spectra[k_old++]; } } } } + } +} - // -------------------------------------------------------------------------------------------------------------------- - - void TransLocalopt3::invtrans( const int nb_scalar_fields, const double scalar_spectra[], - const int nb_vordiv_fields, const double vorticity_spectra[], - const double divergence_spectra[], double gp_fields[], - const eckit::Configuration& config ) const { - ATLAS_TRACE( "TransLocalopt3::invtrans" ); - int nb_gp = grid_.size(); - int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields; - if ( nb_vordiv_fields > 0 ) { - std::vector vorticity_spectra_extended( nb_vordiv_spec_ext, 0. ); - std::vector divergence_spectra_extended( nb_vordiv_spec_ext, 0. ); - std::vector U_ext( nb_vordiv_spec_ext, 0. ); - std::vector V_ext( nb_vordiv_spec_ext, 0. ); - - { - ATLAS_TRACE( "opt3 extend vordiv" ); - // increase truncation in vorticity_spectra and divergence_spectra: - extend_truncationopt3( truncation_, nb_vordiv_fields, vorticity_spectra, - vorticity_spectra_extended.data() ); - extend_truncationopt3( truncation_, nb_vordiv_fields, divergence_spectra, - divergence_spectra_extended.data() ); - } +// -------------------------------------------------------------------------------------------------------------------- - { - ATLAS_TRACE( "vordiv to UV opt3" ); - // call vd2uv to compute u and v in spectral space - trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt3" ) ); - vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(), - divergence_spectra_extended.data(), U_ext.data(), V_ext.data() ); - } +void TransLocalopt3::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields, + const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[], + const eckit::Configuration& config ) const { + ATLAS_TRACE( "TransLocalopt3::invtrans" ); + int nb_gp = grid_.size(); + int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields; + if ( nb_vordiv_fields > 0 ) { + std::vector vorticity_spectra_extended( nb_vordiv_spec_ext, 0. ); + std::vector divergence_spectra_extended( nb_vordiv_spec_ext, 0. ); + std::vector U_ext( nb_vordiv_spec_ext, 0. ); + std::vector V_ext( nb_vordiv_spec_ext, 0. ); + + { + ATLAS_TRACE( "opt3 extend vordiv" ); + // increase truncation in vorticity_spectra and divergence_spectra: + extend_truncationopt3( truncation_, nb_vordiv_fields, vorticity_spectra, + vorticity_spectra_extended.data() ); + extend_truncationopt3( truncation_, nb_vordiv_fields, divergence_spectra, + divergence_spectra_extended.data() ); + } - // perform spectral transform to compute all fields in grid point space - invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config ); - invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(), - gp_fields + nb_gp * nb_vordiv_fields, config ); - } - if ( nb_scalar_fields > 0 ) { - int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields; - std::vector scalar_spectra_extended( nb_scalar_spec_ext, 0. ); - extend_truncationopt3( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() ); - invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(), - gp_fields + 2 * nb_gp * nb_vordiv_fields, config ); - } + { + ATLAS_TRACE( "vordiv to UV opt3" ); + // call vd2uv to compute u and v in spectral space + trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt3" ) ); + vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(), + divergence_spectra_extended.data(), U_ext.data(), V_ext.data() ); } - // -------------------------------------------------------------------------------------------------------------------- + // perform spectral transform to compute all fields in grid point space + invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config ); + invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(), + gp_fields + nb_gp * nb_vordiv_fields, config ); + } + if ( nb_scalar_fields > 0 ) { + int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields; + std::vector scalar_spectra_extended( nb_scalar_spec_ext, 0. ); + extend_truncationopt3( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() ); + invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(), + gp_fields + 2 * nb_gp * nb_vordiv_fields, config ); + } +} - void TransLocalopt3::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) - const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. - } +// -------------------------------------------------------------------------------------------------------------------- - // -------------------------------------------------------------------------------------------------------------------- +void TransLocalopt3::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. +} - void TransLocalopt3::dirtrans( const FieldSet& gpfields, FieldSet& spfields, - const eckit::Configuration& config ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. - } +// -------------------------------------------------------------------------------------------------------------------- - // -------------------------------------------------------------------------------------------------------------------- +void TransLocalopt3::dirtrans( const FieldSet& gpfields, FieldSet& spfields, + const eckit::Configuration& config ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. +} - void TransLocalopt3::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv, - const eckit::Configuration& config ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. - } +// -------------------------------------------------------------------------------------------------------------------- - // -------------------------------------------------------------------------------------------------------------------- +void TransLocalopt3::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv, + const eckit::Configuration& config ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. +} - void TransLocalopt3::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[], - const eckit::Configuration& ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. - } +// -------------------------------------------------------------------------------------------------------------------- - // -------------------------------------------------------------------------------------------------------------------- +void TransLocalopt3::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[], + const eckit::Configuration& ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. +} - void TransLocalopt3::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[], - double divergence_spectra[], const eckit::Configuration& ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. - } +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt3::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[], + double divergence_spectra[], const eckit::Configuration& ) const { + NOTIMP; + // Not implemented and not planned. + // Use the TransIFS implementation instead. +} - // -------------------------------------------------------------------------------------------------------------------- +// -------------------------------------------------------------------------------------------------------------------- - } // namespace trans +} // namespace trans } // namespace atlas diff --git a/src/atlas/trans/localopt3/TransLocalopt3.h b/src/atlas/trans/localopt3/TransLocalopt3.h index 286ebc9d5..54798549a 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.h +++ b/src/atlas/trans/localopt3/TransLocalopt3.h @@ -111,6 +111,8 @@ class TransLocalopt3 : public trans::TransImpl { private: Grid grid_; + bool useFFT_; + bool dgemmMethod1_; int truncation_; int nlatsNH_; int nlatsSH_; From ec1547eef9259836e04cd601b60d37238e9cafdc Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Tue, 3 Apr 2018 18:10:27 +0100 Subject: [PATCH 036/123] FFT is now used for cropped grids with large regions. The variable fft_threshold determines how many longitudes are needed to switch to FFT --- src/atlas/trans/localopt3/TransLocalopt3.cc | 44 +++++++++++++++------ src/atlas/trans/localopt3/TransLocalopt3.h | 2 + 2 files changed, 34 insertions(+), 12 deletions(-) diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc index 796f627ac..556dc438f 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.cc +++ b/src/atlas/trans/localopt3/TransLocalopt3.cc @@ -127,9 +127,26 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long } Grid g_global( grid.name() ); grid::StructuredGrid gs_global( g_global ); - if ( nlons < 1.0 * gs_global.nxmax() ) { useFFT_ = false; } + nlonsGlobal_ = gs_global.nxmax(); + jlonMin_ = 0; + double lonmin = fmod( g.x( 0, 0 ), 360 ); + if ( lonmin < 0. ) { lonmin += 360.; } + if ( nlons < nlonsGlobal_ ) { + double fft_threshold = 0.05; // fraction of latitudes of the full grid up to which FFT is used. + // This threshold needs to be adjusted depending on the dgemm and FFT performance of the machine + // on which this code is running! + if ( nlons < fft_threshold * nlonsGlobal_ ) { useFFT_ = false; } + else { + // need to use FFT with cropped grid + for ( size_t j = 0; j < nlonsGlobal_; ++j ) { + if ( gs_global.x( j, 0 ) == lonmin ) { jlonMin_ = j; } + } + } + } + //Log::info() << "nlats=" << g.ny() << " nlatsGlobal=" << gs_global.ny() << std::endl; } else { + // unstructured grid useFFT_ = false; nlats = grid_.size(); nlons = grid_.size(); @@ -187,11 +204,11 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long #if ATLAS_HAVE_FFTW { ATLAS_TRACE( "opt3 precomp FFTW" ); - int num_complex = ( nlons / 2 ) + 1; + int num_complex = ( nlonsGlobal_ / 2 ) + 1; fft_in_ = fftw_alloc_complex( nlats * num_complex ); - fft_out_ = fftw_alloc_real( nlats * nlons ); - plan_ = fftw_plan_many_dft_c2r( 1, &nlons, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1, nlons, - FFTW_ESTIMATE ); + fft_out_ = fftw_alloc_real( nlats * nlonsGlobal_ ); + plan_ = fftw_plan_many_dft_c2r( 1, &nlonsGlobal_, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1, + nlonsGlobal_, FFTW_ESTIMATE ); } // other FFT implementations should be added with #elif statements #else @@ -302,10 +319,8 @@ void gp_transposeopt3( const int nb_size, const int nb_fields, const double gp_t } //----------------------------------------------------------------------------- -// Routine to compute the spectral transform by using a localopt3 Fourier -// transformation -// for a grid (same latitude for all longitudes, allows to compute Legendre -// functions +// Routine to compute the spectral transform by using a localopt3 Fourier transformation +// for a grid (same latitude for all longitudes, allows to compute Legendre functions // once for all longitudes). U and v components are divided by cos(latitude) for // nb_vordiv_fields > 0. // @@ -430,7 +445,7 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel if ( useFFT_ ) { #if ATLAS_HAVE_FFTW { - int num_complex = ( nlons / 2 ) + 1; + int num_complex = ( nlonsGlobal_ / 2 ) + 1; { ATLAS_TRACE( "opt3 FFTW" ); for ( int jfld = 0; jfld < nb_fields; jfld++ ) { @@ -449,8 +464,13 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel } } fftw_execute_dft_c2r( plan_, fft_in_, fft_out_ ); - for ( int j = 0; j < nlats * nlons; j++ ) { - gp_fields[j + jfld * nlats * nlons] = fft_out_[j]; + for ( int jlat = 0; jlat < nlats; jlat++ ) { + for ( int jlon = 0; jlon < nlons; jlon++ ) { + int j = jlon + jlonMin_; + if ( j >= nlonsGlobal_ ) { j -= nlonsGlobal_; } + gp_fields[jlon + nlons * ( jlat + nlats * jfld )] = + fft_out_[j + nlonsGlobal_ * jlat]; + } } } } diff --git a/src/atlas/trans/localopt3/TransLocalopt3.h b/src/atlas/trans/localopt3/TransLocalopt3.h index 54798549a..ac33f6395 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.h +++ b/src/atlas/trans/localopt3/TransLocalopt3.h @@ -117,6 +117,8 @@ class TransLocalopt3 : public trans::TransImpl { int nlatsNH_; int nlatsSH_; int nlatsLeg_; + int jlonMin_; + int nlonsGlobal_; bool precompute_; double* legendre_sym_; double* legendre_asym_; From ab6214ad0534900744cfda9976be455a5cf3d9d7 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Tue, 3 Apr 2018 19:12:59 +0100 Subject: [PATCH 037/123] fixed bug with dgemmMethod1 == true --- src/atlas/trans/localopt3/TransLocalopt3.cc | 64 ++++++++++----------- 1 file changed, 29 insertions(+), 35 deletions(-) diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc index 556dc438f..d98bb522f 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.cc +++ b/src/atlas/trans/localopt3/TransLocalopt3.cc @@ -96,12 +96,14 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long #else eckit::linalg::LinearAlgebra::backend( "generic" ); // might want to choose backend with this command #endif + double fft_threshold = 2.; // 0.05; // fraction of latitudes of the full grid up to which FFT is used. + // This threshold needs to be adjusted depending on the dgemm and FFT performance of the machine + // on which this code is running! int nlats = 0; int nlons = 0; int neqtr = 0; - int nlatsNH = nlats_northernHemisphere( nlats ); useFFT_ = true; - dgemmMethod1_ = false; + dgemmMethod1_ = true; nlatsNH_ = 0; nlatsSH_ = 0; nlatsLeg_ = 0; @@ -131,12 +133,9 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long jlonMin_ = 0; double lonmin = fmod( g.x( 0, 0 ), 360 ); if ( lonmin < 0. ) { lonmin += 360.; } - if ( nlons < nlonsGlobal_ ) { - double fft_threshold = 0.05; // fraction of latitudes of the full grid up to which FFT is used. - // This threshold needs to be adjusted depending on the dgemm and FFT performance of the machine - // on which this code is running! - if ( nlons < fft_threshold * nlonsGlobal_ ) { useFFT_ = false; } - else { + if ( nlons < fft_threshold * nlonsGlobal_ ) { useFFT_ = false; } + else { + if ( nlons < nlonsGlobal_ ) { // need to use FFT with cropped grid for ( size_t j = 0; j < nlonsGlobal_; ++j ) { if ( gs_global.x( j, 0 ) == lonmin ) { jlonMin_ = j; } @@ -336,22 +335,16 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel // Transform if ( grid::StructuredGrid g = grid_ ) { ATLAS_TRACE( "invtrans_uv structured opt3" ); - int nlats = g.ny(); - int nlons = g.nxmax(); - auto posFFTW = [&]( int jfld, int imag, int jlat, int jm ) { - return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) ); - }; - /*auto posFFTW = [&]( int jfld, int imag, int jlat, int jm ) { - return jfld + nb_fields * ( imag + 2 * ( nlats - jlat - 1 ) + jm * nb_fields * 2 * nlats ); - };*/ - auto posGemm1 = [&]( int jfld, int imag, int jlat, int jm ) { - return jfld + nb_fields * ( jlat + nlats * ( imag + 2 * ( jm ) ) ); - }; - auto posGemm2 = [&]( int jfld, int imag, int jlat, int jm ) { - return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) ); + int nlats = g.ny(); + int nlons = g.nxmax(); + auto posMethod = [&]( int jfld, int imag, int jlat, int jm ) { + if ( useFFT_ || !dgemmMethod1_ ) { + return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) ); + } + else { + return jfld + nb_fields * ( jlat + nlats * ( imag + 2 * ( jm ) ) ); + }; }; - auto posMethod = posGemm2; - if ( useFFT_ ) { auto posMethod = posFFTW; } int size_fourier_max = nb_fields * 2 * nlats; double* scl_fourier; alloc_aligned( scl_fourier, size_fourier_max * ( truncation + 1 ) ); @@ -451,11 +444,11 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel for ( int jfld = 0; jfld < nb_fields; jfld++ ) { int idx = 0; for ( int jlat = 0; jlat < nlats; jlat++ ) { - fft_in_[idx++][0] = scl_fourier[posFFTW( jfld, 0, jlat, 0 )]; + fft_in_[idx++][0] = scl_fourier[posMethod( jfld, 0, jlat, 0 )]; for ( int jm = 1; jm < num_complex; jm++, idx++ ) { for ( int imag = 0; imag < 2; imag++ ) { if ( jm <= truncation_ ) { - fft_in_[idx][imag] = scl_fourier[posFFTW( jfld, imag, jlat, jm )]; + fft_in_[idx][imag] = scl_fourier[posMethod( jfld, imag, jlat, jm )]; } else { fft_in_[idx][imag] = 0.; @@ -480,13 +473,14 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel else { if ( dgemmMethod1_ ) { // dgemm-method 1 + // should be faster for small domains or large truncation double* gp_opt3; alloc_aligned( gp_opt3, nb_fields * grid_.size() ); { ATLAS_TRACE( "opt3 Fourier dgemm" ); - eckit::linalg::Matrix A( scl_fourier, nb_fields * g.ny(), ( truncation_ + 1 ) * 2 ); - eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, g.nxmax() ); - eckit::linalg::Matrix C( gp_opt3, nb_fields * g.ny(), g.nxmax() ); + eckit::linalg::Matrix A( scl_fourier, nb_fields * nlats, ( truncation_ + 1 ) * 2 ); + eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, nlons ); + eckit::linalg::Matrix C( gp_opt3, nb_fields * nlats, nlons ); eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); } @@ -494,11 +488,11 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel { ATLAS_TRACE( "opt3 transposition in gp-space" ); int idx = 0; - for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) { - for ( int jlat = 0; jlat < g.ny(); jlat++ ) { + for ( int jlon = 0; jlon < nlons; jlon++ ) { + for ( int jlat = 0; jlat < nlats; jlat++ ) { for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int pos_tp = jlon + g.nxmax() * ( jlat + g.ny() * ( jfld ) ); - //int pos = jfld + nb_fields * ( jlat + g.ny() * ( jlon ) ); + int pos_tp = jlon + nlons * ( jlat + nlats * ( jfld ) ); + //int pos = jfld + nb_fields * ( jlat + nlats * ( jlon ) ); gp_fields[pos_tp] = gp_opt3[idx++]; // = gp_opt3[pos] } } @@ -510,9 +504,9 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel // dgemm-method 2 { ATLAS_TRACE( "opt3 Fourier dgemm" ); - eckit::linalg::Matrix A( fourier_, g.nxmax(), ( truncation_ + 1 ) * 2 ); - eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * g.ny() ); - eckit::linalg::Matrix C( gp_fields, g.nxmax(), nb_fields * g.ny() ); + eckit::linalg::Matrix A( fourier_, nlons, ( truncation_ + 1 ) * 2 ); + eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * nlats ); + eckit::linalg::Matrix C( gp_fields, nlons, nb_fields * nlats ); eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); } } From 23777612fb04f363244b930f256501d5f3125dcf Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Wed, 4 Apr 2018 12:23:19 +0100 Subject: [PATCH 038/123] fixed the truncation parameter of invtrans_uv => no extension necessary. Comparing performance between FFT and 2 dgemm methods in opt, opt2, opt3 --- .../trans/localopt/LegendrePolynomialsopt.cc | 10 +- src/atlas/trans/localopt/TransLocalopt.cc | 450 +++++++++-------- src/atlas/trans/localopt/TransLocalopt.h | 15 + .../localopt2/LegendrePolynomialsopt2.cc | 10 +- src/atlas/trans/localopt2/TransLocalopt2.cc | 470 +++++++++--------- src/atlas/trans/localopt2/TransLocalopt2.h | 7 + src/atlas/trans/localopt3/TransLocalopt3.cc | 35 +- src/tests/trans/test_transgeneral.cc | 41 +- 8 files changed, 544 insertions(+), 494 deletions(-) diff --git a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc index 413620301..9c5562fa2 100644 --- a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc +++ b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc @@ -161,12 +161,6 @@ void compute_legendre_polynomialsopt( { //ATLAS_TRACE( "add to global arrays" ); - // take factor 2 for m > 0 into account: - for ( int jm = 1; jm <= trc; ++jm ) { - for ( int jn = jm; jn <= trc; ++jn ) { - //legpol[idxmn( jm, jn )] *= 2.; - } - } for ( int jm = 0; jm <= trc; jm++ ) { int is1 = 0, ia1 = 0; for ( int jn = jm; jn <= trc; jn++ ) { @@ -185,11 +179,11 @@ void compute_legendre_polynomialsopt( //for ( int jn = jm; jn <= trc; jn++ ) { for ( int jn = trc; jn >= jm; jn-- ) { if ( ( jn - jm ) % 2 == 0 ) { - int is = leg_start_sym[jm] + jlat + nlats * is2++; + int is = leg_start_sym[jm] + is1 * jlat + is2++; leg_sym[is] = legpol[idxmn( jm, jn )]; } else { - int ia = leg_start_asym[jm] + jlat + nlats * ia2++; + int ia = leg_start_asym[jm] + ia1 * jlat + ia2++; leg_asym[ia] = legpol[idxmn( jm, jn )]; } } diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc index 112b70fb9..48f90d386 100644 --- a/src/atlas/trans/localopt/TransLocalopt.cc +++ b/src/atlas/trans/localopt/TransLocalopt.cc @@ -26,9 +26,6 @@ #if ATLAS_HAVE_MKL #include "mkl.h" #endif -#if ATLAS_HAVE_FFTW -#include -#endif namespace atlas { namespace trans { @@ -99,27 +96,75 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t #else eckit::linalg::LinearAlgebra::backend( "generic" ); // might want to choose backend with this command #endif - int nlats = 0; - int nlons = 0; - int nlatsNH = nlats_northernHemisphere( nlats ); + double fft_threshold = 0.; // 0.05; // fraction of latitudes of the full grid up to which FFT is used. + // This threshold needs to be adjusted depending on the dgemm and FFT performance of the machine + // on which this code is running! + int nlats = 0; + int nlons = 0; + int neqtr = 0; + useFFT_ = true; + dgemmMethod1_ = true; + nlatsNH_ = 0; + nlatsSH_ = 0; + nlatsLeg_ = 0; if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { grid::StructuredGrid g( grid_ ); - nlats = g.ny(); - nlons = g.nxmax(); - nlatsNH = nlats_northernHemisphere( nlats ); + nlats = g.ny(); + nlons = g.nxmax(); + for ( size_t j = 0; j < nlats; ++j ) { + // assumptions: latitudes in g.y(j) are monotone and decreasing + // no assumption on whether we have 0, 1 or 2 latitudes at the equator + double lat = g.y( j ); + if ( lat > 0. ) { nlatsNH_++; } + if ( lat == 0. ) { neqtr++; } + if ( lat < 0. ) { nlatsSH_++; } + } + if ( neqtr > 0 ) { + nlatsNH_++; + nlatsSH_++; + } + if ( nlatsNH_ >= nlatsSH_ ) { nlatsLeg_ = nlatsNH_; } + else { + nlatsLeg_ = nlatsSH_; + } + Grid g_global( grid.name() ); + grid::StructuredGrid gs_global( g_global ); + nlonsGlobal_ = gs_global.nxmax(); + jlonMin_ = 0; + double lonmin = fmod( g.x( 0, 0 ), 360 ); + if ( lonmin < 0. ) { lonmin += 360.; } + if ( nlons < fft_threshold * nlonsGlobal_ ) { useFFT_ = false; } + else { + if ( nlons < nlonsGlobal_ ) { + // need to use FFT with cropped grid + for ( size_t j = 0; j < nlonsGlobal_; ++j ) { + if ( gs_global.x( j, 0 ) == lonmin ) { jlonMin_ = j; } + } + } + } + //Log::info() << "nlats=" << g.ny() << " nlatsGlobal=" << gs_global.ny() << std::endl; } else { - nlats = grid_.size(); - nlons = grid_.size(); - nlatsNH = nlats; + // unstructured grid + useFFT_ = false; + nlats = grid_.size(); + nlons = grid_.size(); + nlatsNH_ = nlats; + nlatsLeg_ = nlats; } - std::vector lats( nlatsNH ); + std::vector lats( nlatsLeg_ ); std::vector lons( nlons ); if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { grid::StructuredGrid g( grid_ ); - // TODO: remove legendre_begin and legendre_data (only legendre_ should be needed) - for ( size_t j = 0; j < nlatsNH; ++j ) { - lats[j] = g.y( j ) * util::Constants::degreesToRadians(); + if ( nlatsNH_ >= nlatsSH_ ) { + for ( size_t j = 0; j < nlatsLeg_; ++j ) { + lats[j] = g.y( j ) * util::Constants::degreesToRadians(); + } + } + else { + for ( size_t j = nlats - 1, idx = 0; idx < nlatsLeg_; --j, ++idx ) { + lats[idx] = -g.y( j ) * util::Constants::degreesToRadians(); + } } for ( size_t j = 0; j < nlons; ++j ) { lons[j] = g.x( j, 0 ) * util::Constants::degreesToRadians(); @@ -142,45 +187,67 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t legendre_sym_begin_[0] = 0; legendre_asym_begin_[0] = 0; for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { - size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsNH ); - size_asym += add_padding( num_n( truncation_ + 1, jm, false ) * nlatsNH ); + size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsLeg_ ); + size_asym += add_padding( num_n( truncation_ + 1, jm, false ) * nlatsLeg_ ); legendre_sym_begin_[jm + 1] = size_sym; legendre_asym_begin_[jm + 1] = size_asym; } alloc_aligned( legendre_sym_, size_sym ); alloc_aligned( legendre_asym_, size_asym ); - compute_legendre_polynomialsopt( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_, + compute_legendre_polynomialsopt( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, legendre_asym_, legendre_sym_begin_.data(), legendre_asym_begin_.data() ); } - // precomputations for Fourier transformations: -#if !ATLAS_HAVE_FFTW - { - ATLAS_TRACE( "opt precomp Fourier" ); - alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons ); - int idx = 0; - for ( int jlon = 0; jlon < nlons; jlon++ ) { - for ( int jm = 0; jm < truncation_ + 1; jm++ ) { - fourier_[idx++] = +std::cos( jm * lons[jlon] ); // real part - fourier_[idx++] = -std::sin( jm * lons[jlon] ); // imaginary part - } + // precomputations for Fourier transformations: + if ( useFFT_ ) { +#if ATLAS_HAVE_FFTW + { + ATLAS_TRACE( "opt precomp FFTW" ); + int num_complex = ( nlonsGlobal_ / 2 ) + 1; + fft_in_ = fftw_alloc_complex( nlats * num_complex ); + fft_out_ = fftw_alloc_real( nlats * nlonsGlobal_ ); + plan_ = fftw_plan_many_dft_c2r( 1, &nlonsGlobal_, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1, + nlonsGlobal_, FFTW_ESTIMATE ); } + // other FFT implementations should be added with #elif statements +#else + useFFT_ = false; // no FFT implemented => default to dgemm +#endif } - { - ATLAS_TRACE( "opt precomp Fourier tp" ); - alloc_aligned( fouriertp_, 2 * ( truncation_ + 1 ) * nlons ); - int idx = 0; - for ( int jm = 0; jm < truncation_ + 1; jm++ ) { - for ( int jlon = 0; jlon < nlons; jlon++ ) { - fouriertp_[idx++] = +std::cos( jm * lons[jlon] ); // real part + if ( !useFFT_ ) { + alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons ); + if ( dgemmMethod1_ ) { + { + ATLAS_TRACE( "opt precomp Fourier" ); + int idx = 0; + for ( int jlon = 0; jlon < nlons; jlon++ ) { + double factor = 1.; + for ( int jm = 0; jm < truncation_ + 1; jm++ ) { + if ( jm > 0 ) { factor = 2.; } + fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor; // real part + fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor; // imaginary part + } + } } - for ( int jlon = 0; jlon < nlons; jlon++ ) { - fouriertp_[idx++] = -std::sin( jm * lons[jlon] ); // imaginary part + } + else { + { + ATLAS_TRACE( "opt precomp Fourier tp" ); + int idx = 0; + for ( int jm = 0; jm < truncation_ + 1; jm++ ) { + double factor = 1.; + if ( jm > 0 ) { factor = 2.; } + for ( int jlon = 0; jlon < nlons; jlon++ ) { + fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor; // real part + } + for ( int jlon = 0; jlon < nlons; jlon++ ) { + fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor; // imaginary part + } + } } } } -#endif -} // namespace atlas +} // namespace trans // -------------------------------------------------------------------------------------------------------------------- @@ -192,10 +259,16 @@ TransLocalopt::TransLocalopt( const Grid& grid, const long truncation, const eck TransLocalopt::~TransLocalopt() { free_aligned( legendre_sym_ ); free_aligned( legendre_asym_ ); -#if !ATLAS_HAVE_FFTW - free_aligned( fourier_ ); - free_aligned( fouriertp_ ); + if ( useFFT_ ) { +#if ATLAS_HAVE_FFTW + fftw_destroy_plan( plan_ ); + fftw_free( fft_in_ ); + fftw_free( fft_out_ ); #endif + } + else { + free_aligned( fourier_ ); + } } // -------------------------------------------------------------------------------------------------------------------- @@ -244,13 +317,17 @@ void gp_transposeopt( const int nb_size, const int nb_fields, const double gp_tm } //----------------------------------------------------------------------------- -// Routine to compute the spectral transform by using a localopt Fourier -// transformation -// for a grid (same latitude for all longitudes, allows to compute Legendre -// functions +// Routine to compute the spectral transform by using a localopt Fourier transformation +// for a grid (same latitude for all longitudes, allows to compute Legendre functions // once for all longitudes). U and v components are divided by cos(latitude) for // nb_vordiv_fields > 0. // +// Legendre polynomials are computed up to truncation_+1 to be accurate for vorticity and +// divergence computation. The parameter truncation is the truncation used in storing the +// spectral data scalar_spectra and can be different from truncation_. If truncation is +// larger than truncation_+1 the transform will behave as if the spectral data was truncated +// to truncation_+1. +// // Author: // Andreas Mueller *ECMWF* // @@ -263,39 +340,20 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field // Transform if ( grid::StructuredGrid g = grid_ ) { ATLAS_TRACE( "invtrans_uv structured opt" ); - int nlats = g.ny(); - int nlons = g.nxmax(); - int nlatsNH = nlats_northernHemisphere( nlats ); - auto posFFTW = [&]( int jfld, int imag, int jlat, int jm ) { - return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) ); - }; - /*auto posFFTW = [&]( int jfld, int imag, int jlat, int jm ) { - return jfld + nb_fields * ( imag + 2 * ( nlats - jlat - 1 ) + jm * nb_fields * 2 * nlats ); - };*/ - auto posGemm1 = [&]( int jfld, int imag, int jlat, int jm ) { - return jfld + nb_fields * ( jlat + nlats * ( imag + 2 * ( jm ) ) ); - }; - auto posGemm2 = [&]( int jfld, int imag, int jlat, int jm ) { - return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) ); + int nlats = g.ny(); + int nlons = g.nxmax(); + auto posMethod = [&]( int jfld, int imag, int jlat, int jm ) { + if ( useFFT_ || !dgemmMethod1_ ) { + return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) ); + } + else { + return jfld + nb_fields * ( jlat + nlats * ( imag + 2 * ( jm ) ) ); + }; }; int size_fourier_max = nb_fields * 2 * nlats; double* scl_fourier; - alloc_aligned( scl_fourier, size_fourier_max * ( truncation + 1 ) ); + alloc_aligned( scl_fourier, size_fourier_max * ( truncation_ + 1 ) ); -#if ATLAS_HAVE_FFTW - int num_complex = ( nlons / 2 ) + 1; - fftw_complex* fft_in = fftw_alloc_complex( nlats * num_complex * nb_fields ); - fftw_plan plan = fftw_plan_many_dft_c2r( 1, &nlons, nlats * nb_fields, fft_in, NULL, 1, num_complex, - gp_fields, NULL, 1, nlons, FFTW_ESTIMATE ); - for ( int j = 0; j < nlats * num_complex * nb_fields; j++ ) { - for ( int imag = 0; imag < 2; imag++ ) { - fft_in[j][imag] = 0.; - } - } - auto posFFTWin = [&]( int jfld, int jlat, int jm ) { - return jm + num_complex * ( jlat + nlats * ( jfld ) ); - }; -#endif // Legendre transform: { ATLAS_TRACE( "opt Legendre dgemm" ); @@ -304,7 +362,10 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field int size_asym = num_n( truncation_ + 1, jm, false ); int n_imag = 2; if ( jm == 0 ) { n_imag = 1; } - int size_fourier = nb_fields * n_imag * nlatsNH; + int size_fourier = nb_fields * n_imag * nlatsLeg_; + auto posFourier = [&]( int jfld, int imag, int jlat, int jm, int nlatsH ) { + return jfld + nb_fields * ( imag + n_imag * ( nlatsLeg_ - nlatsH + jlat ) ); + }; double* scalar_sym; double* scalar_asym; double* scl_fourier_sym; @@ -316,14 +377,28 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field { //ATLAS_TRACE( "opt Legendre split" ); int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2; - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + // the choice between the following two code lines determines whether + // total wavenumbers are summed in an ascending or descending order. + // The trans library in IFS uses descending order because it should + // be more accurate (higher wavenumbers have smaller contributions). + // This also needs to be changed when splitting the spectral data in + // compute_legendre_polynomialsopt! + //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) { + for ( int jn = truncation_ + 1; jn >= jm; jn-- ) { for ( int imag = 0; imag < n_imag; imag++ ) { - //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) { //ascending - for ( int jn = truncation_ + 1; jn >= jm; jn-- ) { // descending + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { idx = jfld + nb_fields * ( imag + 2 * ( jn - jm ) ); - if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; } + if ( jn <= truncation && jm < truncation ) { + if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; } + else { + scalar_asym[ia++] = scalar_spectra[idx + ioff]; + } + } else { - scalar_asym[ia++] = scalar_spectra[idx + ioff]; + if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = 0.; } + else { + scalar_asym[ia++] = 0.; + } } } } @@ -331,37 +406,36 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field ASSERT( ia == n_imag * nb_fields * size_asym && is == n_imag * nb_fields * size_sym ); } { - eckit::linalg::Matrix A( legendre_sym_ + legendre_sym_begin_[jm], nlatsNH, size_sym ); - eckit::linalg::Matrix B( scalar_sym, size_sym, nb_fields * n_imag ); - eckit::linalg::Matrix C( scl_fourier_sym, nlatsNH, nb_fields * n_imag ); + eckit::linalg::Matrix A( scalar_sym, nb_fields * n_imag, size_sym ); + eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm], size_sym, nlatsLeg_ ); + eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsLeg_ ); eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); } if ( size_asym > 0 ) { - eckit::linalg::Matrix A( legendre_asym_ + legendre_asym_begin_[jm], nlatsNH, size_asym ); - eckit::linalg::Matrix B( scalar_asym, size_asym, nb_fields * n_imag ); - eckit::linalg::Matrix C( scl_fourier_asym, nlatsNH, nb_fields * n_imag ); + eckit::linalg::Matrix A( scalar_asym, nb_fields * n_imag, size_asym ); + eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm], size_asym, nlatsLeg_ ); + eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsLeg_ ); eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); } -#if 1 //ATLAS_HAVE_FFTW { //ATLAS_TRACE( "opt merge spheres" ); // northern hemisphere: - int idx = 0; - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + for ( int jlat = 0; jlat < nlatsNH_; jlat++ ) { for ( int imag = 0; imag < n_imag; imag++ ) { - for ( int jlat = 0; jlat < nlatsNH; jlat++, idx++ ) { - fft_in[posFFTWin( jfld, jlat, jm )][imag] = + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int idx = posFourier( jfld, imag, jlat, jm, nlatsNH_ ); + scl_fourier[posMethod( jfld, imag, jlat, jm )] = scl_fourier_sym[idx] + scl_fourier_asym[idx]; } } } // southern hemisphere: - idx = 0; - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + for ( int jlat = 0; jlat < nlatsSH_; jlat++ ) { for ( int imag = 0; imag < n_imag; imag++ ) { - for ( int jlat = 0; jlat < nlatsNH; jlat++, idx++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int idx = posFourier( jfld, imag, jlat, jm, nlatsSH_ ); int jslat = nlats - jlat - 1; - fft_in[posFFTWin( jfld, jslat, jm )][imag] = + scl_fourier[posMethod( jfld, imag, jslat, jm )] = scl_fourier_sym[idx] - scl_fourier_asym[idx]; } } @@ -373,130 +447,83 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field free_aligned( scl_fourier_asym ); } } - { + // Fourier transformation: + if ( useFFT_ ) { +#if ATLAS_HAVE_FFTW { - ATLAS_TRACE( "opt FFTW" ); - { fftw_execute( plan ); } - } - } - fftw_destroy_plan( plan ); - fftw_free( fft_in ); -#else -#if 0 // 1: better for small number of columns, large truncation; 0: better for large number of columns + int num_complex = ( nlonsGlobal_ / 2 ) + 1; { - //ATLAS_TRACE( "opt merge spheres" ); - // northern hemisphere: - int ioff = jm * size_fourier_max; - int pos0 = ioff; - int idx = 0; - for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { - int poslat = pos0 + 2 * jlat; - for ( int imag = 0; imag < n_imag; imag++ ) { - int posimag = nb_fields * ( imag + poslat ); - for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { - scl_fourier[posGemm1( jfld, imag, jlat, jm )] = - scl_fourier_sym[idx] + scl_fourier_asym[idx]; + ATLAS_TRACE( "opt FFTW" ); + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int idx = 0; + for ( int jlat = 0; jlat < nlats; jlat++ ) { + fft_in_[idx++][0] = scl_fourier[posMethod( jfld, 0, jlat, 0 )]; + for ( int jm = 1; jm < num_complex; jm++, idx++ ) { + for ( int imag = 0; imag < 2; imag++ ) { + if ( jm <= truncation_ ) { + fft_in_[idx][imag] = scl_fourier[posMethod( jfld, imag, jlat, jm )]; + } + else { + fft_in_[idx][imag] = 0.; + } + } } } - } - // southern hemisphere: - idx = 0; - pos0 = 2 * ( nlats - 1 ) + ioff; - for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { - int poslat = pos0 - 2 * jlat; - for ( int imag = 0; imag < n_imag; imag++ ) { - int posimag = nb_fields * ( imag + poslat ); - for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { - int jslat = nlats - jlat - 1; - scl_fourier[posGemm1( jfld, imag, jslat, jm )] = - scl_fourier_sym[idx] - scl_fourier_asym[idx]; + fftw_execute_dft_c2r( plan_, fft_in_, fft_out_ ); + for ( int jlat = 0; jlat < nlats; jlat++ ) { + for ( int jlon = 0; jlon < nlons; jlon++ ) { + int j = jlon + jlonMin_; + if ( j >= nlonsGlobal_ ) { j -= nlonsGlobal_; } + gp_fields[jlon + nlons * ( jlat + nlats * jfld )] = + fft_out_[j + nlonsGlobal_ * jlat]; } } } } - free_aligned( scalar_sym ); - free_aligned( scalar_asym ); - free_aligned( scl_fourier_sym ); - free_aligned( scl_fourier_asym ); } +#endif } - - // Fourier transformation: - double* gp_opt; - alloc_aligned( gp_opt, nb_fields * grid_.size() ); - { - ATLAS_TRACE( "opt Fourier dgemm" ); - eckit::linalg::Matrix A( scl_fourier, nb_fields * g.ny(), ( truncation_ + 1 ) * 2 ); - eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, g.nxmax() ); - eckit::linalg::Matrix C( gp_opt, nb_fields * g.ny(), g.nxmax() ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); - } - - // Transposition in grid point space: - { - ATLAS_TRACE( "opt transposition in gp-space" ); - int idx = 0; - for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) { - for ( int jlat = 0; jlat < g.ny(); jlat++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int pos_tp = jlon + g.nxmax() * ( jlat + g.ny() * ( jfld ) ); - //int pos = jfld + nb_fields * ( jlat + g.ny() * ( jlon ) ); - gp_fields[pos_tp] = gp_opt[idx++]; // = gp_opt[pos] - } + else { + if ( dgemmMethod1_ ) { + // dgemm-method 1 + // should be faster for small domains or large truncation + double* gp_opt; + alloc_aligned( gp_opt, nb_fields * grid_.size() ); + { + ATLAS_TRACE( "opt Fourier dgemm method 1" ); + eckit::linalg::Matrix A( scl_fourier, nb_fields * nlats, ( truncation_ + 1 ) * 2 ); + eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, nlons ); + eckit::linalg::Matrix C( gp_opt, nb_fields * nlats, nlons ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); } - } - } - free_aligned( gp_opt ); -#else + + // Transposition in grid point space: { - //ATLAS_TRACE( "opt merge spheres" ); - // northern hemisphere: - int ioff = jm * size_fourier_max; - int pos0 = ioff; - int idx = 0; - for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { - int poslat = pos0 + 2 * jlat; - for ( int imag = 0; imag < n_imag; imag++ ) { - int posimag = nb_fields * ( imag + poslat ); - for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { - scl_fourier[posGemm2( jfld, imag, jlat, jm )] = - scl_fourier_sym[idx] + scl_fourier_asym[idx]; - } - } - } - // southern hemisphere: - idx = 0; - pos0 = 2 * ( nlats - 1 ) + ioff; - for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { - int poslat = pos0 - 2 * jlat; - for ( int imag = 0; imag < n_imag; imag++ ) { - int posimag = nb_fields * ( imag + poslat ); - for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { - int jslat = nlats - jlat - 1; - scl_fourier[posGemm2( jfld, imag, jslat, jm )] = - scl_fourier_sym[idx] - scl_fourier_asym[idx]; + ATLAS_TRACE( "opt transposition in gp-space" ); + int idx = 0; + for ( int jlon = 0; jlon < nlons; jlon++ ) { + for ( int jlat = 0; jlat < nlats; jlat++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int pos_tp = jlon + nlons * ( jlat + nlats * ( jfld ) ); + //int pos = jfld + nb_fields * ( jlat + nlats * ( jlon ) ); + gp_fields[pos_tp] = gp_opt[idx++]; // = gp_opt[pos] } } } } - free_aligned( scalar_sym ); - free_aligned( scalar_asym ); - free_aligned( scl_fourier_sym ); - free_aligned( scl_fourier_asym ); + free_aligned( gp_opt ); } - } - - // Fourier transformation: - { - ATLAS_TRACE( "opt Fourier dgemm" ); - eckit::linalg::Matrix A( fouriertp_, g.nxmax(), ( truncation_ + 1 ) * 2 ); - eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * g.ny() ); - eckit::linalg::Matrix C( gp_fields, g.nxmax(), nb_fields * g.ny() ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); - } - -#endif -#endif + else { + // dgemm-method 2 + { + ATLAS_TRACE( "opt Fourier dgemm method 2" ); + eckit::linalg::Matrix A( fourier_, nlons, ( truncation_ + 1 ) * 2 ); + eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * nlats ); + eckit::linalg::Matrix C( gp_fields, nlons, nb_fields * nlats ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + } + } + } // namespace trans // Computing u,v from U,V: { if ( nb_vordiv_fields > 0 ) { @@ -517,7 +544,7 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field } } free_aligned( scl_fourier ); - } + } // namespace atlas else { ATLAS_TRACE( "invtrans_uv unstructured opt" ); int idx = 0; @@ -539,8 +566,8 @@ void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_field ++idx; } } - } -} // namespace trans + } // namespace trans +} // namespace atlas // -------------------------------------------------------------------------------------------------------------------- @@ -603,11 +630,8 @@ void TransLocalopt::invtrans( const int nb_scalar_fields, const double scalar_sp gp_fields + nb_gp * nb_vordiv_fields, config ); } if ( nb_scalar_fields > 0 ) { - int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields; - std::vector scalar_spectra_extended( nb_scalar_spec_ext, 0. ); - extend_truncationopt( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() ); - invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(), - gp_fields + 2 * nb_gp * nb_vordiv_fields, config ); + invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields + 2 * nb_gp * nb_vordiv_fields, + config ); } } diff --git a/src/atlas/trans/localopt/TransLocalopt.h b/src/atlas/trans/localopt/TransLocalopt.h index 55b7a074e..d81445f5b 100644 --- a/src/atlas/trans/localopt/TransLocalopt.h +++ b/src/atlas/trans/localopt/TransLocalopt.h @@ -15,6 +15,9 @@ #include "atlas/array.h" #include "atlas/grid/Grid.h" #include "atlas/trans/Trans.h" +#if ATLAS_HAVE_FFTW +#include +#endif //----------------------------------------------------------------------------- // Forward declarations @@ -107,7 +110,14 @@ class TransLocalopt : public trans::TransImpl { private: Grid grid_; + bool useFFT_; + bool dgemmMethod1_; int truncation_; + int nlatsNH_; + int nlatsSH_; + int nlatsLeg_; + int jlonMin_; + int nlonsGlobal_; bool precompute_; double* legendre_sym_; double* legendre_asym_; @@ -116,6 +126,11 @@ class TransLocalopt : public trans::TransImpl { std::vector legendre_begin_; std::vector legendre_sym_begin_; std::vector legendre_asym_begin_; +#if ATLAS_HAVE_FFTW + fftw_complex* fft_in_; + double* fft_out_; + fftw_plan plan_; +#endif }; //----------------------------------------------------------------------------- diff --git a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc index 1d9f86daa..9d11ea850 100644 --- a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc +++ b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc @@ -161,12 +161,6 @@ void compute_legendre_polynomialsopt2( { //ATLAS_TRACE( "add to global arrays" ); - // take factor 2 for m > 0 into account: - for ( int jm = 1; jm <= trc; ++jm ) { - for ( int jn = jm; jn <= trc; ++jn ) { - legpol[idxmn( jm, jn )] *= 2.; - } - } for ( int jm = 0; jm <= trc; jm++ ) { int is1 = 0, ia1 = 0; for ( int jn = jm; jn <= trc; jn++ ) { @@ -185,11 +179,11 @@ void compute_legendre_polynomialsopt2( //for ( int jn = jm; jn <= trc; jn++ ) { for ( int jn = trc; jn >= jm; jn-- ) { if ( ( jn - jm ) % 2 == 0 ) { - int is = leg_start_sym[jm] + jlat + nlats * is2++; + int is = leg_start_sym[jm] + is1 * jlat + is2++; leg_sym[is] = legpol[idxmn( jm, jn )]; } else { - int ia = leg_start_asym[jm] + jlat + nlats * ia2++; + int ia = leg_start_asym[jm] + ia1 * jlat + ia2++; leg_asym[ia] = legpol[idxmn( jm, jn )]; } } diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc index ac1da815b..2091f57b9 100644 --- a/src/atlas/trans/localopt2/TransLocalopt2.cc +++ b/src/atlas/trans/localopt2/TransLocalopt2.cc @@ -96,27 +96,75 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long #else eckit::linalg::LinearAlgebra::backend( "generic" ); // might want to choose backend with this command #endif - int nlats = 0; - int nlons = 0; - int nlatsNH = nlats_northernHemisphere( nlats ); + double fft_threshold = 0.05; // fraction of latitudes of the full grid up to which FFT is used. + // This threshold needs to be adjusted depending on the dgemm and FFT performance of the machine + // on which this code is running! + int nlats = 0; + int nlons = 0; + int neqtr = 0; + useFFT_ = true; + dgemmMethod1_ = true; + nlatsNH_ = 0; + nlatsSH_ = 0; + nlatsLeg_ = 0; if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { grid::StructuredGrid g( grid_ ); - nlats = g.ny(); - nlons = g.nxmax(); - nlatsNH = nlats_northernHemisphere( nlats ); + nlats = g.ny(); + nlons = g.nxmax(); + for ( size_t j = 0; j < nlats; ++j ) { + // assumptions: latitudes in g.y(j) are monotone and decreasing + // no assumption on whether we have 0, 1 or 2 latitudes at the equator + double lat = g.y( j ); + if ( lat > 0. ) { nlatsNH_++; } + if ( lat == 0. ) { neqtr++; } + if ( lat < 0. ) { nlatsSH_++; } + } + if ( neqtr > 0 ) { + nlatsNH_++; + nlatsSH_++; + } + if ( nlatsNH_ >= nlatsSH_ ) { nlatsLeg_ = nlatsNH_; } + else { + nlatsLeg_ = nlatsSH_; + } + Grid g_global( grid.name() ); + grid::StructuredGrid gs_global( g_global ); + nlonsGlobal_ = gs_global.nxmax(); + jlonMin_ = 0; + double lonmin = fmod( g.x( 0, 0 ), 360 ); + if ( lonmin < 0. ) { lonmin += 360.; } + if ( nlons < fft_threshold * nlonsGlobal_ ) { useFFT_ = false; } + else { + if ( nlons < nlonsGlobal_ ) { + // need to use FFT with cropped grid + for ( size_t j = 0; j < nlonsGlobal_; ++j ) { + if ( gs_global.x( j, 0 ) == lonmin ) { jlonMin_ = j; } + } + } + } + //Log::info() << "nlats=" << g.ny() << " nlatsGlobal=" << gs_global.ny() << std::endl; } else { - nlats = grid_.size(); - nlons = grid_.size(); - nlatsNH = nlats; + // unstructured grid + useFFT_ = false; + nlats = grid_.size(); + nlons = grid_.size(); + nlatsNH_ = nlats; + nlatsLeg_ = nlats; } - std::vector lats( nlatsNH ); + std::vector lats( nlatsLeg_ ); std::vector lons( nlons ); if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { grid::StructuredGrid g( grid_ ); - // TODO: remove legendre_begin and legendre_data (only legendre_ should be needed) - for ( size_t j = 0; j < nlatsNH; ++j ) { - lats[j] = g.y( j ) * util::Constants::degreesToRadians(); + if ( nlatsNH_ >= nlatsSH_ ) { + for ( size_t j = 0; j < nlatsLeg_; ++j ) { + lats[j] = g.y( j ) * util::Constants::degreesToRadians(); + } + } + else { + for ( size_t j = nlats - 1, idx = 0; idx < nlatsLeg_; --j, ++idx ) { + lats[idx] = -g.y( j ) * util::Constants::degreesToRadians(); + } } for ( size_t j = 0; j < nlons; ++j ) { lons[j] = g.x( j, 0 ) * util::Constants::degreesToRadians(); @@ -139,54 +187,67 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long legendre_sym_begin_[0] = 0; legendre_asym_begin_[0] = 0; for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { - size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsNH ); - size_asym += add_padding( num_n( truncation_ + 1, jm, false ) * nlatsNH ); + size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsLeg_ ); + size_asym += add_padding( num_n( truncation_ + 1, jm, false ) * nlatsLeg_ ); legendre_sym_begin_[jm + 1] = size_sym; legendre_asym_begin_[jm + 1] = size_asym; } alloc_aligned( legendre_sym_, size_sym ); alloc_aligned( legendre_asym_, size_asym ); - compute_legendre_polynomialsopt2( truncation_ + 1, nlatsNH, lats.data(), legendre_sym_, legendre_asym_, + compute_legendre_polynomialsopt2( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, legendre_asym_, legendre_sym_begin_.data(), legendre_asym_begin_.data() ); } - // precomputations for Fourier transformations: + // precomputations for Fourier transformations: + if ( useFFT_ ) { #if ATLAS_HAVE_FFTW - { - ATLAS_TRACE( "opt2 precomp FFTW" ); - int num_complex = ( nlons / 2 ) + 1; - fft_in_ = fftw_alloc_complex( nlats * num_complex ); - fft_out_ = fftw_alloc_real( nlats * nlons ); - plan_ = fftw_plan_many_dft_c2r( 1, &nlons, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1, nlons, - FFTW_ESTIMATE ); - } + { + ATLAS_TRACE( "opt2 precomp FFTW" ); + int num_complex = ( nlonsGlobal_ / 2 ) + 1; + fft_in_ = fftw_alloc_complex( nlats * num_complex ); + fft_out_ = fftw_alloc_real( nlats * nlonsGlobal_ ); + plan_ = fftw_plan_many_dft_c2r( 1, &nlonsGlobal_, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1, + nlonsGlobal_, FFTW_ESTIMATE ); + } + // other FFT implementations should be added with #elif statements #else - { - ATLAS_TRACE( "opt2 precomp Fourier" ); + useFFT_ = false; // no FFT implemented => default to dgemm +#endif + } + if ( !useFFT_ ) { alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons ); - int idx = 0; - for ( int jlon = 0; jlon < nlons; jlon++ ) { - for ( int jm = 0; jm < truncation_ + 1; jm++ ) { - fourier_[idx++] = +std::cos( jm * lons[jlon] ); // real part - fourier_[idx++] = -std::sin( jm * lons[jlon] ); // imaginary part + if ( dgemmMethod1_ ) { + { + ATLAS_TRACE( "opt2 precomp Fourier" ); + int idx = 0; + for ( int jlon = 0; jlon < nlons; jlon++ ) { + double factor = 1.; + for ( int jm = 0; jm < truncation_ + 1; jm++ ) { + if ( jm > 0 ) { factor = 2.; } + fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor; // real part + fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor; // imaginary part + } + } } } - } - { - ATLAS_TRACE( "opt2 precomp Fourier tp" ); - alloc_aligned( fouriertp_, 2 * ( truncation_ + 1 ) * nlons ); - int idx = 0; - for ( int jm = 0; jm < truncation_ + 1; jm++ ) { - for ( int jlon = 0; jlon < nlons; jlon++ ) { - fouriertp_[idx++] = +std::cos( jm * lons[jlon] ); // real part - } - for ( int jlon = 0; jlon < nlons; jlon++ ) { - fouriertp_[idx++] = -std::sin( jm * lons[jlon] ); // imaginary part + else { + { + ATLAS_TRACE( "opt2 precomp Fourier tp" ); + int idx = 0; + for ( int jm = 0; jm < truncation_ + 1; jm++ ) { + double factor = 1.; + if ( jm > 0 ) { factor = 2.; } + for ( int jlon = 0; jlon < nlons; jlon++ ) { + fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor; // real part + } + for ( int jlon = 0; jlon < nlons; jlon++ ) { + fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor; // imaginary part + } + } } } } -#endif -} // namespace atlas +} // namespace trans // -------------------------------------------------------------------------------------------------------------------- @@ -198,14 +259,16 @@ TransLocalopt2::TransLocalopt2( const Grid& grid, const long truncation, const e TransLocalopt2::~TransLocalopt2() { free_aligned( legendre_sym_ ); free_aligned( legendre_asym_ ); + if ( useFFT_ ) { #if ATLAS_HAVE_FFTW - fftw_destroy_plan( plan_ ); - fftw_free( fft_in_ ); - fftw_free( fft_out_ ); -#else - free_aligned( fourier_ ); - free_aligned( fouriertp_ ); + fftw_destroy_plan( plan_ ); + fftw_free( fft_in_ ); + fftw_free( fft_out_ ); #endif + } + else { + free_aligned( fourier_ ); + } } // -------------------------------------------------------------------------------------------------------------------- @@ -255,13 +318,17 @@ void gp_transposeopt2( const int nb_size, const int nb_fields, const double gp_t } //----------------------------------------------------------------------------- -// Routine to compute the spectral transform by using a localopt2 Fourier -// transformation -// for a grid (same latitude for all longitudes, allows to compute Legendre -// functions +// Routine to compute the spectral transform by using a localopt2 Fourier transformation +// for a grid (same latitude for all longitudes, allows to compute Legendre functions // once for all longitudes). U and v components are divided by cos(latitude) for // nb_vordiv_fields > 0. // +// Legendre polynomials are computed up to truncation_+1 to be accurate for vorticity and +// divergence computation. The parameter truncation is the truncation used in storing the +// spectral data scalar_spectra and can be different from truncation_. If truncation is +// larger than truncation_+1 the transform will behave as if the spectral data was truncated +// to truncation_+1. +// // Author: // Andreas Mueller *ECMWF* // @@ -274,24 +341,19 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel // Transform if ( grid::StructuredGrid g = grid_ ) { ATLAS_TRACE( "invtrans_uv structured opt2" ); - int nlats = g.ny(); - int nlons = g.nxmax(); - int nlatsNH = nlats_northernHemisphere( nlats ); - auto posFFTW = [&]( int jfld, int imag, int jlat, int jm ) { - return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) ); - }; - /*auto posFFTW = [&]( int jfld, int imag, int jlat, int jm ) { - return jfld + nb_fields * ( imag + 2 * ( nlats - jlat - 1 ) + jm * nb_fields * 2 * nlats ); - };*/ - auto posGemm1 = [&]( int jfld, int imag, int jlat, int jm ) { - return jfld + nb_fields * ( jlat + nlats * ( imag + 2 * ( jm ) ) ); - }; - auto posGemm2 = [&]( int jfld, int imag, int jlat, int jm ) { - return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) ); + int nlats = g.ny(); + int nlons = g.nxmax(); + auto posMethod = [&]( int jfld, int imag, int jlat, int jm ) { + if ( useFFT_ || !dgemmMethod1_ ) { + return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) ); + } + else { + return jfld + nb_fields * ( jlat + nlats * ( imag + 2 * ( jm ) ) ); + }; }; int size_fourier_max = nb_fields * 2 * nlats; double* scl_fourier; - alloc_aligned( scl_fourier, size_fourier_max * ( truncation + 1 ) ); + alloc_aligned( scl_fourier, size_fourier_max * ( truncation_ + 1 ) ); // Legendre transform: { @@ -301,7 +363,10 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel int size_asym = num_n( truncation_ + 1, jm, false ); int n_imag = 2; if ( jm == 0 ) { n_imag = 1; } - int size_fourier = nb_fields * n_imag * nlatsNH; + int size_fourier = nb_fields * n_imag * nlatsLeg_; + auto posFourier = [&]( int jfld, int imag, int jlat, int jm, int nlatsH ) { + return jfld + nb_fields * ( imag + n_imag * ( nlatsLeg_ - nlatsH + jlat ) ); + }; double* scalar_sym; double* scalar_asym; double* scl_fourier_sym; @@ -313,64 +378,65 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel { //ATLAS_TRACE( "opt2 Legendre split" ); int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2; - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + // the choice between the following two code lines determines whether + // total wavenumbers are summed in an ascending or descending order. + // The trans library in IFS uses descending order because it should + // be more accurate (higher wavenumbers have smaller contributions). + // This also needs to be changed when splitting the spectral data in + // compute_legendre_polynomialsopt2! + //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) { + for ( int jn = truncation_ + 1; jn >= jm; jn-- ) { for ( int imag = 0; imag < n_imag; imag++ ) { - //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) { //ascending - for ( int jn = truncation_ + 1; jn >= jm; jn-- ) { // descending + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { idx = jfld + nb_fields * ( imag + 2 * ( jn - jm ) ); - if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; } + if ( jn <= truncation && jm < truncation ) { + if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; } + else { + scalar_asym[ia++] = scalar_spectra[idx + ioff]; + } + } else { - scalar_asym[ia++] = scalar_spectra[idx + ioff]; + if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = 0.; } + else { + scalar_asym[ia++] = 0.; + } } } } } ASSERT( ia == n_imag * nb_fields * size_asym && is == n_imag * nb_fields * size_sym ); } - { // transposed - eckit::linalg::Matrix A( legendre_sym_ + legendre_sym_begin_[jm], nlatsNH, size_sym ); - eckit::linalg::Matrix B( scalar_sym, size_sym, nb_fields * n_imag ); - eckit::linalg::Matrix C( scl_fourier_sym, nlatsNH, nb_fields * n_imag ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); - } - if ( size_asym > 0 ) { - eckit::linalg::Matrix A( legendre_asym_ + legendre_asym_begin_[jm], nlatsNH, size_asym ); - eckit::linalg::Matrix B( scalar_asym, size_asym, nb_fields * n_imag ); - eckit::linalg::Matrix C( scl_fourier_asym, nlatsNH, nb_fields * n_imag ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); - } - /*{ // non-transposed + { eckit::linalg::Matrix A( scalar_sym, nb_fields * n_imag, size_sym ); - eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm], size_sym, nlatsNH ); - eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsNH ); + eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm], size_sym, nlatsLeg_ ); + eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsLeg_ ); eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); } if ( size_asym > 0 ) { eckit::linalg::Matrix A( scalar_asym, nb_fields * n_imag, size_asym ); - eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm], size_asym, nlatsNH ); - eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsNH ); + eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm], size_asym, nlatsLeg_ ); + eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsLeg_ ); eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); - }*/ -#if 1 //ATLAS_HAVE_FFTW + } { //ATLAS_TRACE( "opt2 merge spheres" ); // northern hemisphere: - int idx = 0; - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + for ( int jlat = 0; jlat < nlatsNH_; jlat++ ) { for ( int imag = 0; imag < n_imag; imag++ ) { - for ( int jlat = 0; jlat < nlatsNH; jlat++, idx++ ) { - scl_fourier[posFFTW( jfld, imag, jlat, jm )] = + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int idx = posFourier( jfld, imag, jlat, jm, nlatsNH_ ); + scl_fourier[posMethod( jfld, imag, jlat, jm )] = scl_fourier_sym[idx] + scl_fourier_asym[idx]; } } } // southern hemisphere: - idx = 0; - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + for ( int jlat = 0; jlat < nlatsSH_; jlat++ ) { for ( int imag = 0; imag < n_imag; imag++ ) { - for ( int jlat = 0; jlat < nlatsNH; jlat++, idx++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int idx = posFourier( jfld, imag, jlat, jm, nlatsSH_ ); int jslat = nlats - jlat - 1; - scl_fourier[posFFTW( jfld, imag, jslat, jm )] = + scl_fourier[posMethod( jfld, imag, jslat, jm )] = scl_fourier_sym[idx] - scl_fourier_asym[idx]; } } @@ -382,148 +448,83 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel free_aligned( scl_fourier_asym ); } } - { - int num_complex = ( nlons / 2 ) + 1; + // Fourier transformation: + if ( useFFT_ ) { +#if ATLAS_HAVE_FFTW { - ATLAS_TRACE( "opt2 FFTW" ); - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int idx = 0; - for ( int jlat = 0; jlat < nlats; jlat++ ) { - fft_in_[idx++][0] = scl_fourier[posFFTW( jfld, 0, jlat, 0 )]; - for ( int jm = 1; jm < num_complex; jm++, idx++ ) { - for ( int imag = 0; imag < 2; imag++ ) { - if ( jm <= truncation_ ) { - fft_in_[idx][imag] = scl_fourier[posFFTW( jfld, imag, jlat, jm )] / 2.; - } - else { - fft_in_[idx][imag] = 0.; - } - } - } - } - fftw_execute_dft_c2r( plan_, fft_in_, fft_out_ ); - for ( int j = 0; j < nlats * nlons; j++ ) { - gp_fields[j + jfld * nlats * nlons] = fft_out_[j]; - } - } - } - } -#else -#if 1 // 1: better for small number of columns, large truncation; 0: better for large number of columns + int num_complex = ( nlonsGlobal_ / 2 ) + 1; { - //ATLAS_TRACE( "opt2 merge spheres" ); - // northern hemisphere: - int ioff = jm * size_fourier_max; - int pos0 = ioff; - int idx = 0; - for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { - int poslat = pos0 + 2 * jlat; - for ( int imag = 0; imag < n_imag; imag++ ) { - int posimag = nb_fields * ( imag + poslat ); - for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { - scl_fourier[posGemm1( jfld, imag, jlat, jm )] = - scl_fourier_sym[idx] + scl_fourier_asym[idx]; + ATLAS_TRACE( "opt2 FFTW" ); + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int idx = 0; + for ( int jlat = 0; jlat < nlats; jlat++ ) { + fft_in_[idx++][0] = scl_fourier[posMethod( jfld, 0, jlat, 0 )]; + for ( int jm = 1; jm < num_complex; jm++, idx++ ) { + for ( int imag = 0; imag < 2; imag++ ) { + if ( jm <= truncation_ ) { + fft_in_[idx][imag] = scl_fourier[posMethod( jfld, imag, jlat, jm )]; + } + else { + fft_in_[idx][imag] = 0.; + } + } } } - } - // southern hemisphere: - idx = 0; - pos0 = 2 * ( nlats - 1 ) + ioff; - for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { - int poslat = pos0 - 2 * jlat; - for ( int imag = 0; imag < n_imag; imag++ ) { - int posimag = nb_fields * ( imag + poslat ); - for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { - int jslat = nlats - jlat - 1; - scl_fourier[posGemm1( jfld, imag, jslat, jm )] = - scl_fourier_sym[idx] - scl_fourier_asym[idx]; + fftw_execute_dft_c2r( plan_, fft_in_, fft_out_ ); + for ( int jlat = 0; jlat < nlats; jlat++ ) { + for ( int jlon = 0; jlon < nlons; jlon++ ) { + int j = jlon + jlonMin_; + if ( j >= nlonsGlobal_ ) { j -= nlonsGlobal_; } + gp_fields[jlon + nlons * ( jlat + nlats * jfld )] = + fft_out_[j + nlonsGlobal_ * jlat]; } } } } - free_aligned( scalar_sym ); - free_aligned( scalar_asym ); - free_aligned( scl_fourier_sym ); - free_aligned( scl_fourier_asym ); } +#endif } - - // Fourier transformation: - double* gp_opt2; - alloc_aligned( gp_opt2, nb_fields * grid_.size() ); - { - ATLAS_TRACE( "opt2 Fourier dgemm" ); - eckit::linalg::Matrix A( scl_fourier, nb_fields * g.ny(), ( truncation_ + 1 ) * 2 ); - eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, g.nxmax() ); - eckit::linalg::Matrix C( gp_opt2, nb_fields * g.ny(), g.nxmax() ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); - } - - // Transposition in grid point space: - { - ATLAS_TRACE( "opt2 transposition in gp-space" ); - int idx = 0; - for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) { - for ( int jlat = 0; jlat < g.ny(); jlat++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int pos_tp = jlon + g.nxmax() * ( jlat + g.ny() * ( jfld ) ); - //int pos = jfld + nb_fields * ( jlat + g.ny() * ( jlon ) ); - gp_fields[pos_tp] = gp_opt2[idx++]; // = gp_opt2[pos] - } + else { + if ( dgemmMethod1_ ) { + // dgemm-method 1 + // should be faster for small domains or large truncation + double* gp_opt2; + alloc_aligned( gp_opt2, nb_fields * grid_.size() ); + { + ATLAS_TRACE( "opt2 Fourier dgemm method 1" ); + eckit::linalg::Matrix A( scl_fourier, nb_fields * nlats, ( truncation_ + 1 ) * 2 ); + eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, nlons ); + eckit::linalg::Matrix C( gp_opt2, nb_fields * nlats, nlons ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); } - } - } - free_aligned( gp_opt2 ); -#else + + // Transposition in grid point space: { - //ATLAS_TRACE( "opt2 merge spheres" ); - // northern hemisphere: - int ioff = jm * size_fourier_max; - int pos0 = ioff; - int idx = 0; - for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { - int poslat = pos0 + 2 * jlat; - for ( int imag = 0; imag < n_imag; imag++ ) { - int posimag = nb_fields * ( imag + poslat ); - for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { - scl_fourier[posGemm2( jfld, imag, jlat, jm )] = - scl_fourier_sym[idx] + scl_fourier_asym[idx]; - } - } - } - // southern hemisphere: - idx = 0; - pos0 = 2 * ( nlats - 1 ) + ioff; - for ( int jlat = 0; jlat < nlatsNH; jlat++ ) { - int poslat = pos0 - 2 * jlat; - for ( int imag = 0; imag < n_imag; imag++ ) { - int posimag = nb_fields * ( imag + poslat ); - for ( int jfld = 0; jfld < nb_fields; jfld++, idx++ ) { - int jslat = nlats - jlat - 1; - scl_fourier[posGemm2( jfld, imag, jslat, jm )] = - scl_fourier_sym[idx] - scl_fourier_asym[idx]; + ATLAS_TRACE( "opt2 transposition in gp-space" ); + int idx = 0; + for ( int jlon = 0; jlon < nlons; jlon++ ) { + for ( int jlat = 0; jlat < nlats; jlat++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int pos_tp = jlon + nlons * ( jlat + nlats * ( jfld ) ); + //int pos = jfld + nb_fields * ( jlat + nlats * ( jlon ) ); + gp_fields[pos_tp] = gp_opt2[idx++]; // = gp_opt2[pos] } } } } - free_aligned( scalar_sym ); - free_aligned( scalar_asym ); - free_aligned( scl_fourier_sym ); - free_aligned( scl_fourier_asym ); + free_aligned( gp_opt2 ); } - } - - // Fourier transformation: - { - ATLAS_TRACE( "opt2 Fourier dgemm" ); - eckit::linalg::Matrix A( fouriertp_, g.nxmax(), ( truncation_ + 1 ) * 2 ); - eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * g.ny() ); - eckit::linalg::Matrix C( gp_fields, g.nxmax(), nb_fields * g.ny() ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); - } - -#endif -#endif + else { + // dgemm-method 2 + { + ATLAS_TRACE( "opt2 Fourier dgemm method 2" ); + eckit::linalg::Matrix A( fourier_, nlons, ( truncation_ + 1 ) * 2 ); + eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * nlats ); + eckit::linalg::Matrix C( gp_fields, nlons, nb_fields * nlats ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + } + } + } // namespace trans // Computing u,v from U,V: { if ( nb_vordiv_fields > 0 ) { @@ -544,7 +545,7 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel } } free_aligned( scl_fourier ); - } + } // namespace atlas else { ATLAS_TRACE( "invtrans_uv unstructured opt2" ); int idx = 0; @@ -566,8 +567,8 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel ++idx; } } - } -} // namespace trans + } // namespace trans +} // namespace atlas // -------------------------------------------------------------------------------------------------------------------- @@ -631,11 +632,8 @@ void TransLocalopt2::invtrans( const int nb_scalar_fields, const double scalar_s gp_fields + nb_gp * nb_vordiv_fields, config ); } if ( nb_scalar_fields > 0 ) { - int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields; - std::vector scalar_spectra_extended( nb_scalar_spec_ext, 0. ); - extend_truncationopt2( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() ); - invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(), - gp_fields + 2 * nb_gp * nb_vordiv_fields, config ); + invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields + 2 * nb_gp * nb_vordiv_fields, + config ); } } diff --git a/src/atlas/trans/localopt2/TransLocalopt2.h b/src/atlas/trans/localopt2/TransLocalopt2.h index c5f5f2aa4..2bc500ea6 100644 --- a/src/atlas/trans/localopt2/TransLocalopt2.h +++ b/src/atlas/trans/localopt2/TransLocalopt2.h @@ -111,7 +111,14 @@ class TransLocalopt2 : public trans::TransImpl { private: Grid grid_; + bool useFFT_; + bool dgemmMethod1_; int truncation_; + int nlatsNH_; + int nlatsSH_; + int nlatsLeg_; + int jlonMin_; + int nlonsGlobal_; bool precompute_; double* legendre_sym_; double* legendre_asym_; diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc index d98bb522f..8f5f9c2d3 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.cc +++ b/src/atlas/trans/localopt3/TransLocalopt3.cc @@ -96,14 +96,14 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long #else eckit::linalg::LinearAlgebra::backend( "generic" ); // might want to choose backend with this command #endif - double fft_threshold = 2.; // 0.05; // fraction of latitudes of the full grid up to which FFT is used. + double fft_threshold = 0.05; // fraction of latitudes of the full grid up to which FFT is used. // This threshold needs to be adjusted depending on the dgemm and FFT performance of the machine // on which this code is running! int nlats = 0; int nlons = 0; int neqtr = 0; useFFT_ = true; - dgemmMethod1_ = true; + dgemmMethod1_ = false; nlatsNH_ = 0; nlatsSH_ = 0; nlatsLeg_ = 0; @@ -323,6 +323,12 @@ void gp_transposeopt3( const int nb_size, const int nb_fields, const double gp_t // once for all longitudes). U and v components are divided by cos(latitude) for // nb_vordiv_fields > 0. // +// Legendre polynomials are computed up to truncation_+1 to be accurate for vorticity and +// divergence computation. The parameter truncation is the truncation used in storing the +// spectral data scalar_spectra and can be different from truncation_. If truncation is +// larger than truncation_+1 the transform will behave as if the spectral data was truncated +// to truncation_+1. +// // Author: // Andreas Mueller *ECMWF* // @@ -347,7 +353,7 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel }; int size_fourier_max = nb_fields * 2 * nlats; double* scl_fourier; - alloc_aligned( scl_fourier, size_fourier_max * ( truncation + 1 ) ); + alloc_aligned( scl_fourier, size_fourier_max * ( truncation_ + 1 ) ); // Legendre transform: { @@ -383,9 +389,17 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel for ( int imag = 0; imag < n_imag; imag++ ) { for ( int jfld = 0; jfld < nb_fields; jfld++ ) { idx = jfld + nb_fields * ( imag + 2 * ( jn - jm ) ); - if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; } + if ( jn <= truncation && jm < truncation ) { + if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; } + else { + scalar_asym[ia++] = scalar_spectra[idx + ioff]; + } + } else { - scalar_asym[ia++] = scalar_spectra[idx + ioff]; + if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = 0.; } + else { + scalar_asym[ia++] = 0.; + } } } } @@ -477,7 +491,7 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel double* gp_opt3; alloc_aligned( gp_opt3, nb_fields * grid_.size() ); { - ATLAS_TRACE( "opt3 Fourier dgemm" ); + ATLAS_TRACE( "opt3 Fourier dgemm method 1" ); eckit::linalg::Matrix A( scl_fourier, nb_fields * nlats, ( truncation_ + 1 ) * 2 ); eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, nlons ); eckit::linalg::Matrix C( gp_opt3, nb_fields * nlats, nlons ); @@ -503,7 +517,7 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel else { // dgemm-method 2 { - ATLAS_TRACE( "opt3 Fourier dgemm" ); + ATLAS_TRACE( "opt3 Fourier dgemm method 2" ); eckit::linalg::Matrix A( fourier_, nlons, ( truncation_ + 1 ) * 2 ); eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * nlats ); eckit::linalg::Matrix C( gp_fields, nlons, nb_fields * nlats ); @@ -618,11 +632,8 @@ void TransLocalopt3::invtrans( const int nb_scalar_fields, const double scalar_s gp_fields + nb_gp * nb_vordiv_fields, config ); } if ( nb_scalar_fields > 0 ) { - int nb_scalar_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields; - std::vector scalar_spectra_extended( nb_scalar_spec_ext, 0. ); - extend_truncationopt3( truncation_, nb_scalar_fields, scalar_spectra, scalar_spectra_extended.data() ); - invtrans_uv( truncation_ + 1, nb_scalar_fields, 0, scalar_spectra_extended.data(), - gp_fields + 2 * nb_gp * nb_vordiv_fields, config ); + invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields + 2 * nb_gp * nb_vordiv_fields, + config ); } } diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index 6b556f663..6774d9a29 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -853,7 +853,7 @@ CASE( "test_trans_vordiv_with_translib" ) { } #endif //----------------------------------------------------------------------------- -#if 0 +#if 1 CASE( "test_trans_hires" ) { Log::info() << "test_trans_hires" << std::endl; // test transgeneral by comparing its result with the trans library @@ -862,28 +862,34 @@ CASE( "test_trans_hires" ) { std::ostream& out = Log::info(); double tolerance = 1.e-13; - // Grid: (Adjust the following line if the test takes too long!) - Grid g( "F1280" ); #if ATLAS_HAVE_TRANS - std::string transTypes[4] = {"localopt", "localopt2", "localopt3", "ifs"}; - //std::string transTypes[2] = {"localopt", "localopt2"}; - //std::string transTypes[3] = {"localopt", "localopt2", "ifs"}; + //std::string transTypes[4] = {"localopt", "localopt2", "localopt3", "ifs"}; + //std::string transTypes[2] = {"localopt2", "localopt3"}; + std::string transTypes[3] = {"localopt", "localopt2", "localopt3"}; //std::string transTypes[1] = {"localopt3"}; #else std::string transTypes[1] = {"localopt2"}; #endif + + //Domain testdomain = ZonalBandDomain( {-90., 90.} ); + //Domain testdomain = ZonalBandDomain( {-.5, .5} ); + //Domain testdomain = RectangularDomain( {0., 30.}, {-.05, .05} ); + //Domain testdomain = ZonalBandDomain( {-85., -86.} ); + Domain testdomain = RectangularDomain( {-1., 1.}, {5., 5.5} ); + // Grid: (Adjust the following line if the test takes too long!) + Grid g( "F1280", testdomain ); + Grid g_global( g.name() ); + grid::StructuredGrid gs( g ); - int ndgl = gs.ny(); + grid::StructuredGrid gs_global( g_global ); + Log::info() << "nlats: " << gs.ny() << " nlons:" << gs.nxmax() << std::endl; + int ndgl = gs_global.ny(); //int trc = ndgl - 1; // linear int trc = ndgl / 2. - 1; // cubic - int nb_scalar = 1, nb_vordiv = 0; + int nb_scalar = 1000, nb_vordiv = 0; for ( auto transType : transTypes ) { - if ( transType == "ifs" ) { trc = ndgl / 2. - 1; } - else { - trc = ndgl / 2. - 2; - } int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv; int icase = 0; trans::Trans trans( g, trc, util::Config( "type", transType ) ); @@ -931,7 +937,7 @@ CASE( "test_trans_hires" ) { } #endif //----------------------------------------------------------------------------- -#if 1 +#if 0 CASE( "test_trans_domain" ) { Log::info() << "test_trans_domain" << std::endl; // test transgeneral by comparing with analytic solution on a cropped domain @@ -943,24 +949,25 @@ CASE( "test_trans_domain" ) { //Domain testdomain = ZonalBandDomain( {-.5, .5} ); //Domain testdomain = RectangularDomain( {0., 30.}, {-.05, .05} ); //Domain testdomain = ZonalBandDomain( {-85., -86.} ); - Domain testdomain = RectangularDomain( {-5., 10.}, {5., 6.} ); + Domain testdomain = RectangularDomain( {-1., 1.}, {5., 5.5} ); // Grid: (Adjust the following line if the test takes too long!) Grid g( "F1280", testdomain ); Grid g_global( g.name() ); grid::StructuredGrid gs( g ); grid::StructuredGrid gs_global( g_global ); + Log::info() << "nlats: " << gs.ny() << " nlons:" << gs.nxmax() << std::endl; int ndgl = gs_global.ny(); //int trc = ndgl - 1; // linear int trc = ndgl / 2. - 1; // cubic - trans::Trans transLocal1( g, trc, util::Config( "type", "localopt3" ) ); + trans::Trans transLocal1( g, trc, util::Config( "type", "localopt2" ) ); trans::Trans transLocal2( g, trc, util::Config( "type", "localopt3" ) ); double rav1 = 0., rav2 = 0.; // compute average rms errors of transLocal1 and transLocal2 functionspace::Spectral spectral( trc ); functionspace::StructuredColumns gridpoints( g ); - int nb_scalar = 1, nb_vordiv = 0; + int nb_scalar = 1000, nb_vordiv = 0; int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv; std::vector sp( 2 * N * nb_scalar ); std::vector vor( 2 * N * nb_vordiv ); @@ -990,7 +997,7 @@ CASE( "test_trans_domain" ) { for ( int imag = 0; imag <= 1; imag++ ) { // real and imaginary part if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. && - icase < 1000 ) { + icase < 1 ) { auto start = std::chrono::system_clock::now(); for ( int j = 0; j < 2 * N * nb_scalar; j++ ) { sp[j] = 0.; From 993347b990a30fbd7b4d30fe5d1ac1404e2b290e Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Wed, 4 Apr 2018 15:44:17 +0100 Subject: [PATCH 039/123] writing and reading Legendre polynomials from file to speed up testing --- src/atlas/trans/localopt/TransLocalopt.cc | 17 +++++++++++++++-- src/atlas/trans/localopt2/TransLocalopt2.cc | 17 +++++++++++++++-- src/atlas/trans/localopt3/TransLocalopt3.cc | 17 +++++++++++++++-- src/tests/trans/test_transgeneral.cc | 8 ++++---- 4 files changed, 49 insertions(+), 10 deletions(-) diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc index 48f90d386..89434ff6b 100644 --- a/src/atlas/trans/localopt/TransLocalopt.cc +++ b/src/atlas/trans/localopt/TransLocalopt.cc @@ -194,8 +194,21 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t } alloc_aligned( legendre_sym_, size_sym ); alloc_aligned( legendre_asym_, size_asym ); - compute_legendre_polynomialsopt( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, legendre_asym_, - legendre_sym_begin_.data(), legendre_asym_begin_.data() ); + FILE* file_leg; + file_leg = fopen( "legendre.bin", "r" ); + if ( file_leg ) { + fread( legendre_sym_, sizeof( double ), size_sym, file_leg ); + fread( legendre_asym_, sizeof( double ), size_asym, file_leg ); + fclose( file_leg ); + } + else { + compute_legendre_polynomialsopt( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, legendre_asym_, + legendre_sym_begin_.data(), legendre_asym_begin_.data() ); + file_leg = fopen( "legendre.bin", "wb" ); + fwrite( legendre_sym_, sizeof( double ), size_sym, file_leg ); + fwrite( legendre_asym_, sizeof( double ), size_asym, file_leg ); + fclose( file_leg ); + } } // precomputations for Fourier transformations: diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc index 2091f57b9..7eea4f9d3 100644 --- a/src/atlas/trans/localopt2/TransLocalopt2.cc +++ b/src/atlas/trans/localopt2/TransLocalopt2.cc @@ -194,8 +194,21 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long } alloc_aligned( legendre_sym_, size_sym ); alloc_aligned( legendre_asym_, size_asym ); - compute_legendre_polynomialsopt2( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, legendre_asym_, - legendre_sym_begin_.data(), legendre_asym_begin_.data() ); + FILE* file_leg; + file_leg = fopen( "legendre.bin", "r" ); + if ( file_leg ) { + fread( legendre_sym_, sizeof( double ), size_sym, file_leg ); + fread( legendre_asym_, sizeof( double ), size_asym, file_leg ); + fclose( file_leg ); + } + else { + compute_legendre_polynomialsopt2( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, legendre_asym_, + legendre_sym_begin_.data(), legendre_asym_begin_.data() ); + file_leg = fopen( "legendre.bin", "wb" ); + fwrite( legendre_sym_, sizeof( double ), size_sym, file_leg ); + fwrite( legendre_asym_, sizeof( double ), size_asym, file_leg ); + fclose( file_leg ); + } } // precomputations for Fourier transformations: diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc index 8f5f9c2d3..36104005c 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.cc +++ b/src/atlas/trans/localopt3/TransLocalopt3.cc @@ -194,8 +194,21 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long } alloc_aligned( legendre_sym_, size_sym ); alloc_aligned( legendre_asym_, size_asym ); - compute_legendre_polynomialsopt3( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, legendre_asym_, - legendre_sym_begin_.data(), legendre_asym_begin_.data() ); + FILE* file_leg; + file_leg = fopen( "legendre.bin", "r" ); + if ( file_leg ) { + fread( legendre_sym_, sizeof( double ), size_sym, file_leg ); + fread( legendre_asym_, sizeof( double ), size_asym, file_leg ); + fclose( file_leg ); + } + else { + compute_legendre_polynomialsopt3( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, legendre_asym_, + legendre_sym_begin_.data(), legendre_asym_begin_.data() ); + file_leg = fopen( "legendre.bin", "wb" ); + fwrite( legendre_sym_, sizeof( double ), size_sym, file_leg ); + fwrite( legendre_asym_, sizeof( double ), size_asym, file_leg ); + fclose( file_leg ); + } } // precomputations for Fourier transformations: diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index 6774d9a29..421dbdeaf 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -877,7 +877,7 @@ CASE( "test_trans_hires" ) { //Domain testdomain = ZonalBandDomain( {-85., -86.} ); Domain testdomain = RectangularDomain( {-1., 1.}, {5., 5.5} ); // Grid: (Adjust the following line if the test takes too long!) - Grid g( "F1280", testdomain ); + Grid g( "F5000", testdomain ); Grid g_global( g.name() ); grid::StructuredGrid gs( g ); @@ -887,7 +887,7 @@ CASE( "test_trans_hires" ) { //int trc = ndgl - 1; // linear int trc = ndgl / 2. - 1; // cubic - int nb_scalar = 1000, nb_vordiv = 0; + int nb_scalar = 1, nb_vordiv = 0; for ( auto transType : transTypes ) { int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv; @@ -967,7 +967,7 @@ CASE( "test_trans_domain" ) { functionspace::Spectral spectral( trc ); functionspace::StructuredColumns gridpoints( g ); - int nb_scalar = 1000, nb_vordiv = 0; + int nb_scalar = 1, nb_vordiv = 0; int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv; std::vector sp( 2 * N * nb_scalar ); std::vector vor( 2 * N * nb_vordiv ); @@ -997,7 +997,7 @@ CASE( "test_trans_domain" ) { for ( int imag = 0; imag <= 1; imag++ ) { // real and imaginary part if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. && - icase < 1 ) { + icase < 1000 ) { auto start = std::chrono::system_clock::now(); for ( int j = 0; j < 2 * N * nb_scalar; j++ ) { sp[j] = 0.; From 7ab54d8fd53f70c50971e78519490242dc33f538 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 5 Apr 2018 14:40:13 +0100 Subject: [PATCH 040/123] unstructured meshes are working in opt3 for scalar fields. computing Legendre-polynomials for every point. --- .../localopt3/LegendrePolynomialsopt3.cc | 236 +++++++-------- .../trans/localopt3/LegendrePolynomialsopt3.h | 7 + src/atlas/trans/localopt3/TransLocalopt3.cc | 269 ++++++++++-------- src/tests/trans/test_transgeneral.cc | 147 +++++++++- 4 files changed, 427 insertions(+), 232 deletions(-) diff --git a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc index 3ea2b41ef..c285d32b2 100644 --- a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc +++ b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc @@ -21,143 +21,155 @@ namespace trans { //----------------------------------------------------------------------------- -void compute_legendre_polynomialsopt3( - const size_t trc, // truncation (in) - const int nlats, // number of latitudes - const double lats[], // latitudes in radians (in) - double leg_sym[], // values of associated Legendre functions, symmetric part - double leg_asym[], // values of associated Legendre functions, asymmetric part - size_t leg_start_sym[], // start indices for different zonal wave numbers, symmetric part - size_t leg_start_asym[] ) // start indices for different zonal wave numbers, asymmetric part -{ - auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; }; - array::ArrayT zfn_( trc + 1, trc + 1 ); - array::ArrayView zfn = array::make_view( zfn_ ); - std::vector legpol( legendre_size( trc ) ); - auto idxmn = [&]( int jm, int jn ) { return ( 2 * trc + 3 - jm ) * jm / 2 + jn - jm; }; - int iodd; - +void compute_zfnopt3( const size_t trc, double zfn[] ) { + auto idxzfn = [&]( int jn, int jk ) { return jk + ( trc + 1 ) * jn; }; + int iodd = 0; // Compute coefficients for Taylor series in Belousov (19) and (21) - // Belousov, Swarztrauber use zfn(0,0)=std::sqrt(2.) + // Belousov, Swarztrauber use zfn[0]=std::sqrt(2.) // IFS normalisation chosen to be 0.5*Integral(Pnm**2) = 1 - zfn( 0, 0 ) = 2.; + zfn[idxzfn( 0, 0 )] = 2.; for ( int jn = 1; jn <= trc; ++jn ) { - double zfnn = zfn( 0, 0 ); + double zfnn = zfn[idxzfn( 0, 0 )]; for ( int jgl = 1; jgl <= jn; ++jgl ) { zfnn *= std::sqrt( 1. - 0.25 / ( jgl * jgl ) ); } - iodd = jn % 2; - zfn( jn, jn ) = zfnn; + iodd = jn % 2; + zfn[idxzfn( jn, jn )] = zfnn; for ( int jgl = 2; jgl <= jn - iodd; jgl += 2 ) { double zfjn = ( ( jgl - 1. ) * ( 2. * jn - jgl + 2. ) ); // new factor numerator double zfjd = ( jgl * ( 2. * jn - jgl + 1. ) ); // new factor denominator - zfn( jn, jn - jgl ) = zfn( jn, jn - jgl + 2 ) * zfjn / zfjd; + zfn[idxzfn( jn, jn - jgl )] = zfn[idxzfn( jn, jn - jgl + 2 )] * zfjn / zfjd; } } +} - // Loop over latitudes: - for ( int jlat = 0; jlat < nlats; ++jlat ) { - { - //ATLAS_TRACE( "compute Legendre polynomials" ); - // -------------------- - // 1. First two columns - // -------------------- - double lat = lats[jlat]; - double zdlx1 = ( M_PI_2 - lat ); // theta - double zdlx = std::cos( zdlx1 ); // cos(theta) - double zdlsita = std::sqrt( 1. - zdlx * zdlx ); // sin(theta) (this is how trans library does it) - - legpol[idxmn( 0, 0 )] = 1.; - double vsin[trc + 1], vcos[trc + 1]; - for ( int j = 1; j <= trc; j++ ) { - vsin[j] = std::sin( j * zdlx1 ); - } - for ( int j = 1; j <= trc; j++ ) { - vcos[j] = std::cos( j * zdlx1 ); - } - double zdl1sita = 0.; - // if we are less than 1 meter from the pole, - if ( std::abs( zdlsita ) <= std::sqrt( std::numeric_limits::epsilon() ) ) { - zdlx = 1.; - zdlsita = 0.; - } - else { - zdl1sita = 1. / zdlsita; - } +void compute_legendre_polynomials_latopt3( const size_t trc, // truncation (in) + const double lat, // latitude in radians (in) + double legpol[], // legendre polynomials + double zfn[] ) { + auto idxmn = [&]( int jm, int jn ) { return ( 2 * trc + 3 - jm ) * jm / 2 + jn - jm; }; + auto idxzfn = [&]( int jn, int jk ) { return jk + ( trc + 1 ) * jn; }; + { //ATLAS_TRACE( "compute Legendre polynomials" ); + // -------------------- + // 1. First two columns + // -------------------- + double zdlx1 = ( M_PI_2 - lat ); // theta + double zdlx = std::cos( zdlx1 ); // cos(theta) + double zdlsita = std::sqrt( 1. - zdlx * zdlx ); // sin(theta) (this is how trans library does it) + + legpol[idxmn( 0, 0 )] = 1.; + double vsin[trc + 1], vcos[trc + 1]; + for ( int j = 1; j <= trc; j++ ) { + vsin[j] = std::sin( j * zdlx1 ); + } + for ( int j = 1; j <= trc; j++ ) { + vcos[j] = std::cos( j * zdlx1 ); + } - // ordinary Legendre polynomials from series expansion - // --------------------------------------------------- - - // even N - for ( int jn = 2; jn <= trc; jn += 2 ) { - double zdlk = 0.5 * zfn( jn, 0 ); - double zdlldn = 0.0; - double zdsq = 1. / std::sqrt( jn * ( jn + 1. ) ); - // represented by only even k - for ( int jk = 2; jk <= jn; jk += 2 ) { - // normalised ordinary Legendre polynomial == \overbar{P_n}^0 - zdlk = zdlk + zfn( jn, jk ) * vcos[jk]; - // normalised associated Legendre polynomial == \overbar{P_n}^1 - zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * vsin[jk]; - } - legpol[idxmn( 0, jn )] = zdlk; - legpol[idxmn( 1, jn )] = zdlldn; + double zdl1sita = 0.; + // if we are less than 1 meter from the pole, + if ( std::abs( zdlsita ) <= std::sqrt( std::numeric_limits::epsilon() ) ) { + zdlx = 1.; + zdlsita = 0.; + } + else { + zdl1sita = 1. / zdlsita; + } + + // ordinary Legendre polynomials from series expansion + // --------------------------------------------------- + + // even N + for ( int jn = 2; jn <= trc; jn += 2 ) { + double zdlk = 0.5 * zfn[idxzfn( jn, 0 )]; + double zdlldn = 0.0; + double zdsq = 1. / std::sqrt( jn * ( jn + 1. ) ); + // represented by only even k + for ( int jk = 2; jk <= jn; jk += 2 ) { + // normalised ordinary Legendre polynomial == \overbar{P_n}^0 + zdlk = zdlk + zfn[idxzfn( jn, jk )] * vcos[jk]; + // normalised associated Legendre polynomial == \overbar{P_n}^1 + zdlldn = zdlldn + zdsq * zfn[idxzfn( jn, jk )] * jk * vsin[jk]; } + legpol[idxmn( 0, jn )] = zdlk; + legpol[idxmn( 1, jn )] = zdlldn; + } - // odd N - for ( int jn = 1; jn <= trc; jn += 2 ) { - zfn( jn, 0 ) = 0.; - double zdlk = 0.; - double zdlldn = 0.0; - double zdsq = 1. / std::sqrt( jn * ( jn + 1. ) ); - // represented by only even k - for ( int jk = 1; jk <= jn; jk += 2 ) { - // normalised ordinary Legendre polynomial == \overbar{P_n}^0 - zdlk = zdlk + zfn( jn, jk ) * vcos[jk]; - // normalised associated Legendre polynomial == \overbar{P_n}^1 - zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * vsin[jk]; - } - legpol[idxmn( 0, jn )] = zdlk; - legpol[idxmn( 1, jn )] = zdlldn; + // odd N + for ( int jn = 1; jn <= trc; jn += 2 ) { + zfn[idxzfn( jn, 0 )] = 0.; + double zdlk = 0.; + double zdlldn = 0.0; + double zdsq = 1. / std::sqrt( jn * ( jn + 1. ) ); + // represented by only even k + for ( int jk = 1; jk <= jn; jk += 2 ) { + // normalised ordinary Legendre polynomial == \overbar{P_n}^0 + zdlk = zdlk + zfn[idxzfn( jn, jk )] * vcos[jk]; + // normalised associated Legendre polynomial == \overbar{P_n}^1 + zdlldn = zdlldn + zdsq * zfn[idxzfn( jn, jk )] * jk * vsin[jk]; } + legpol[idxmn( 0, jn )] = zdlk; + legpol[idxmn( 1, jn )] = zdlldn; + } - // -------------------------------------------------------------- - // 2. Diagonal (the terms 0,0 and 1,1 have already been computed) - // Belousov, equation (23) - // -------------------------------------------------------------- + // -------------------------------------------------------------- + // 2. Diagonal (the terms 0,0 and 1,1 have already been computed) + // Belousov, equation (23) + // -------------------------------------------------------------- - double zdls = zdl1sita * std::numeric_limits::min(); - for ( int jn = 2; jn <= trc; ++jn ) { - double sq = std::sqrt( ( 2. * jn + 1. ) / ( 2. * jn ) ); + double zdls = zdl1sita * std::numeric_limits::min(); + for ( int jn = 2; jn <= trc; ++jn ) { + double sq = std::sqrt( ( 2. * jn + 1. ) / ( 2. * jn ) ); - legpol[idxmn( jn, jn )] = legpol[idxmn( jn - 1, jn - 1 )] * zdlsita * sq; - if ( std::abs( legpol[idxmn( jn, jn )] ) < zdls ) legpol[idxmn( jn, jn )] = 0.0; - } + legpol[idxmn( jn, jn )] = legpol[idxmn( jn - 1, jn - 1 )] * zdlsita * sq; + if ( std::abs( legpol[idxmn( jn, jn )] ) < zdls ) legpol[idxmn( jn, jn )] = 0.0; + } - // --------------------------------------------- - // 3. General recurrence (Belousov, equation 17) - // --------------------------------------------- - - for ( int jn = 3; jn <= trc; ++jn ) { - for ( int jm = 2; jm < jn; ++jm ) { - double cn = - ( ( 2. * jn + 1. ) * ( jn + jm - 3. ) * ( jn + jm - 1. ) ); // numerator of c in Belousov - double cd = ( ( 2. * jn - 3. ) * ( jn + jm - 2. ) * ( jn + jm ) ); // denominator of c in Belousov - double dn = - ( ( 2. * jn + 1. ) * ( jn - jm + 1. ) * ( jn + jm - 1. ) ); // numerator of d in Belousov - double dd = ( ( 2. * jn - 1. ) * ( jn + jm - 2. ) * ( jn + jm ) ); // denominator of d in Belousov - double en = ( ( 2. * jn + 1. ) * ( jn - jm ) ); // numerator of e in Belousov - double ed = ( ( 2. * jn - 1. ) * ( jn + jm ) ); // denominator of e in Belousov - - legpol[idxmn( jm, jn )] = std::sqrt( cn / cd ) * legpol[idxmn( jm - 2, jn - 2 )] - - std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1 )] * zdlx + - std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1 )] * zdlx; - } + // --------------------------------------------- + // 3. General recurrence (Belousov, equation 17) + // --------------------------------------------- + + for ( int jn = 3; jn <= trc; ++jn ) { + for ( int jm = 2; jm < jn; ++jm ) { + double cn = ( ( 2. * jn + 1. ) * ( jn + jm - 3. ) * ( jn + jm - 1. ) ); // numerator of c in Belousov + double cd = ( ( 2. * jn - 3. ) * ( jn + jm - 2. ) * ( jn + jm ) ); // denominator of c in Belousov + double dn = ( ( 2. * jn + 1. ) * ( jn - jm + 1. ) * ( jn + jm - 1. ) ); // numerator of d in Belousov + double dd = ( ( 2. * jn - 1. ) * ( jn + jm - 2. ) * ( jn + jm ) ); // denominator of d in Belousov + double en = ( ( 2. * jn + 1. ) * ( jn - jm ) ); // numerator of e in Belousov + double ed = ( ( 2. * jn - 1. ) * ( jn + jm ) ); // denominator of e in Belousov + + legpol[idxmn( jm, jn )] = std::sqrt( cn / cd ) * legpol[idxmn( jm - 2, jn - 2 )] - + std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1 )] * zdlx + + std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1 )] * zdlx; } } + } +} + + +void compute_legendre_polynomialsopt3( + const size_t trc, // truncation (in) + const int nlats, // number of latitudes + const double lats[], // latitudes in radians (in) + double leg_sym[], // values of associated Legendre functions, symmetric part + double leg_asym[], // values of associated Legendre functions, asymmetric part + size_t leg_start_sym[], // start indices for different zonal wave numbers, symmetric part + size_t leg_start_asym[] ) // start indices for different zonal wave numbers, asymmetric part +{ + auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; }; + std::vector legpol( legendre_size( trc ) ); + std::vector zfn( ( trc + 1 ) * ( trc + 1 ) ); + auto idxmn = [&]( int jm, int jn ) { return ( 2 * trc + 3 - jm ) * jm / 2 + jn - jm; }; + compute_zfnopt3( trc, zfn.data() ); + + // Loop over latitudes: + for ( int jlat = 0; jlat < nlats; ++jlat ) { + // compute legendre polynomials for current latitude: + compute_legendre_polynomials_latopt3( trc, lats[jlat], legpol.data(), zfn.data() ); + // split polynomials into symmetric and antisymmetric parts: { //ATLAS_TRACE( "add to global arrays" ); diff --git a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.h b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.h index 1698fb80c..f5dbd7aa3 100644 --- a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.h +++ b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.h @@ -32,6 +32,13 @@ namespace trans { // Ported to C++ by: // Andreas Mueller *ECMWF* // +void compute_zfnopt3( const size_t trc, double zfn[] ); + +void compute_legendre_polynomials_latopt3( const size_t trc, // truncation (in) + const double lat, // latitude in radians (in) + double legpol[], // legendre polynomials + double zfn[] ); + void compute_legendre_polynomialsopt3( const size_t trc, // truncation (in) const int nlats, // number of latitudes diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc index 36104005c..f214c6309 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.cc +++ b/src/atlas/trans/localopt3/TransLocalopt3.cc @@ -143,19 +143,8 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long } } //Log::info() << "nlats=" << g.ny() << " nlatsGlobal=" << gs_global.ny() << std::endl; - } - else { - // unstructured grid - useFFT_ = false; - nlats = grid_.size(); - nlons = grid_.size(); - nlatsNH_ = nlats; - nlatsLeg_ = nlats; - } - std::vector lats( nlatsLeg_ ); - std::vector lons( nlons ); - if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { - grid::StructuredGrid g( grid_ ); + std::vector lats( nlatsLeg_ ); + std::vector lons( nlons ); if ( nlatsNH_ >= nlatsSH_ ) { for ( size_t j = 0; j < nlatsLeg_; ++j ) { lats[j] = g.y( j ) * util::Constants::degreesToRadians(); @@ -169,92 +158,86 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long for ( size_t j = 0; j < nlons; ++j ) { lons[j] = g.x( j, 0 ) * util::Constants::degreesToRadians(); } - } - else { - int j( 0 ); - for ( PointXY p : grid_.xy() ) { - lats[j++] = p.y() * util::Constants::degreesToRadians(); - lons[j++] = p.x() * util::Constants::degreesToRadians(); - } - } - // precomputations for Legendre polynomials: - { - ATLAS_TRACE( "opt3 precomp Legendre" ); - int size_sym = 0; - int size_asym = 0; - legendre_sym_begin_.resize( truncation_ + 3 ); - legendre_asym_begin_.resize( truncation_ + 3 ); - legendre_sym_begin_[0] = 0; - legendre_asym_begin_[0] = 0; - for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { - size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsLeg_ ); - size_asym += add_padding( num_n( truncation_ + 1, jm, false ) * nlatsLeg_ ); - legendre_sym_begin_[jm + 1] = size_sym; - legendre_asym_begin_[jm + 1] = size_asym; - } - alloc_aligned( legendre_sym_, size_sym ); - alloc_aligned( legendre_asym_, size_asym ); - FILE* file_leg; - file_leg = fopen( "legendre.bin", "r" ); - if ( file_leg ) { - fread( legendre_sym_, sizeof( double ), size_sym, file_leg ); - fread( legendre_asym_, sizeof( double ), size_asym, file_leg ); - fclose( file_leg ); - } - else { - compute_legendre_polynomialsopt3( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, legendre_asym_, - legendre_sym_begin_.data(), legendre_asym_begin_.data() ); - file_leg = fopen( "legendre.bin", "wb" ); - fwrite( legendre_sym_, sizeof( double ), size_sym, file_leg ); - fwrite( legendre_asym_, sizeof( double ), size_asym, file_leg ); - fclose( file_leg ); + // precomputations for Legendre polynomials: + { + ATLAS_TRACE( "opt3 precomp Legendre" ); + int size_sym = 0; + int size_asym = 0; + legendre_sym_begin_.resize( truncation_ + 3 ); + legendre_asym_begin_.resize( truncation_ + 3 ); + legendre_sym_begin_[0] = 0; + legendre_asym_begin_[0] = 0; + for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { + size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsLeg_ ); + size_asym += add_padding( num_n( truncation_ + 1, jm, false ) * nlatsLeg_ ); + legendre_sym_begin_[jm + 1] = size_sym; + legendre_asym_begin_[jm + 1] = size_asym; + } + alloc_aligned( legendre_sym_, size_sym ); + alloc_aligned( legendre_asym_, size_asym ); + FILE* file_leg; + file_leg = fopen( "legendre.bin", "r" ); + if ( false ) { //if ( file_leg ) { + fread( legendre_sym_, sizeof( double ), size_sym, file_leg ); + fread( legendre_asym_, sizeof( double ), size_asym, file_leg ); + fclose( file_leg ); + } + else { + compute_legendre_polynomialsopt3( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, + legendre_asym_, legendre_sym_begin_.data(), + legendre_asym_begin_.data() ); + /*file_leg = fopen( "legendre.bin", "wb" ); + fwrite( legendre_sym_, sizeof( double ), size_sym, file_leg ); + fwrite( legendre_asym_, sizeof( double ), size_asym, file_leg ); + fclose( file_leg );*/ + } } - } - // precomputations for Fourier transformations: - if ( useFFT_ ) { + // precomputations for Fourier transformations: + if ( useFFT_ ) { #if ATLAS_HAVE_FFTW - { - ATLAS_TRACE( "opt3 precomp FFTW" ); - int num_complex = ( nlonsGlobal_ / 2 ) + 1; - fft_in_ = fftw_alloc_complex( nlats * num_complex ); - fft_out_ = fftw_alloc_real( nlats * nlonsGlobal_ ); - plan_ = fftw_plan_many_dft_c2r( 1, &nlonsGlobal_, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1, - nlonsGlobal_, FFTW_ESTIMATE ); - } - // other FFT implementations should be added with #elif statements -#else - useFFT_ = false; // no FFT implemented => default to dgemm -#endif - } - if ( !useFFT_ ) { - alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons ); - if ( dgemmMethod1_ ) { { - ATLAS_TRACE( "opt3 precomp Fourier" ); - int idx = 0; - for ( int jlon = 0; jlon < nlons; jlon++ ) { - double factor = 1.; - for ( int jm = 0; jm < truncation_ + 1; jm++ ) { - if ( jm > 0 ) { factor = 2.; } - fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor; // real part - fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor; // imaginary part - } - } + ATLAS_TRACE( "opt3 precomp FFTW" ); + int num_complex = ( nlonsGlobal_ / 2 ) + 1; + fft_in_ = fftw_alloc_complex( nlats * num_complex ); + fft_out_ = fftw_alloc_real( nlats * nlonsGlobal_ ); + plan_ = fftw_plan_many_dft_c2r( 1, &nlonsGlobal_, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, + 1, nlonsGlobal_, FFTW_ESTIMATE ); } + // other FFT implementations should be added with #elif statements +#else + useFFT_ = false; // no FFT implemented => default to dgemm +#endif } - else { - { - ATLAS_TRACE( "opt3 precomp Fourier tp" ); - int idx = 0; - for ( int jm = 0; jm < truncation_ + 1; jm++ ) { - double factor = 1.; - if ( jm > 0 ) { factor = 2.; } + if ( !useFFT_ ) { + alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons ); + if ( dgemmMethod1_ ) { + { + ATLAS_TRACE( "opt3 precomp Fourier" ); + int idx = 0; for ( int jlon = 0; jlon < nlons; jlon++ ) { - fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor; // real part + double factor = 1.; + for ( int jm = 0; jm < truncation_ + 1; jm++ ) { + if ( jm > 0 ) { factor = 2.; } + fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor; // real part + fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor; // imaginary part + } } - for ( int jlon = 0; jlon < nlons; jlon++ ) { - fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor; // imaginary part + } + } + else { + { + ATLAS_TRACE( "opt3 precomp Fourier tp" ); + int idx = 0; + for ( int jm = 0; jm < truncation_ + 1; jm++ ) { + double factor = 1.; + if ( jm > 0 ) { factor = 2.; } + for ( int jlon = 0; jlon < nlons; jlon++ ) { + fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor; // real part + } + for ( int jlon = 0; jlon < nlons; jlon++ ) { + fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor; // imaginary part + } } } } @@ -270,17 +253,19 @@ TransLocalopt3::TransLocalopt3( const Grid& grid, const long truncation, const e // -------------------------------------------------------------------------------------------------------------------- TransLocalopt3::~TransLocalopt3() { - free_aligned( legendre_sym_ ); - free_aligned( legendre_asym_ ); - if ( useFFT_ ) { + if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { + free_aligned( legendre_sym_ ); + free_aligned( legendre_asym_ ); + if ( useFFT_ ) { #if ATLAS_HAVE_FFTW - fftw_destroy_plan( plan_ ); - fftw_free( fft_in_ ); - fftw_free( fft_out_ ); + fftw_destroy_plan( plan_ ); + fftw_free( fft_in_ ); + fftw_free( fft_out_ ); #endif - } - else { - free_aligned( fourier_ ); + } + else { + free_aligned( fourier_ ); + } } } @@ -561,24 +546,78 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel } // namespace atlas else { ATLAS_TRACE( "invtrans_uv unstructured opt3" ); - int idx = 0; - for ( PointXY p : grid_.xy() ) { - double lon = p.x() * util::Constants::degreesToRadians(); - double lat = p.y() * util::Constants::degreesToRadians(); - double trcFT = truncation; - + grid::UnstructuredGrid gu = grid_; + double* zfn; + alloc_aligned( zfn, ( truncation + 1 ) * ( truncation + 1 ) ); + compute_zfnopt3( truncation, zfn ); + int size_fourier = nb_fields * 2; + double* legendre; + double* scl_fourier; + double* scl_fourier_tp; + double* fouriertp; + double* gp_opt; + alloc_aligned( legendre, legendre_size( truncation + 1 ) ); + alloc_aligned( scl_fourier, size_fourier * ( truncation + 1 ) ); + alloc_aligned( scl_fourier_tp, size_fourier * ( truncation + 1 ) ); + alloc_aligned( fouriertp, 2 * ( truncation + 1 ) ); + alloc_aligned( gp_opt, nb_fields ); + + // loop over all points: + for ( int ip = 0; ip < grid_.size(); ip++ ) { + PointXY p = gu.xy( ip ); + double lon = p.x() * util::Constants::degreesToRadians(); + double lat = p.y() * util::Constants::degreesToRadians(); + compute_legendre_polynomials_latopt3( truncation, lat, legendre, zfn ); // Legendre transform: - //invtrans_legendreopt3( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra, - // legReal.data(), legImag.data() ); - - // Fourier transform: - //invtrans_fourieropt3( trcFT, lon, nb_fields, legReal.data(), legImag.data(), - // gp_tmp.data() + ( nb_fields * idx ) ); - for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { - //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat ); + { + //ATLAS_TRACE( "opt Legendre dgemm" ); + for ( int jm = 0; jm <= truncation; jm++ ) { + int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1; + eckit::linalg::Matrix A( eckit::linalg::Matrix( + const_cast( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) ); + eckit::linalg::Matrix B( legendre + noff, ns, 1 ); + eckit::linalg::Matrix C( scl_fourier + jm * size_fourier, nb_fields * 2, 1 ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + } + } + { + //ATLAS_TRACE( "opt transposition in Fourier" ); + int idx = 0; + for ( int jm = 0; jm < truncation + 1; jm++ ) { + for ( int imag = 0; imag < 2; imag++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int pos_tp = imag + 2 * ( jm + ( truncation + 1 ) * ( jfld ) ); + //int pos = jfld + nb_fields * ( imag + 2 * ( jm ) ); + scl_fourier_tp[pos_tp] = scl_fourier[idx++]; // = scl_fourier[pos] + } + } + } + } + + // Fourier transformation: + int idx = 0; + fouriertp[idx++] = 1.; // real part + fouriertp[idx++] = 0.; // imaginary part + for ( int jm = 1; jm < truncation + 1; jm++ ) { + fouriertp[idx++] = +2. * std::cos( jm * lon ); // real part + fouriertp[idx++] = -2. * std::sin( jm * lon ); // imaginary part + } + { + //ATLAS_TRACE( "opt Fourier dgemm" ); + eckit::linalg::Matrix A( fouriertp, 1, ( truncation + 1 ) * 2 ); + eckit::linalg::Matrix B( scl_fourier_tp, ( truncation + 1 ) * 2, nb_fields ); + eckit::linalg::Matrix C( gp_opt, 1, nb_fields ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + for ( int j = 0; j < nb_fields; j++ ) { + gp_fields[ip + j * grid_.size()] = gp_opt[j]; + } } - ++idx; } + free_aligned( legendre ); + free_aligned( scl_fourier ); + free_aligned( scl_fourier_tp ); + free_aligned( fouriertp ); + free_aligned( gp_opt ); } } // namespace trans } // namespace atlas diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index 421dbdeaf..95a49eeb3 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -724,7 +724,7 @@ CASE( "test_trans_vordiv_with_translib" ) { trans::Trans transIFS( g, trc, util::Config( "type", "ifs" ) ); double rav = 0.; // compute average rms error of trans library in rav #endif - trans::Trans transLocal1( g, trc, util::Config( "type", "localopt3" ) ); + trans::Trans transLocal1( g, trc, util::Config( "type", "localopt2" ) ); trans::Trans transLocal2( g, trc, util::Config( "type", "localopt3" ) ); double rav1 = 0., rav2 = 0.; // compute average rms errors of transLocal1 and transLocal2 @@ -853,7 +853,7 @@ CASE( "test_trans_vordiv_with_translib" ) { } #endif //----------------------------------------------------------------------------- -#if 1 +#if 0 CASE( "test_trans_hires" ) { Log::info() << "test_trans_hires" << std::endl; // test transgeneral by comparing its result with the trans library @@ -865,7 +865,7 @@ CASE( "test_trans_hires" ) { #if ATLAS_HAVE_TRANS //std::string transTypes[4] = {"localopt", "localopt2", "localopt3", "ifs"}; //std::string transTypes[2] = {"localopt2", "localopt3"}; - std::string transTypes[3] = {"localopt", "localopt2", "localopt3"}; + std::string transTypes[3] = {"localopt3", "localopt2", "localopt"}; //std::string transTypes[1] = {"localopt3"}; #else std::string transTypes[1] = {"localopt2"}; @@ -875,9 +875,10 @@ CASE( "test_trans_hires" ) { //Domain testdomain = ZonalBandDomain( {-.5, .5} ); //Domain testdomain = RectangularDomain( {0., 30.}, {-.05, .05} ); //Domain testdomain = ZonalBandDomain( {-85., -86.} ); - Domain testdomain = RectangularDomain( {-1., 1.}, {5., 5.5} ); + ///Domain testdomain = RectangularDomain( {-.01, .01}, {-.01, .01} ); + Domain testdomain = RectangularDomain( {-1, 1}, {-1, 1} ); // Grid: (Adjust the following line if the test takes too long!) - Grid g( "F5000", testdomain ); + Grid g( "F22000", testdomain ); Grid g_global( g.name() ); grid::StructuredGrid gs( g ); @@ -1071,6 +1072,142 @@ CASE( "test_trans_domain" ) { } #endif //----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +#if 1 +CASE( "test_trans_unstructured" ) { + Log::info() << "test_trans_unstructured" << std::endl; + // test transgeneral by comparing with analytic solution on an unstructured grid + + std::ostream& out = Log::info(); + double tolerance = 1.e-13; + + //Domain testdomain = RectangularDomain( {20., 25.}, {40., 60.} ); + Domain testdomain = RectangularDomain( {20., 25.}, {45., 50.} ); + // Grid: (Adjust the following line if the test takes too long!) + Grid g( "F12", testdomain ); + grid::StructuredGrid gs( g ); + std::vector pts( g.size() ); + int idx( 0 ); + for ( size_t j = 0; j < gs.ny(); ++j ) { + double lat = gs.y( j ); + for ( size_t i = 0; i < gs.nx( j ); ++i ) { + double lon = gs.x( i, j ); + Log::info() << "idx=" << idx << " lon=" << lon << " lat=" << lat << std::endl; + pts[idx++].assign( lon, lat ); + } + } + Grid gu = grid::UnstructuredGrid( new std::vector( pts ) ); + + int trc = 120; + double rav1 = 0., rav2 = 0.; // compute average rms errors of transLocal1 and transLocal2 + + int nb_scalar = 100, nb_vordiv = 0; + int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv; + std::vector sp( 2 * N * nb_scalar ); + std::vector vor( 2 * N * nb_vordiv ); + std::vector div( 2 * N * nb_vordiv ); + std::vector rspecg( 2 * N ); + std::vector gp( nb_all * g.size() ); + std::vector rgp1( nb_all * g.size() ); + std::vector rgp2( nb_all * g.size() ); + std::vector rgp_analytic( g.size() ); + + int icase = 0; + for ( int ivar_in = 2; ivar_in < 3; ivar_in++ ) { // vorticity, divergence, scalar + for ( int ivar_out = 2; ivar_out < 3; ivar_out++ ) { // u, v, scalar + int nb_fld = 1; + if ( ivar_out == 2 ) { + tolerance = 1.e-13; + nb_fld = nb_scalar; + } + else { + tolerance = 2.e-6; + nb_fld = nb_vordiv; + } + for ( int jfld = 0; jfld < nb_fld; jfld++ ) { // multiple fields + int k = 0; + for ( int m = 0; m <= trc; m++ ) { // zonal wavenumber + for ( int n = m; n <= trc; n++ ) { // total wavenumber + for ( int imag = 0; imag <= 1; imag++ ) { // real and imaginary part + + if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. && + icase < 1000 ) { + auto start = std::chrono::system_clock::now(); + trans::Trans transLocal1( g, trc, util::Config( "type", "localopt3" ) ); + trans::Trans transLocal2( gu, trc, util::Config( "type", "localopt3" ) ); + for ( int j = 0; j < 2 * N * nb_scalar; j++ ) { + sp[j] = 0.; + } + for ( int j = 0; j < 2 * N * nb_vordiv; j++ ) { + vor[j] = 0.; + div[j] = 0.; + } + if ( ivar_in == 0 ) vor[k * nb_vordiv + jfld] = 1.; + if ( ivar_in == 1 ) div[k * nb_vordiv + jfld] = 1.; + if ( ivar_in == 2 ) sp[k * nb_scalar + jfld] = 1.; + + for ( int j = 0; j < nb_all * g.size(); j++ ) { + gp[j] = 0.; + rgp1[j] = 0.; + rgp2[j] = 0.; + } + for ( int j = 0; j < g.size(); j++ ) { + rgp_analytic[j] = 0.; + } + + spectral_transform_grid_analytic( trc, trc, n, m, imag, g, rspecg.data(), + rgp_analytic.data(), ivar_in, ivar_out ); + + //Log::info() << icase << " m=" << m << " n=" << n << " imag=" << imag << " structured: "; + EXPECT_NO_THROW( transLocal1.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), + div.data(), rgp1.data() ) ); + + //Log::info() << icase << " m=" << m << " n=" << n << " imag=" << imag << " unstructured: "; + EXPECT_NO_THROW( transLocal2.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), + div.data(), rgp2.data() ) ); + + int pos = ( ivar_out * nb_vordiv + jfld ); + + double rms_gen1 = + compute_rms( g.size(), rgp1.data() + pos * g.size(), rgp_analytic.data() ); + + double rms_gen2 = + compute_rms( g.size(), rgp2.data() + pos * g.size(), rgp_analytic.data() ); + + rav1 += rms_gen1; + rav2 += rms_gen2; + if ( !( rms_gen1 < tolerance ) || !( rms_gen2 < tolerance ) ) { + Log::info() + << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out + << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl; + ATLAS_DEBUG_VAR( rms_gen1 ); + ATLAS_DEBUG_VAR( rms_gen2 ); + ATLAS_DEBUG_VAR( tolerance ); + } + EXPECT( rms_gen1 < tolerance ); + EXPECT( rms_gen2 < tolerance ); + icase++; + auto end = std::chrono::system_clock::now(); // + std::chrono::duration elapsed_seconds = end - start; + std::time_t end_time = std::chrono::system_clock::to_time_t( end ); + std::string time_str = std::ctime( &end_time ); + //Log::info() << "case " << icase << ", elapsed time: " << elapsed_seconds.count() + // << "s. Now: " << time_str.substr( 0, time_str.length() - 1 ) << std::endl; + } + k++; + } + } + } + } + } + } + Log::info() << "Vordiv+scalar comparison with trans: all " << icase << " cases successfully passed!" << std::endl; + rav1 /= icase; + Log::info() << "average RMS error of transLocal1: " << rav1 << std::endl; + rav2 /= icase; + Log::info() << "average RMS error of transLocal2: " << rav2 << std::endl; +} +#endif #if 0 CASE( "test_trans_invtrans" ) { trans::Trans trans( Grid( "O64" ), 63, util::Config( "type", "local" ) ); From be0ad59f365b12dc4427df03c022439e9ef42842 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 5 Apr 2018 16:23:47 +0100 Subject: [PATCH 041/123] not working --- .../localopt2/LegendrePolynomialsopt2.cc | 263 +++++++++-------- .../trans/localopt2/LegendrePolynomialsopt2.h | 12 + src/atlas/trans/localopt2/TransLocalopt2.cc | 271 ++++++++++-------- .../localopt3/LegendrePolynomialsopt3.cc | 28 ++ .../trans/localopt3/LegendrePolynomialsopt3.h | 5 + src/atlas/trans/localopt3/TransLocalopt3.cc | 49 +++- src/atlas/trans/localopt3/TransLocalopt3.h | 1 + src/tests/trans/test_transgeneral.cc | 11 +- 8 files changed, 392 insertions(+), 248 deletions(-) diff --git a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc index 9d11ea850..c3e3fe2f6 100644 --- a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc +++ b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc @@ -21,143 +21,155 @@ namespace trans { //----------------------------------------------------------------------------- -void compute_legendre_polynomialsopt2( - const size_t trc, // truncation (in) - const int nlats, // number of latitudes - const double lats[], // latitudes in radians (in) - double leg_sym[], // values of associated Legendre functions, symmetric part - double leg_asym[], // values of associated Legendre functions, asymmetric part - size_t leg_start_sym[], // start indices for different zonal wave numbers, symmetric part - size_t leg_start_asym[] ) // start indices for different zonal wave numbers, asymmetric part -{ - auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; }; - array::ArrayT zfn_( trc + 1, trc + 1 ); - array::ArrayView zfn = array::make_view( zfn_ ); - std::vector legpol( legendre_size( trc ) ); - auto idxmn = [&]( int jm, int jn ) { return ( 2 * trc + 3 - jm ) * jm / 2 + jn - jm; }; - int iodd; - +void compute_zfnopt2( const size_t trc, double zfn[] ) { + auto idxzfn = [&]( int jn, int jk ) { return jk + ( trc + 1 ) * jn; }; + int iodd = 0; // Compute coefficients for Taylor series in Belousov (19) and (21) - // Belousov, Swarztrauber use zfn(0,0)=std::sqrt(2.) + // Belousov, Swarztrauber use zfn[0]=std::sqrt(2.) // IFS normalisation chosen to be 0.5*Integral(Pnm**2) = 1 - zfn( 0, 0 ) = 2.; + zfn[idxzfn( 0, 0 )] = 2.; for ( int jn = 1; jn <= trc; ++jn ) { - double zfnn = zfn( 0, 0 ); + double zfnn = zfn[idxzfn( 0, 0 )]; for ( int jgl = 1; jgl <= jn; ++jgl ) { zfnn *= std::sqrt( 1. - 0.25 / ( jgl * jgl ) ); } - iodd = jn % 2; - zfn( jn, jn ) = zfnn; + iodd = jn % 2; + zfn[idxzfn( jn, jn )] = zfnn; for ( int jgl = 2; jgl <= jn - iodd; jgl += 2 ) { double zfjn = ( ( jgl - 1. ) * ( 2. * jn - jgl + 2. ) ); // new factor numerator double zfjd = ( jgl * ( 2. * jn - jgl + 1. ) ); // new factor denominator - zfn( jn, jn - jgl ) = zfn( jn, jn - jgl + 2 ) * zfjn / zfjd; + zfn[idxzfn( jn, jn - jgl )] = zfn[idxzfn( jn, jn - jgl + 2 )] * zfjn / zfjd; } } +} - // Loop over latitudes: - for ( int jlat = 0; jlat < nlats; ++jlat ) { - { - //ATLAS_TRACE( "compute Legendre polynomials" ); - // -------------------- - // 1. First two columns - // -------------------- - double lat = lats[jlat]; - double zdlx1 = ( M_PI_2 - lat ); // theta - double zdlx = std::cos( zdlx1 ); // cos(theta) - double zdlsita = std::sqrt( 1. - zdlx * zdlx ); // sin(theta) (this is how trans library does it) - - legpol[idxmn( 0, 0 )] = 1.; - double vsin[trc + 1], vcos[trc + 1]; - for ( int j = 1; j <= trc; j++ ) { - vsin[j] = std::sin( j * zdlx1 ); - } - for ( int j = 1; j <= trc; j++ ) { - vcos[j] = std::cos( j * zdlx1 ); - } - double zdl1sita = 0.; - // if we are less than 1 meter from the pole, - if ( std::abs( zdlsita ) <= std::sqrt( std::numeric_limits::epsilon() ) ) { - zdlx = 1.; - zdlsita = 0.; - } - else { - zdl1sita = 1. / zdlsita; - } +void compute_legendre_polynomials_latopt2( const size_t trc, // truncation (in) + const double lat, // latitude in radians (in) + double legpol[], // legendre polynomials + double zfn[] ) { + auto idxmn = [&]( int jm, int jn ) { return ( 2 * trc + 3 - jm ) * jm / 2 + jn - jm; }; + auto idxzfn = [&]( int jn, int jk ) { return jk + ( trc + 1 ) * jn; }; + { //ATLAS_TRACE( "compute Legendre polynomials" ); + // -------------------- + // 1. First two columns + // -------------------- + double zdlx1 = ( M_PI_2 - lat ); // theta + double zdlx = std::cos( zdlx1 ); // cos(theta) + double zdlsita = std::sqrt( 1. - zdlx * zdlx ); // sin(theta) (this is how trans library does it) + + legpol[idxmn( 0, 0 )] = 1.; + double vsin[trc + 1], vcos[trc + 1]; + for ( int j = 1; j <= trc; j++ ) { + vsin[j] = std::sin( j * zdlx1 ); + } + for ( int j = 1; j <= trc; j++ ) { + vcos[j] = std::cos( j * zdlx1 ); + } - // ordinary Legendre polynomials from series expansion - // --------------------------------------------------- - - // even N - for ( int jn = 2; jn <= trc; jn += 2 ) { - double zdlk = 0.5 * zfn( jn, 0 ); - double zdlldn = 0.0; - double zdsq = 1. / std::sqrt( jn * ( jn + 1. ) ); - // represented by only even k - for ( int jk = 2; jk <= jn; jk += 2 ) { - // normalised ordinary Legendre polynomial == \overbar{P_n}^0 - zdlk = zdlk + zfn( jn, jk ) * vcos[jk]; - // normalised associated Legendre polynomial == \overbar{P_n}^1 - zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * vsin[jk]; - } - legpol[idxmn( 0, jn )] = zdlk; - legpol[idxmn( 1, jn )] = zdlldn; + double zdl1sita = 0.; + // if we are less than 1 meter from the pole, + if ( std::abs( zdlsita ) <= std::sqrt( std::numeric_limits::epsilon() ) ) { + zdlx = 1.; + zdlsita = 0.; + } + else { + zdl1sita = 1. / zdlsita; + } + + // ordinary Legendre polynomials from series expansion + // --------------------------------------------------- + + // even N + for ( int jn = 2; jn <= trc; jn += 2 ) { + double zdlk = 0.5 * zfn[idxzfn( jn, 0 )]; + double zdlldn = 0.0; + double zdsq = 1. / std::sqrt( jn * ( jn + 1. ) ); + // represented by only even k + for ( int jk = 2; jk <= jn; jk += 2 ) { + // normalised ordinary Legendre polynomial == \overbar{P_n}^0 + zdlk = zdlk + zfn[idxzfn( jn, jk )] * vcos[jk]; + // normalised associated Legendre polynomial == \overbar{P_n}^1 + zdlldn = zdlldn + zdsq * zfn[idxzfn( jn, jk )] * jk * vsin[jk]; } + legpol[idxmn( 0, jn )] = zdlk; + legpol[idxmn( 1, jn )] = zdlldn; + } - // odd N - for ( int jn = 1; jn <= trc; jn += 2 ) { - zfn( jn, 0 ) = 0.; - double zdlk = 0.; - double zdlldn = 0.0; - double zdsq = 1. / std::sqrt( jn * ( jn + 1. ) ); - // represented by only even k - for ( int jk = 1; jk <= jn; jk += 2 ) { - // normalised ordinary Legendre polynomial == \overbar{P_n}^0 - zdlk = zdlk + zfn( jn, jk ) * vcos[jk]; - // normalised associated Legendre polynomial == \overbar{P_n}^1 - zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * vsin[jk]; - } - legpol[idxmn( 0, jn )] = zdlk; - legpol[idxmn( 1, jn )] = zdlldn; + // odd N + for ( int jn = 1; jn <= trc; jn += 2 ) { + zfn[idxzfn( jn, 0 )] = 0.; + double zdlk = 0.; + double zdlldn = 0.0; + double zdsq = 1. / std::sqrt( jn * ( jn + 1. ) ); + // represented by only even k + for ( int jk = 1; jk <= jn; jk += 2 ) { + // normalised ordinary Legendre polynomial == \overbar{P_n}^0 + zdlk = zdlk + zfn[idxzfn( jn, jk )] * vcos[jk]; + // normalised associated Legendre polynomial == \overbar{P_n}^1 + zdlldn = zdlldn + zdsq * zfn[idxzfn( jn, jk )] * jk * vsin[jk]; } + legpol[idxmn( 0, jn )] = zdlk; + legpol[idxmn( 1, jn )] = zdlldn; + } - // -------------------------------------------------------------- - // 2. Diagonal (the terms 0,0 and 1,1 have already been computed) - // Belousov, equation (23) - // -------------------------------------------------------------- + // -------------------------------------------------------------- + // 2. Diagonal (the terms 0,0 and 1,1 have already been computed) + // Belousov, equation (23) + // -------------------------------------------------------------- - double zdls = zdl1sita * std::numeric_limits::min(); - for ( int jn = 2; jn <= trc; ++jn ) { - double sq = std::sqrt( ( 2. * jn + 1. ) / ( 2. * jn ) ); + double zdls = zdl1sita * std::numeric_limits::min(); + for ( int jn = 2; jn <= trc; ++jn ) { + double sq = std::sqrt( ( 2. * jn + 1. ) / ( 2. * jn ) ); - legpol[idxmn( jn, jn )] = legpol[idxmn( jn - 1, jn - 1 )] * zdlsita * sq; - if ( std::abs( legpol[idxmn( jn, jn )] ) < zdls ) legpol[idxmn( jn, jn )] = 0.0; - } + legpol[idxmn( jn, jn )] = legpol[idxmn( jn - 1, jn - 1 )] * zdlsita * sq; + if ( std::abs( legpol[idxmn( jn, jn )] ) < zdls ) legpol[idxmn( jn, jn )] = 0.0; + } - // --------------------------------------------- - // 3. General recurrence (Belousov, equation 17) - // --------------------------------------------- - - for ( int jn = 3; jn <= trc; ++jn ) { - for ( int jm = 2; jm < jn; ++jm ) { - double cn = - ( ( 2. * jn + 1. ) * ( jn + jm - 3. ) * ( jn + jm - 1. ) ); // numerator of c in Belousov - double cd = ( ( 2. * jn - 3. ) * ( jn + jm - 2. ) * ( jn + jm ) ); // denominator of c in Belousov - double dn = - ( ( 2. * jn + 1. ) * ( jn - jm + 1. ) * ( jn + jm - 1. ) ); // numerator of d in Belousov - double dd = ( ( 2. * jn - 1. ) * ( jn + jm - 2. ) * ( jn + jm ) ); // denominator of d in Belousov - double en = ( ( 2. * jn + 1. ) * ( jn - jm ) ); // numerator of e in Belousov - double ed = ( ( 2. * jn - 1. ) * ( jn + jm ) ); // denominator of e in Belousov - - legpol[idxmn( jm, jn )] = std::sqrt( cn / cd ) * legpol[idxmn( jm - 2, jn - 2 )] - - std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1 )] * zdlx + - std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1 )] * zdlx; - } + // --------------------------------------------- + // 3. General recurrence (Belousov, equation 17) + // --------------------------------------------- + + for ( int jn = 3; jn <= trc; ++jn ) { + for ( int jm = 2; jm < jn; ++jm ) { + double cn = ( ( 2. * jn + 1. ) * ( jn + jm - 3. ) * ( jn + jm - 1. ) ); // numerator of c in Belousov + double cd = ( ( 2. * jn - 3. ) * ( jn + jm - 2. ) * ( jn + jm ) ); // denominator of c in Belousov + double dn = ( ( 2. * jn + 1. ) * ( jn - jm + 1. ) * ( jn + jm - 1. ) ); // numerator of d in Belousov + double dd = ( ( 2. * jn - 1. ) * ( jn + jm - 2. ) * ( jn + jm ) ); // denominator of d in Belousov + double en = ( ( 2. * jn + 1. ) * ( jn - jm ) ); // numerator of e in Belousov + double ed = ( ( 2. * jn - 1. ) * ( jn + jm ) ); // denominator of e in Belousov + + legpol[idxmn( jm, jn )] = std::sqrt( cn / cd ) * legpol[idxmn( jm - 2, jn - 2 )] - + std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1 )] * zdlx + + std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1 )] * zdlx; } } + } +} + + +void compute_legendre_polynomialsopt2( + const size_t trc, // truncation (in) + const int nlats, // number of latitudes + const double lats[], // latitudes in radians (in) + double leg_sym[], // values of associated Legendre functions, symmetric part + double leg_asym[], // values of associated Legendre functions, asymmetric part + size_t leg_start_sym[], // start indices for different zonal wave numbers, symmetric part + size_t leg_start_asym[] ) // start indices for different zonal wave numbers, asymmetric part +{ + auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; }; + std::vector legpol( legendre_size( trc ) ); + std::vector zfn( ( trc + 1 ) * ( trc + 1 ) ); + auto idxmn = [&]( int jm, int jn ) { return ( 2 * trc + 3 - jm ) * jm / 2 + jn - jm; }; + compute_zfnopt2( trc, zfn.data() ); + + // Loop over latitudes: + for ( int jlat = 0; jlat < nlats; ++jlat ) { + // compute legendre polynomials for current latitude: + compute_legendre_polynomials_latopt2( trc, lats[jlat], legpol.data(), zfn.data() ); + // split polynomials into symmetric and antisymmetric parts: { //ATLAS_TRACE( "add to global arrays" ); @@ -192,6 +204,33 @@ void compute_legendre_polynomialsopt2( } } +void compute_legendre_polynomials_allopt2( const size_t trc, // truncation (in) + const int nlats, // number of latitudes + const double lats[], // latitudes in radians (in) + double legendre[] ) // legendre polynomials for all latitudes +{ + auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; }; + std::vector legpol( legendre_size( trc ) ); + std::vector zfn( ( trc + 1 ) * ( trc + 1 ) ); + auto idxmn = [&]( int jm, int jn ) { return ( 2 * trc + 3 - jm ) * jm / 2 + jn - jm; }; + auto idxmnl = [&]( int jm, int jn, int jlat ) { + return ( 2 * trc + 3 - jm ) * jm / 2 * nlats + jlat * ( trc - jm + 1 ) + jn - jm; + }; + compute_zfnopt2( trc, zfn.data() ); + + // Loop over latitudes: + for ( int jlat = 0; jlat < nlats; ++jlat ) { + // compute legendre polynomials for current latitude: + compute_legendre_polynomials_latopt2( trc, lats[jlat], legpol.data(), zfn.data() ); + + for ( int jm = 1; jm <= trc; ++jm ) { + for ( int jn = jm; jn <= trc; ++jn ) { + legendre[idxmnl( jm, jn, jlat )] = legpol[idxmn( jm, jn )]; + } + } + } +} // namespace trans + // -------------------------------------------------------------------------------------------------------------------- } // namespace trans diff --git a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h index 7e97dec7e..f79438aea 100644 --- a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h +++ b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h @@ -32,6 +32,13 @@ namespace trans { // Ported to C++ by: // Andreas Mueller *ECMWF* // +void compute_zfnopt2( const size_t trc, double zfn[] ); + +void compute_legendre_polynomials_latopt2( const size_t trc, // truncation (in) + const double lat, // latitude in radians (in) + double legpol[], // legendre polynomials + double zfn[] ); + void compute_legendre_polynomialsopt2( const size_t trc, // truncation (in) const int nlats, // number of latitudes @@ -41,6 +48,11 @@ void compute_legendre_polynomialsopt2( size_t leg_start_sym[], // start indices for different zonal wave numbers, symmetric part size_t leg_start_asym[] ); // start indices for different zonal wave numbers, asymmetric part +void compute_legendre_polynomials_allopt2( const size_t trc, // truncation (in) + const int nlats, // number of latitudes + const double lats[], // latitudes in radians (in) + double legendre[] ); // legendre polynomials for all latitudes + // -------------------------------------------------------------------------------------------------------------------- } // namespace trans diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc index 7eea4f9d3..16062989d 100644 --- a/src/atlas/trans/localopt2/TransLocalopt2.cc +++ b/src/atlas/trans/localopt2/TransLocalopt2.cc @@ -103,7 +103,7 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long int nlons = 0; int neqtr = 0; useFFT_ = true; - dgemmMethod1_ = true; + dgemmMethod1_ = false; nlatsNH_ = 0; nlatsSH_ = 0; nlatsLeg_ = 0; @@ -143,19 +143,8 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long } } //Log::info() << "nlats=" << g.ny() << " nlatsGlobal=" << gs_global.ny() << std::endl; - } - else { - // unstructured grid - useFFT_ = false; - nlats = grid_.size(); - nlons = grid_.size(); - nlatsNH_ = nlats; - nlatsLeg_ = nlats; - } - std::vector lats( nlatsLeg_ ); - std::vector lons( nlons ); - if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { - grid::StructuredGrid g( grid_ ); + std::vector lats( nlatsLeg_ ); + std::vector lons( nlons ); if ( nlatsNH_ >= nlatsSH_ ) { for ( size_t j = 0; j < nlatsLeg_; ++j ) { lats[j] = g.y( j ) * util::Constants::degreesToRadians(); @@ -169,92 +158,86 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long for ( size_t j = 0; j < nlons; ++j ) { lons[j] = g.x( j, 0 ) * util::Constants::degreesToRadians(); } - } - else { - int j( 0 ); - for ( PointXY p : grid_.xy() ) { - lats[j++] = p.y() * util::Constants::degreesToRadians(); - lons[j++] = p.x() * util::Constants::degreesToRadians(); - } - } - // precomputations for Legendre polynomials: - { - ATLAS_TRACE( "opt2 precomp Legendre" ); - int size_sym = 0; - int size_asym = 0; - legendre_sym_begin_.resize( truncation_ + 3 ); - legendre_asym_begin_.resize( truncation_ + 3 ); - legendre_sym_begin_[0] = 0; - legendre_asym_begin_[0] = 0; - for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { - size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsLeg_ ); - size_asym += add_padding( num_n( truncation_ + 1, jm, false ) * nlatsLeg_ ); - legendre_sym_begin_[jm + 1] = size_sym; - legendre_asym_begin_[jm + 1] = size_asym; - } - alloc_aligned( legendre_sym_, size_sym ); - alloc_aligned( legendre_asym_, size_asym ); - FILE* file_leg; - file_leg = fopen( "legendre.bin", "r" ); - if ( file_leg ) { - fread( legendre_sym_, sizeof( double ), size_sym, file_leg ); - fread( legendre_asym_, sizeof( double ), size_asym, file_leg ); - fclose( file_leg ); - } - else { - compute_legendre_polynomialsopt2( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, legendre_asym_, - legendre_sym_begin_.data(), legendre_asym_begin_.data() ); - file_leg = fopen( "legendre.bin", "wb" ); - fwrite( legendre_sym_, sizeof( double ), size_sym, file_leg ); - fwrite( legendre_asym_, sizeof( double ), size_asym, file_leg ); - fclose( file_leg ); + // precomputations for Legendre polynomials: + { + ATLAS_TRACE( "opt2 precomp Legendre" ); + int size_sym = 0; + int size_asym = 0; + legendre_sym_begin_.resize( truncation_ + 3 ); + legendre_asym_begin_.resize( truncation_ + 3 ); + legendre_sym_begin_[0] = 0; + legendre_asym_begin_[0] = 0; + for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { + size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsLeg_ ); + size_asym += add_padding( num_n( truncation_ + 1, jm, false ) * nlatsLeg_ ); + legendre_sym_begin_[jm + 1] = size_sym; + legendre_asym_begin_[jm + 1] = size_asym; + } + alloc_aligned( legendre_sym_, size_sym ); + alloc_aligned( legendre_asym_, size_asym ); + FILE* file_leg; + file_leg = fopen( "legendre.bin", "r" ); + if ( false ) { //if ( file_leg ) { + fread( legendre_sym_, sizeof( double ), size_sym, file_leg ); + fread( legendre_asym_, sizeof( double ), size_asym, file_leg ); + fclose( file_leg ); + } + else { + compute_legendre_polynomialsopt2( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, + legendre_asym_, legendre_sym_begin_.data(), + legendre_asym_begin_.data() ); + /*file_leg = fopen( "legendre.bin", "wb" ); + fwrite( legendre_sym_, sizeof( double ), size_sym, file_leg ); + fwrite( legendre_asym_, sizeof( double ), size_asym, file_leg ); + fclose( file_leg );*/ + } } - } - // precomputations for Fourier transformations: - if ( useFFT_ ) { + // precomputations for Fourier transformations: + if ( useFFT_ ) { #if ATLAS_HAVE_FFTW - { - ATLAS_TRACE( "opt2 precomp FFTW" ); - int num_complex = ( nlonsGlobal_ / 2 ) + 1; - fft_in_ = fftw_alloc_complex( nlats * num_complex ); - fft_out_ = fftw_alloc_real( nlats * nlonsGlobal_ ); - plan_ = fftw_plan_many_dft_c2r( 1, &nlonsGlobal_, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1, - nlonsGlobal_, FFTW_ESTIMATE ); - } - // other FFT implementations should be added with #elif statements -#else - useFFT_ = false; // no FFT implemented => default to dgemm -#endif - } - if ( !useFFT_ ) { - alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons ); - if ( dgemmMethod1_ ) { { - ATLAS_TRACE( "opt2 precomp Fourier" ); - int idx = 0; - for ( int jlon = 0; jlon < nlons; jlon++ ) { - double factor = 1.; - for ( int jm = 0; jm < truncation_ + 1; jm++ ) { - if ( jm > 0 ) { factor = 2.; } - fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor; // real part - fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor; // imaginary part - } - } + ATLAS_TRACE( "opt2 precomp FFTW" ); + int num_complex = ( nlonsGlobal_ / 2 ) + 1; + fft_in_ = fftw_alloc_complex( nlats * num_complex ); + fft_out_ = fftw_alloc_real( nlats * nlonsGlobal_ ); + plan_ = fftw_plan_many_dft_c2r( 1, &nlonsGlobal_, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, + 1, nlonsGlobal_, FFTW_ESTIMATE ); } + // other FFT implementations should be added with #elif statements +#else + useFFT_ = false; // no FFT implemented => default to dgemm +#endif } - else { - { - ATLAS_TRACE( "opt2 precomp Fourier tp" ); - int idx = 0; - for ( int jm = 0; jm < truncation_ + 1; jm++ ) { - double factor = 1.; - if ( jm > 0 ) { factor = 2.; } + if ( !useFFT_ ) { + alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons ); + if ( dgemmMethod1_ ) { + { + ATLAS_TRACE( "opt2 precomp Fourier" ); + int idx = 0; for ( int jlon = 0; jlon < nlons; jlon++ ) { - fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor; // real part + double factor = 1.; + for ( int jm = 0; jm < truncation_ + 1; jm++ ) { + if ( jm > 0 ) { factor = 2.; } + fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor; // real part + fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor; // imaginary part + } } - for ( int jlon = 0; jlon < nlons; jlon++ ) { - fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor; // imaginary part + } + } + else { + { + ATLAS_TRACE( "opt2 precomp Fourier tp" ); + int idx = 0; + for ( int jm = 0; jm < truncation_ + 1; jm++ ) { + double factor = 1.; + if ( jm > 0 ) { factor = 2.; } + for ( int jlon = 0; jlon < nlons; jlon++ ) { + fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor; // real part + } + for ( int jlon = 0; jlon < nlons; jlon++ ) { + fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor; // imaginary part + } } } } @@ -270,17 +253,19 @@ TransLocalopt2::TransLocalopt2( const Grid& grid, const long truncation, const e // -------------------------------------------------------------------------------------------------------------------- TransLocalopt2::~TransLocalopt2() { - free_aligned( legendre_sym_ ); - free_aligned( legendre_asym_ ); - if ( useFFT_ ) { + if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { + free_aligned( legendre_sym_ ); + free_aligned( legendre_asym_ ); + if ( useFFT_ ) { #if ATLAS_HAVE_FFTW - fftw_destroy_plan( plan_ ); - fftw_free( fft_in_ ); - fftw_free( fft_out_ ); + fftw_destroy_plan( plan_ ); + fftw_free( fft_in_ ); + fftw_free( fft_out_ ); #endif - } - else { - free_aligned( fourier_ ); + } + else { + free_aligned( fourier_ ); + } } } @@ -561,24 +546,78 @@ void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fiel } // namespace atlas else { ATLAS_TRACE( "invtrans_uv unstructured opt2" ); - int idx = 0; - for ( PointXY p : grid_.xy() ) { - double lon = p.x() * util::Constants::degreesToRadians(); - double lat = p.y() * util::Constants::degreesToRadians(); - double trcFT = truncation; - + grid::UnstructuredGrid gu = grid_; + double* zfn; + alloc_aligned( zfn, ( truncation + 1 ) * ( truncation + 1 ) ); + compute_zfnopt2( truncation, zfn ); + int size_fourier = nb_fields * 2; + double* legendre; + double* scl_fourier; + double* scl_fourier_tp; + double* fouriertp; + double* gp_opt; + alloc_aligned( legendre, legendre_size( truncation + 1 ) ); + alloc_aligned( scl_fourier, size_fourier * ( truncation + 1 ) ); + alloc_aligned( scl_fourier_tp, size_fourier * ( truncation + 1 ) ); + alloc_aligned( fouriertp, 2 * ( truncation + 1 ) ); + alloc_aligned( gp_opt, nb_fields ); + + // loop over all points: + for ( int ip = 0; ip < grid_.size(); ip++ ) { + PointXY p = gu.xy( ip ); + double lon = p.x() * util::Constants::degreesToRadians(); + double lat = p.y() * util::Constants::degreesToRadians(); + compute_legendre_polynomials_latopt2( truncation, lat, legendre, zfn ); // Legendre transform: - //invtrans_legendreopt2( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra, - // legReal.data(), legImag.data() ); - - // Fourier transform: - //invtrans_fourieropt2( trcFT, lon, nb_fields, legReal.data(), legImag.data(), - // gp_tmp.data() + ( nb_fields * idx ) ); - for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { - //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat ); + { + //ATLAS_TRACE( "opt Legendre dgemm" ); + for ( int jm = 0; jm <= truncation; jm++ ) { + int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1; + eckit::linalg::Matrix A( eckit::linalg::Matrix( + const_cast( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) ); + eckit::linalg::Matrix B( legendre + noff, ns, 1 ); + eckit::linalg::Matrix C( scl_fourier + jm * size_fourier, nb_fields * 2, 1 ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + } + } + { + //ATLAS_TRACE( "opt transposition in Fourier" ); + int idx = 0; + for ( int jm = 0; jm < truncation + 1; jm++ ) { + for ( int imag = 0; imag < 2; imag++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int pos_tp = imag + 2 * ( jm + ( truncation + 1 ) * ( jfld ) ); + //int pos = jfld + nb_fields * ( imag + 2 * ( jm ) ); + scl_fourier_tp[pos_tp] = scl_fourier[idx++]; // = scl_fourier[pos] + } + } + } + } + + // Fourier transformation: + int idx = 0; + fouriertp[idx++] = 1.; // real part + fouriertp[idx++] = 0.; // imaginary part + for ( int jm = 1; jm < truncation + 1; jm++ ) { + fouriertp[idx++] = +2. * std::cos( jm * lon ); // real part + fouriertp[idx++] = -2. * std::sin( jm * lon ); // imaginary part + } + { + //ATLAS_TRACE( "opt Fourier dgemm" ); + eckit::linalg::Matrix A( fouriertp, 1, ( truncation + 1 ) * 2 ); + eckit::linalg::Matrix B( scl_fourier_tp, ( truncation + 1 ) * 2, nb_fields ); + eckit::linalg::Matrix C( gp_opt, 1, nb_fields ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + for ( int j = 0; j < nb_fields; j++ ) { + gp_fields[ip + j * grid_.size()] = gp_opt[j]; + } } - ++idx; } + free_aligned( legendre ); + free_aligned( scl_fourier ); + free_aligned( scl_fourier_tp ); + free_aligned( fouriertp ); + free_aligned( gp_opt ); } } // namespace trans } // namespace atlas diff --git a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc index c285d32b2..0d440bffd 100644 --- a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc +++ b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc @@ -204,6 +204,34 @@ void compute_legendre_polynomialsopt3( } } +void compute_legendre_polynomials_allopt3( + const size_t trc, // truncation (in) + const int nlats, // number of latitudes + const double lats[], // latitudes in radians (in) + double legendre[] ) // start indices for different zonal wave numbers, asymmetric part +{ + auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; }; + std::vector legpol( legendre_size( trc ) ); + std::vector zfn( ( trc + 1 ) * ( trc + 1 ) ); + auto idxmn = [&]( int jm, int jn ) { return ( 2 * trc + 3 - jm ) * jm / 2 + jn - jm; }; + auto idxmnl = [&]( int jm, int jn, int jlat ) { + return ( 2 * trc + 3 - jm ) * jm / 2 * nlats + jlat * ( trc - jm + 1 ) + jn - jm; + }; + compute_zfnopt3( trc, zfn.data() ); + + // Loop over latitudes: + for ( int jlat = 0; jlat < nlats; ++jlat ) { + // compute legendre polynomials for current latitude: + compute_legendre_polynomials_latopt3( trc, lats[jlat], legpol.data(), zfn.data() ); + + for ( int jm = 1; jm <= trc; ++jm ) { + for ( int jn = jm; jn <= trc; ++jn ) { + legendre[idxmnl( jm, jn, jlat )] = legpol[idxmn( jm, jn )]; + } + } + } +} // namespace trans + // -------------------------------------------------------------------------------------------------------------------- } // namespace trans diff --git a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.h b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.h index f5dbd7aa3..93ebb49f7 100644 --- a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.h +++ b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.h @@ -48,6 +48,11 @@ void compute_legendre_polynomialsopt3( size_t leg_start_sym[], // start indices for different zonal wave numbers, symmetric part size_t leg_start_asym[] ); // start indices for different zonal wave numbers, asymmetric part +void compute_legendre_polynomials_allopt3( const size_t trc, // truncation (in) + const int nlats, // number of latitudes + const double lats[], // latitudes in radians (in) + double legendre[] ); // legendre polynomials for all latitudes + // -------------------------------------------------------------------------------------------------------------------- } // namespace trans diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc index f214c6309..45f0245c4 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.cc +++ b/src/atlas/trans/localopt3/TransLocalopt3.cc @@ -243,6 +243,17 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long } } } + else { + // unstructured grid + ATLAS_TRACE( "opt2 precomp unstructured" ); + std::vector lats( grid_.size() ); + alloc_aligned( legendre_, legendre_size( truncation_ ) * grid_.size() ); + int j( 0 ); + for ( PointXY p : grid_.xy() ) { + lats[j++] = p.y() * util::Constants::degreesToRadians(); + } + compute_legendre_polynomials_allopt3( truncation_, grid_.size(), lats.data(), legendre_ ); + } } // namespace trans // -------------------------------------------------------------------------------------------------------------------- @@ -267,6 +278,9 @@ TransLocalopt3::~TransLocalopt3() { free_aligned( fourier_ ); } } + else { + free_aligned( legendre_ ); + } } // -------------------------------------------------------------------------------------------------------------------- @@ -550,6 +564,7 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel double* zfn; alloc_aligned( zfn, ( truncation + 1 ) * ( truncation + 1 ) ); compute_zfnopt3( truncation, zfn ); + int nlats = grid_.size(); int size_fourier = nb_fields * 2; double* legendre; double* scl_fourier; @@ -557,33 +572,32 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel double* fouriertp; double* gp_opt; alloc_aligned( legendre, legendre_size( truncation + 1 ) ); - alloc_aligned( scl_fourier, size_fourier * ( truncation + 1 ) ); + alloc_aligned( scl_fourier, size_fourier * ( truncation + 1 ) * nlats ); alloc_aligned( scl_fourier_tp, size_fourier * ( truncation + 1 ) ); alloc_aligned( fouriertp, 2 * ( truncation + 1 ) ); alloc_aligned( gp_opt, nb_fields ); + { + ATLAS_TRACE( "opt Legendre dgemm" ); + for ( int jm = 0; jm <= truncation; jm++ ) { + int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1; + eckit::linalg::Matrix A( eckit::linalg::Matrix( + const_cast( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) ); + eckit::linalg::Matrix B( legendre_ + noff * nlats, ns, nlats ); + eckit::linalg::Matrix C( scl_fourier + jm * size_fourier, nb_fields * 2, nlats ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + } + } + // loop over all points: for ( int ip = 0; ip < grid_.size(); ip++ ) { PointXY p = gu.xy( ip ); double lon = p.x() * util::Constants::degreesToRadians(); double lat = p.y() * util::Constants::degreesToRadians(); - compute_legendre_polynomials_latopt3( truncation, lat, legendre, zfn ); - // Legendre transform: - { - //ATLAS_TRACE( "opt Legendre dgemm" ); - for ( int jm = 0; jm <= truncation; jm++ ) { - int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1; - eckit::linalg::Matrix A( eckit::linalg::Matrix( - const_cast( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) ); - eckit::linalg::Matrix B( legendre + noff, ns, 1 ); - eckit::linalg::Matrix C( scl_fourier + jm * size_fourier, nb_fields * 2, 1 ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); - } - } { //ATLAS_TRACE( "opt transposition in Fourier" ); - int idx = 0; for ( int jm = 0; jm < truncation + 1; jm++ ) { + int idx = nb_fields * 2 * ( ip + nlats * jm ); for ( int imag = 0; imag < 2; imag++ ) { for ( int jfld = 0; jfld < nb_fields; jfld++ ) { int pos_tp = imag + 2 * ( jm + ( truncation + 1 ) * ( jfld ) ); @@ -619,6 +633,11 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel free_aligned( fouriertp ); free_aligned( gp_opt ); } + for ( int j = 0; j < nb_fields * grid_.size(); j++ ) { + Log::info() << gp_fields[j] << " "; + } + Log::info() << std::endl; + } // namespace trans } // namespace atlas diff --git a/src/atlas/trans/localopt3/TransLocalopt3.h b/src/atlas/trans/localopt3/TransLocalopt3.h index ac33f6395..c97bb1b7f 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.h +++ b/src/atlas/trans/localopt3/TransLocalopt3.h @@ -120,6 +120,7 @@ class TransLocalopt3 : public trans::TransImpl { int jlonMin_; int nlonsGlobal_; bool precompute_; + double* legendre_; double* legendre_sym_; double* legendre_asym_; double* fourier_; diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index 95a49eeb3..a41a954c4 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -1098,10 +1098,10 @@ CASE( "test_trans_unstructured" ) { } Grid gu = grid::UnstructuredGrid( new std::vector( pts ) ); - int trc = 120; + int trc = 12; double rav1 = 0., rav2 = 0.; // compute average rms errors of transLocal1 and transLocal2 - int nb_scalar = 100, nb_vordiv = 0; + int nb_scalar = 1, nb_vordiv = 0; int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv; std::vector sp( 2 * N * nb_scalar ); std::vector vor( 2 * N * nb_vordiv ); @@ -1133,7 +1133,7 @@ CASE( "test_trans_unstructured" ) { if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. && icase < 1000 ) { auto start = std::chrono::system_clock::now(); - trans::Trans transLocal1( g, trc, util::Config( "type", "localopt3" ) ); + trans::Trans transLocal1( gu, trc, util::Config( "type", "localopt3" ) ); trans::Trans transLocal2( gu, trc, util::Config( "type", "localopt3" ) ); for ( int j = 0; j < 2 * N * nb_scalar; j++ ) { sp[j] = 0.; @@ -1158,11 +1158,12 @@ CASE( "test_trans_unstructured" ) { spectral_transform_grid_analytic( trc, trc, n, m, imag, g, rspecg.data(), rgp_analytic.data(), ivar_in, ivar_out ); - //Log::info() << icase << " m=" << m << " n=" << n << " imag=" << imag << " structured: "; + Log::info() + << icase << " m=" << m << " n=" << n << " imag=" << imag << " unstructured: "; EXPECT_NO_THROW( transLocal1.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), div.data(), rgp1.data() ) ); - //Log::info() << icase << " m=" << m << " n=" << n << " imag=" << imag << " unstructured: "; + Log::info() << icase << " m=" << m << " n=" << n << " imag=" << imag << " structured: "; EXPECT_NO_THROW( transLocal2.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), div.data(), rgp2.data() ) ); From 71be7170a91df73fd44d606302cb9fade2018620 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 5 Apr 2018 19:13:31 +0100 Subject: [PATCH 042/123] two options for unstructured grids are working now in opt3. One option precomputes Legendre-polynomials to create bigger dgemm calls (unstruct_precomp_==true), the other computes them on the fly. Also started some cleanup. --- src/atlas/CMakeLists.txt | 4 - .../trans/localopt3/FourierTransformsopt3.cc | 78 --- .../trans/localopt3/FourierTransformsopt3.h | 38 -- .../localopt3/LegendrePolynomialsopt3.cc | 11 +- .../trans/localopt3/LegendreTransformsopt3.cc | 62 --- .../trans/localopt3/LegendreTransformsopt3.h | 37 -- src/atlas/trans/localopt3/TransLocalopt3.cc | 486 +++++++++++------- src/atlas/trans/localopt3/TransLocalopt3.h | 16 + src/tests/trans/test_transgeneral.cc | 17 +- 9 files changed, 317 insertions(+), 432 deletions(-) delete mode 100644 src/atlas/trans/localopt3/FourierTransformsopt3.cc delete mode 100644 src/atlas/trans/localopt3/FourierTransformsopt3.h delete mode 100644 src/atlas/trans/localopt3/LegendreTransformsopt3.cc delete mode 100644 src/atlas/trans/localopt3/LegendreTransformsopt3.h diff --git a/src/atlas/CMakeLists.txt b/src/atlas/CMakeLists.txt index bf6f686c3..b018107ec 100644 --- a/src/atlas/CMakeLists.txt +++ b/src/atlas/CMakeLists.txt @@ -355,10 +355,6 @@ trans/localopt3/TransLocalopt3.h trans/localopt3/TransLocalopt3.cc trans/localopt3/LegendrePolynomialsopt3.h trans/localopt3/LegendrePolynomialsopt3.cc -trans/localopt3/LegendreTransformsopt3.h -trans/localopt3/LegendreTransformsopt3.cc -trans/localopt3/FourierTransformsopt3.h -trans/localopt3/FourierTransformsopt3.cc trans/localopt3/VorDivToUVLocalopt3.h trans/localopt3/VorDivToUVLocalopt3.cc diff --git a/src/atlas/trans/localopt3/FourierTransformsopt3.cc b/src/atlas/trans/localopt3/FourierTransformsopt3.cc deleted file mode 100644 index c02b57ada..000000000 --- a/src/atlas/trans/localopt3/FourierTransformsopt3.cc +++ /dev/null @@ -1,78 +0,0 @@ -/* - * (C) Copyright 2013 ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor - * does it submit to any jurisdiction. - */ - -#include -#include -#include - -#include "atlas/trans/localopt3/FourierTransformsopt3.h" - -namespace atlas { -namespace trans { - -//----------------------------------------------------------------------------- - -void invtrans_fourieropt3( const size_t trcFT, - const double lon, // longitude in radians (in) - const int nb_fields, // Number of fields - const double rlegReal[], // associated Legendre functions, size (trc+1)*trc/2 (in) - const double rlegImag[], // associated Legendre functions, size (trc+1)*trc/2 (in) - double rgp[] ) // gridpoint -{ - for ( int jfld = 0; jfld < nb_fields; ++jfld ) { - rgp[jfld] = 0.; - } - // local Fourier transformation: - for ( int jm = 0; jm <= trcFT; ++jm ) { - const double cos = std::cos( jm * lon ); - const double sin = std::sin( jm * lon ); - for ( int jfld = 0; jfld < nb_fields; ++jfld ) { - double real = cos * rlegReal[jm * nb_fields + jfld]; - double imag = sin * rlegImag[jm * nb_fields + jfld]; - rgp[jfld] += real - imag; - } - } -} - -int fourier_truncationopt3( const int truncation, // truncation - const int nx, // number of longitudes - const int nxmax, // maximum nx - const int ndgl, // number of latitudes - const double lat, // latitude in radian - const bool fullgrid ) { // regular grid - int trc = truncation; - int trclin = ndgl - 1; - int trcquad = ndgl * 2 / 3 - 1; - if ( truncation >= trclin || fullgrid ) { - // linear - trc = ( nx - 1 ) / 2; - } - else if ( truncation >= trcquad ) { - // quadratic - double weight = 3 * ( trclin - truncation ) / ndgl; - double sqcos = std::pow( std::cos( lat ), 2 ); - - trc = ( nx - 1 ) / ( 2 + weight * sqcos ); - } - else { - // cubic - double sqcos = std::pow( std::cos( lat ), 2 ); - - trc = ( nx - 1 ) / ( 2 + sqcos ) - 1; - } - trc = std::min( truncation, trc ); - return trc; -} - -// -------------------------------------------------------------------------------------------------------------------- - -} // namespace trans -} // namespace atlas diff --git a/src/atlas/trans/localopt3/FourierTransformsopt3.h b/src/atlas/trans/localopt3/FourierTransformsopt3.h deleted file mode 100644 index b735ed145..000000000 --- a/src/atlas/trans/localopt3/FourierTransformsopt3.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * (C) Copyright 2013 ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor does it submit to any jurisdiction. - */ - -#pragma once - -#include - -namespace atlas { -namespace trans { - -//----------------------------------------------------------------------------- -// Routine to compute the local Fourier transformation -// -// Author: -// Andreas Mueller *ECMWF* -// - -void invtrans_fourieropt3( const size_t trcFT, - const double lon, // longitude in radians (in) - const int nb_fields, // Number of fields - const double rlegReal[], // values of associated Legendre functions, size (trc+1)*trc/2 (in) - const double rlegImag[], // values of associated Legendre functions, size (trc+1)*trc/2 (in) - double rgp[] ); // gridpoint - -int fourier_truncationopt3( const int truncation, const int nx, const int nxmax, const int ndgl, const double lat, - const bool fullgrid ); - -// -------------------------------------------------------------------------------------------------------------------- - -} // namespace trans -} // namespace atlas diff --git a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc index 0d440bffd..1cddbc18b 100644 --- a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc +++ b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc @@ -204,11 +204,10 @@ void compute_legendre_polynomialsopt3( } } -void compute_legendre_polynomials_allopt3( - const size_t trc, // truncation (in) - const int nlats, // number of latitudes - const double lats[], // latitudes in radians (in) - double legendre[] ) // start indices for different zonal wave numbers, asymmetric part +void compute_legendre_polynomials_allopt3( const size_t trc, // truncation (in) + const int nlats, // number of latitudes + const double lats[], // latitudes in radians (in) + double legendre[] ) // legendre polynomials for all latitudes { auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; }; std::vector legpol( legendre_size( trc ) ); @@ -224,7 +223,7 @@ void compute_legendre_polynomials_allopt3( // compute legendre polynomials for current latitude: compute_legendre_polynomials_latopt3( trc, lats[jlat], legpol.data(), zfn.data() ); - for ( int jm = 1; jm <= trc; ++jm ) { + for ( int jm = 0; jm <= trc; ++jm ) { for ( int jn = jm; jn <= trc; ++jn ) { legendre[idxmnl( jm, jn, jlat )] = legpol[idxmn( jm, jn )]; } diff --git a/src/atlas/trans/localopt3/LegendreTransformsopt3.cc b/src/atlas/trans/localopt3/LegendreTransformsopt3.cc deleted file mode 100644 index 919e9246c..000000000 --- a/src/atlas/trans/localopt3/LegendreTransformsopt3.cc +++ /dev/null @@ -1,62 +0,0 @@ -/* - * (C) Copyright 2013 ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor does it submit to any jurisdiction. - */ - -#include - -#include "atlas/trans/localopt3/LegendreTransformsopt3.h" - -namespace atlas { -namespace trans { - -//----------------------------------------------------------------------------- - -void invtrans_legendreopt3( - const size_t trc, // truncation (in) - const size_t trcFT, // truncation for Fourier transformation (in) - const size_t trcLP, // truncation of Legendre polynomials data legpol. Needs to be >= trc (in) - const double legpol[], // values of associated Legendre functions, size (trc+1)*trc/2 (in) - const int nb_fields, // number of fields - const double spec[], // spectral data, size (trc+1)*trc (in) - double leg_real[], // values of associated Legendre functions, size (trc+1)*trc/2 (out) - double leg_imag[] ) // values of associated Legendre functions, size (trc+1)*trc/2 (out) -{ - // Legendre transformation: - int k = 0, klp = 0; - for ( int jm = 0; jm <= trcFT; ++jm ) { - for ( int jfld = 0; jfld < nb_fields; ++jfld ) { - leg_real[jm * nb_fields + jfld] = 0.; - leg_imag[jm * nb_fields + jfld] = 0.; - } - for ( int jn = jm; jn <= trcLP; ++jn, ++klp ) { - if ( jn <= trc ) { - for ( int jfld = 0; jfld < nb_fields; ++jfld ) { - // not completely sure where this factor 2 comes from. One possible - // explanation: - // normalization of trigonometric functions in the spherical harmonics - // integral over square of trig function is 1 for m=0 and 0.5 (?) for - // m>0 - leg_real[jm * nb_fields + jfld] += 2. * spec[( 2 * k ) * nb_fields + jfld] * legpol[klp]; - leg_imag[jm * nb_fields + jfld] += 2. * spec[( 2 * k + 1 ) * nb_fields + jfld] * legpol[klp]; - } - ++k; - } - } - } - // Undo factor 2 for (jm == 0) - for ( int jfld = 0; jfld < nb_fields; ++jfld ) { - leg_real[jfld] /= 2.; - leg_imag[jfld] /= 2.; - } -} - -// -------------------------------------------------------------------------------------------------------------------- - -} // namespace trans -} // namespace atlas diff --git a/src/atlas/trans/localopt3/LegendreTransformsopt3.h b/src/atlas/trans/localopt3/LegendreTransformsopt3.h deleted file mode 100644 index 7205b3bf0..000000000 --- a/src/atlas/trans/localopt3/LegendreTransformsopt3.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * (C) Copyright 2013 ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor does it submit to any jurisdiction. - */ - -#pragma once - -#include - -namespace atlas { -namespace trans { - -//----------------------------------------------------------------------------- -// Routine to compute the Legendre transformation -// -// Author: -// Andreas Mueller *ECMWF* -// -void invtrans_legendreopt3( - const size_t trc, // truncation (in) - const size_t trcFT, // truncation for Fourier transformation (in) - const size_t trcLP, // truncation of Legendre polynomials data legpol. Needs to be >= trc (in) - const double legpol[], // values of associated Legendre functions, size (trc+1)*trc/2 (in) - const int nb_fields, // number of fields - const double spec[], // spectral data, size (trc+1)*trc (in) - double leg_real[], // values of associated Legendre functions, size (trc+1)*trc/2 (out) - double leg_imag[] ); // values of associated Legendre functions, size (trc+1)*trc/2 (out) - -// -------------------------------------------------------------------------------------------------------------------- - -} // namespace trans -} // namespace atlas diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc index 45f0245c4..d7184cb78 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.cc +++ b/src/atlas/trans/localopt3/TransLocalopt3.cc @@ -17,9 +17,7 @@ #include "atlas/runtime/Log.h" #include "atlas/trans/VorDivToUV.h" #include "atlas/trans/local/LegendrePolynomials.h" -#include "atlas/trans/localopt3/FourierTransformsopt3.h" #include "atlas/trans/localopt3/LegendrePolynomialsopt3.h" -#include "atlas/trans/localopt3/LegendreTransformsopt3.h" #include "atlas/util/Constants.h" #include "eckit/linalg/LinearAlgebra.h" #include "eckit/linalg/Matrix.h" @@ -99,14 +97,15 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long double fft_threshold = 0.05; // fraction of latitudes of the full grid up to which FFT is used. // This threshold needs to be adjusted depending on the dgemm and FFT performance of the machine // on which this code is running! - int nlats = 0; - int nlons = 0; - int neqtr = 0; - useFFT_ = true; - dgemmMethod1_ = false; - nlatsNH_ = 0; - nlatsSH_ = 0; - nlatsLeg_ = 0; + int nlats = 0; + int nlons = 0; + int neqtr = 0; + useFFT_ = true; + dgemmMethod1_ = false; + unstruct_precomp_ = true; + nlatsNH_ = 0; + nlatsSH_ = 0; + nlatsLeg_ = 0; if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { grid::StructuredGrid g( grid_ ); nlats = g.ny(); @@ -245,14 +244,16 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long } else { // unstructured grid - ATLAS_TRACE( "opt2 precomp unstructured" ); - std::vector lats( grid_.size() ); - alloc_aligned( legendre_, legendre_size( truncation_ ) * grid_.size() ); - int j( 0 ); - for ( PointXY p : grid_.xy() ) { - lats[j++] = p.y() * util::Constants::degreesToRadians(); + if ( unstruct_precomp_ ) { + ATLAS_TRACE( "opt3 precomp unstructured" ); + std::vector lats( grid_.size() ); + alloc_aligned( legendre_, legendre_size( truncation_ ) * grid_.size() ); + int j( 0 ); + for ( PointXY p : grid_.xy() ) { + lats[j++] = p.y() * util::Constants::degreesToRadians(); + } + compute_legendre_polynomials_allopt3( truncation_, grid_.size(), lats.data(), legendre_ ); } - compute_legendre_polynomials_allopt3( truncation_, grid_.size(), lats.data(), legendre_ ); } } // namespace trans @@ -279,7 +280,7 @@ TransLocalopt3::~TransLocalopt3() { } } else { - free_aligned( legendre_ ); + if ( unstruct_precomp_ ) { free_aligned( legendre_ ); } } } @@ -329,6 +330,263 @@ void gp_transposeopt3( const int nb_size, const int nb_fields, const double gp_t } } +int TransLocalopt3::posMethod( const int jfld, const int imag, const int jlat, const int jm, const int nb_fields, + const int nlats ) const { + if ( useFFT_ || !dgemmMethod1_ ) { return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) ); } + else { + return jfld + nb_fields * ( jlat + nlats * ( imag + 2 * ( jm ) ) ); + }; +}; + +void TransLocalopt3::invtrans_legendreopt3( const int truncation, const int nlats, const int nb_fields, + const double scalar_spectra[], double scl_fourier[], + const eckit::Configuration& config ) const { + // Legendre transform: + { + ATLAS_TRACE( "opt3 Legendre dgemm" ); + for ( int jm = 0; jm <= truncation_; jm++ ) { + int size_sym = num_n( truncation_ + 1, jm, true ); + int size_asym = num_n( truncation_ + 1, jm, false ); + int n_imag = 2; + if ( jm == 0 ) { n_imag = 1; } + int size_fourier = nb_fields * n_imag * nlatsLeg_; + auto posFourier = [&]( int jfld, int imag, int jlat, int jm, int nlatsH ) { + return jfld + nb_fields * ( imag + n_imag * ( nlatsLeg_ - nlatsH + jlat ) ); + }; + double* scalar_sym; + double* scalar_asym; + double* scl_fourier_sym; + double* scl_fourier_asym; + alloc_aligned( scalar_sym, n_imag * nb_fields * size_sym ); + alloc_aligned( scalar_asym, n_imag * nb_fields * size_asym ); + alloc_aligned( scl_fourier_sym, size_fourier ); + alloc_aligned( scl_fourier_asym, size_fourier ); + { + //ATLAS_TRACE( "opt3 Legendre split" ); + int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2; + // the choice between the following two code lines determines whether + // total wavenumbers are summed in an ascending or descending order. + // The trans library in IFS uses descending order because it should + // be more accurate (higher wavenumbers have smaller contributions). + // This also needs to be changed when splitting the spectral data in + // compute_legendre_polynomialsopt3! + //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) { + for ( int jn = truncation_ + 1; jn >= jm; jn-- ) { + for ( int imag = 0; imag < n_imag; imag++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + idx = jfld + nb_fields * ( imag + 2 * ( jn - jm ) ); + if ( jn <= truncation && jm < truncation ) { + if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; } + else { + scalar_asym[ia++] = scalar_spectra[idx + ioff]; + } + } + else { + if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = 0.; } + else { + scalar_asym[ia++] = 0.; + } + } + } + } + } + ASSERT( ia == n_imag * nb_fields * size_asym && is == n_imag * nb_fields * size_sym ); + } + { + eckit::linalg::Matrix A( scalar_sym, nb_fields * n_imag, size_sym ); + eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm], size_sym, nlatsLeg_ ); + eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsLeg_ ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + } + if ( size_asym > 0 ) { + eckit::linalg::Matrix A( scalar_asym, nb_fields * n_imag, size_asym ); + eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm], size_asym, nlatsLeg_ ); + eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsLeg_ ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + } + { + //ATLAS_TRACE( "opt3 merge spheres" ); + // northern hemisphere: + for ( int jlat = 0; jlat < nlatsNH_; jlat++ ) { + for ( int imag = 0; imag < n_imag; imag++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int idx = posFourier( jfld, imag, jlat, jm, nlatsNH_ ); + scl_fourier[posMethod( jfld, imag, jlat, jm, nb_fields, nlats )] = + scl_fourier_sym[idx] + scl_fourier_asym[idx]; + } + } + } + // southern hemisphere: + for ( int jlat = 0; jlat < nlatsSH_; jlat++ ) { + for ( int imag = 0; imag < n_imag; imag++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int idx = posFourier( jfld, imag, jlat, jm, nlatsSH_ ); + int jslat = nlats - jlat - 1; + scl_fourier[posMethod( jfld, imag, jslat, jm, nb_fields, nlats )] = + scl_fourier_sym[idx] - scl_fourier_asym[idx]; + } + } + } + } + free_aligned( scalar_sym ); + free_aligned( scalar_asym ); + free_aligned( scl_fourier_sym ); + free_aligned( scl_fourier_asym ); + } + } +} + +void TransLocalopt3::invtrans_unstructured_precomp( const int truncation, const int nb_fields, + const int nb_vordiv_fields, const double scalar_spectra[], + double gp_fields[], const eckit::Configuration& config ) const { + ATLAS_TRACE( "invtrans_uv unstructured opt3" ); + grid::UnstructuredGrid gu = grid_; + int nlats = grid_.size(); + int size_fourier = nb_fields * 2; + double* legendre; + double* scl_fourier; + double* scl_fourier_tp; + double* fouriertp; + double* gp_opt; + alloc_aligned( scl_fourier, size_fourier * (truncation)*nlats ); + alloc_aligned( scl_fourier_tp, size_fourier * ( truncation ) ); + alloc_aligned( fouriertp, 2 * ( truncation ) ); + alloc_aligned( gp_opt, nb_fields ); + + { + ATLAS_TRACE( "opt Legendre dgemm" ); + for ( int jm = 0; jm < truncation; jm++ ) { + int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1; + eckit::linalg::Matrix A( eckit::linalg::Matrix( + const_cast( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) ); + eckit::linalg::Matrix B( legendre_ + noff * nlats, ns, nlats ); + eckit::linalg::Matrix C( scl_fourier + jm * size_fourier * nlats, nb_fields * 2, nlats ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + } + } + + // loop over all points: + for ( int ip = 0; ip < grid_.size(); ip++ ) { + PointXY p = gu.xy( ip ); + double lon = p.x() * util::Constants::degreesToRadians(); + double lat = p.y() * util::Constants::degreesToRadians(); + { + //ATLAS_TRACE( "opt transposition in Fourier" ); + for ( int jm = 0; jm < truncation; jm++ ) { + int idx = nb_fields * 2 * ( ip + nlats * jm ); + for ( int imag = 0; imag < 2; imag++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int pos_tp = imag + 2 * ( jm + ( truncation ) * ( jfld ) ); + //int pos = jfld + nb_fields * ( imag + 2 * ( jm ) ); + scl_fourier_tp[pos_tp] = scl_fourier[idx++]; // = scl_fourier[pos] + } + } + } + } + + // Fourier transformation: + int idx = 0; + fouriertp[idx++] = 1.; // real part + fouriertp[idx++] = 0.; // imaginary part + for ( int jm = 1; jm < truncation; jm++ ) { + fouriertp[idx++] = +2. * std::cos( jm * lon ); // real part + fouriertp[idx++] = -2. * std::sin( jm * lon ); // imaginary part + } + { + //ATLAS_TRACE( "opt Fourier dgemm" ); + eckit::linalg::Matrix A( fouriertp, 1, (truncation)*2 ); + eckit::linalg::Matrix B( scl_fourier_tp, (truncation)*2, nb_fields ); + eckit::linalg::Matrix C( gp_opt, 1, nb_fields ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + for ( int j = 0; j < nb_fields; j++ ) { + gp_fields[ip + j * grid_.size()] = gp_opt[j]; + } + } + } + free_aligned( scl_fourier ); + free_aligned( scl_fourier_tp ); + free_aligned( fouriertp ); + free_aligned( gp_opt ); +} + +void TransLocalopt3::invtrans_unstructured( const int truncation, const int nb_fields, const int nb_vordiv_fields, + const double scalar_spectra[], double gp_fields[], + const eckit::Configuration& config ) const { + ATLAS_TRACE( "invtrans_uv unstructured opt3" ); + grid::UnstructuredGrid gu = grid_; + double* zfn; + alloc_aligned( zfn, ( truncation + 1 ) * ( truncation + 1 ) ); + compute_zfnopt3( truncation, zfn ); + int size_fourier = nb_fields * 2; + double* legendre; + double* scl_fourier; + double* scl_fourier_tp; + double* fouriertp; + double* gp_opt; + alloc_aligned( legendre, legendre_size( truncation + 1 ) ); + alloc_aligned( scl_fourier, size_fourier * ( truncation + 1 ) ); + alloc_aligned( scl_fourier_tp, size_fourier * ( truncation + 1 ) ); + alloc_aligned( fouriertp, 2 * ( truncation + 1 ) ); + alloc_aligned( gp_opt, nb_fields ); + + // loop over all points: + for ( int ip = 0; ip < grid_.size(); ip++ ) { + PointXY p = gu.xy( ip ); + double lon = p.x() * util::Constants::degreesToRadians(); + double lat = p.y() * util::Constants::degreesToRadians(); + compute_legendre_polynomials_latopt3( truncation, lat, legendre, zfn ); + // Legendre transform: + { + //ATLAS_TRACE( "opt Legendre dgemm" ); + for ( int jm = 0; jm <= truncation; jm++ ) { + int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1; + eckit::linalg::Matrix A( eckit::linalg::Matrix( + const_cast( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) ); + eckit::linalg::Matrix B( legendre + noff, ns, 1 ); + eckit::linalg::Matrix C( scl_fourier + jm * size_fourier, nb_fields * 2, 1 ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + } + } + { + //ATLAS_TRACE( "opt transposition in Fourier" ); + int idx = 0; + for ( int jm = 0; jm < truncation + 1; jm++ ) { + for ( int imag = 0; imag < 2; imag++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int pos_tp = imag + 2 * ( jm + ( truncation + 1 ) * ( jfld ) ); + //int pos = jfld + nb_fields * ( imag + 2 * ( jm ) ); + scl_fourier_tp[pos_tp] = scl_fourier[idx++]; // = scl_fourier[pos] + } + } + } + } + + // Fourier transformation: + int idx = 0; + fouriertp[idx++] = 1.; // real part + fouriertp[idx++] = 0.; // imaginary part + for ( int jm = 1; jm < truncation + 1; jm++ ) { + fouriertp[idx++] = +2. * std::cos( jm * lon ); // real part + fouriertp[idx++] = -2. * std::sin( jm * lon ); // imaginary part + } + { + //ATLAS_TRACE( "opt Fourier dgemm" ); + eckit::linalg::Matrix A( fouriertp, 1, ( truncation + 1 ) * 2 ); + eckit::linalg::Matrix B( scl_fourier_tp, ( truncation + 1 ) * 2, nb_fields ); + eckit::linalg::Matrix C( gp_opt, 1, nb_fields ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + for ( int j = 0; j < nb_fields; j++ ) { + gp_fields[ip + j * grid_.size()] = gp_opt[j]; + } + } + } + free_aligned( legendre ); + free_aligned( scl_fourier ); + free_aligned( scl_fourier_tp ); + free_aligned( fouriertp ); + free_aligned( gp_opt ); +} + //----------------------------------------------------------------------------- // Routine to compute the spectral transform by using a localopt3 Fourier transformation // for a grid (same latitude for all longitudes, allows to compute Legendre functions @@ -353,113 +611,13 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel // Transform if ( grid::StructuredGrid g = grid_ ) { ATLAS_TRACE( "invtrans_uv structured opt3" ); - int nlats = g.ny(); - int nlons = g.nxmax(); - auto posMethod = [&]( int jfld, int imag, int jlat, int jm ) { - if ( useFFT_ || !dgemmMethod1_ ) { - return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) ); - } - else { - return jfld + nb_fields * ( jlat + nlats * ( imag + 2 * ( jm ) ) ); - }; - }; + int nlats = g.ny(); + int nlons = g.nxmax(); int size_fourier_max = nb_fields * 2 * nlats; double* scl_fourier; alloc_aligned( scl_fourier, size_fourier_max * ( truncation_ + 1 ) ); + invtrans_legendreopt3( truncation, nlats, nb_scalar_fields, scalar_spectra, scl_fourier, config ); - // Legendre transform: - { - ATLAS_TRACE( "opt3 Legendre dgemm" ); - for ( int jm = 0; jm <= truncation_; jm++ ) { - int size_sym = num_n( truncation_ + 1, jm, true ); - int size_asym = num_n( truncation_ + 1, jm, false ); - int n_imag = 2; - if ( jm == 0 ) { n_imag = 1; } - int size_fourier = nb_fields * n_imag * nlatsLeg_; - auto posFourier = [&]( int jfld, int imag, int jlat, int jm, int nlatsH ) { - return jfld + nb_fields * ( imag + n_imag * ( nlatsLeg_ - nlatsH + jlat ) ); - }; - double* scalar_sym; - double* scalar_asym; - double* scl_fourier_sym; - double* scl_fourier_asym; - alloc_aligned( scalar_sym, n_imag * nb_fields * size_sym ); - alloc_aligned( scalar_asym, n_imag * nb_fields * size_asym ); - alloc_aligned( scl_fourier_sym, size_fourier ); - alloc_aligned( scl_fourier_asym, size_fourier ); - { - //ATLAS_TRACE( "opt3 Legendre split" ); - int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2; - // the choice between the following two code lines determines whether - // total wavenumbers are summed in an ascending or descending order. - // The trans library in IFS uses descending order because it should - // be more accurate (higher wavenumbers have smaller contributions). - // This also needs to be changed when splitting the spectral data in - // compute_legendre_polynomialsopt3! - //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) { - for ( int jn = truncation_ + 1; jn >= jm; jn-- ) { - for ( int imag = 0; imag < n_imag; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - idx = jfld + nb_fields * ( imag + 2 * ( jn - jm ) ); - if ( jn <= truncation && jm < truncation ) { - if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; } - else { - scalar_asym[ia++] = scalar_spectra[idx + ioff]; - } - } - else { - if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = 0.; } - else { - scalar_asym[ia++] = 0.; - } - } - } - } - } - ASSERT( ia == n_imag * nb_fields * size_asym && is == n_imag * nb_fields * size_sym ); - } - { - eckit::linalg::Matrix A( scalar_sym, nb_fields * n_imag, size_sym ); - eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm], size_sym, nlatsLeg_ ); - eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsLeg_ ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); - } - if ( size_asym > 0 ) { - eckit::linalg::Matrix A( scalar_asym, nb_fields * n_imag, size_asym ); - eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm], size_asym, nlatsLeg_ ); - eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsLeg_ ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); - } - { - //ATLAS_TRACE( "opt3 merge spheres" ); - // northern hemisphere: - for ( int jlat = 0; jlat < nlatsNH_; jlat++ ) { - for ( int imag = 0; imag < n_imag; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int idx = posFourier( jfld, imag, jlat, jm, nlatsNH_ ); - scl_fourier[posMethod( jfld, imag, jlat, jm )] = - scl_fourier_sym[idx] + scl_fourier_asym[idx]; - } - } - } - // southern hemisphere: - for ( int jlat = 0; jlat < nlatsSH_; jlat++ ) { - for ( int imag = 0; imag < n_imag; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int idx = posFourier( jfld, imag, jlat, jm, nlatsSH_ ); - int jslat = nlats - jlat - 1; - scl_fourier[posMethod( jfld, imag, jslat, jm )] = - scl_fourier_sym[idx] - scl_fourier_asym[idx]; - } - } - } - } - free_aligned( scalar_sym ); - free_aligned( scalar_asym ); - free_aligned( scl_fourier_sym ); - free_aligned( scl_fourier_asym ); - } - } // Fourier transformation: if ( useFFT_ ) { #if ATLAS_HAVE_FFTW @@ -470,11 +628,12 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel for ( int jfld = 0; jfld < nb_fields; jfld++ ) { int idx = 0; for ( int jlat = 0; jlat < nlats; jlat++ ) { - fft_in_[idx++][0] = scl_fourier[posMethod( jfld, 0, jlat, 0 )]; + fft_in_[idx++][0] = scl_fourier[posMethod( jfld, 0, jlat, 0, nb_fields, nlats )]; for ( int jm = 1; jm < num_complex; jm++, idx++ ) { for ( int imag = 0; imag < 2; imag++ ) { if ( jm <= truncation_ ) { - fft_in_[idx][imag] = scl_fourier[posMethod( jfld, imag, jlat, jm )]; + fft_in_[idx][imag] = + scl_fourier[posMethod( jfld, imag, jlat, jm, nb_fields, nlats )]; } else { fft_in_[idx][imag] = 0.; @@ -559,84 +718,15 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel free_aligned( scl_fourier ); } // namespace atlas else { - ATLAS_TRACE( "invtrans_uv unstructured opt3" ); - grid::UnstructuredGrid gu = grid_; - double* zfn; - alloc_aligned( zfn, ( truncation + 1 ) * ( truncation + 1 ) ); - compute_zfnopt3( truncation, zfn ); - int nlats = grid_.size(); - int size_fourier = nb_fields * 2; - double* legendre; - double* scl_fourier; - double* scl_fourier_tp; - double* fouriertp; - double* gp_opt; - alloc_aligned( legendre, legendre_size( truncation + 1 ) ); - alloc_aligned( scl_fourier, size_fourier * ( truncation + 1 ) * nlats ); - alloc_aligned( scl_fourier_tp, size_fourier * ( truncation + 1 ) ); - alloc_aligned( fouriertp, 2 * ( truncation + 1 ) ); - alloc_aligned( gp_opt, nb_fields ); - - { - ATLAS_TRACE( "opt Legendre dgemm" ); - for ( int jm = 0; jm <= truncation; jm++ ) { - int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1; - eckit::linalg::Matrix A( eckit::linalg::Matrix( - const_cast( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) ); - eckit::linalg::Matrix B( legendre_ + noff * nlats, ns, nlats ); - eckit::linalg::Matrix C( scl_fourier + jm * size_fourier, nb_fields * 2, nlats ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); - } + if ( unstruct_precomp_ ) { + invtrans_unstructured_precomp( truncation, nb_scalar_fields, nb_vordiv_fields, scalar_spectra, + gp_fields, config ); } - - // loop over all points: - for ( int ip = 0; ip < grid_.size(); ip++ ) { - PointXY p = gu.xy( ip ); - double lon = p.x() * util::Constants::degreesToRadians(); - double lat = p.y() * util::Constants::degreesToRadians(); - { - //ATLAS_TRACE( "opt transposition in Fourier" ); - for ( int jm = 0; jm < truncation + 1; jm++ ) { - int idx = nb_fields * 2 * ( ip + nlats * jm ); - for ( int imag = 0; imag < 2; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int pos_tp = imag + 2 * ( jm + ( truncation + 1 ) * ( jfld ) ); - //int pos = jfld + nb_fields * ( imag + 2 * ( jm ) ); - scl_fourier_tp[pos_tp] = scl_fourier[idx++]; // = scl_fourier[pos] - } - } - } - } - - // Fourier transformation: - int idx = 0; - fouriertp[idx++] = 1.; // real part - fouriertp[idx++] = 0.; // imaginary part - for ( int jm = 1; jm < truncation + 1; jm++ ) { - fouriertp[idx++] = +2. * std::cos( jm * lon ); // real part - fouriertp[idx++] = -2. * std::sin( jm * lon ); // imaginary part - } - { - //ATLAS_TRACE( "opt Fourier dgemm" ); - eckit::linalg::Matrix A( fouriertp, 1, ( truncation + 1 ) * 2 ); - eckit::linalg::Matrix B( scl_fourier_tp, ( truncation + 1 ) * 2, nb_fields ); - eckit::linalg::Matrix C( gp_opt, 1, nb_fields ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); - for ( int j = 0; j < nb_fields; j++ ) { - gp_fields[ip + j * grid_.size()] = gp_opt[j]; - } - } + else { + invtrans_unstructured( truncation, nb_scalar_fields, nb_vordiv_fields, scalar_spectra, gp_fields, + config ); } - free_aligned( legendre ); - free_aligned( scl_fourier ); - free_aligned( scl_fourier_tp ); - free_aligned( fouriertp ); - free_aligned( gp_opt ); - } - for ( int j = 0; j < nb_fields * grid_.size(); j++ ) { - Log::info() << gp_fields[j] << " "; } - Log::info() << std::endl; } // namespace trans } // namespace atlas diff --git a/src/atlas/trans/localopt3/TransLocalopt3.h b/src/atlas/trans/localopt3/TransLocalopt3.h index c97bb1b7f..44be5a848 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.h +++ b/src/atlas/trans/localopt3/TransLocalopt3.h @@ -105,6 +105,21 @@ class TransLocalopt3 : public trans::TransImpl { double divergence_spectra[], const eckit::Configuration& = util::NoConfig() ) const override; private: + int posMethod( const int jfld, const int imag, const int jlat, const int jm, const int nb_fields, + const int nlats ) const; + + void invtrans_legendreopt3( const int truncation, const int nlats, const int nb_fields, + const double scalar_spectra[], double scl_fourier[], + const eckit::Configuration& config ) const; + + void invtrans_unstructured_precomp( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields, + const double scalar_spectra[], double gp_fields[], + const eckit::Configuration& = util::NoConfig() ) const; + + void invtrans_unstructured( const int truncation, const int nb_fields, const int nb_vordiv_fields, + const double scalar_spectra[], double gp_fields[], + const eckit::Configuration& config ) const; + void invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields, const double scalar_spectra[], double gp_fields[], const eckit::Configuration& = util::NoConfig() ) const; @@ -113,6 +128,7 @@ class TransLocalopt3 : public trans::TransImpl { Grid grid_; bool useFFT_; bool dgemmMethod1_; + bool unstruct_precomp_; int truncation_; int nlatsNH_; int nlatsSH_; diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index a41a954c4..8b8a4af26 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -1082,9 +1082,9 @@ CASE( "test_trans_unstructured" ) { double tolerance = 1.e-13; //Domain testdomain = RectangularDomain( {20., 25.}, {40., 60.} ); - Domain testdomain = RectangularDomain( {20., 25.}, {45., 50.} ); + //Domain testdomain = RectangularDomain( {10., 25.}, {35., 50.} ); // Grid: (Adjust the following line if the test takes too long!) - Grid g( "F12", testdomain ); + Grid g( "F120" ); grid::StructuredGrid gs( g ); std::vector pts( g.size() ); int idx( 0 ); @@ -1092,13 +1092,13 @@ CASE( "test_trans_unstructured" ) { double lat = gs.y( j ); for ( size_t i = 0; i < gs.nx( j ); ++i ) { double lon = gs.x( i, j ); - Log::info() << "idx=" << idx << " lon=" << lon << " lat=" << lat << std::endl; + //Log::info() << "idx=" << idx << " lon=" << lon << " lat=" << lat << std::endl; pts[idx++].assign( lon, lat ); } } Grid gu = grid::UnstructuredGrid( new std::vector( pts ) ); - int trc = 12; + int trc = 120; double rav1 = 0., rav2 = 0.; // compute average rms errors of transLocal1 and transLocal2 int nb_scalar = 1, nb_vordiv = 0; @@ -1131,9 +1131,9 @@ CASE( "test_trans_unstructured" ) { for ( int imag = 0; imag <= 1; imag++ ) { // real and imaginary part if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. && - icase < 1000 ) { + icase < 1 ) { auto start = std::chrono::system_clock::now(); - trans::Trans transLocal1( gu, trc, util::Config( "type", "localopt3" ) ); + trans::Trans transLocal1( g, trc, util::Config( "type", "localopt3" ) ); trans::Trans transLocal2( gu, trc, util::Config( "type", "localopt3" ) ); for ( int j = 0; j < 2 * N * nb_scalar; j++ ) { sp[j] = 0.; @@ -1158,12 +1158,11 @@ CASE( "test_trans_unstructured" ) { spectral_transform_grid_analytic( trc, trc, n, m, imag, g, rspecg.data(), rgp_analytic.data(), ivar_in, ivar_out ); - Log::info() - << icase << " m=" << m << " n=" << n << " imag=" << imag << " unstructured: "; + //Log::info() << icase << " m=" << m << " n=" << n << " imag=" << imag << " structured: "; EXPECT_NO_THROW( transLocal1.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), div.data(), rgp1.data() ) ); - Log::info() << icase << " m=" << m << " n=" << n << " imag=" << imag << " structured: "; + //Log::info() << icase << " m=" << m << " n=" << n << " imag=" << imag << " unstructured: "; EXPECT_NO_THROW( transLocal2.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), div.data(), rgp2.data() ) ); From 01bc8dca392412a5b8fa3bc6624f8781ad1a8266 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Fri, 6 Apr 2018 12:12:08 +0100 Subject: [PATCH 043/123] more cleanup. Switched names of the two dgemm-methods. dgemm-method is now chosen via preprocessor variable TRANSLOCAL_DGEMM2. Current active test compares regular and unstructured grid with same number of different latitudes and longitudes. --- src/atlas/trans/localopt3/TransLocalopt3.cc | 298 ++++++++++---------- src/atlas/trans/localopt3/TransLocalopt3.h | 13 +- src/tests/trans/test_transgeneral.cc | 41 ++- 3 files changed, 194 insertions(+), 158 deletions(-) diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc index d7184cb78..983a2ac0d 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.cc +++ b/src/atlas/trans/localopt3/TransLocalopt3.cc @@ -9,7 +9,7 @@ */ #include "atlas/trans/localopt3/TransLocalopt3.h" -#include +#include #include "atlas/array.h" #include "atlas/option.h" #include "atlas/parallel/mpi/mpi.h" @@ -101,7 +101,6 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long int nlons = 0; int neqtr = 0; useFFT_ = true; - dgemmMethod1_ = false; unstruct_precomp_ = true; nlatsNH_ = 0; nlatsSH_ = 0; @@ -194,7 +193,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long // precomputations for Fourier transformations: if ( useFFT_ ) { -#if ATLAS_HAVE_FFTW +#if ATLAS_HAVE_FFTW && !TRANSLOCAL_DGEMM2 { ATLAS_TRACE( "opt3 precomp FFTW" ); int num_complex = ( nlonsGlobal_ / 2 ) + 1; @@ -210,36 +209,35 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long } if ( !useFFT_ ) { alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons ); - if ( dgemmMethod1_ ) { - { - ATLAS_TRACE( "opt3 precomp Fourier" ); - int idx = 0; +#if !TRANSLOCAL_DGEMM2 + { + ATLAS_TRACE( "opt3 precomp Fourier tp" ); + int idx = 0; + for ( int jm = 0; jm < truncation_ + 1; jm++ ) { + double factor = 1.; + if ( jm > 0 ) { factor = 2.; } for ( int jlon = 0; jlon < nlons; jlon++ ) { - double factor = 1.; - for ( int jm = 0; jm < truncation_ + 1; jm++ ) { - if ( jm > 0 ) { factor = 2.; } - fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor; // real part - fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor; // imaginary part - } + fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor; // real part + } + for ( int jlon = 0; jlon < nlons; jlon++ ) { + fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor; // imaginary part } } } - else { - { - ATLAS_TRACE( "opt3 precomp Fourier tp" ); - int idx = 0; +#else + { + ATLAS_TRACE( "opt3 precomp Fourier" ); + int idx = 0; + for ( int jlon = 0; jlon < nlons; jlon++ ) { + double factor = 1.; for ( int jm = 0; jm < truncation_ + 1; jm++ ) { - double factor = 1.; if ( jm > 0 ) { factor = 2.; } - for ( int jlon = 0; jlon < nlons; jlon++ ) { - fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor; // real part - } - for ( int jlon = 0; jlon < nlons; jlon++ ) { - fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor; // imaginary part - } + fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor; // real part + fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor; // imaginary part } } } +#endif } } else { @@ -269,7 +267,7 @@ TransLocalopt3::~TransLocalopt3() { free_aligned( legendre_sym_ ); free_aligned( legendre_asym_ ); if ( useFFT_ ) { -#if ATLAS_HAVE_FFTW +#if ATLAS_HAVE_FFTW && !TRANSLOCAL_DGEMM2 fftw_destroy_plan( plan_ ); fftw_free( fft_in_ ); fftw_free( fft_out_ ); @@ -317,11 +315,15 @@ void TransLocalopt3::invtrans_vordiv2wind( const Field& spvor, const Field& spdi NOTIMP; } +// -------------------------------------------------------------------------------------------------------------------- + void TransLocalopt3::invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[], const eckit::Configuration& config ) const { invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields, config ); } +// -------------------------------------------------------------------------------------------------------------------- + void gp_transposeopt3( const int nb_size, const int nb_fields, const double gp_tmp[], double gp_fields[] ) { for ( int jgp = 0; jgp < nb_size; jgp++ ) { for ( int jfld = 0; jfld < nb_fields; jfld++ ) { @@ -330,13 +332,7 @@ void gp_transposeopt3( const int nb_size, const int nb_fields, const double gp_t } } -int TransLocalopt3::posMethod( const int jfld, const int imag, const int jlat, const int jm, const int nb_fields, - const int nlats ) const { - if ( useFFT_ || !dgemmMethod1_ ) { return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) ); } - else { - return jfld + nb_fields * ( jlat + nlats * ( imag + 2 * ( jm ) ) ); - }; -}; +// -------------------------------------------------------------------------------------------------------------------- void TransLocalopt3::invtrans_legendreopt3( const int truncation, const int nlats, const int nb_fields, const double scalar_spectra[], double scl_fourier[], @@ -436,6 +432,91 @@ void TransLocalopt3::invtrans_legendreopt3( const int truncation, const int nlat } } +// -------------------------------------------------------------------------------------------------------------------- + +void TransLocalopt3::invtrans_fourieropt3( const int nlats, const int nlons, const int nb_fields, double scl_fourier[], + double gp_fields[], const eckit::Configuration& config ) const { + // Fourier transformation: + if ( useFFT_ ) { +#if ATLAS_HAVE_FFTW && !TRANSLOCAL_DGEMM2 + { + int num_complex = ( nlonsGlobal_ / 2 ) + 1; + { + ATLAS_TRACE( "opt3 FFTW" ); + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int idx = 0; + for ( int jlat = 0; jlat < nlats; jlat++ ) { + fft_in_[idx++][0] = scl_fourier[posMethod( jfld, 0, jlat, 0, nb_fields, nlats )]; + for ( int jm = 1; jm < num_complex; jm++, idx++ ) { + for ( int imag = 0; imag < 2; imag++ ) { + if ( jm <= truncation_ ) { + fft_in_[idx][imag] = + scl_fourier[posMethod( jfld, imag, jlat, jm, nb_fields, nlats )]; + } + else { + fft_in_[idx][imag] = 0.; + } + } + } + } + fftw_execute_dft_c2r( plan_, fft_in_, fft_out_ ); + for ( int jlat = 0; jlat < nlats; jlat++ ) { + for ( int jlon = 0; jlon < nlons; jlon++ ) { + int j = jlon + jlonMin_; + if ( j >= nlonsGlobal_ ) { j -= nlonsGlobal_; } + gp_fields[jlon + nlons * ( jlat + nlats * jfld )] = fft_out_[j + nlonsGlobal_ * jlat]; + } + } + } + } + } +#endif + } + else { +#if !TRANSLOCAL_DGEMM2 + // dgemm-method 1 + { + ATLAS_TRACE( "opt3 Fourier dgemm method 1" ); + eckit::linalg::Matrix A( fourier_, nlons, ( truncation_ + 1 ) * 2 ); + eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * nlats ); + eckit::linalg::Matrix C( gp_fields, nlons, nb_fields * nlats ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + } +#else + // dgemm-method 2 + // should be faster for small domains or large truncation + // but have not found any significant speedup so far + double* gp_opt3; + alloc_aligned( gp_opt3, nb_fields * grid_.size() ); + { + ATLAS_TRACE( "opt3 Fourier dgemm method 2" ); + eckit::linalg::Matrix A( scl_fourier, nb_fields * nlats, ( truncation_ + 1 ) * 2 ); + eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, nlons ); + eckit::linalg::Matrix C( gp_opt3, nb_fields * nlats, nlons ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + } + + // Transposition in grid point space: + { + ATLAS_TRACE( "opt3 transposition in gp-space" ); + int idx = 0; + for ( int jlon = 0; jlon < nlons; jlon++ ) { + for ( int jlat = 0; jlat < nlats; jlat++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int pos_tp = jlon + nlons * ( jlat + nlats * ( jfld ) ); + //int pos = jfld + nb_fields * ( jlat + nlats * ( jlon ) ); + gp_fields[pos_tp] = gp_opt3[idx++]; // = gp_opt3[pos] + } + } + } + } + free_aligned( gp_opt3 ); +#endif + } +} + +// -------------------------------------------------------------------------------------------------------------------- + void TransLocalopt3::invtrans_unstructured_precomp( const int truncation, const int nb_fields, const int nb_vordiv_fields, const double scalar_spectra[], double gp_fields[], const eckit::Configuration& config ) const { @@ -466,40 +547,43 @@ void TransLocalopt3::invtrans_unstructured_precomp( const int truncation, const } // loop over all points: - for ( int ip = 0; ip < grid_.size(); ip++ ) { - PointXY p = gu.xy( ip ); - double lon = p.x() * util::Constants::degreesToRadians(); - double lat = p.y() * util::Constants::degreesToRadians(); - { - //ATLAS_TRACE( "opt transposition in Fourier" ); - for ( int jm = 0; jm < truncation; jm++ ) { - int idx = nb_fields * 2 * ( ip + nlats * jm ); - for ( int imag = 0; imag < 2; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int pos_tp = imag + 2 * ( jm + ( truncation ) * ( jfld ) ); - //int pos = jfld + nb_fields * ( imag + 2 * ( jm ) ); - scl_fourier_tp[pos_tp] = scl_fourier[idx++]; // = scl_fourier[pos] + { + ATLAS_TRACE( "opt Fourier dgemm" ); + + for ( int ip = 0; ip < grid_.size(); ip++ ) { + PointXY p = gu.xy( ip ); + double lon = p.x() * util::Constants::degreesToRadians(); + double lat = p.y() * util::Constants::degreesToRadians(); + { + //ATLAS_TRACE( "opt transposition in Fourier" ); + for ( int jm = 0; jm < truncation; jm++ ) { + int idx = nb_fields * 2 * ( ip + nlats * jm ); + for ( int imag = 0; imag < 2; imag++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int pos_tp = imag + 2 * ( jm + ( truncation ) * ( jfld ) ); + //int pos = jfld + nb_fields * ( imag + 2 * ( jm ) ); + scl_fourier_tp[pos_tp] = scl_fourier[idx++]; // = scl_fourier[pos] + } } } } - } - // Fourier transformation: - int idx = 0; - fouriertp[idx++] = 1.; // real part - fouriertp[idx++] = 0.; // imaginary part - for ( int jm = 1; jm < truncation; jm++ ) { - fouriertp[idx++] = +2. * std::cos( jm * lon ); // real part - fouriertp[idx++] = -2. * std::sin( jm * lon ); // imaginary part - } - { - //ATLAS_TRACE( "opt Fourier dgemm" ); - eckit::linalg::Matrix A( fouriertp, 1, (truncation)*2 ); - eckit::linalg::Matrix B( scl_fourier_tp, (truncation)*2, nb_fields ); - eckit::linalg::Matrix C( gp_opt, 1, nb_fields ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); - for ( int j = 0; j < nb_fields; j++ ) { - gp_fields[ip + j * grid_.size()] = gp_opt[j]; + // Fourier transformation: + int idx = 0; + fouriertp[idx++] = 1.; // real part + fouriertp[idx++] = 0.; // imaginary part + for ( int jm = 1; jm < truncation; jm++ ) { + fouriertp[idx++] = +2. * std::cos( jm * lon ); // real part + fouriertp[idx++] = -2. * std::sin( jm * lon ); // imaginary part + } + { + eckit::linalg::Matrix A( fouriertp, 1, (truncation)*2 ); + eckit::linalg::Matrix B( scl_fourier_tp, (truncation)*2, nb_fields ); + eckit::linalg::Matrix C( gp_opt, 1, nb_fields ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + for ( int j = 0; j < nb_fields; j++ ) { + gp_fields[ip + j * grid_.size()] = gp_opt[j]; + } } } } @@ -509,6 +593,8 @@ void TransLocalopt3::invtrans_unstructured_precomp( const int truncation, const free_aligned( gp_opt ); } +// -------------------------------------------------------------------------------------------------------------------- + void TransLocalopt3::invtrans_unstructured( const int truncation, const int nb_fields, const int nb_vordiv_fields, const double scalar_spectra[], double gp_fields[], const eckit::Configuration& config ) const { @@ -616,86 +702,13 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel int size_fourier_max = nb_fields * 2 * nlats; double* scl_fourier; alloc_aligned( scl_fourier, size_fourier_max * ( truncation_ + 1 ) ); + + // Legendre transformation: invtrans_legendreopt3( truncation, nlats, nb_scalar_fields, scalar_spectra, scl_fourier, config ); // Fourier transformation: - if ( useFFT_ ) { -#if ATLAS_HAVE_FFTW - { - int num_complex = ( nlonsGlobal_ / 2 ) + 1; - { - ATLAS_TRACE( "opt3 FFTW" ); - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int idx = 0; - for ( int jlat = 0; jlat < nlats; jlat++ ) { - fft_in_[idx++][0] = scl_fourier[posMethod( jfld, 0, jlat, 0, nb_fields, nlats )]; - for ( int jm = 1; jm < num_complex; jm++, idx++ ) { - for ( int imag = 0; imag < 2; imag++ ) { - if ( jm <= truncation_ ) { - fft_in_[idx][imag] = - scl_fourier[posMethod( jfld, imag, jlat, jm, nb_fields, nlats )]; - } - else { - fft_in_[idx][imag] = 0.; - } - } - } - } - fftw_execute_dft_c2r( plan_, fft_in_, fft_out_ ); - for ( int jlat = 0; jlat < nlats; jlat++ ) { - for ( int jlon = 0; jlon < nlons; jlon++ ) { - int j = jlon + jlonMin_; - if ( j >= nlonsGlobal_ ) { j -= nlonsGlobal_; } - gp_fields[jlon + nlons * ( jlat + nlats * jfld )] = - fft_out_[j + nlonsGlobal_ * jlat]; - } - } - } - } - } -#endif - } - else { - if ( dgemmMethod1_ ) { - // dgemm-method 1 - // should be faster for small domains or large truncation - double* gp_opt3; - alloc_aligned( gp_opt3, nb_fields * grid_.size() ); - { - ATLAS_TRACE( "opt3 Fourier dgemm method 1" ); - eckit::linalg::Matrix A( scl_fourier, nb_fields * nlats, ( truncation_ + 1 ) * 2 ); - eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, nlons ); - eckit::linalg::Matrix C( gp_opt3, nb_fields * nlats, nlons ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); - } + invtrans_fourieropt3( nlats, nlons, nb_fields, scl_fourier, gp_fields, config ); - // Transposition in grid point space: - { - ATLAS_TRACE( "opt3 transposition in gp-space" ); - int idx = 0; - for ( int jlon = 0; jlon < nlons; jlon++ ) { - for ( int jlat = 0; jlat < nlats; jlat++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int pos_tp = jlon + nlons * ( jlat + nlats * ( jfld ) ); - //int pos = jfld + nb_fields * ( jlat + nlats * ( jlon ) ); - gp_fields[pos_tp] = gp_opt3[idx++]; // = gp_opt3[pos] - } - } - } - } - free_aligned( gp_opt3 ); - } - else { - // dgemm-method 2 - { - ATLAS_TRACE( "opt3 Fourier dgemm method 2" ); - eckit::linalg::Matrix A( fourier_, nlons, ( truncation_ + 1 ) * 2 ); - eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * nlats ); - eckit::linalg::Matrix C( gp_fields, nlons, nb_fields * nlats ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); - } - } - } // namespace trans // Computing u,v from U,V: { if ( nb_vordiv_fields > 0 ) { @@ -716,7 +729,7 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel } } free_aligned( scl_fourier ); - } // namespace atlas + } else { if ( unstruct_precomp_ ) { invtrans_unstructured_precomp( truncation, nb_scalar_fields, nb_vordiv_fields, scalar_spectra, @@ -727,9 +740,8 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel config ); } } - - } // namespace trans -} // namespace atlas + } +} // -------------------------------------------------------------------------------------------------------------------- @@ -739,6 +751,8 @@ void TransLocalopt3::invtrans( const int nb_vordiv_fields, const double vorticit invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config ); } +// -------------------------------------------------------------------------------------------------------------------- + void extend_truncationopt3( const int old_truncation, const int nb_fields, const double old_spectra[], double new_spectra[] ) { int k = 0, k_old = 0; diff --git a/src/atlas/trans/localopt3/TransLocalopt3.h b/src/atlas/trans/localopt3/TransLocalopt3.h index 44be5a848..af8360801 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.h +++ b/src/atlas/trans/localopt3/TransLocalopt3.h @@ -19,6 +19,8 @@ #include #endif +#define TRANSLOCAL_DGEMM2 0 + //----------------------------------------------------------------------------- // Forward declarations @@ -106,12 +108,21 @@ class TransLocalopt3 : public trans::TransImpl { private: int posMethod( const int jfld, const int imag, const int jlat, const int jm, const int nb_fields, - const int nlats ) const; + const int nlats ) const { +#if !TRANSLOCAL_DGEMM2 + return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) ); +#else + return jfld + nb_fields * ( jlat + nlats * ( imag + 2 * ( jm ) ) ); +#endif + }; void invtrans_legendreopt3( const int truncation, const int nlats, const int nb_fields, const double scalar_spectra[], double scl_fourier[], const eckit::Configuration& config ) const; + void invtrans_fourieropt3( const int nlats, const int nlons, const int nb_fields, double scl_fourier[], + double gp_fields[], const eckit::Configuration& config ) const; + void invtrans_unstructured_precomp( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields, const double scalar_spectra[], double gp_fields[], const eckit::Configuration& = util::NoConfig() ) const; diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index 8b8a4af26..3d023ebfd 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -1082,9 +1082,9 @@ CASE( "test_trans_unstructured" ) { double tolerance = 1.e-13; //Domain testdomain = RectangularDomain( {20., 25.}, {40., 60.} ); - //Domain testdomain = RectangularDomain( {10., 25.}, {35., 50.} ); + Domain testdomain = RectangularDomain( {0., 90.}, {0., 90.} ); // Grid: (Adjust the following line if the test takes too long!) - Grid g( "F120" ); + Grid g( "F120", testdomain ); grid::StructuredGrid gs( g ); std::vector pts( g.size() ); int idx( 0 ); @@ -1092,13 +1092,15 @@ CASE( "test_trans_unstructured" ) { double lat = gs.y( j ); for ( size_t i = 0; i < gs.nx( j ); ++i ) { double lon = gs.x( i, j ); - //Log::info() << "idx=" << idx << " lon=" << lon << " lat=" << lat << std::endl; - pts[idx++].assign( lon, lat ); + if ( i == j && lat > 0 ) { + //Log::info() << "idx=" << idx << " lon=" << lon << " lat=" << lat << std::endl; + pts[idx++].assign( lon, lat ); + } } } - Grid gu = grid::UnstructuredGrid( new std::vector( pts ) ); - - int trc = 120; + Grid gu = grid::UnstructuredGrid( new std::vector( &pts[0], &pts[idx] ) ); + Log::info() << "gu: size=" << gu.size() << std::endl; + int trc = 1280; double rav1 = 0., rav2 = 0.; // compute average rms errors of transLocal1 and transLocal2 int nb_scalar = 1, nb_vordiv = 0; @@ -1110,7 +1112,11 @@ CASE( "test_trans_unstructured" ) { std::vector gp( nb_all * g.size() ); std::vector rgp1( nb_all * g.size() ); std::vector rgp2( nb_all * g.size() ); - std::vector rgp_analytic( g.size() ); + std::vector rgp_analytic1( g.size() ); + std::vector rgp_analytic2( gu.size() ); + + trans::Trans transLocal1( g, trc, util::Config( "type", "localopt3" ) ); + trans::Trans transLocal2( gu, trc, util::Config( "type", "localopt3" ) ); int icase = 0; for ( int ivar_in = 2; ivar_in < 3; ivar_in++ ) { // vorticity, divergence, scalar @@ -1131,10 +1137,8 @@ CASE( "test_trans_unstructured" ) { for ( int imag = 0; imag <= 1; imag++ ) { // real and imaginary part if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. && - icase < 1 ) { + icase < 100 ) { auto start = std::chrono::system_clock::now(); - trans::Trans transLocal1( g, trc, util::Config( "type", "localopt3" ) ); - trans::Trans transLocal2( gu, trc, util::Config( "type", "localopt3" ) ); for ( int j = 0; j < 2 * N * nb_scalar; j++ ) { sp[j] = 0.; } @@ -1152,16 +1156,23 @@ CASE( "test_trans_unstructured" ) { rgp2[j] = 0.; } for ( int j = 0; j < g.size(); j++ ) { - rgp_analytic[j] = 0.; + rgp_analytic1[j] = 0.; + } + + for ( int j = 0; j < gu.size(); j++ ) { + rgp_analytic2[j] = 0.; } spectral_transform_grid_analytic( trc, trc, n, m, imag, g, rspecg.data(), - rgp_analytic.data(), ivar_in, ivar_out ); + rgp_analytic1.data(), ivar_in, ivar_out ); //Log::info() << icase << " m=" << m << " n=" << n << " imag=" << imag << " structured: "; EXPECT_NO_THROW( transLocal1.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), div.data(), rgp1.data() ) ); + spectral_transform_grid_analytic( trc, trc, n, m, imag, gu, rspecg.data(), + rgp_analytic2.data(), ivar_in, ivar_out ); + //Log::info() << icase << " m=" << m << " n=" << n << " imag=" << imag << " unstructured: "; EXPECT_NO_THROW( transLocal2.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), div.data(), rgp2.data() ) ); @@ -1169,10 +1180,10 @@ CASE( "test_trans_unstructured" ) { int pos = ( ivar_out * nb_vordiv + jfld ); double rms_gen1 = - compute_rms( g.size(), rgp1.data() + pos * g.size(), rgp_analytic.data() ); + compute_rms( g.size(), rgp1.data() + pos * g.size(), rgp_analytic1.data() ); double rms_gen2 = - compute_rms( g.size(), rgp2.data() + pos * g.size(), rgp_analytic.data() ); + compute_rms( gu.size(), rgp2.data() + pos * gu.size(), rgp_analytic2.data() ); rav1 += rms_gen1; rav2 += rms_gen2; From 42c2adc95851cf399ba38bdd83aa4269534004e6 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Tue, 10 Apr 2018 15:59:20 +0100 Subject: [PATCH 044/123] reduced meshes are working now with FFTW and added storing wisdom; still need to add reduced truncation towards the poles --- src/atlas/trans/localopt3/TransLocalopt3.cc | 236 ++++++++++++++++---- src/atlas/trans/localopt3/TransLocalopt3.h | 19 +- src/tests/trans/test_transgeneral.cc | 26 ++- 3 files changed, 228 insertions(+), 53 deletions(-) diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc index 983a2ac0d..6ea7093f6 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.cc +++ b/src/atlas/trans/localopt3/TransLocalopt3.cc @@ -94,11 +94,11 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long #else eckit::linalg::LinearAlgebra::backend( "generic" ); // might want to choose backend with this command #endif - double fft_threshold = 0.05; // fraction of latitudes of the full grid up to which FFT is used. + double fft_threshold = 0.0; // fraction of latitudes of the full grid up to which FFT is used. // This threshold needs to be adjusted depending on the dgemm and FFT performance of the machine // on which this code is running! int nlats = 0; - int nlons = 0; + int nlonsMax = 0; int neqtr = 0; useFFT_ = true; unstruct_precomp_ = true; @@ -107,8 +107,8 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long nlatsLeg_ = 0; if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { grid::StructuredGrid g( grid_ ); - nlats = g.ny(); - nlons = g.nxmax(); + nlats = g.ny(); + nlonsMax = g.nxmax(); for ( size_t j = 0; j < nlats; ++j ) { // assumptions: latitudes in g.y(j) are monotone and decreasing // no assumption on whether we have 0, 1 or 2 latitudes at the equator @@ -125,24 +125,48 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long else { nlatsLeg_ = nlatsSH_; } - Grid g_global( grid.name() ); - grid::StructuredGrid gs_global( g_global ); - nlonsGlobal_ = gs_global.nxmax(); - jlonMin_ = 0; - double lonmin = fmod( g.x( 0, 0 ), 360 ); - if ( lonmin < 0. ) { lonmin += 360.; } - if ( nlons < fft_threshold * nlonsGlobal_ ) { useFFT_ = false; } + gridGlobal_ = Grid( grid.name() ); + grid::StructuredGrid gs_global( gridGlobal_ ); + nlonsMaxGlobal_ = gs_global.nxmax(); + jlonMin_.resize( 1 ); + jlonMin_[0] = 0; + jlatMin_ = 0; + nlatsGlobal_ = gs_global.ny(); + for ( int jlat = 0; jlat < nlatsGlobal_; jlat++ ) { + if ( gs_global.y( jlat ) > g.y( 0 ) ) { jlatMin_++; }; + } + int jlatMinLeg_ = jlatMin_; + if ( nlatsNH_ < nlatsSH_ ) { jlatMinLeg_ += nlatsNH_ - nlatsSH_; }; + auto wrapAngle = [&]( double angle ) { + double result = fmod( angle, 360 ); + if ( result < 0. ) { result += 360.; } + return result; + }; + double lonmin = wrapAngle( g.x( 0, 0 ) ); + if ( nlonsMax < fft_threshold * nlonsMaxGlobal_ ) { useFFT_ = false; } else { - if ( nlons < nlonsGlobal_ ) { - // need to use FFT with cropped grid - for ( size_t j = 0; j < nlonsGlobal_; ++j ) { - if ( gs_global.x( j, 0 ) == lonmin ) { jlonMin_ = j; } + // need to use FFT with cropped grid + if ( grid::RegularGrid( gridGlobal_ ) ) { + for ( size_t jlon = 0; jlon < nlonsMaxGlobal_; ++jlon ) { + if ( gs_global.x( jlon, 0 ) < lonmin ) { jlonMin_[0]++; } + } + } + else { + nlonsGlobal_.resize( nlats ); + jlonMin_.resize( nlats ); + for ( size_t jlat = 0; jlat < nlats; jlat++ ) { + double lonmin = wrapAngle( g.x( 0, jlat ) ); + nlonsGlobal_[jlat] = gs_global.nx( jlat + jlatMin_ ); + jlonMin_[jlat] = 0; + for ( size_t jlon = 0; jlon < nlonsGlobal_[jlat]; ++jlon ) { + if ( gs_global.x( jlon, jlat + jlatMin_ ) < lonmin ) { jlonMin_[jlat]++; } + } } } } //Log::info() << "nlats=" << g.ny() << " nlatsGlobal=" << gs_global.ny() << std::endl; std::vector lats( nlatsLeg_ ); - std::vector lons( nlons ); + std::vector lons( nlonsMax ); if ( nlatsNH_ >= nlatsSH_ ) { for ( size_t j = 0; j < nlatsLeg_; ++j ) { lats[j] = g.y( j ) * util::Constants::degreesToRadians(); @@ -153,7 +177,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long lats[idx] = -g.y( j ) * util::Constants::degreesToRadians(); } } - for ( size_t j = 0; j < nlons; ++j ) { + for ( size_t j = 0; j < nlonsMax; ++j ) { lons[j] = g.x( j, 0 ) * util::Constants::degreesToRadians(); } // precomputations for Legendre polynomials: @@ -196,11 +220,37 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long #if ATLAS_HAVE_FFTW && !TRANSLOCAL_DGEMM2 { ATLAS_TRACE( "opt3 precomp FFTW" ); - int num_complex = ( nlonsGlobal_ / 2 ) + 1; + int num_complex = ( nlonsMaxGlobal_ / 2 ) + 1; fft_in_ = fftw_alloc_complex( nlats * num_complex ); - fft_out_ = fftw_alloc_real( nlats * nlonsGlobal_ ); - plan_ = fftw_plan_many_dft_c2r( 1, &nlonsGlobal_, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, - 1, nlonsGlobal_, FFTW_ESTIMATE ); + fft_out_ = fftw_alloc_real( nlats * nlonsMaxGlobal_ ); + if ( grid::RegularGrid( gridGlobal_ ) ) { + plans_.resize( 1 ); + FILE* file_fftw; + file_fftw = fopen( "wisdom.bin", "r" ); + if ( file_fftw ) { + fftw_import_wisdom_from_file( file_fftw ); + fclose( file_fftw ); + } + plans_[0] = fftw_plan_many_dft_c2r( 1, &nlonsMaxGlobal_, nlats, fft_in_, NULL, 1, num_complex, + fft_out_, NULL, 1, nlonsMaxGlobal_, FFTW_ESTIMATE ); + } + else { + plans_.resize( nlatsLeg_ ); + FILE* file_fftw; + file_fftw = fopen( "wisdom.bin", "r" ); + if ( file_fftw ) { + fftw_import_wisdom_from_file( file_fftw ); + fclose( file_fftw ); + } + for ( int j = 0; j < nlatsLeg_; j++ ) { + int nlonsGlobalj = gs_global.nx( jlatMinLeg_ + j ); + //ASSERT( nlonsGlobalj > 0 && nlonsGlobalj <= nlonsMaxGlobal_ ); + plans_[j] = fftw_plan_dft_c2r_1d( nlonsGlobalj, fft_in_, fft_out_, FFTW_ESTIMATE ); + } + file_fftw = fopen( "wisdom.bin", "wb" ); + fftw_export_wisdom_to_file( file_fftw ); + fclose( file_fftw ); + } } // other FFT implementations should be added with #elif statements #else @@ -208,7 +258,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long #endif } if ( !useFFT_ ) { - alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons ); + alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlonsMax ); #if !TRANSLOCAL_DGEMM2 { ATLAS_TRACE( "opt3 precomp Fourier tp" ); @@ -216,10 +266,10 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long for ( int jm = 0; jm < truncation_ + 1; jm++ ) { double factor = 1.; if ( jm > 0 ) { factor = 2.; } - for ( int jlon = 0; jlon < nlons; jlon++ ) { + for ( int jlon = 0; jlon < nlonsMax; jlon++ ) { fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor; // real part } - for ( int jlon = 0; jlon < nlons; jlon++ ) { + for ( int jlon = 0; jlon < nlonsMax; jlon++ ) { fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor; // imaginary part } } @@ -228,7 +278,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long { ATLAS_TRACE( "opt3 precomp Fourier" ); int idx = 0; - for ( int jlon = 0; jlon < nlons; jlon++ ) { + for ( int jlon = 0; jlon < nlonsMax; jlon++ ) { double factor = 1.; for ( int jm = 0; jm < truncation_ + 1; jm++ ) { if ( jm > 0 ) { factor = 2.; } @@ -268,7 +318,9 @@ TransLocalopt3::~TransLocalopt3() { free_aligned( legendre_asym_ ); if ( useFFT_ ) { #if ATLAS_HAVE_FFTW && !TRANSLOCAL_DGEMM2 - fftw_destroy_plan( plan_ ); + for ( int j = 0; j < plans_.size(); j++ ) { + fftw_destroy_plan( plans_[j] ); + } fftw_free( fft_in_ ); fftw_free( fft_out_ ); #endif @@ -434,15 +486,16 @@ void TransLocalopt3::invtrans_legendreopt3( const int truncation, const int nlat // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt3::invtrans_fourieropt3( const int nlats, const int nlons, const int nb_fields, double scl_fourier[], - double gp_fields[], const eckit::Configuration& config ) const { +void TransLocalopt3::invtrans_fourier_regularopt3( const int nlats, const int nlons, const int nb_fields, + double scl_fourier[], double gp_fields[], + const eckit::Configuration& config ) const { // Fourier transformation: if ( useFFT_ ) { #if ATLAS_HAVE_FFTW && !TRANSLOCAL_DGEMM2 { - int num_complex = ( nlonsGlobal_ / 2 ) + 1; + int num_complex = ( nlonsMaxGlobal_ / 2 ) + 1; { - ATLAS_TRACE( "opt3 FFTW" ); + ATLAS_TRACE( "opt3 FFTW regular" ); for ( int jfld = 0; jfld < nb_fields; jfld++ ) { int idx = 0; for ( int jlat = 0; jlat < nlats; jlat++ ) { @@ -459,12 +512,12 @@ void TransLocalopt3::invtrans_fourieropt3( const int nlats, const int nlons, con } } } - fftw_execute_dft_c2r( plan_, fft_in_, fft_out_ ); + fftw_execute_dft_c2r( plans_[0], fft_in_, fft_out_ ); for ( int jlat = 0; jlat < nlats; jlat++ ) { for ( int jlon = 0; jlon < nlons; jlon++ ) { - int j = jlon + jlonMin_; - if ( j >= nlonsGlobal_ ) { j -= nlonsGlobal_; } - gp_fields[jlon + nlons * ( jlat + nlats * jfld )] = fft_out_[j + nlonsGlobal_ * jlat]; + int j = jlon + jlonMin_[0]; + if ( j >= nlonsMaxGlobal_ ) { j -= nlonsMaxGlobal_; } + gp_fields[jlon + nlons * ( jlat + nlats * jfld )] = fft_out_[j + nlonsMaxGlobal_ * jlat]; } } } @@ -517,6 +570,98 @@ void TransLocalopt3::invtrans_fourieropt3( const int nlats, const int nlons, con // -------------------------------------------------------------------------------------------------------------------- +void TransLocalopt3::invtrans_fourier_reducedopt3( const int nlats, const grid::StructuredGrid g, const int nb_fields, + double scl_fourier[], double gp_fields[], + const eckit::Configuration& config ) const { + // Fourier transformation: + int nlonsMax = g.nxmax(); + if ( useFFT_ ) { +#if ATLAS_HAVE_FFTW && !TRANSLOCAL_DGEMM2 + { + { + ATLAS_TRACE( "opt3 FFTW reduced" ); + int jgp = 0; + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + for ( int jlat = 0; jlat < nlats; jlat++ ) { + int idx = 0; + int num_complex = ( nlonsGlobal_[jlat] / 2 ) + 1; + fft_in_[idx++][0] = scl_fourier[posMethod( jfld, 0, jlat, 0, nb_fields, nlats )]; + for ( int jm = 1; jm < num_complex; jm++, idx++ ) { + for ( int imag = 0; imag < 2; imag++ ) { + if ( jm <= truncation_ ) { + fft_in_[idx][imag] = + scl_fourier[posMethod( jfld, imag, jlat, jm, nb_fields, nlats )]; + } + else { + fft_in_[idx][imag] = 0.; + } + } + } + //Log::info() << std::endl; + //Log::info() << jlat << "out:" << std::endl; + int jplan = nlatsLeg_ - nlatsNH_ + jlat; + if ( jplan >= nlatsLeg_ ) { jplan = nlats - 1 + nlatsLeg_ - nlatsSH_ - jlat; }; + //ASSERT( jplan < nlatsLeg_ && jplan >= 0 ); + fftw_execute_dft_c2r( plans_[jplan], fft_in_, fft_out_ ); + for ( int jlon = 0; jlon < g.nx( jlat ); jlon++ ) { + int j = jlon + jlonMin_[jlat]; + if ( j >= nlonsGlobal_[jlat] ) { j -= nlonsGlobal_[jlat]; } + //Log::info() << fft_out_[j] << " "; + ASSERT( j < nlonsMaxGlobal_ ); + gp_fields[jgp++] = fft_out_[j]; + } + //Log::info() << std::endl; + } + } + } + } +#endif + } + else { +#if !TRANSLOCAL_DGEMM2 + // dgemm-method 1 + { + ATLAS_TRACE( "opt3 Fourier dgemm method 1" ); + eckit::linalg::Matrix A( fourier_, nlonsMax, ( truncation_ + 1 ) * 2 ); + eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * nlats ); + eckit::linalg::Matrix C( gp_fields, nlonsMax, nb_fields * nlats ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + } +#else + // dgemm-method 2 + // should be faster for small domains or large truncation + // but have not found any significant speedup so far + double* gp_opt3; + alloc_aligned( gp_opt3, nb_fields * grid_.size() ); + { + ATLAS_TRACE( "opt3 Fourier dgemm method 2" ); + eckit::linalg::Matrix A( scl_fourier, nb_fields * nlats, ( truncation_ + 1 ) * 2 ); + eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, nlonsMax ); + eckit::linalg::Matrix C( gp_opt3, nb_fields * nlats, nlonsMax ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + } + + // Transposition in grid point space: + { + ATLAS_TRACE( "opt3 transposition in gp-space" ); + int idx = 0; + for ( int jlon = 0; jlon < nlonsMax; jlon++ ) { + for ( int jlat = 0; jlat < nlats; jlat++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int pos_tp = jlon + nlonsMax * ( jlat + nlats * ( jfld ) ); + //int pos = jfld + nb_fields * ( jlat + nlats * ( jlon ) ); + gp_fields[pos_tp] = gp_opt3[idx++]; // = gp_opt3[pos] + } + } + } + } + free_aligned( gp_opt3 ); +#endif + } +} + +// -------------------------------------------------------------------------------------------------------------------- + void TransLocalopt3::invtrans_unstructured_precomp( const int truncation, const int nb_fields, const int nb_vordiv_fields, const double scalar_spectra[], double gp_fields[], const eckit::Configuration& config ) const { @@ -569,14 +714,18 @@ void TransLocalopt3::invtrans_unstructured_precomp( const int truncation, const } // Fourier transformation: - int idx = 0; - fouriertp[idx++] = 1.; // real part - fouriertp[idx++] = 0.; // imaginary part - for ( int jm = 1; jm < truncation; jm++ ) { - fouriertp[idx++] = +2. * std::cos( jm * lon ); // real part - fouriertp[idx++] = -2. * std::sin( jm * lon ); // imaginary part + { + //ATLAS_TRACE( "opt compute fouriertp" ); + int idx = 0; + fouriertp[idx++] = 1.; // real part + fouriertp[idx++] = 0.; // imaginary part + for ( int jm = 1; jm < truncation; jm++ ) { + fouriertp[idx++] = +2. * std::cos( jm * lon ); // real part + fouriertp[idx++] = -2. * std::sin( jm * lon ); // imaginary part + } } { + //ATLAS_TRACE( "opt Fourier dgemm" ); eckit::linalg::Matrix A( fouriertp, 1, (truncation)*2 ); eckit::linalg::Matrix B( scl_fourier_tp, (truncation)*2, nb_fields ); eckit::linalg::Matrix C( gp_opt, 1, nb_fields ); @@ -707,7 +856,12 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel invtrans_legendreopt3( truncation, nlats, nb_scalar_fields, scalar_spectra, scl_fourier, config ); // Fourier transformation: - invtrans_fourieropt3( nlats, nlons, nb_fields, scl_fourier, gp_fields, config ); + if ( grid::RegularGrid( gridGlobal_ ) ) { + invtrans_fourier_regularopt3( nlats, nlons, nb_fields, scl_fourier, gp_fields, config ); + } + else { + invtrans_fourier_reducedopt3( nlats, g, nb_fields, scl_fourier, gp_fields, config ); + } // Computing u,v from U,V: { diff --git a/src/atlas/trans/localopt3/TransLocalopt3.h b/src/atlas/trans/localopt3/TransLocalopt3.h index af8360801..099f4b0e1 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.h +++ b/src/atlas/trans/localopt3/TransLocalopt3.h @@ -120,8 +120,12 @@ class TransLocalopt3 : public trans::TransImpl { const double scalar_spectra[], double scl_fourier[], const eckit::Configuration& config ) const; - void invtrans_fourieropt3( const int nlats, const int nlons, const int nb_fields, double scl_fourier[], - double gp_fields[], const eckit::Configuration& config ) const; + void invtrans_fourier_regularopt3( const int nlats, const int nlons, const int nb_fields, double scl_fourier[], + double gp_fields[], const eckit::Configuration& config ) const; + + void invtrans_fourier_reducedopt3( const int nlats, const grid::StructuredGrid g, const int nb_fields, + double scl_fourier[], double gp_fields[], + const eckit::Configuration& config ) const; void invtrans_unstructured_precomp( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields, const double scalar_spectra[], double gp_fields[], @@ -137,6 +141,7 @@ class TransLocalopt3 : public trans::TransImpl { private: Grid grid_; + Grid gridGlobal_; bool useFFT_; bool dgemmMethod1_; bool unstruct_precomp_; @@ -144,8 +149,12 @@ class TransLocalopt3 : public trans::TransImpl { int nlatsNH_; int nlatsSH_; int nlatsLeg_; - int jlonMin_; - int nlonsGlobal_; + std::vector jlonMin_; + int jlatMin_; + int jlatMinLeg_; + int nlonsMaxGlobal_; + std::vector nlonsGlobal_; + int nlatsGlobal_; bool precompute_; double* legendre_; double* legendre_sym_; @@ -158,7 +167,7 @@ class TransLocalopt3 : public trans::TransImpl { #if ATLAS_HAVE_FFTW fftw_complex* fft_in_; double* fft_out_; - fftw_plan plan_; + std::vector plans_; #endif }; diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index 3d023ebfd..f67c8704c 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -938,7 +938,7 @@ CASE( "test_trans_hires" ) { } #endif //----------------------------------------------------------------------------- -#if 0 +#if 1 CASE( "test_trans_domain" ) { Log::info() << "test_trans_domain" << std::endl; // test transgeneral by comparing with analytic solution on a cropped domain @@ -950,9 +950,9 @@ CASE( "test_trans_domain" ) { //Domain testdomain = ZonalBandDomain( {-.5, .5} ); //Domain testdomain = RectangularDomain( {0., 30.}, {-.05, .05} ); //Domain testdomain = ZonalBandDomain( {-85., -86.} ); - Domain testdomain = RectangularDomain( {-1., 1.}, {5., 5.5} ); + Domain testdomain = RectangularDomain( {-2., 2.}, {20., 30.} ); // Grid: (Adjust the following line if the test takes too long!) - Grid g( "F1280", testdomain ); + Grid g( "O1280" ); Grid g_global( g.name() ); grid::StructuredGrid gs( g ); @@ -960,8 +960,9 @@ CASE( "test_trans_domain" ) { Log::info() << "nlats: " << gs.ny() << " nlons:" << gs.nxmax() << std::endl; int ndgl = gs_global.ny(); //int trc = ndgl - 1; // linear - int trc = ndgl / 2. - 1; // cubic - trans::Trans transLocal1( g, trc, util::Config( "type", "localopt2" ) ); + //int trc = ndgl / 2. - 1; // cubic + int trc = 120; + trans::Trans transLocal1( g, trc, util::Config( "type", "ifs" ) ); trans::Trans transLocal2( g, trc, util::Config( "type", "localopt3" ) ); double rav1 = 0., rav2 = 0.; // compute average rms errors of transLocal1 and transLocal2 @@ -998,7 +999,7 @@ CASE( "test_trans_domain" ) { for ( int imag = 0; imag <= 1; imag++ ) { // real and imaginary part if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. && - icase < 1000 ) { + icase < 1 ) { auto start = std::chrono::system_clock::now(); for ( int j = 0; j < 2 * N * nb_scalar; j++ ) { sp[j] = 0.; @@ -1037,6 +1038,17 @@ CASE( "test_trans_domain" ) { double rms_gen2 = compute_rms( g.size(), rgp2.data() + pos * g.size(), rgp_analytic.data() ); + //Log::info() << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out + // << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl + // << "rgp2:"; + //for ( int j = 0; j < g.size(); j++ ) { + // Log::info() << rgp2[pos * g.size() + j] << " "; + //}; + //Log::info() << std::endl << "analytic:"; + //for ( int j = 0; j < g.size(); j++ ) { + // Log::info() << rgp_analytic[j] << " "; + //}; + //Log::info() << std::endl; rav1 += rms_gen1; rav2 += rms_gen2; if ( !( rms_gen1 < tolerance ) || !( rms_gen2 < tolerance ) ) { @@ -1073,7 +1085,7 @@ CASE( "test_trans_domain" ) { #endif //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- -#if 1 +#if 0 CASE( "test_trans_unstructured" ) { Log::info() << "test_trans_unstructured" << std::endl; // test transgeneral by comparing with analytic solution on an unstructured grid From 4a3e7a60873d1f255cc3a67ab7536b17190825ae Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Wed, 11 Apr 2018 18:11:31 +0100 Subject: [PATCH 045/123] reduced truncation towards the poles is now working --- src/atlas/trans/localopt3/TransLocalopt3.cc | 146 ++++++++++++++++---- src/atlas/trans/localopt3/TransLocalopt3.h | 1 + src/tests/trans/test_transgeneral.cc | 22 ++- 3 files changed, 140 insertions(+), 29 deletions(-) diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc index 6ea7093f6..e38defb8d 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.cc +++ b/src/atlas/trans/localopt3/TransLocalopt3.cc @@ -77,6 +77,37 @@ void free_aligned( double*& ptr ) { int add_padding( int n ) { return std::ceil( n / 8. ) * 8; } + +int fourier_truncation( const int truncation, // truncation + const int nx, // number of longitudes + const int nxmax, // maximum nx + const int ndgl, // number of latitudes + const double lat, // latitude in radian + const bool fullgrid ) { // regular grid + int trc = truncation; + int trclin = ndgl - 1; + int trcquad = ndgl * 2 / 3 - 1; + if ( truncation >= trclin || fullgrid ) { + // linear + trc = ( nx - 1 ) / 2; + } + else if ( truncation >= trcquad ) { + // quadratic + double weight = 3 * ( trclin - truncation ) / ndgl; + double sqcos = std::pow( std::cos( lat ), 2 ); + + trc = ( nx - 1 ) / ( 2 + weight * sqcos ); + } + else { + // cubic + double sqcos = std::pow( std::cos( lat ), 2 ); + + trc = ( nx - 1 ) / ( 2 + sqcos ) - 1; + } + trc = std::min( truncation, trc ); + return trc; +} + } // namespace // -------------------------------------------------------------------------------------------------------------------- @@ -109,6 +140,8 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long grid::StructuredGrid g( grid_ ); nlats = g.ny(); nlonsMax = g.nxmax(); + + // check location of domain relative to the equator: for ( size_t j = 0; j < nlats; ++j ) { // assumptions: latitudes in g.y(j) are monotone and decreasing // no assumption on whether we have 0, 1 or 2 latitudes at the equator @@ -125,6 +158,8 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long else { nlatsLeg_ = nlatsSH_; } + + // compute latitudinal location of domain relative to global grid: gridGlobal_ = Grid( grid.name() ); grid::StructuredGrid gs_global( gridGlobal_ ); nlonsMaxGlobal_ = gs_global.nxmax(); @@ -137,6 +172,35 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long } int jlatMinLeg_ = jlatMin_; if ( nlatsNH_ < nlatsSH_ ) { jlatMinLeg_ += nlatsNH_ - nlatsSH_; }; + if ( jlatMin_ > nlatsGlobal_ / 2 ) { jlatMinLeg_ -= jlatMin_ - nlatsGlobal_ / 2 + 1; }; + //Log::info() << "jlatMinLeg:" << jlatMinLeg_ << std::endl; + // reduce truncation towards the pole for reduced meshes: + nlat0_.resize( truncation_ + 1 ); + int nmen0 = -1; + for ( int jlat = 0; jlat < nlatsGlobal_ / 2; jlat++ ) { + double lat = gs_global.y( jlat ) * util::Constants::degreesToRadians(); + int nmen = fourier_truncation( truncation_, gs_global.nx( jlat ), gs_global.nxmax(), nlatsGlobal_, lat, + grid::RegularGrid( gs_global ) ); + /*Log::info() << "jlat=" << jlat << " nmen=" << nmen << " trc=" << truncation_ + << " nx=" << gs_global.nx( jlat ) << " nxmax=" << gs_global.nxmax() << " nlats=" << nlatsGlobal_ + << " lat=" << lat << std::endl;*/ + nmen = std::max( nmen0, nmen ); + int ndgluj = nlatsLeg_ - std::min( nlatsLeg_, nlatsLeg_ + jlatMinLeg_ - jlat ); + for ( int j = nmen0 + 1; j <= nmen; j++ ) { + nlat0_[j] = ndgluj; + } + nmen0 = nmen; + } + for ( int j = nmen0 + 1; j <= truncation_; j++ ) { + nlat0_[j] = nlatsLeg_; + } + /*Log::info() << "localopt:" << std::endl; + for ( int j = 0; j <= truncation_; j++ ) { + Log::info() << nlatsLeg_ - nlat0_[j] << " "; + } + Log::info() << std::endl;*/ + + // compute longitudinal location of domain within global grid for using FFT: auto wrapAngle = [&]( double angle ) { double result = fmod( angle, 360 ); if ( result < 0. ) { result += 360.; } @@ -180,6 +244,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long for ( size_t j = 0; j < nlonsMax; ++j ) { lons[j] = g.x( j, 0 ) * util::Constants::degreesToRadians(); } + // precomputations for Legendre polynomials: { ATLAS_TRACE( "opt3 precomp Legendre" ); @@ -399,7 +464,7 @@ void TransLocalopt3::invtrans_legendreopt3( const int truncation, const int nlat if ( jm == 0 ) { n_imag = 1; } int size_fourier = nb_fields * n_imag * nlatsLeg_; auto posFourier = [&]( int jfld, int imag, int jlat, int jm, int nlatsH ) { - return jfld + nb_fields * ( imag + n_imag * ( nlatsLeg_ - nlatsH + jlat ) ); + return jfld + nb_fields * ( imag + n_imag * ( nlatsLeg_ - nlat0_[jm] - nlatsH + jlat ) ); }; double* scalar_sym; double* scalar_asym; @@ -440,38 +505,68 @@ void TransLocalopt3::invtrans_legendreopt3( const int truncation, const int nlat } ASSERT( ia == n_imag * nb_fields * size_asym && is == n_imag * nb_fields * size_sym ); } - { - eckit::linalg::Matrix A( scalar_sym, nb_fields * n_imag, size_sym ); - eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm], size_sym, nlatsLeg_ ); - eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsLeg_ ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); - } - if ( size_asym > 0 ) { - eckit::linalg::Matrix A( scalar_asym, nb_fields * n_imag, size_asym ); - eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm], size_asym, nlatsLeg_ ); - eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsLeg_ ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + if ( nlatsLeg_ - nlat0_[jm] > 0 ) { + { + eckit::linalg::Matrix A( scalar_sym, nb_fields * n_imag, size_sym ); + eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm] + nlat0_[jm] * size_sym, size_sym, + nlatsLeg_ - nlat0_[jm] ); + eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsLeg_ - nlat0_[jm] ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + } + if ( size_asym > 0 ) { + eckit::linalg::Matrix A( scalar_asym, nb_fields * n_imag, size_asym ); + eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm] + nlat0_[jm] * size_sym, + size_asym, nlatsLeg_ - nlat0_[jm] ); + eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsLeg_ - nlat0_[jm] ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + } } { //ATLAS_TRACE( "opt3 merge spheres" ); // northern hemisphere: for ( int jlat = 0; jlat < nlatsNH_; jlat++ ) { - for ( int imag = 0; imag < n_imag; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int idx = posFourier( jfld, imag, jlat, jm, nlatsNH_ ); - scl_fourier[posMethod( jfld, imag, jlat, jm, nb_fields, nlats )] = - scl_fourier_sym[idx] + scl_fourier_asym[idx]; + if ( nlatsLeg_ - nlat0_[jm] - nlatsNH_ + jlat >= 0 ) { + for ( int imag = 0; imag < n_imag; imag++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int idx = posFourier( jfld, imag, jlat, jm, nlatsNH_ ); + scl_fourier[posMethod( jfld, imag, jlat, jm, nb_fields, nlats )] = + scl_fourier_sym[idx] + scl_fourier_asym[idx]; + } + } + } + else { + for ( int imag = 0; imag < n_imag; imag++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + scl_fourier[posMethod( jfld, imag, jlat, jm, nb_fields, nlats )] = 0.; + } } } + /*for ( int imag = 0; imag < n_imag; imag++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + if ( scl_fourier[posMethod( jfld, imag, jlat, jm, nb_fields, nlats )] > 0. ) { + Log::info() << "jm=" << jm << " jlat=" << jlat << " nlatsLeg_=" << nlatsLeg_ + << " nlat0=" << nlat0_[jm] << " nlatsNH=" << nlatsNH_ << std::endl; + } + } + }*/ } // southern hemisphere: for ( int jlat = 0; jlat < nlatsSH_; jlat++ ) { - for ( int imag = 0; imag < n_imag; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int idx = posFourier( jfld, imag, jlat, jm, nlatsSH_ ); - int jslat = nlats - jlat - 1; - scl_fourier[posMethod( jfld, imag, jslat, jm, nb_fields, nlats )] = - scl_fourier_sym[idx] - scl_fourier_asym[idx]; + int jslat = nlats - jlat - 1; + if ( nlatsLeg_ - nlat0_[jm] - nlatsSH_ + jlat >= 0 ) { + for ( int imag = 0; imag < n_imag; imag++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int idx = posFourier( jfld, imag, jlat, jm, nlatsSH_ ); + scl_fourier[posMethod( jfld, imag, jslat, jm, nb_fields, nlats )] = + scl_fourier_sym[idx] - scl_fourier_asym[idx]; + } + } + } + else { + for ( int imag = 0; imag < n_imag; imag++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + scl_fourier[posMethod( jfld, imag, jslat, jm, nb_fields, nlats )] = 0.; + } } } } @@ -583,9 +678,11 @@ void TransLocalopt3::invtrans_fourier_reducedopt3( const int nlats, const grid:: int jgp = 0; for ( int jfld = 0; jfld < nb_fields; jfld++ ) { for ( int jlat = 0; jlat < nlats; jlat++ ) { - int idx = 0; + int idx = 0; + //Log::info() << jlat << "in:" << std::endl; int num_complex = ( nlonsGlobal_[jlat] / 2 ) + 1; fft_in_[idx++][0] = scl_fourier[posMethod( jfld, 0, jlat, 0, nb_fields, nlats )]; + //Log::info() << fft_in_[0][0] << " "; for ( int jm = 1; jm < num_complex; jm++, idx++ ) { for ( int imag = 0; imag < 2; imag++ ) { if ( jm <= truncation_ ) { @@ -595,6 +692,7 @@ void TransLocalopt3::invtrans_fourier_reducedopt3( const int nlats, const grid:: else { fft_in_[idx][imag] = 0.; } + //Log::info() << fft_in_[idx][imag] << " "; } } //Log::info() << std::endl; diff --git a/src/atlas/trans/localopt3/TransLocalopt3.h b/src/atlas/trans/localopt3/TransLocalopt3.h index 099f4b0e1..1ca002436 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.h +++ b/src/atlas/trans/localopt3/TransLocalopt3.h @@ -154,6 +154,7 @@ class TransLocalopt3 : public trans::TransImpl { int jlatMinLeg_; int nlonsMaxGlobal_; std::vector nlonsGlobal_; + std::vector nlat0_; int nlatsGlobal_; bool precompute_; double* legendre_; diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index f67c8704c..8b1a78a54 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -457,16 +457,28 @@ void spectral_transform_grid_analytic( if ( grid::StructuredGrid( grid ) ) { grid::StructuredGrid g( grid ); + Grid gridGlobal( grid.name() ); + grid::StructuredGrid gs_global( gridGlobal ); + int nlatsGlobal = gs_global.ny(); + int jlatMin = 0; + for ( int jlat = 0; jlat < nlatsGlobal; jlat++ ) { + if ( gs_global.y( jlat ) > g.y( 0 ) ) { jlatMin++; }; + } + int idx = 0; for ( size_t j = 0; j < g.ny(); ++j ) { double lat = g.y( j ) * util::Constants::degreesToRadians(); + int ftrc = trans::fourier_truncation( trc, gs_global.nx( jlatMin + j ), gs_global.nxmax(), gs_global.ny(), + lat, grid::RegularGrid( gs_global ) ); + /*Log::info() << "j=" << j << " ftrc=" << ftrc << " trc=" << trc << " nx=" << gs_global.nx( jlatMin + j ) + << " nxmax=" << gs_global.nxmax() << " nlats=" << gs_global.ny() << " lat=" << g.y( j ) + << " jlatMin=" << jlatMin << std::endl;*/ for ( size_t i = 0; i < g.nx( j ); ++i ) { double lon = g.x( i, j ) * util::Constants::degreesToRadians(); // compute spherical harmonics: - if ( trans::fourier_truncation( trc, g.nx( j ), g.nxmax(), g.ny(), lat, grid::RegularGrid( g ) ) >= - m ) { + if ( ftrc >= m ) { rgp[idx++] = sphericalharmonics_analytic_point( n, m, imag, lon, lat, ivar_in, ivar_out ); } else { @@ -950,9 +962,9 @@ CASE( "test_trans_domain" ) { //Domain testdomain = ZonalBandDomain( {-.5, .5} ); //Domain testdomain = RectangularDomain( {0., 30.}, {-.05, .05} ); //Domain testdomain = ZonalBandDomain( {-85., -86.} ); - Domain testdomain = RectangularDomain( {-2., 2.}, {20., 30.} ); + Domain testdomain = RectangularDomain( {15., 20.}, {10., 20.} ); // Grid: (Adjust the following line if the test takes too long!) - Grid g( "O1280" ); + Grid g( "O120" ); Grid g_global( g.name() ); grid::StructuredGrid gs( g ); @@ -999,7 +1011,7 @@ CASE( "test_trans_domain" ) { for ( int imag = 0; imag <= 1; imag++ ) { // real and imaginary part if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. && - icase < 1 ) { + icase < 1000 ) { auto start = std::chrono::system_clock::now(); for ( int j = 0; j < 2 * N * nb_scalar; j++ ) { sp[j] = 0.; From fbb51a60f54176aacfe576851f4bde1c98ad34eb Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Wed, 11 Apr 2018 18:34:28 +0100 Subject: [PATCH 046/123] fixed vordiv in localopt3 (VorDivToUV is not yet optimised) --- src/atlas/trans/localopt3/TransLocalopt3.cc | 22 ++++++++++++++++++++- src/tests/trans/test_transgeneral.cc | 6 +++--- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc index e38defb8d..09b84fc34 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.cc +++ b/src/atlas/trans/localopt3/TransLocalopt3.cc @@ -125,7 +125,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long #else eckit::linalg::LinearAlgebra::backend( "generic" ); // might want to choose backend with this command #endif - double fft_threshold = 0.0; // fraction of latitudes of the full grid up to which FFT is used. + double fft_threshold = 0.0; // fraction of latitudes of the full grid down to which FFT is used. // This threshold needs to be adjusted depending on the dgemm and FFT performance of the machine // on which this code is running! int nlats = 0; @@ -832,6 +832,16 @@ void TransLocalopt3::invtrans_unstructured_precomp( const int truncation, const gp_fields[ip + j * grid_.size()] = gp_opt[j]; } } + // Computing u,v from U,V: + { + if ( nb_vordiv_fields > 0 ) { + //ATLAS_TRACE( "opt3 u,v from U,V" ); + double coslat = std::cos( lat ); + for ( int j = 0; j < nb_fields; j++ ) { + gp_fields[ip + j * grid_.size()] /= coslat; + } + } + } } } free_aligned( scl_fourier ); @@ -912,6 +922,16 @@ void TransLocalopt3::invtrans_unstructured( const int truncation, const int nb_f gp_fields[ip + j * grid_.size()] = gp_opt[j]; } } + // Computing u,v from U,V: + { + if ( nb_vordiv_fields > 0 ) { + //ATLAS_TRACE( "opt3 u,v from U,V" ); + double coslat = std::cos( lat ); + for ( int j = 0; j < nb_fields; j++ ) { + gp_fields[ip + j * grid_.size()] /= coslat; + } + } + } } free_aligned( legendre ); free_aligned( scl_fourier ); diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index 8b1a78a54..2f2c2705e 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -981,7 +981,7 @@ CASE( "test_trans_domain" ) { functionspace::Spectral spectral( trc ); functionspace::StructuredColumns gridpoints( g ); - int nb_scalar = 1, nb_vordiv = 0; + int nb_scalar = 2, nb_vordiv = 2; int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv; std::vector sp( 2 * N * nb_scalar ); std::vector vor( 2 * N * nb_vordiv ); @@ -1124,10 +1124,10 @@ CASE( "test_trans_unstructured" ) { } Grid gu = grid::UnstructuredGrid( new std::vector( &pts[0], &pts[idx] ) ); Log::info() << "gu: size=" << gu.size() << std::endl; - int trc = 1280; + int trc = 120; double rav1 = 0., rav2 = 0.; // compute average rms errors of transLocal1 and transLocal2 - int nb_scalar = 1, nb_vordiv = 0; + int nb_scalar = 1, nb_vordiv = 1; int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv; std::vector sp( 2 * N * nb_scalar ); std::vector vor( 2 * N * nb_vordiv ); From 9f3cc6334f5721a32ef0ebe246196ca8e37ab958 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 19 Apr 2018 10:57:05 +0100 Subject: [PATCH 047/123] No need to look for MKL when eckit already does --- CMakeLists.txt | 6 ------ src/CMakeLists.txt | 6 ------ src/atlas/CMakeLists.txt | 2 -- src/atlas/library/defines.h.in | 1 - src/atlas/trans/localopt/TransLocalopt.cc | 9 +++++---- src/atlas/trans/localopt2/TransLocalopt2.cc | 9 +++++---- src/atlas/trans/localopt3/TransLocalopt3.cc | 9 +++++---- 7 files changed, 15 insertions(+), 27 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e71fcc03f..787c96898 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -105,12 +105,6 @@ ecbuild_add_option( FEATURE FFTW DESCRIPTION "Support for fftw" REQUIRED_PACKAGES "FFTW COMPONENTS double" ) -### MKL ... - -ecbuild_add_option( FEATURE MKL - DESCRIPTION "MKL linear algebra library" - REQUIRED_PACKAGES MKL ) - ### trans ... ecbuild_add_option( FEATURE TRANS diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 48175e157..98a5e1dd0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -42,12 +42,6 @@ else() set( ATLAS_HAVE_FFTW 0 ) endif() -if( ATLAS_HAVE_MKL ) - set( ATLAS_HAVE_MKL 1 ) -else() - set( ATLAS_HAVE_MKL 0 ) -endif() - if( ATLAS_HAVE_BOUNDSCHECKING ) set( ATLAS_HAVE_BOUNDSCHECKING 1 ) else() diff --git a/src/atlas/CMakeLists.txt b/src/atlas/CMakeLists.txt index b018107ec..a8ddfe4b4 100644 --- a/src/atlas/CMakeLists.txt +++ b/src/atlas/CMakeLists.txt @@ -583,7 +583,6 @@ ecbuild_add_library( TARGET atlas "${TRANSI_INCLUDE_DIRS}" "${MPI_CXX_INCLUDE_DIRS}" "${FFTW_INCLUDES}" - "${MKL_INCLUDE_DIRS}" LIBS eckit_geometry eckit_linalg @@ -593,7 +592,6 @@ ecbuild_add_library( TARGET atlas "${TRANSI_LIBRARIES}" "${FCKIT_LIBRARIES}" "${FFTW_LIBRARIES}" - "${MKL_LIBRARIES}" DEFINITIONS ${ATLAS_DEFINITIONS} ) diff --git a/src/atlas/library/defines.h.in b/src/atlas/library/defines.h.in index 75f91f2b5..e644fc73e 100644 --- a/src/atlas/library/defines.h.in +++ b/src/atlas/library/defines.h.in @@ -11,7 +11,6 @@ #define ATLAS_HAVE_FORTRAN @ATLAS_HAVE_FORTRAN@ #define ATLAS_HAVE_EIGEN @ATLAS_HAVE_EIGEN@ #define ATLAS_HAVE_FFTW @ATLAS_HAVE_FFTW@ -#define ATLAS_HAVE_MKL @ATLAS_HAVE_MKL@ #define ATLAS_BITS_GLOBAL @ATLAS_BITS_GLOBAL@ #define ATLAS_ARRAYVIEW_BOUNDS_CHECKING @ATLAS_HAVE_BOUNDSCHECKING@ #define ATLAS_INDEXVIEW_BOUNDS_CHECKING @ATLAS_HAVE_BOUNDSCHECKING@ diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc index 89434ff6b..d0dfbfe4a 100644 --- a/src/atlas/trans/localopt/TransLocalopt.cc +++ b/src/atlas/trans/localopt/TransLocalopt.cc @@ -23,7 +23,8 @@ #include "atlas/util/Constants.h" #include "eckit/linalg/LinearAlgebra.h" #include "eckit/linalg/Matrix.h" -#if ATLAS_HAVE_MKL +#include "eckit/eckit_config.h" +#ifdef ECKIT_HAVE_MKL #include "mkl.h" #endif @@ -58,7 +59,7 @@ int num_n( const int truncation, const int m, const bool symmetric ) { } void alloc_aligned( double*& ptr, size_t n ) { -#if ATLAS_HAVE_MKL +#ifdef ECKIT_HAVE_MKL int al = 64; ptr = (double*)mkl_malloc( sizeof( double ) * n, al ); #else @@ -69,7 +70,7 @@ void alloc_aligned( double*& ptr, size_t n ) { } void free_aligned( double*& ptr ) { -#if ATLAS_HAVE_MKL +#ifdef ECKIT_HAVE_MKL mkl_free( ptr ); #else free( ptr ); @@ -91,7 +92,7 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t truncation_( truncation ), precompute_( config.getBool( "precompute", true ) ) { ATLAS_TRACE( "Precompute legendre opt" ); -#if ATLAS_HAVE_MKL +#ifdef ECKIT_HAVE_MKL eckit::linalg::LinearAlgebra::backend( "mkl" ); // might want to choose backend with this command #else eckit::linalg::LinearAlgebra::backend( "generic" ); // might want to choose backend with this command diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc index 16062989d..6b51c2dc5 100644 --- a/src/atlas/trans/localopt2/TransLocalopt2.cc +++ b/src/atlas/trans/localopt2/TransLocalopt2.cc @@ -23,7 +23,8 @@ #include "atlas/util/Constants.h" #include "eckit/linalg/LinearAlgebra.h" #include "eckit/linalg/Matrix.h" -#if ATLAS_HAVE_MKL +#include "eckit/eckit_config.h" +#ifdef ECKIT_HAVE_MKL #include "mkl.h" #endif @@ -58,7 +59,7 @@ int num_n( const int truncation, const int m, const bool symmetric ) { } void alloc_aligned( double*& ptr, size_t n ) { -#if ATLAS_HAVE_MKL +#ifdef ECKIT_HAVE_MKL int al = 64; ptr = (double*)mkl_malloc( sizeof( double ) * n, al ); #else @@ -69,7 +70,7 @@ void alloc_aligned( double*& ptr, size_t n ) { } void free_aligned( double*& ptr ) { -#if ATLAS_HAVE_MKL +#ifdef ECKIT_HAVE_MKL mkl_free( ptr ); #else free( ptr ); @@ -91,7 +92,7 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long truncation_( truncation ), precompute_( config.getBool( "precompute", true ) ) { ATLAS_TRACE( "Precompute legendre opt2" ); -#if ATLAS_HAVE_MKL +#ifdef ECKIT_HAVE_MKL eckit::linalg::LinearAlgebra::backend( "mkl" ); // might want to choose backend with this command #else eckit::linalg::LinearAlgebra::backend( "generic" ); // might want to choose backend with this command diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc index 09b84fc34..abf36baf0 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.cc +++ b/src/atlas/trans/localopt3/TransLocalopt3.cc @@ -21,7 +21,8 @@ #include "atlas/util/Constants.h" #include "eckit/linalg/LinearAlgebra.h" #include "eckit/linalg/Matrix.h" -#if ATLAS_HAVE_MKL +#include "eckit/eckit_config.h" +#ifdef ECKIT_HAVE_MKL #include "mkl.h" #endif @@ -56,7 +57,7 @@ int num_n( const int truncation, const int m, const bool symmetric ) { } void alloc_aligned( double*& ptr, size_t n ) { -#if ATLAS_HAVE_MKL +#ifdef ECKIT_HAVE_MKL int al = 64; ptr = (double*)mkl_malloc( sizeof( double ) * n, al ); #else @@ -67,7 +68,7 @@ void alloc_aligned( double*& ptr, size_t n ) { } void free_aligned( double*& ptr ) { -#if ATLAS_HAVE_MKL +#ifdef ECKIT_HAVE_MKL mkl_free( ptr ); #else free( ptr ); @@ -120,7 +121,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long truncation_( truncation ), precompute_( config.getBool( "precompute", true ) ) { ATLAS_TRACE( "Precompute legendre opt3" ); -#if ATLAS_HAVE_MKL +#ifdef ECKIT_HAVE_MKL eckit::linalg::LinearAlgebra::backend( "mkl" ); // might want to choose backend with this command #else eckit::linalg::LinearAlgebra::backend( "generic" ); // might want to choose backend with this command From 821bf1302b07b76290e806a76dc6e2f4c622315b Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Sun, 22 Apr 2018 00:43:02 +0100 Subject: [PATCH 048/123] using global Legendre matrices independent of the domain works --- src/atlas/trans/localopt3/TransLocalopt3.cc | 271 +++++++++++--------- src/atlas/trans/localopt3/TransLocalopt3.h | 2 + src/tests/trans/test_transgeneral.cc | 80 +++--- 3 files changed, 202 insertions(+), 151 deletions(-) diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc index abf36baf0..1b9fbc39b 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.cc +++ b/src/atlas/trans/localopt3/TransLocalopt3.cc @@ -19,9 +19,9 @@ #include "atlas/trans/local/LegendrePolynomials.h" #include "atlas/trans/localopt3/LegendrePolynomialsopt3.h" #include "atlas/util/Constants.h" +#include "eckit/eckit_config.h" #include "eckit/linalg/LinearAlgebra.h" #include "eckit/linalg/Matrix.h" -#include "eckit/eckit_config.h" #ifdef ECKIT_HAVE_MKL #include "mkl.h" #endif @@ -137,6 +137,9 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long nlatsNH_ = 0; nlatsSH_ = 0; nlatsLeg_ = 0; + nlatsLegDomain_ = 0; + nlatsLegReduced_ = 0; + bool useGlobalLeg = true; if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { grid::StructuredGrid g( grid_ ); nlats = g.ny(); @@ -155,26 +158,38 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long nlatsNH_++; nlatsSH_++; } - if ( nlatsNH_ >= nlatsSH_ ) { nlatsLeg_ = nlatsNH_; } + if ( nlatsNH_ >= nlatsSH_ ) { nlatsLegDomain_ = nlatsNH_; } else { - nlatsLeg_ = nlatsSH_; + nlatsLegDomain_ = nlatsSH_; } // compute latitudinal location of domain relative to global grid: gridGlobal_ = Grid( grid.name() ); grid::StructuredGrid gs_global( gridGlobal_ ); + grid::StructuredGrid* gsLeg = &g; + if ( useGlobalLeg ) { gsLeg = &gs_global; }; nlonsMaxGlobal_ = gs_global.nxmax(); jlonMin_.resize( 1 ); jlonMin_[0] = 0; jlatMin_ = 0; nlatsGlobal_ = gs_global.ny(); + if ( useGlobalLeg ) { nlatsLeg_ = nlatsGlobal_ / 2; } + else { + nlatsLeg_ = nlatsLegDomain_; + nlatsLegReduced_ = nlatsLeg_; + } for ( int jlat = 0; jlat < nlatsGlobal_; jlat++ ) { - if ( gs_global.y( jlat ) > g.y( 0 ) ) { jlatMin_++; }; + if ( gs_global.y( jlat ) > g.y( 0 ) ) { + //Log::info() << gs_global.y( jlat ) << ">" << g.y( 0 ) << " "; + jlatMin_++; + }; } + //Log::info() << std::endl; int jlatMinLeg_ = jlatMin_; if ( nlatsNH_ < nlatsSH_ ) { jlatMinLeg_ += nlatsNH_ - nlatsSH_; }; - if ( jlatMin_ > nlatsGlobal_ / 2 ) { jlatMinLeg_ -= jlatMin_ - nlatsGlobal_ / 2 + 1; }; - //Log::info() << "jlatMinLeg:" << jlatMinLeg_ << std::endl; + if ( jlatMin_ > nlatsGlobal_ / 2 ) { jlatMinLeg_ -= 2 * ( jlatMin_ - nlatsGlobal_ / 2 ); }; + if ( useGlobalLeg ) { nlatsLegReduced_ = jlatMinLeg_ + nlatsLegDomain_; } + // reduce truncation towards the pole for reduced meshes: nlat0_.resize( truncation_ + 1 ); int nmen0 = -1; @@ -182,11 +197,9 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long double lat = gs_global.y( jlat ) * util::Constants::degreesToRadians(); int nmen = fourier_truncation( truncation_, gs_global.nx( jlat ), gs_global.nxmax(), nlatsGlobal_, lat, grid::RegularGrid( gs_global ) ); - /*Log::info() << "jlat=" << jlat << " nmen=" << nmen << " trc=" << truncation_ - << " nx=" << gs_global.nx( jlat ) << " nxmax=" << gs_global.nxmax() << " nlats=" << nlatsGlobal_ - << " lat=" << lat << std::endl;*/ nmen = std::max( nmen0, nmen ); int ndgluj = nlatsLeg_ - std::min( nlatsLeg_, nlatsLeg_ + jlatMinLeg_ - jlat ); + if ( useGlobalLeg ) { ndgluj = std::max( jlatMinLeg_, jlat ); } for ( int j = nmen0 + 1; j <= nmen; j++ ) { nlat0_[j] = ndgluj; } @@ -195,11 +208,9 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long for ( int j = nmen0 + 1; j <= truncation_; j++ ) { nlat0_[j] = nlatsLeg_; } - /*Log::info() << "localopt:" << std::endl; - for ( int j = 0; j <= truncation_; j++ ) { - Log::info() << nlatsLeg_ - nlat0_[j] << " "; - } - Log::info() << std::endl;*/ + /*Log::info() << "nlats=" << g.ny() << " nlatsGlobal=" << gs_global.ny() << " jlatMin=" << jlatMin_ + << " jlatMinLeg=" << jlatMinLeg_ << " nlatsGlobal/2-nlatsLeg=" << nlatsGlobal_ / 2 - nlatsLeg_ + << " nlatsLeg_=" << nlatsLeg_ << " nlatsLegDomain_=" << nlatsLegDomain_ << std::endl;*/ // compute longitudinal location of domain within global grid for using FFT: auto wrapAngle = [&]( double angle ) { @@ -232,19 +243,24 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long //Log::info() << "nlats=" << g.ny() << " nlatsGlobal=" << gs_global.ny() << std::endl; std::vector lats( nlatsLeg_ ); std::vector lons( nlonsMax ); - if ( nlatsNH_ >= nlatsSH_ ) { + if ( nlatsNH_ >= nlatsSH_ || useGlobalLeg ) { for ( size_t j = 0; j < nlatsLeg_; ++j ) { - lats[j] = g.y( j ) * util::Constants::degreesToRadians(); + lats[j] = gsLeg->y( j ) * util::Constants::degreesToRadians(); } } else { for ( size_t j = nlats - 1, idx = 0; idx < nlatsLeg_; --j, ++idx ) { - lats[idx] = -g.y( j ) * util::Constants::degreesToRadians(); + lats[idx] = -gsLeg->y( j ) * util::Constants::degreesToRadians(); } } for ( size_t j = 0; j < nlonsMax; ++j ) { lons[j] = g.x( j, 0 ) * util::Constants::degreesToRadians(); } + /*Log::info() << "lats: "; + for ( int j = 0; j < nlatsLeg_; j++ ) { + Log::info() << lats[j] << " "; + } + Log::info() << std::endl;*/ // precomputations for Legendre polynomials: { @@ -265,7 +281,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long alloc_aligned( legendre_asym_, size_asym ); FILE* file_leg; file_leg = fopen( "legendre.bin", "r" ); - if ( false ) { //if ( file_leg ) { + if ( file_leg ) { fread( legendre_sym_, sizeof( double ), size_sym, file_leg ); fread( legendre_asym_, sizeof( double ), size_asym, file_leg ); fclose( file_leg ); @@ -274,10 +290,10 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long compute_legendre_polynomialsopt3( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, legendre_asym_, legendre_sym_begin_.data(), legendre_asym_begin_.data() ); - /*file_leg = fopen( "legendre.bin", "wb" ); + file_leg = fopen( "legendre.bin", "wb" ); fwrite( legendre_sym_, sizeof( double ), size_sym, file_leg ); fwrite( legendre_asym_, sizeof( double ), size_asym, file_leg ); - fclose( file_leg );*/ + fclose( file_leg ); } } @@ -301,21 +317,23 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long fft_out_, NULL, 1, nlonsMaxGlobal_, FFTW_ESTIMATE ); } else { - plans_.resize( nlatsLeg_ ); + plans_.resize( nlatsLegDomain_ ); FILE* file_fftw; file_fftw = fopen( "wisdom.bin", "r" ); if ( file_fftw ) { fftw_import_wisdom_from_file( file_fftw ); fclose( file_fftw ); } - for ( int j = 0; j < nlatsLeg_; j++ ) { + for ( int j = 0; j < nlatsLegDomain_; j++ ) { int nlonsGlobalj = gs_global.nx( jlatMinLeg_ + j ); //ASSERT( nlonsGlobalj > 0 && nlonsGlobalj <= nlonsMaxGlobal_ ); plans_[j] = fftw_plan_dft_c2r_1d( nlonsGlobalj, fft_in_, fft_out_, FFTW_ESTIMATE ); } - file_fftw = fopen( "wisdom.bin", "wb" ); - fftw_export_wisdom_to_file( file_fftw ); - fclose( file_fftw ); + if ( !file_fftw ) { + file_fftw = fopen( "wisdom.bin", "wb" ); + fftw_export_wisdom_to_file( file_fftw ); + fclose( file_fftw ); + } } } // other FFT implementations should be added with #elif statements @@ -463,86 +481,97 @@ void TransLocalopt3::invtrans_legendreopt3( const int truncation, const int nlat int size_asym = num_n( truncation_ + 1, jm, false ); int n_imag = 2; if ( jm == 0 ) { n_imag = 1; } - int size_fourier = nb_fields * n_imag * nlatsLeg_; - auto posFourier = [&]( int jfld, int imag, int jlat, int jm, int nlatsH ) { - return jfld + nb_fields * ( imag + n_imag * ( nlatsLeg_ - nlat0_[jm] - nlatsH + jlat ) ); - }; - double* scalar_sym; - double* scalar_asym; - double* scl_fourier_sym; - double* scl_fourier_asym; - alloc_aligned( scalar_sym, n_imag * nb_fields * size_sym ); - alloc_aligned( scalar_asym, n_imag * nb_fields * size_asym ); - alloc_aligned( scl_fourier_sym, size_fourier ); - alloc_aligned( scl_fourier_asym, size_fourier ); - { - //ATLAS_TRACE( "opt3 Legendre split" ); - int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2; - // the choice between the following two code lines determines whether - // total wavenumbers are summed in an ascending or descending order. - // The trans library in IFS uses descending order because it should - // be more accurate (higher wavenumbers have smaller contributions). - // This also needs to be changed when splitting the spectral data in - // compute_legendre_polynomialsopt3! - //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) { - for ( int jn = truncation_ + 1; jn >= jm; jn-- ) { - for ( int imag = 0; imag < n_imag; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - idx = jfld + nb_fields * ( imag + 2 * ( jn - jm ) ); - if ( jn <= truncation && jm < truncation ) { - if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; } - else { - scalar_asym[ia++] = scalar_spectra[idx + ioff]; + int size_fourier = nb_fields * n_imag * ( nlatsLegReduced_ - nlat0_[jm] ); + if ( size_fourier > 0 ) { + auto posFourier = [&]( int jfld, int imag, int jlat, int jm, int nlatsH ) { + return jfld + nb_fields * ( imag + n_imag * ( nlatsLegReduced_ - nlat0_[jm] - nlatsH + jlat ) ); + }; + double* scalar_sym; + double* scalar_asym; + double* scl_fourier_sym; + double* scl_fourier_asym; + alloc_aligned( scalar_sym, n_imag * nb_fields * size_sym ); + alloc_aligned( scalar_asym, n_imag * nb_fields * size_asym ); + alloc_aligned( scl_fourier_sym, size_fourier ); + alloc_aligned( scl_fourier_asym, size_fourier ); + { + //ATLAS_TRACE( "opt3 Legendre split" ); + int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2; + // the choice between the following two code lines determines whether + // total wavenumbers are summed in an ascending or descending order. + // The trans library in IFS uses descending order because it should + // be more accurate (higher wavenumbers have smaller contributions). + // This also needs to be changed when splitting the spectral data in + // compute_legendre_polynomialsopt3! + //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) { + for ( int jn = truncation_ + 1; jn >= jm; jn-- ) { + for ( int imag = 0; imag < n_imag; imag++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + idx = jfld + nb_fields * ( imag + 2 * ( jn - jm ) ); + if ( jn <= truncation && jm < truncation ) { + if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; } + else { + scalar_asym[ia++] = scalar_spectra[idx + ioff]; + } } - } - else { - if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = 0.; } else { - scalar_asym[ia++] = 0.; + if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = 0.; } + else { + scalar_asym[ia++] = 0.; + } } } } } + ASSERT( ia == n_imag * nb_fields * size_asym && is == n_imag * nb_fields * size_sym ); } - ASSERT( ia == n_imag * nb_fields * size_asym && is == n_imag * nb_fields * size_sym ); - } - if ( nlatsLeg_ - nlat0_[jm] > 0 ) { - { - eckit::linalg::Matrix A( scalar_sym, nb_fields * n_imag, size_sym ); - eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm] + nlat0_[jm] * size_sym, size_sym, - nlatsLeg_ - nlat0_[jm] ); - eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsLeg_ - nlat0_[jm] ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); - } - if ( size_asym > 0 ) { - eckit::linalg::Matrix A( scalar_asym, nb_fields * n_imag, size_asym ); - eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm] + nlat0_[jm] * size_sym, - size_asym, nlatsLeg_ - nlat0_[jm] ); - eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsLeg_ - nlat0_[jm] ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + if ( nlatsLegReduced_ - nlat0_[jm] > 0 ) { + { + eckit::linalg::Matrix A( scalar_sym, nb_fields * n_imag, size_sym ); + eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm] + nlat0_[jm] * size_sym, + size_sym, nlatsLegReduced_ - nlat0_[jm] ); + eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsLegReduced_ - nlat0_[jm] ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + /*Log::info() << "sym: "; + for ( int j = 0; j < size_sym * ( nlatsLegReduced_ - nlat0_[jm] ); j++ ) { + Log::info() << legendre_sym_[j + legendre_sym_begin_[jm] + nlat0_[jm] * size_sym] << " "; + } + Log::info() << std::endl;*/ + } + if ( size_asym > 0 ) { + eckit::linalg::Matrix A( scalar_asym, nb_fields * n_imag, size_asym ); + eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm] + nlat0_[jm] * size_asym, + size_asym, nlatsLegReduced_ - nlat0_[jm] ); + eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsLegReduced_ - nlat0_[jm] ); + eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + /*Log::info() << "asym: "; + for ( int j = 0; j < size_asym * ( nlatsLegReduced_ - nlat0_[jm] ); j++ ) { + Log::info() << legendre_asym_[j + legendre_asym_begin_[jm] + nlat0_[jm] * size_asym] << " "; + } + Log::info() << std::endl;*/ + } } - } - { - //ATLAS_TRACE( "opt3 merge spheres" ); - // northern hemisphere: - for ( int jlat = 0; jlat < nlatsNH_; jlat++ ) { - if ( nlatsLeg_ - nlat0_[jm] - nlatsNH_ + jlat >= 0 ) { - for ( int imag = 0; imag < n_imag; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int idx = posFourier( jfld, imag, jlat, jm, nlatsNH_ ); - scl_fourier[posMethod( jfld, imag, jlat, jm, nb_fields, nlats )] = - scl_fourier_sym[idx] + scl_fourier_asym[idx]; + { + //ATLAS_TRACE( "opt3 merge spheres" ); + // northern hemisphere: + for ( int jlat = 0; jlat < nlatsNH_; jlat++ ) { + if ( nlatsLegReduced_ - nlat0_[jm] - nlatsNH_ + jlat >= 0 ) { + for ( int imag = 0; imag < n_imag; imag++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int idx = posFourier( jfld, imag, jlat, jm, nlatsNH_ ); + scl_fourier[posMethod( jfld, imag, jlat, jm, nb_fields, nlats )] = + scl_fourier_sym[idx] + scl_fourier_asym[idx]; + } } } - } - else { - for ( int imag = 0; imag < n_imag; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - scl_fourier[posMethod( jfld, imag, jlat, jm, nb_fields, nlats )] = 0.; + else { + for ( int imag = 0; imag < n_imag; imag++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + scl_fourier[posMethod( jfld, imag, jlat, jm, nb_fields, nlats )] = 0.; + } } } - } - /*for ( int imag = 0; imag < n_imag; imag++ ) { + /*for ( int imag = 0; imag < n_imag; imag++ ) { for ( int jfld = 0; jfld < nb_fields; jfld++ ) { if ( scl_fourier[posMethod( jfld, imag, jlat, jm, nb_fields, nlats )] > 0. ) { Log::info() << "jm=" << jm << " jlat=" << jlat << " nlatsLeg_=" << nlatsLeg_ @@ -550,32 +579,42 @@ void TransLocalopt3::invtrans_legendreopt3( const int truncation, const int nlat } } }*/ - } - // southern hemisphere: - for ( int jlat = 0; jlat < nlatsSH_; jlat++ ) { - int jslat = nlats - jlat - 1; - if ( nlatsLeg_ - nlat0_[jm] - nlatsSH_ + jlat >= 0 ) { - for ( int imag = 0; imag < n_imag; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int idx = posFourier( jfld, imag, jlat, jm, nlatsSH_ ); - scl_fourier[posMethod( jfld, imag, jslat, jm, nb_fields, nlats )] = - scl_fourier_sym[idx] - scl_fourier_asym[idx]; + } + // southern hemisphere: + for ( int jlat = 0; jlat < nlatsSH_; jlat++ ) { + int jslat = nlats - jlat - 1; + if ( nlatsLegReduced_ - nlat0_[jm] - nlatsSH_ + jlat >= 0 ) { + for ( int imag = 0; imag < n_imag; imag++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + int idx = posFourier( jfld, imag, jlat, jm, nlatsSH_ ); + scl_fourier[posMethod( jfld, imag, jslat, jm, nb_fields, nlats )] = + scl_fourier_sym[idx] - scl_fourier_asym[idx]; + } } } - } - else { - for ( int imag = 0; imag < n_imag; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - scl_fourier[posMethod( jfld, imag, jslat, jm, nb_fields, nlats )] = 0.; + else { + for ( int imag = 0; imag < n_imag; imag++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + scl_fourier[posMethod( jfld, imag, jslat, jm, nb_fields, nlats )] = 0.; + } } } } } + free_aligned( scalar_sym ); + free_aligned( scalar_asym ); + free_aligned( scl_fourier_sym ); + free_aligned( scl_fourier_asym ); + } + else { + for ( int jlat = 0; jlat < nlats; jlat++ ) { + for ( int imag = 0; imag < n_imag; imag++ ) { + for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + scl_fourier[posMethod( jfld, imag, jlat, jm, nb_fields, nlats )] = 0.; + } + } + } } - free_aligned( scalar_sym ); - free_aligned( scalar_asym ); - free_aligned( scl_fourier_sym ); - free_aligned( scl_fourier_asym ); } } } @@ -698,8 +737,8 @@ void TransLocalopt3::invtrans_fourier_reducedopt3( const int nlats, const grid:: } //Log::info() << std::endl; //Log::info() << jlat << "out:" << std::endl; - int jplan = nlatsLeg_ - nlatsNH_ + jlat; - if ( jplan >= nlatsLeg_ ) { jplan = nlats - 1 + nlatsLeg_ - nlatsSH_ - jlat; }; + int jplan = nlatsLegDomain_ - nlatsNH_ + jlat; + if ( jplan >= nlatsLegDomain_ ) { jplan = nlats - 1 + nlatsLegDomain_ - nlatsSH_ - jlat; }; //ASSERT( jplan < nlatsLeg_ && jplan >= 0 ); fftw_execute_dft_c2r( plans_[jplan], fft_in_, fft_out_ ); for ( int jlon = 0; jlon < g.nx( jlat ); jlon++ ) { @@ -779,7 +818,7 @@ void TransLocalopt3::invtrans_unstructured_precomp( const int truncation, const alloc_aligned( gp_opt, nb_fields ); { - ATLAS_TRACE( "opt Legendre dgemm" ); + ATLAS_TRACE( "opt3 Legendre dgemm" ); for ( int jm = 0; jm < truncation; jm++ ) { int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1; eckit::linalg::Matrix A( eckit::linalg::Matrix( @@ -792,7 +831,7 @@ void TransLocalopt3::invtrans_unstructured_precomp( const int truncation, const // loop over all points: { - ATLAS_TRACE( "opt Fourier dgemm" ); + ATLAS_TRACE( "opt3 Fourier dgemm" ); for ( int ip = 0; ip < grid_.size(); ip++ ) { PointXY p = gu.xy( ip ); diff --git a/src/atlas/trans/localopt3/TransLocalopt3.h b/src/atlas/trans/localopt3/TransLocalopt3.h index 1ca002436..056d4e304 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.h +++ b/src/atlas/trans/localopt3/TransLocalopt3.h @@ -149,6 +149,8 @@ class TransLocalopt3 : public trans::TransImpl { int nlatsNH_; int nlatsSH_; int nlatsLeg_; + int nlatsLegReduced_; + int nlatsLegDomain_; std::vector jlonMin_; int jlatMin_; int jlatMinLeg_; diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index 2f2c2705e..1a411227a 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -478,7 +478,7 @@ void spectral_transform_grid_analytic( double lon = g.x( i, j ) * util::Constants::degreesToRadians(); // compute spherical harmonics: - if ( ftrc >= m ) { + if ( ftrc > m ) { rgp[idx++] = sphericalharmonics_analytic_point( n, m, imag, lon, lat, ivar_in, ivar_out ); } else { @@ -961,36 +961,33 @@ CASE( "test_trans_domain" ) { //Domain testdomain = ZonalBandDomain( {-90., 90.} ); //Domain testdomain = ZonalBandDomain( {-.5, .5} ); //Domain testdomain = RectangularDomain( {0., 30.}, {-.05, .05} ); - //Domain testdomain = ZonalBandDomain( {-85., -86.} ); - Domain testdomain = RectangularDomain( {15., 20.}, {10., 20.} ); + Domain testdomain1 = ZonalBandDomain( {-10., 5.} ); + //Domain testdomain1 = RectangularDomain( {-1., 1.}, {50., 55.} ); + Domain testdomain2 = RectangularDomain( {-1., 1.}, {-5., 40.} ); // Grid: (Adjust the following line if the test takes too long!) - Grid g( "O120" ); - Grid g_global( g.name() ); - - grid::StructuredGrid gs( g ); - grid::StructuredGrid gs_global( g_global ); - Log::info() << "nlats: " << gs.ny() << " nlons:" << gs.nxmax() << std::endl; - int ndgl = gs_global.ny(); - //int trc = ndgl - 1; // linear - //int trc = ndgl / 2. - 1; // cubic - int trc = 120; - trans::Trans transLocal1( g, trc, util::Config( "type", "ifs" ) ); - trans::Trans transLocal2( g, trc, util::Config( "type", "localopt3" ) ); + std::string gridString = "O640"; + Grid g1( gridString, testdomain1 ); + Grid g2( gridString, testdomain2 ); + + int trc = 640; + //Log::info() << "rgp1:" << std::endl; + trans::Trans transLocal1( g1, trc, util::Config( "type", "localopt3" ) ); + //Log::info() << "rgp2:" << std::endl; + trans::Trans transLocal2( g2, trc, util::Config( "type", "localopt3" ) ); double rav1 = 0., rav2 = 0.; // compute average rms errors of transLocal1 and transLocal2 functionspace::Spectral spectral( trc ); - functionspace::StructuredColumns gridpoints( g ); - int nb_scalar = 2, nb_vordiv = 2; + int nb_scalar = 1, nb_vordiv = 0; int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv; std::vector sp( 2 * N * nb_scalar ); std::vector vor( 2 * N * nb_vordiv ); std::vector div( 2 * N * nb_vordiv ); std::vector rspecg( 2 * N ); - std::vector gp( nb_all * g.size() ); - std::vector rgp1( nb_all * g.size() ); - std::vector rgp2( nb_all * g.size() ); - std::vector rgp_analytic( g.size() ); + std::vector rgp1( nb_all * g1.size() ); + std::vector rgp2( nb_all * g2.size() ); + std::vector rgp1_analytic( g1.size() ); + std::vector rgp2_analytic( g2.size() ); int icase = 0; for ( int ivar_in = 2; ivar_in < 3; ivar_in++ ) { // vorticity, divergence, scalar @@ -1024,41 +1021,54 @@ CASE( "test_trans_domain" ) { if ( ivar_in == 1 ) div[k * nb_vordiv + jfld] = 1.; if ( ivar_in == 2 ) sp[k * nb_scalar + jfld] = 1.; - for ( int j = 0; j < nb_all * g.size(); j++ ) { - gp[j] = 0.; + for ( int j = 0; j < nb_all * g1.size(); j++ ) { rgp1[j] = 0.; + } + for ( int j = 0; j < nb_all * g2.size(); j++ ) { rgp2[j] = 0.; } - for ( int j = 0; j < g.size(); j++ ) { - rgp_analytic[j] = 0.; + for ( int j = 0; j < g1.size(); j++ ) { + rgp1_analytic[j] = 0.; + } + for ( int j = 0; j < g2.size(); j++ ) { + rgp2_analytic[j] = 0.; } - spectral_transform_grid_analytic( trc, trc, n, m, imag, g, rspecg.data(), - rgp_analytic.data(), ivar_in, ivar_out ); + spectral_transform_grid_analytic( trc, trc, n, m, imag, g1, rspecg.data(), + rgp1_analytic.data(), ivar_in, ivar_out ); + + spectral_transform_grid_analytic( trc, trc, n, m, imag, g2, rspecg.data(), + rgp2_analytic.data(), ivar_in, ivar_out ); + //Log::info() << std::endl << "rgp1:"; EXPECT_NO_THROW( transLocal1.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), div.data(), rgp1.data() ) ); + //Log::info() << std::endl << "rgp2:"; EXPECT_NO_THROW( transLocal2.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), div.data(), rgp2.data() ) ); int pos = ( ivar_out * nb_vordiv + jfld ); double rms_gen1 = - compute_rms( g.size(), rgp1.data() + pos * g.size(), rgp_analytic.data() ); + compute_rms( g1.size(), rgp1.data() + pos * g1.size(), rgp1_analytic.data() ); double rms_gen2 = - compute_rms( g.size(), rgp2.data() + pos * g.size(), rgp_analytic.data() ); + compute_rms( g2.size(), rgp2.data() + pos * g2.size(), rgp2_analytic.data() ); //Log::info() << "Case " << icase << " ivar_in=" << ivar_in << " ivar_out=" << ivar_out // << " m=" << m << " n=" << n << " imag=" << imag << " k=" << k << std::endl - // << "rgp2:"; - //for ( int j = 0; j < g.size(); j++ ) { - // Log::info() << rgp2[pos * g.size() + j] << " "; + // << "rgp1:"; + //for ( int j = 0; j < g1.size(); j++ ) { + // Log::info() << rgp1[pos * g1.size() + j] << " "; + //}; + //Log::info() << std::endl << "rgp2:"; + //for ( int j = 0; j < g2.size(); j++ ) { + // Log::info() << rgp2[pos * g2.size() + j] << " "; //}; - //Log::info() << std::endl << "analytic:"; - //for ( int j = 0; j < g.size(); j++ ) { - // Log::info() << rgp_analytic[j] << " "; + //Log::info() << std::endl << "analytic1:"; + //for ( int j = 0; j < g1.size(); j++ ) { + // Log::info() << rgp1_analytic[j] << " "; //}; //Log::info() << std::endl; rav1 += rms_gen1; From 69ab84d5ac040bb30e77cfd3c9702de9afda21c9 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Sun, 22 Apr 2018 02:02:04 +0100 Subject: [PATCH 049/123] reading and writing fftw wisdom now via strings and streams --- src/atlas/trans/localopt3/TransLocalopt3.cc | 35 +++++++++++---------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc index 1b9fbc39b..f47c3a68b 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.cc +++ b/src/atlas/trans/localopt3/TransLocalopt3.cc @@ -305,35 +305,36 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long int num_complex = ( nlonsMaxGlobal_ / 2 ) + 1; fft_in_ = fftw_alloc_complex( nlats * num_complex ); fft_out_ = fftw_alloc_real( nlats * nlonsMaxGlobal_ ); + std::string wisdomString( "" ); + std::ifstream read( "wisdom.bin" ); + if ( read.is_open() ) { + std::getline( read, wisdomString ); + while ( read ) { + std::string line; + std::getline( read, line ); + wisdomString += line; + } + } + read.close(); + if ( wisdomString.length() > 0 ) { fftw_import_wisdom_from_string( &wisdomString[0u] ); } if ( grid::RegularGrid( gridGlobal_ ) ) { plans_.resize( 1 ); - FILE* file_fftw; - file_fftw = fopen( "wisdom.bin", "r" ); - if ( file_fftw ) { - fftw_import_wisdom_from_file( file_fftw ); - fclose( file_fftw ); - } plans_[0] = fftw_plan_many_dft_c2r( 1, &nlonsMaxGlobal_, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1, nlonsMaxGlobal_, FFTW_ESTIMATE ); } else { plans_.resize( nlatsLegDomain_ ); - FILE* file_fftw; - file_fftw = fopen( "wisdom.bin", "r" ); - if ( file_fftw ) { - fftw_import_wisdom_from_file( file_fftw ); - fclose( file_fftw ); - } for ( int j = 0; j < nlatsLegDomain_; j++ ) { int nlonsGlobalj = gs_global.nx( jlatMinLeg_ + j ); //ASSERT( nlonsGlobalj > 0 && nlonsGlobalj <= nlonsMaxGlobal_ ); plans_[j] = fftw_plan_dft_c2r_1d( nlonsGlobalj, fft_in_, fft_out_, FFTW_ESTIMATE ); } - if ( !file_fftw ) { - file_fftw = fopen( "wisdom.bin", "wb" ); - fftw_export_wisdom_to_file( file_fftw ); - fclose( file_fftw ); - } + } + std::string newWisdom( fftw_export_wisdom_to_string() ); + if ( 1.1 * wisdomString.length() < newWisdom.length() ) { + std::ofstream write( "wisdom.bin" ); + write << newWisdom; + write.close(); } } // other FFT implementations should be added with #elif statements From 0cb6bfb98bb672a21c2eb4f38b79163e76e42140 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Wed, 25 Apr 2018 12:04:07 +0100 Subject: [PATCH 050/123] Trans type=local preliminary LegendreCache support --- VERSION.cmake | 2 +- cmake/CompileFlags.cmake | 4 +- src/atlas/CMakeLists.txt | 20 +- src/atlas/option/TransOptions.cc | 8 + src/atlas/option/TransOptions.h | 14 ++ src/atlas/trans/Trans.cc | 4 +- src/atlas/trans/Trans.h | 22 +- src/atlas/trans/VorDivToUV.cc | 4 +- .../FourierTransforms.cc | 2 +- .../FourierTransforms.h | 0 .../LegendrePolynomials.cc | 2 +- .../LegendrePolynomials.h | 0 .../LegendreTransforms.cc | 2 +- .../LegendreTransforms.h | 0 .../{local => local_noopt}/TransLocal.cc | 10 +- .../trans/{local => local_noopt}/TransLocal.h | 0 .../{local => local_noopt}/VorDivToUVLocal.cc | 4 +- .../{local => local_noopt}/VorDivToUVLocal.h | 0 src/atlas/trans/localopt/TransLocalopt.cc | 2 +- src/atlas/trans/localopt2/TransLocalopt2.cc | 2 +- .../localopt3/LegendrePolynomialsopt3.cc | 1 + src/atlas/trans/localopt3/TransLocalopt3.cc | 203 ++++++++++++++---- src/atlas/trans/localopt3/TransLocalopt3.h | 6 + .../trans/localopt3/VorDivToUVLocalopt3.cc | 3 +- src/tests/trans/test_transgeneral.cc | 18 +- 25 files changed, 260 insertions(+), 73 deletions(-) rename src/atlas/trans/{local => local_noopt}/FourierTransforms.cc (98%) rename src/atlas/trans/{local => local_noopt}/FourierTransforms.h (100%) rename src/atlas/trans/{local => local_noopt}/LegendrePolynomials.cc (99%) rename src/atlas/trans/{local => local_noopt}/LegendrePolynomials.h (100%) rename src/atlas/trans/{local => local_noopt}/LegendreTransforms.cc (97%) rename src/atlas/trans/{local => local_noopt}/LegendreTransforms.h (100%) rename src/atlas/trans/{local => local_noopt}/TransLocal.cc (98%) rename src/atlas/trans/{local => local_noopt}/TransLocal.h (100%) rename src/atlas/trans/{local => local_noopt}/VorDivToUVLocal.cc (98%) rename src/atlas/trans/{local => local_noopt}/VorDivToUVLocal.h (100%) diff --git a/VERSION.cmake b/VERSION.cmake index 756656b6a..62025fc5d 100644 --- a/VERSION.cmake +++ b/VERSION.cmake @@ -6,5 +6,5 @@ # granted to it by virtue of its status as an intergovernmental organisation nor # does it submit to any jurisdiction. -set ( ${PROJECT_NAME}_VERSION_STR "0.14.0" ) +set ( ${PROJECT_NAME}_VERSION_STR "0.14.0-opt-translocal" ) diff --git a/cmake/CompileFlags.cmake b/cmake/CompileFlags.cmake index c92b7d581..ef238a21e 100644 --- a/cmake/CompileFlags.cmake +++ b/cmake/CompileFlags.cmake @@ -9,8 +9,8 @@ if( CMAKE_CXX_COMPILER_ID MATCHES Cray ) endif() -ecbuild_add_cxx_flags("-Wl,-ydgemm_") -ecbuild_add_fortran_flags("-Wl,-ydgemm_") +#ecbuild_add_cxx_flags("-Wl,-ydgemm_") +#ecbuild_add_fortran_flags("-Wl,-ydgemm_") #ecbuild_add_cxx_flags("-fsanitize=address") #ecbuild_add_cxx_flags("-fsanitize=thread") #ecbuild_add_cxx_flags("-fsanitize=memory") diff --git a/src/atlas/CMakeLists.txt b/src/atlas/CMakeLists.txt index a8ddfe4b4..6a10f2ff1 100644 --- a/src/atlas/CMakeLists.txt +++ b/src/atlas/CMakeLists.txt @@ -321,16 +321,16 @@ trans/Trans.h trans/Trans.cc trans/VorDivToUV.h trans/VorDivToUV.cc -trans/local/TransLocal.h -trans/local/TransLocal.cc -trans/local/LegendrePolynomials.h -trans/local/LegendrePolynomials.cc -trans/local/LegendreTransforms.h -trans/local/LegendreTransforms.cc -trans/local/FourierTransforms.h -trans/local/FourierTransforms.cc -trans/local/VorDivToUVLocal.h -trans/local/VorDivToUVLocal.cc +trans/local_noopt/TransLocal.h +trans/local_noopt/TransLocal.cc +trans/local_noopt/LegendrePolynomials.h +trans/local_noopt/LegendrePolynomials.cc +trans/local_noopt/LegendreTransforms.h +trans/local_noopt/LegendreTransforms.cc +trans/local_noopt/FourierTransforms.h +trans/local_noopt/FourierTransforms.cc +trans/local_noopt/VorDivToUVLocal.h +trans/local_noopt/VorDivToUVLocal.cc trans/localopt/TransLocalopt.h trans/localopt/TransLocalopt.cc trans/localopt/LegendrePolynomialsopt.h diff --git a/src/atlas/option/TransOptions.cc b/src/atlas/option/TransOptions.cc index 0f00dcd0f..b321838fd 100644 --- a/src/atlas/option/TransOptions.cc +++ b/src/atlas/option/TransOptions.cc @@ -52,6 +52,14 @@ read_legendre::read_legendre( const eckit::PathName& filepath ) { set( "read_legendre", filepath ); } +write_fft::write_fft( const eckit::PathName& filepath ) { + set( "write_fft", filepath ); +} + +read_fft::read_fft( const eckit::PathName& filepath ) { + set( "read_fft", filepath ); +} + nproma::nproma( int nproma ) { set( "nproma", nproma ); } diff --git a/src/atlas/option/TransOptions.h b/src/atlas/option/TransOptions.h index 3e548eb2c..952ea7465 100644 --- a/src/atlas/option/TransOptions.h +++ b/src/atlas/option/TransOptions.h @@ -84,6 +84,20 @@ class read_legendre : public util::Config { // ---------------------------------------------------------------------------- +class write_fft : public util::Config { +public: + write_fft( const eckit::PathName& ); +}; + +// ---------------------------------------------------------------------------- + +class read_fft : public util::Config { +public: + read_fft( const eckit::PathName& ); +}; + +// ---------------------------------------------------------------------------- + class nproma : public util::Config { nproma( int ); }; diff --git a/src/atlas/trans/Trans.cc b/src/atlas/trans/Trans.cc index b264cc7ee..af6b325eb 100644 --- a/src/atlas/trans/Trans.cc +++ b/src/atlas/trans/Trans.cc @@ -27,10 +27,10 @@ #else #define TRANS_DEFAULT "local" #endif -#include "atlas/trans/local/TransLocal.h" +#include "atlas/trans/local_noopt/TransLocal.h" #include "atlas/trans/localopt/TransLocalopt.h" #include "atlas/trans/localopt2/TransLocalopt2.h" -#include "atlas/trans/localopt3/TransLocalopt3.h" +#include "atlas/trans/localopt3/TransLocalopt3.h" // --> recommended "local" namespace atlas { namespace trans { diff --git a/src/atlas/trans/Trans.h b/src/atlas/trans/Trans.h index c5c417201..a78cc1e49 100644 --- a/src/atlas/trans/Trans.h +++ b/src/atlas/trans/Trans.h @@ -19,6 +19,8 @@ #include "eckit/memory/SharedPtr.h" #include "atlas/util/Config.h" +#include "atlas/runtime/Trace.h" +#include "atlas/runtime/Log.h" //----------------------------------------------------------------------------- // Forward declarations @@ -55,6 +57,8 @@ class TransCacheFileEntry final : public TransCacheEntry { public: TransCacheFileEntry( const eckit::PathName& path ) : buffer_( path.size() ) { + ATLAS_TRACE(); + Log::debug() << "Loading cache from file " << path << std::endl; std::unique_ptr dh( path.fileHandle() ); dh->openForRead(); dh->read( buffer_.data(), buffer_.size() ); @@ -102,10 +106,24 @@ class Cache { class LegendreCache : public Cache { public: LegendreCache(const void* address, size_t size) : - Cache(std::make_shared(address, size)) { + Cache( std::make_shared( address, size ) ) { } LegendreCache( const eckit::PathName& path ) : - Cache( std::shared_ptr( new TransCacheFileEntry( path ) ) ) {} + Cache( std::shared_ptr( new TransCacheFileEntry( path ) ) ) { + } +}; + +class LegendreFFTCache : public Cache { +public: + LegendreFFTCache( const void* legendre_address, size_t legendre_size, + const void* fft_address, size_t fft_size ) : + Cache( std::make_shared( legendre_address, legendre_size ), + std::make_shared( fft_address, fft_size ) ) { + } + LegendreFFTCache( const eckit::PathName& legendre_path, const eckit::PathName& fft_path ) : + Cache( std::shared_ptr( new TransCacheFileEntry( legendre_path ) ), + std::shared_ptr( new TransCacheFileEntry( fft_path ) ) ) { + } }; class TransImpl : public eckit::Owned { diff --git a/src/atlas/trans/VorDivToUV.cc b/src/atlas/trans/VorDivToUV.cc index 727ead312..70feef4d8 100644 --- a/src/atlas/trans/VorDivToUV.cc +++ b/src/atlas/trans/VorDivToUV.cc @@ -26,10 +26,10 @@ #else #define TRANS_DEFAULT "local" #endif -#include "atlas/trans/local/VorDivToUVLocal.h" +#include "atlas/trans/local_noopt/VorDivToUVLocal.h" #include "atlas/trans/localopt/VorDivToUVLocalopt.h" #include "atlas/trans/localopt2/VorDivToUVLocalopt2.h" -#include "atlas/trans/localopt3/VorDivToUVLocalopt3.h" +#include "atlas/trans/localopt3/VorDivToUVLocalopt3.h" // --> recommended "local" namespace atlas { namespace trans { diff --git a/src/atlas/trans/local/FourierTransforms.cc b/src/atlas/trans/local_noopt/FourierTransforms.cc similarity index 98% rename from src/atlas/trans/local/FourierTransforms.cc rename to src/atlas/trans/local_noopt/FourierTransforms.cc index 886cc2ee7..c9f6f2974 100644 --- a/src/atlas/trans/local/FourierTransforms.cc +++ b/src/atlas/trans/local_noopt/FourierTransforms.cc @@ -13,7 +13,7 @@ #include #include -#include "atlas/trans/local/FourierTransforms.h" +#include "atlas/trans/local_noopt/FourierTransforms.h" namespace atlas { namespace trans { diff --git a/src/atlas/trans/local/FourierTransforms.h b/src/atlas/trans/local_noopt/FourierTransforms.h similarity index 100% rename from src/atlas/trans/local/FourierTransforms.h rename to src/atlas/trans/local_noopt/FourierTransforms.h diff --git a/src/atlas/trans/local/LegendrePolynomials.cc b/src/atlas/trans/local_noopt/LegendrePolynomials.cc similarity index 99% rename from src/atlas/trans/local/LegendrePolynomials.cc rename to src/atlas/trans/local_noopt/LegendrePolynomials.cc index 639f76a82..26854fd69 100644 --- a/src/atlas/trans/local/LegendrePolynomials.cc +++ b/src/atlas/trans/local_noopt/LegendrePolynomials.cc @@ -13,7 +13,7 @@ #include #include "atlas/array.h" -#include "atlas/trans/local/LegendrePolynomials.h" +#include "atlas/trans/local_noopt/LegendrePolynomials.h" namespace atlas { namespace trans { diff --git a/src/atlas/trans/local/LegendrePolynomials.h b/src/atlas/trans/local_noopt/LegendrePolynomials.h similarity index 100% rename from src/atlas/trans/local/LegendrePolynomials.h rename to src/atlas/trans/local_noopt/LegendrePolynomials.h diff --git a/src/atlas/trans/local/LegendreTransforms.cc b/src/atlas/trans/local_noopt/LegendreTransforms.cc similarity index 97% rename from src/atlas/trans/local/LegendreTransforms.cc rename to src/atlas/trans/local_noopt/LegendreTransforms.cc index b18d28ca8..f82d9f401 100644 --- a/src/atlas/trans/local/LegendreTransforms.cc +++ b/src/atlas/trans/local_noopt/LegendreTransforms.cc @@ -10,7 +10,7 @@ #include -#include "atlas/trans/local/LegendreTransforms.h" +#include "atlas/trans/local_noopt/LegendreTransforms.h" namespace atlas { namespace trans { diff --git a/src/atlas/trans/local/LegendreTransforms.h b/src/atlas/trans/local_noopt/LegendreTransforms.h similarity index 100% rename from src/atlas/trans/local/LegendreTransforms.h rename to src/atlas/trans/local_noopt/LegendreTransforms.h diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local_noopt/TransLocal.cc similarity index 98% rename from src/atlas/trans/local/TransLocal.cc rename to src/atlas/trans/local_noopt/TransLocal.cc index f0f5973ef..33947d15f 100644 --- a/src/atlas/trans/local/TransLocal.cc +++ b/src/atlas/trans/local_noopt/TransLocal.cc @@ -8,23 +8,23 @@ * nor does it submit to any jurisdiction. */ -#include "atlas/trans/local/TransLocal.h" +#include "atlas/trans/local_noopt/TransLocal.h" #include "atlas/array.h" #include "atlas/option.h" #include "atlas/parallel/mpi/mpi.h" #include "atlas/runtime/ErrorHandling.h" #include "atlas/runtime/Log.h" #include "atlas/trans/VorDivToUV.h" -#include "atlas/trans/local/FourierTransforms.h" -#include "atlas/trans/local/LegendrePolynomials.h" -#include "atlas/trans/local/LegendreTransforms.h" +#include "atlas/trans/local_noopt/FourierTransforms.h" +#include "atlas/trans/local_noopt/LegendrePolynomials.h" +#include "atlas/trans/local_noopt/LegendreTransforms.h" #include "atlas/util/Constants.h" namespace atlas { namespace trans { namespace { -static TransBuilderGrid builder( "local" ); +static TransBuilderGrid builder( "local_noopt" ); } // -------------------------------------------------------------------------------------------------------------------- diff --git a/src/atlas/trans/local/TransLocal.h b/src/atlas/trans/local_noopt/TransLocal.h similarity index 100% rename from src/atlas/trans/local/TransLocal.h rename to src/atlas/trans/local_noopt/TransLocal.h diff --git a/src/atlas/trans/local/VorDivToUVLocal.cc b/src/atlas/trans/local_noopt/VorDivToUVLocal.cc similarity index 98% rename from src/atlas/trans/local/VorDivToUVLocal.cc rename to src/atlas/trans/local_noopt/VorDivToUVLocal.cc index 15065d4e1..0db63e792 100644 --- a/src/atlas/trans/local/VorDivToUVLocal.cc +++ b/src/atlas/trans/local_noopt/VorDivToUVLocal.cc @@ -9,7 +9,7 @@ */ #include // for std::sqrt -#include "atlas/trans/local/VorDivToUVLocal.h" +#include "atlas/trans/local_noopt/VorDivToUVLocal.h" #include "atlas/functionspace/Spectral.h" #include "atlas/runtime/Log.h" #include "atlas/util/Earth.h" @@ -21,7 +21,7 @@ namespace atlas { namespace trans { namespace { -static VorDivToUVBuilder builder( "local" ); +static VorDivToUVBuilder builder( "local_noopt" ); } // -------------------------------------------------------------------------------------------------------------------- diff --git a/src/atlas/trans/local/VorDivToUVLocal.h b/src/atlas/trans/local_noopt/VorDivToUVLocal.h similarity index 100% rename from src/atlas/trans/local/VorDivToUVLocal.h rename to src/atlas/trans/local_noopt/VorDivToUVLocal.h diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc index d0dfbfe4a..29ac535c1 100644 --- a/src/atlas/trans/localopt/TransLocalopt.cc +++ b/src/atlas/trans/localopt/TransLocalopt.cc @@ -16,7 +16,7 @@ #include "atlas/runtime/ErrorHandling.h" #include "atlas/runtime/Log.h" #include "atlas/trans/VorDivToUV.h" -#include "atlas/trans/local/LegendrePolynomials.h" +#include "atlas/trans/local_noopt/LegendrePolynomials.h" #include "atlas/trans/localopt/FourierTransformsopt.h" #include "atlas/trans/localopt/LegendrePolynomialsopt.h" #include "atlas/trans/localopt/LegendreTransformsopt.h" diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc index 6b51c2dc5..7a46c7245 100644 --- a/src/atlas/trans/localopt2/TransLocalopt2.cc +++ b/src/atlas/trans/localopt2/TransLocalopt2.cc @@ -16,7 +16,7 @@ #include "atlas/runtime/ErrorHandling.h" #include "atlas/runtime/Log.h" #include "atlas/trans/VorDivToUV.h" -#include "atlas/trans/local/LegendrePolynomials.h" +#include "atlas/trans/local_noopt/LegendrePolynomials.h" #include "atlas/trans/localopt2/FourierTransformsopt2.h" #include "atlas/trans/localopt2/LegendrePolynomialsopt2.h" #include "atlas/trans/localopt2/LegendreTransformsopt2.h" diff --git a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc index 1cddbc18b..2f16cc43b 100644 --- a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc +++ b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc @@ -158,6 +158,7 @@ void compute_legendre_polynomialsopt3( size_t leg_start_sym[], // start indices for different zonal wave numbers, symmetric part size_t leg_start_asym[] ) // start indices for different zonal wave numbers, asymmetric part { + ATLAS_TRACE(); auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; }; std::vector legpol( legendre_size( trc ) ); std::vector zfn( ( trc + 1 ) * ( trc + 1 ) ); diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc index f47c3a68b..eefe99c22 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.cc +++ b/src/atlas/trans/localopt3/TransLocalopt3.cc @@ -16,12 +16,13 @@ #include "atlas/runtime/ErrorHandling.h" #include "atlas/runtime/Log.h" #include "atlas/trans/VorDivToUV.h" -#include "atlas/trans/local/LegendrePolynomials.h" +#include "atlas/trans/local_noopt/LegendrePolynomials.h" #include "atlas/trans/localopt3/LegendrePolynomialsopt3.h" #include "atlas/util/Constants.h" #include "eckit/eckit_config.h" #include "eckit/linalg/LinearAlgebra.h" #include "eckit/linalg/Matrix.h" +#include "eckit/log/Bytes.h" #ifdef ECKIT_HAVE_MKL #include "mkl.h" #endif @@ -30,7 +31,84 @@ namespace atlas { namespace trans { namespace { -static TransBuilderGrid builder( "localopt3" ); +static TransBuilderGrid builder_deprecated( "localopt3" ); +static TransBuilderGrid builder( "local" ); +} + +namespace { +class TransParameters { +public: + TransParameters( const eckit::Configuration& config ) : config_( config ) {} + ~TransParameters() {} + + bool scalar_derivatives() const { return config_.getBool( "scalar_derivatives", false ); } + + bool wind_EW_derivatives() const { return config_.getBool( "wind_EW_derivatives", false ); } + + bool vorticity_divergence_fields() const { return config_.getBool( "vorticity_divergence_fields", false ); } + + std::string read_legendre() const { return config_.getString( "read_legendre", "" ); } + + std::string write_legendre() const { return config_.getString( "write_legendre", "" ); } + + std::string read_fft() const { return config_.getString( "read_fft", "" ); } + + std::string write_fft() const { return config_.getString( "write_fft", "" ); } + + bool global() const { return config_.getBool( "global", false ); } + +private: + const eckit::Configuration& config_; +}; + +struct ReadCache { +ReadCache( const void* cache ) { + begin = (char*) cache; + pos = 0; +} +template T* read(size_t size) { + T* v = (T*) (begin + pos); + pos += size * sizeof(T); + return v; +} +char* begin; +size_t pos; +}; + +struct WriteCache { +WriteCache( const eckit::PathName& file_path, long estimated_length = 0 ) : + dh_( file_path.fileHandle( /*overwrite = */ true ) ) +{ + dh_->openForWrite( estimated_length ); + pos = 0; +} +~WriteCache() { + dh_->close(); +} +template void write( const T* v, long size) { + dh_->write( v , size * sizeof(T) ); + pos += size * sizeof(T); +} +std::unique_ptr dh_; +size_t pos; +}; + +#if ATLAS_HAVE_FFTW +struct FFTW_Wisdom { + char* wisdom; + FFTW_Wisdom() { + wisdom = fftw_export_wisdom_to_string(); + } + ~FFTW_Wisdom() { + free( wisdom ); + } +}; +std::ostream& operator<< (std::ostream& out, const FFTW_Wisdom& w) { + out << w.wisdom; + return out; +} +#endif + } // -------------------------------------------------------------------------------------------------------------------- @@ -119,7 +197,13 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long const eckit::Configuration& config ) : grid_( grid ), truncation_( truncation ), - precompute_( config.getBool( "precompute", true ) ) { + precompute_( config.getBool( "precompute", true ) ), + cache_( cache ), + legendre_cache_( cache.legendre().data() ), + legendre_cachesize_( cache.legendre().size() ), + fft_cache_( cache.fft().data() ), + fft_cachesize_( cache.fft().size() ) +{ ATLAS_TRACE( "Precompute legendre opt3" ); #ifdef ECKIT_HAVE_MKL eckit::linalg::LinearAlgebra::backend( "mkl" ); // might want to choose backend with this command @@ -272,28 +356,38 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long legendre_sym_begin_[0] = 0; legendre_asym_begin_[0] = 0; for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { - size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsLeg_ ); + size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsLeg_ ); size_asym += add_padding( num_n( truncation_ + 1, jm, false ) * nlatsLeg_ ); legendre_sym_begin_[jm + 1] = size_sym; legendre_asym_begin_[jm + 1] = size_asym; } - alloc_aligned( legendre_sym_, size_sym ); - alloc_aligned( legendre_asym_, size_asym ); - FILE* file_leg; - file_leg = fopen( "legendre.bin", "r" ); - if ( file_leg ) { - fread( legendre_sym_, sizeof( double ), size_sym, file_leg ); - fread( legendre_asym_, sizeof( double ), size_asym, file_leg ); - fclose( file_leg ); - } - else { + + if( legendre_cache_ ) { + ReadCache legendre( legendre_cache_ ); + legendre_sym_ = legendre.read( size_sym ); + legendre_asym_ = legendre.read( size_asym ); + ASSERT( legendre.pos == legendre_cachesize_ ); + // TODO: check this is all aligned... + } else { + + alloc_aligned( legendre_sym_, size_sym ); + alloc_aligned( legendre_asym_, size_asym ); + compute_legendre_polynomialsopt3( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, legendre_asym_, legendre_sym_begin_.data(), legendre_asym_begin_.data() ); - file_leg = fopen( "legendre.bin", "wb" ); - fwrite( legendre_sym_, sizeof( double ), size_sym, file_leg ); - fwrite( legendre_asym_, sizeof( double ), size_asym, file_leg ); - fclose( file_leg ); + std::string file_path = TransParameters(config).write_legendre(); + if( file_path.size() ) { + ATLAS_TRACE( "write_legendre" ); + size_t estimated_length = sizeof(double) * ( size_sym + size_asym ); + Log::debug() << "Writing Legendre cache file ..." << std::endl; + Log::debug() << " path = " << file_path << std::endl; + Log::debug() << " estimated = " << eckit::Bytes(estimated_length) << std::endl; + WriteCache legendre( file_path, estimated_length ); + legendre.write( legendre_sym_, size_sym ); + legendre.write( legendre_asym_, size_asym ); + Log::debug() << "Cache file size: " << eckit::Bytes(legendre.pos) << std::endl; + } } } @@ -305,18 +399,23 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long int num_complex = ( nlonsMaxGlobal_ / 2 ) + 1; fft_in_ = fftw_alloc_complex( nlats * num_complex ); fft_out_ = fftw_alloc_real( nlats * nlonsMaxGlobal_ ); - std::string wisdomString( "" ); - std::ifstream read( "wisdom.bin" ); - if ( read.is_open() ) { - std::getline( read, wisdomString ); - while ( read ) { - std::string line; - std::getline( read, line ); - wisdomString += line; - } + + if( fft_cache_ ) { + Log::debug() << "Import FFTW wisdom from cache" << std::endl; + fftw_import_wisdom_from_string( (const char*)fft_cache_ ); } - read.close(); - if ( wisdomString.length() > 0 ) { fftw_import_wisdom_from_string( &wisdomString[0u] ); } +// std::string wisdomString( "" ); +// std::ifstream read( "wisdom.bin" ); +// if ( read.is_open() ) { +// std::getline( read, wisdomString ); +// while ( read ) { +// std::string line; +// std::getline( read, line ); +// wisdomString += line; +// } +// } +// read.close(); +// if ( wisdomString.length() > 0 ) { fftw_import_wisdom_from_string( &wisdomString[0u] ); } if ( grid::RegularGrid( gridGlobal_ ) ) { plans_.resize( 1 ); plans_[0] = fftw_plan_many_dft_c2r( 1, &nlonsMaxGlobal_, nlats, fft_in_, NULL, 1, num_complex, @@ -330,16 +429,36 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long plans_[j] = fftw_plan_dft_c2r_1d( nlonsGlobalj, fft_in_, fft_out_, FFTW_ESTIMATE ); } } - std::string newWisdom( fftw_export_wisdom_to_string() ); - if ( 1.1 * wisdomString.length() < newWisdom.length() ) { - std::ofstream write( "wisdom.bin" ); - write << newWisdom; - write.close(); + std::string file_path = TransParameters(config).write_fft(); + if( file_path.size() ) { + Log::debug() << "Write FFTW wisdom to file " << file_path << std::endl; + //bool success = fftw_export_wisdom_to_filename( "wisdom.bin" ); + //ASSERT( success ); + //std::ofstream write( file_path ); + //write << FFTW_Wisdom(); + + FILE* file_fftw = fopen( file_path.c_str(), "wb" ); + fftw_export_wisdom_to_file( file_fftw ); + fclose( file_fftw ); + } +// std::string newWisdom( fftw_export_wisdom_to_string() ); +// if ( 1.1 * wisdomString.length() < newWisdom.length() ) { +// std::ofstream write( "wisdom.bin" ); +// write << newWisdom; +// write.close(); +// } } // other FFT implementations should be added with #elif statements #else useFFT_ = false; // no FFT implemented => default to dgemm + std::string file_path = TransParameters(config).write_fft(); + if( file_path.size() ) { + std::ofstream write( file_path ); + write << "No cache available, as FFTW is not enabled" << std::endl; + write.close(); + } + #endif } if ( !useFFT_ ) { @@ -399,8 +518,10 @@ TransLocalopt3::TransLocalopt3( const Grid& grid, const long truncation, const e TransLocalopt3::~TransLocalopt3() { if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { - free_aligned( legendre_sym_ ); - free_aligned( legendre_asym_ ); + if( not legendre_cache_ ) { + free_aligned( legendre_sym_ ); + free_aligned( legendre_asym_ ); + } if ( useFFT_ ) { #if ATLAS_HAVE_FFTW && !TRANSLOCAL_DGEMM2 for ( int j = 0; j < plans_.size(); j++ ) { @@ -662,6 +783,9 @@ void TransLocalopt3::invtrans_fourier_regularopt3( const int nlats, const int nl #endif } else { + + throw eckit::SeriousBug("dgemm for Fourier transforms currently broken. Make sure atlas is compiled with FFTW.",Here()); + #if !TRANSLOCAL_DGEMM2 // dgemm-method 1 { @@ -757,9 +881,15 @@ void TransLocalopt3::invtrans_fourier_reducedopt3( const int nlats, const grid:: #endif } else { + + throw eckit::SeriousBug("dgemm for Fourier transforms currently broken. Make sure atlas is compiled with FFTW.",Here()); + #if !TRANSLOCAL_DGEMM2 // dgemm-method 1 { +#warning dgemm currently broken for Fourier transforms. FFTW required! +// Noticed that Matrix C is trying to access more than is actually allocated +// Memory error!!! BEWARE!!! ATLAS_TRACE( "opt3 Fourier dgemm method 1" ); eckit::linalg::Matrix A( fourier_, nlonsMax, ( truncation_ + 1 ) * 2 ); eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * nlats ); @@ -797,6 +927,7 @@ void TransLocalopt3::invtrans_fourier_reducedopt3( const int nlats, const grid:: free_aligned( gp_opt3 ); #endif } + } // -------------------------------------------------------------------------------------------------------------------- diff --git a/src/atlas/trans/localopt3/TransLocalopt3.h b/src/atlas/trans/localopt3/TransLocalopt3.h index 056d4e304..0651abeab 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.h +++ b/src/atlas/trans/localopt3/TransLocalopt3.h @@ -172,6 +172,12 @@ class TransLocalopt3 : public trans::TransImpl { double* fft_out_; std::vector plans_; #endif + + Cache cache_; + const void* legendre_cache_{nullptr}; + size_t legendre_cachesize_{0}; + const void* fft_cache_{nullptr}; + size_t fft_cachesize_{0}; }; //----------------------------------------------------------------------------- diff --git a/src/atlas/trans/localopt3/VorDivToUVLocalopt3.cc b/src/atlas/trans/localopt3/VorDivToUVLocalopt3.cc index 4e7267748..b7b9474af 100644 --- a/src/atlas/trans/localopt3/VorDivToUVLocalopt3.cc +++ b/src/atlas/trans/localopt3/VorDivToUVLocalopt3.cc @@ -21,7 +21,8 @@ namespace atlas { namespace trans { namespace { -static VorDivToUVBuilder builder( "localopt3" ); +static VorDivToUVBuilder builder_deprecated( "localopt3" ); +static VorDivToUVBuilder builder( "local" ); } // -------------------------------------------------------------------------------------------------------------------- diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index 1a411227a..1d34fb7fb 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -29,9 +29,9 @@ #include "atlas/parallel/mpi/mpi.h" #include "atlas/runtime/Trace.h" #include "atlas/trans/Trans.h" -#include "atlas/trans/local/FourierTransforms.h" -#include "atlas/trans/local/LegendrePolynomials.h" -#include "atlas/trans/local/LegendreTransforms.h" +#include "atlas/trans/local_noopt/FourierTransforms.h" +#include "atlas/trans/local_noopt/LegendrePolynomials.h" +#include "atlas/trans/local_noopt/LegendreTransforms.h" #include "atlas/util/Constants.h" #include "atlas/util/Earth.h" @@ -965,15 +965,23 @@ CASE( "test_trans_domain" ) { //Domain testdomain1 = RectangularDomain( {-1., 1.}, {50., 55.} ); Domain testdomain2 = RectangularDomain( {-1., 1.}, {-5., 40.} ); // Grid: (Adjust the following line if the test takes too long!) + std::string gridString = "O640"; Grid g1( gridString, testdomain1 ); Grid g2( gridString, testdomain2 ); int trc = 640; //Log::info() << "rgp1:" << std::endl; - trans::Trans transLocal1( g1, trc, util::Config( "type", "localopt3" ) ); + Trace t1(Here(),"translocal1 construction"); + trans::Trans transLocal1( g1, trc, option::type("local") | option::write_legendre("legcache.bin") ); + t1.stop(); //Log::info() << "rgp2:" << std::endl; - trans::Trans transLocal2( g2, trc, util::Config( "type", "localopt3" ) ); + trans::Cache cache; + ATLAS_TRACE_SCOPE("Read cache") cache = trans::LegendreCache("legcache.bin"); + Trace t2(Here(),"translocal2 construction"); + trans::Trans transLocal2( cache, g2, trc, option::type("local") ); + t2.stop(); + double rav1 = 0., rav2 = 0.; // compute average rms errors of transLocal1 and transLocal2 functionspace::Spectral spectral( trc ); From 948a74e2b76f617faee2fb8233e7aa1ebf5738f0 Mon Sep 17 00:00:00 2001 From: Pedro Maciel Date: Fri, 13 Apr 2018 18:34:46 +0100 Subject: [PATCH 051/123] MIR-178, MIR-191, MIR-192, MIR-193: finite element 'linear'/'bilinear' using point k-d tree --- .../meshgenerator/StructuredMeshGenerator.cc | 37 +++++++++++-------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/src/atlas/meshgenerator/StructuredMeshGenerator.cc b/src/atlas/meshgenerator/StructuredMeshGenerator.cc index 342cf608c..85c4f0a28 100644 --- a/src/atlas/meshgenerator/StructuredMeshGenerator.cc +++ b/src/atlas/meshgenerator/StructuredMeshGenerator.cc @@ -77,6 +77,10 @@ StructuredMeshGenerator::StructuredMeshGenerator( const eckit::Parametrisation& bool unique_pole; if ( p.get( "unique_pole", unique_pole ) ) options.set( "unique_pole", unique_pole ); + bool force_include_pole; + if ( p.get( "force_include_north_pole", force_include_pole ) ) options.set( "force_include_north_pole", force_include_pole ); + if ( p.get( "force_include_south_pole", force_include_pole ) ) options.set( "force_include_south_pole", force_include_pole ); + bool three_dimensional; if ( p.get( "three_dimensional", three_dimensional ) || p.get( "3d", three_dimensional ) ) options.set( "3d", three_dimensional ); @@ -689,24 +693,23 @@ void StructuredMeshGenerator::generate_mesh( const grid::StructuredGrid& rg, con int nparts = options.get( "nb_parts" ); int n, l; - bool has_point_at_north_pole = rg.y().front() == 90 && rg.nx().front() > 0; - bool has_point_at_south_pole = rg.y().back() == -90 && rg.nx().back() > 0; bool three_dimensional = options.get( "3d" ); bool periodic_east_west = rg.periodic(); bool include_periodic_ghost_points = periodic_east_west && !three_dimensional; bool remove_periodic_ghost_points = periodic_east_west && three_dimensional; - bool include_north_pole = ( mypart == 0 ) && options.get( "include_pole" ) && !has_point_at_north_pole && - rg.domain().containsNorthPole(); - - bool include_south_pole = ( mypart == nparts - 1 ) && options.get( "include_pole" ) && - !has_point_at_south_pole && rg.domain().containsSouthPole(); + bool has_point_at_north_pole = rg.y().front() == 90 && rg.nx().front() > 0; + bool has_point_at_south_pole = rg.y().back() == -90 && rg.nx().back() > 0; + bool possible_north_pole = !has_point_at_north_pole && rg.domain().containsNorthPole() && ( mypart == 0 ); + bool possible_south_pole = !has_point_at_south_pole && rg.domain().containsSouthPole() && ( mypart == nparts - 1 ); - bool patch_north_pole = ( mypart == 0 ) && options.get( "patch_pole" ) && !has_point_at_north_pole && - rg.domain().containsNorthPole() && rg.nx( 1 ) > 0; + bool include_north_pole = (possible_north_pole && options.get( "include_pole" )) || options.get( "force_include_north_pole" ); + bool include_south_pole = (possible_south_pole && options.get( "include_pole" )) || options.get( "force_include_south_pole" ); + bool patch_north_pole = possible_north_pole && options.get( "patch_pole" ) && rg.nx( 1 ) > 0; + bool patch_south_pole = possible_south_pole && options.get( "patch_pole" ) && rg.nx( rg.ny() - 2 ) > 0; - bool patch_south_pole = ( mypart == nparts - 1 ) && options.get( "patch_pole" ) && !has_point_at_south_pole && - rg.domain().containsSouthPole() && rg.nx( rg.ny() - 2 ) > 0; + int nnewnodes = (!has_point_at_north_pole && include_north_pole ? 1 : 0) + + (!has_point_at_south_pole && include_south_pole ? 1 : 0); if ( three_dimensional && nparts != 1 ) throw BadParameter( "Cannot generate three_dimensional mesh in parallel", Here() ); @@ -735,6 +738,7 @@ void StructuredMeshGenerator::generate_mesh( const grid::StructuredGrid& rg, con if ( region.lat_end[jlat] >= rg.nx( jlat ) ) --nnodes; } } + ASSERT( nnodes >= nnewnodes ); #if DEBUG_OUTPUT ATLAS_DEBUG_VAR( include_periodic_ghost_points ); @@ -976,6 +980,9 @@ void StructuredMeshGenerator::generate_mesh( const grid::StructuredGrid& rg, con ++jnode; } + nodes.metadata().set( "NbRealPts", size_t(nnodes - nnewnodes) ); + nodes.metadata().set( "NbVirtualPts", size_t(nnewnodes) ); + nodes.global_index().metadata().set( "human_readable", true ); nodes.global_index().metadata().set( "min", 1 ); nodes.global_index().metadata().set( "max", max_glb_idx ); @@ -992,13 +999,13 @@ void StructuredMeshGenerator::generate_mesh( const grid::StructuredGrid& rg, con array::ArrayView cells_patch = array::make_view( mesh.cells().field( "patch" ) ); /* - * label all patch cells a non-patch - */ + * label all patch cells a non-patch + */ cells_patch.assign( 0 ); /* -Fill in connectivity tables with global node indices first -*/ + * Fill in connectivity tables with global node indices first + */ int jcell; int jquad = 0; int jtriag = 0; From 271616cd86bef4982b94b921a89bfdba4811a7ea Mon Sep 17 00:00:00 2001 From: Pedro Maciel Date: Wed, 25 Apr 2018 16:25:52 +0100 Subject: [PATCH 052/123] MIR-178, MIR-191, MIR-192, MIR-193: finite element 'linear'/'bilinear' using point k-d tree --- src/atlas/meshgenerator/StructuredMeshGenerator.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/atlas/meshgenerator/StructuredMeshGenerator.cc b/src/atlas/meshgenerator/StructuredMeshGenerator.cc index 85c4f0a28..6f5497e69 100644 --- a/src/atlas/meshgenerator/StructuredMeshGenerator.cc +++ b/src/atlas/meshgenerator/StructuredMeshGenerator.cc @@ -703,8 +703,11 @@ void StructuredMeshGenerator::generate_mesh( const grid::StructuredGrid& rg, con bool possible_north_pole = !has_point_at_north_pole && rg.domain().containsNorthPole() && ( mypart == 0 ); bool possible_south_pole = !has_point_at_south_pole && rg.domain().containsSouthPole() && ( mypart == nparts - 1 ); - bool include_north_pole = (possible_north_pole && options.get( "include_pole" )) || options.get( "force_include_north_pole" ); - bool include_south_pole = (possible_south_pole && options.get( "include_pole" )) || options.get( "force_include_south_pole" ); + bool force_include_north_pole(options.has("force_include_north_pole") && options.get( "force_include_north_pole" )); + bool force_include_south_pole(options.has("force_include_south_pole") && options.get( "force_include_south_pole" )); + + bool include_north_pole = (possible_north_pole && options.get( "include_pole" )) || force_include_north_pole; + bool include_south_pole = (possible_south_pole && options.get( "include_pole" )) || force_include_south_pole; bool patch_north_pole = possible_north_pole && options.get( "patch_pole" ) && rg.nx( 1 ) > 0; bool patch_south_pole = possible_south_pole && options.get( "patch_pole" ) && rg.nx( rg.ny() - 2 ) > 0; From a7f2e4ccf6412b1e1683d4c7a7962ed14982fd84 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Fri, 27 Apr 2018 14:07:17 +0100 Subject: [PATCH 053/123] Add unit-tests for caching indicating some work todo --- src/atlas/option/TransOptions.cc | 7 +- src/atlas/option/TransOptions.h | 19 +- src/atlas/runtime/trace/CallStack.cc | 4 +- src/atlas/runtime/trace/CallStack.h | 3 +- src/atlas/runtime/trace/Nesting.cc | 12 +- src/atlas/runtime/trace/Nesting.h | 3 +- src/atlas/runtime/trace/TraceT.h | 4 +- src/atlas/trans/localopt3/TransLocalopt3.cc | 94 ++++++-- src/tests/trans/CMakeLists.txt | 6 + src/tests/trans/test_trans_localcache.cc | 243 ++++++++++++++++++++ src/tests/trans/test_transgeneral.cc | 5 +- 11 files changed, 372 insertions(+), 28 deletions(-) create mode 100644 src/tests/trans/test_trans_localcache.cc diff --git a/src/atlas/option/TransOptions.cc b/src/atlas/option/TransOptions.cc index b321838fd..82131506f 100644 --- a/src/atlas/option/TransOptions.cc +++ b/src/atlas/option/TransOptions.cc @@ -9,6 +9,7 @@ */ #include "atlas/option/TransOptions.h" +#include "atlas/grid.h" // ---------------------------------------------------------------------------- @@ -32,7 +33,7 @@ flt::flt( bool flt ) { } fft::fft( FFT fft ) { - static const std::map FFT_to_string = {{FFT::FFT992, "FFT992"}, {FFT::FFTW, "FFTW"}}; + static const std::map FFT_to_string = { {FFT::OFF, "OFF"}, {FFT::FFT992, "FFT992"}, {FFT::FFTW, "FFTW"}}; set( "fft", FFT_to_string.at( fft ) ); } @@ -48,6 +49,10 @@ write_legendre::write_legendre( const eckit::PathName& filepath ) { set( "write_legendre", filepath ); } +global_grid::global_grid( const Grid& grid ) { + set( "global_grid", grid.spec() ); +} + read_legendre::read_legendre( const eckit::PathName& filepath ) { set( "read_legendre", filepath ); } diff --git a/src/atlas/option/TransOptions.h b/src/atlas/option/TransOptions.h index 952ea7465..a43e836c8 100644 --- a/src/atlas/option/TransOptions.h +++ b/src/atlas/option/TransOptions.h @@ -14,6 +14,10 @@ // ---------------------------------------------------------------------------- +namespace atlas { class Grid; } + +// ---------------------------------------------------------------------------- + namespace atlas { namespace option { @@ -21,8 +25,9 @@ namespace option { enum class FFT { + OFF = 0, FFT992 = 1, - FFTW = 2 + FFTW = 2, }; // ---------------------------------------------------------------------------- @@ -61,6 +66,11 @@ class fft : public util::Config { fft( const std::string& ); }; +class no_fft : public fft { +public: + no_fft() : fft( FFT::OFF ) {} +}; + // ---------------------------------------------------------------------------- class split_latitudes : public util::Config { @@ -77,6 +87,13 @@ class write_legendre : public util::Config { // ---------------------------------------------------------------------------- +class global_grid : public util::Config { +public: + global_grid( const Grid& ); +}; + +// ---------------------------------------------------------------------------- + class read_legendre : public util::Config { public: read_legendre( const eckit::PathName& ); diff --git a/src/atlas/runtime/trace/CallStack.cc b/src/atlas/runtime/trace/CallStack.cc index bb2c043d5..41b6366a9 100644 --- a/src/atlas/runtime/trace/CallStack.cc +++ b/src/atlas/runtime/trace/CallStack.cc @@ -9,8 +9,8 @@ namespace atlas { namespace runtime { namespace trace { -void CallStack::push_front( const eckit::CodeLocation& loc ) { - stack_.push_front( std::hash{}( loc.asString() ) ); +void CallStack::push_front( const eckit::CodeLocation& loc, const std::string& id ) { + stack_.push_front( std::hash{}( loc.asString()+id ) ); } void CallStack::pop_front() { diff --git a/src/atlas/runtime/trace/CallStack.h b/src/atlas/runtime/trace/CallStack.h index 793234dd7..72c50d06c 100644 --- a/src/atlas/runtime/trace/CallStack.h +++ b/src/atlas/runtime/trace/CallStack.h @@ -2,6 +2,7 @@ #include #include +#include namespace eckit { class CodeLocation; @@ -19,7 +20,7 @@ class CallStack { using const_reverse_iterator = std::list::const_reverse_iterator; public: - void push_front( const eckit::CodeLocation& ); + void push_front( const eckit::CodeLocation&, const std::string& id = "" ); void pop_front(); const_iterator begin() const { return stack_.begin(); } diff --git a/src/atlas/runtime/trace/Nesting.cc b/src/atlas/runtime/trace/Nesting.cc index b34f98513..fe46dbc8c 100644 --- a/src/atlas/runtime/trace/Nesting.cc +++ b/src/atlas/runtime/trace/Nesting.cc @@ -29,14 +29,18 @@ class NestingState { return state; } operator CallStack() const { return stack_; } - CallStack& push( const eckit::CodeLocation& loc ) { - stack_.push_front( loc ); + CallStack& push( const eckit::CodeLocation& loc, const std::string& id ) { + stack_.push_front( loc, id ); return stack_; } void pop() { stack_.pop_front(); } }; -Nesting::Nesting( const eckit::CodeLocation& loc ) : loc_( loc ), stack_( NestingState::instance().push( loc ) ) {} +Nesting::Nesting( const eckit::CodeLocation& loc, const std::string& id ) : + loc_( loc ), + id_( id ), + stack_( NestingState::instance().push( loc, id ) ) { +} Nesting::~Nesting() { stop(); @@ -51,7 +55,7 @@ void Nesting::stop() { void Nesting::start() { if ( not running_ ) { - NestingState::instance().push( loc_ ); + NestingState::instance().push( loc_, id_ ); running_ = true; } } diff --git a/src/atlas/runtime/trace/Nesting.h b/src/atlas/runtime/trace/Nesting.h index ae8a5effd..ccf64cc49 100644 --- a/src/atlas/runtime/trace/Nesting.h +++ b/src/atlas/runtime/trace/Nesting.h @@ -22,7 +22,7 @@ namespace trace { class Nesting { public: - Nesting( const eckit::CodeLocation& ); + Nesting( const eckit::CodeLocation&, const std::string& id = "" ); ~Nesting(); operator CallStack() const { return stack_; } void stop(); @@ -31,6 +31,7 @@ class Nesting { private: CallStack stack_; eckit::CodeLocation loc_; + std::string id_; bool running_{true}; }; diff --git a/src/atlas/runtime/trace/TraceT.h b/src/atlas/runtime/trace/TraceT.h index 672f14af6..8f86e863f 100644 --- a/src/atlas/runtime/trace/TraceT.h +++ b/src/atlas/runtime/trace/TraceT.h @@ -90,7 +90,7 @@ template inline TraceT::TraceT( const eckit::CodeLocation& loc, const std::string& title ) : loc_( loc ), title_( title ), - nesting_( loc ) { + nesting_( loc, title ) { start(); } @@ -106,7 +106,7 @@ template inline TraceT::TraceT( const eckit::CodeLocation& loc, const std::string& title, const Labels& labels ) : loc_( loc ), title_( title ), - nesting_( loc ), + nesting_( loc, title ), labels_( labels ) { start(); } diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc index eefe99c22..7e7851bda 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.cc +++ b/src/atlas/trans/localopt3/TransLocalopt3.cc @@ -22,7 +22,9 @@ #include "eckit/eckit_config.h" #include "eckit/linalg/LinearAlgebra.h" #include "eckit/linalg/Matrix.h" +#include "eckit/parser/JSON.h" #include "eckit/log/Bytes.h" +#include "eckit/config/YAMLConfiguration.h" #ifdef ECKIT_HAVE_MKL #include "mkl.h" #endif @@ -55,8 +57,28 @@ class TransParameters { std::string write_fft() const { return config_.getString( "write_fft", "" ); } + Grid global_grid() const { + Grid g; + util::Config spec; + if( config_.get("global_grid",spec) ) { + g = Grid( spec ); + } + return g; + } + bool global() const { return config_.getBool( "global", false ); } + int fft() const { + static const std::map string_to_FFT = + { { "OFF", (int) option::FFT::OFF }, { "FFTW", (int) option::FFT::FFTW } }; +#ifdef ATLAS_HAVE_FFTW + std::string fft_default = "FFTW"; +#else + std::string fft_default = "OFF"; +#endif + return string_to_FFT.at( config_.getString( "fft", fft_default ) ); + } + private: const eckit::Configuration& config_; }; @@ -71,15 +93,27 @@ template T* read(size_t size) { pos += size * sizeof(T); return v; } + +Grid read_grid() { + long& size = *read(1); + char* json = read(size); + return Grid( eckit::YAMLConfiguration( std::string( json, size ) ) ); +} + char* begin; size_t pos; }; struct WriteCache { -WriteCache( const eckit::PathName& file_path, long estimated_length = 0 ) : +WriteCache( const eckit::PathName& file_path) : dh_( file_path.fileHandle( /*overwrite = */ true ) ) { - dh_->openForWrite( estimated_length ); + if( file_path.exists() ) { + std::stringstream err; + err << "Cannot open cache file " << file_path << " for writing as it already exists. Remove first."; + throw eckit::BadParameter( err.str(), Here() ); + } + dh_->openForWrite(0); pos = 0; } ~WriteCache() { @@ -89,6 +123,24 @@ template void write( const T* v, long size) { dh_->write( v , size * sizeof(T) ); pos += size * sizeof(T); } + +//void write( long v ) { +// dh_->write( &v , sizeof(long) ); +// pos += sizeof(long); +//} + +//void write( const Grid& grid ) { +// std::stringstream s; +// eckit::JSON json(s); +// json << grid.spec(); +// std::string grid_spec( s.str() ); +// long size = grid_spec.size(); +// write( size ); +// dh_->write( grid_spec.c_str(), grid_spec.size() ); +// pos += grid_spec.size(); +//} + + std::unique_ptr dh_; size_t pos; }; @@ -135,6 +187,11 @@ int num_n( const int truncation, const int m, const bool symmetric ) { } void alloc_aligned( double*& ptr, size_t n ) { +#warning todo1 + // If we can assume that posix_memalign gives the same result, we would not need to support mkl_malloc + // We can then remove the include of mkl.h above (simplifying things). + // As well there is the C++ functions "std::align" (http://en.cppreference.com/w/cpp/memory/align) + // that we could look into. #ifdef ECKIT_HAVE_MKL int al = 64; ptr = (double*)mkl_malloc( sizeof( double ) * n, al ); @@ -216,7 +273,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long int nlats = 0; int nlonsMax = 0; int neqtr = 0; - useFFT_ = true; + useFFT_ = TransParameters(config).fft(); unstruct_precomp_ = true; nlatsNH_ = 0; nlatsSH_ = 0; @@ -247,11 +304,19 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long nlatsLegDomain_ = nlatsSH_; } - // compute latitudinal location of domain relative to global grid: - gridGlobal_ = Grid( grid.name() ); + + gridGlobal_ = TransParameters(config).global_grid(); + if( not gridGlobal_ ) { + if ( grid_.domain().global() ) { + gridGlobal_ = grid_; + } else { + throw eckit::BadParameter("A global structured grid is required to be passed in the optional arguments",Here()); + } + } + grid::StructuredGrid gs_global( gridGlobal_ ); - grid::StructuredGrid* gsLeg = &g; - if ( useGlobalLeg ) { gsLeg = &gs_global; }; + ASSERT( gs_global ); // assert structured grid + grid::StructuredGrid gsLeg = ( useGlobalLeg ? gs_global : g ); nlonsMaxGlobal_ = gs_global.nxmax(); jlonMin_.resize( 1 ); jlonMin_[0] = 0; @@ -298,7 +363,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long // compute longitudinal location of domain within global grid for using FFT: auto wrapAngle = [&]( double angle ) { - double result = fmod( angle, 360 ); + double result = std::fmod( angle, 360. ); if ( result < 0. ) { result += 360.; } return result; }; @@ -329,12 +394,12 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long std::vector lons( nlonsMax ); if ( nlatsNH_ >= nlatsSH_ || useGlobalLeg ) { for ( size_t j = 0; j < nlatsLeg_; ++j ) { - lats[j] = gsLeg->y( j ) * util::Constants::degreesToRadians(); + lats[j] = gsLeg.y( j ) * util::Constants::degreesToRadians(); } } else { for ( size_t j = nlats - 1, idx = 0; idx < nlatsLeg_; --j, ++idx ) { - lats[idx] = -gsLeg->y( j ) * util::Constants::degreesToRadians(); + lats[idx] = -gsLeg.y( j ) * util::Constants::degreesToRadians(); } } for ( size_t j = 0; j < nlonsMax; ++j ) { @@ -379,11 +444,9 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long std::string file_path = TransParameters(config).write_legendre(); if( file_path.size() ) { ATLAS_TRACE( "write_legendre" ); - size_t estimated_length = sizeof(double) * ( size_sym + size_asym ); Log::debug() << "Writing Legendre cache file ..." << std::endl; Log::debug() << " path = " << file_path << std::endl; - Log::debug() << " estimated = " << eckit::Bytes(estimated_length) << std::endl; - WriteCache legendre( file_path, estimated_length ); + WriteCache legendre( file_path ); legendre.write( legendre_sym_, size_sym ); legendre.write( legendre_asym_, size_asym ); Log::debug() << "Cache file size: " << eckit::Bytes(legendre.pos) << std::endl; @@ -440,7 +503,6 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long FILE* file_fftw = fopen( file_path.c_str(), "wb" ); fftw_export_wisdom_to_file( file_fftw ); fclose( file_fftw ); - } // std::string newWisdom( fftw_export_wisdom_to_string() ); // if ( 1.1 * wisdomString.length() < newWisdom.length() ) { @@ -506,6 +568,10 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long } compute_legendre_polynomials_allopt3( truncation_, grid_.size(), lats.data(), legendre_ ); } + if( TransParameters(config).write_legendre().size() ) { + throw eckit::NotImplemented("Caching for unstructured grids not implemented",Here()); + } + } } // namespace trans diff --git a/src/tests/trans/CMakeLists.txt b/src/tests/trans/CMakeLists.txt index 98139106c..e9cbad59f 100644 --- a/src/tests/trans/CMakeLists.txt +++ b/src/tests/trans/CMakeLists.txt @@ -51,3 +51,9 @@ ecbuild_add_test( TARGET atlas_test_transgeneral ENVIRONMENT ATLAS_TRACE_REPORT=1 ) +ecbuild_add_test( TARGET atlas_test_trans_localcache + SOURCES test_trans_localcache.cc + LIBS atlas + ENVIRONMENT ATLAS_TRACE_REPORT=1 +) + diff --git a/src/tests/trans/test_trans_localcache.cc b/src/tests/trans/test_trans_localcache.cc new file mode 100644 index 000000000..9dce62122 --- /dev/null +++ b/src/tests/trans/test_trans_localcache.cc @@ -0,0 +1,243 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include +#include + +#include "atlas/array/MakeView.h" +#include "atlas/field/FieldSet.h" +#include "atlas/functionspace/NodeColumns.h" +#include "atlas/functionspace/Spectral.h" +#include "atlas/functionspace/StructuredColumns.h" +#include "atlas/grid.h" +#include "atlas/grid/Distribution.h" +#include "atlas/grid/Partitioner.h" +#include "atlas/grid/detail/partitioner/EqualRegionsPartitioner.h" +#include "atlas/grid/detail/partitioner/TransPartitioner.h" +#include "atlas/library/Library.h" +#include "atlas/mesh/Mesh.h" +#include "atlas/mesh/Nodes.h" +#include "atlas/meshgenerator/StructuredMeshGenerator.h" +#include "atlas/output/Gmsh.h" +#include "atlas/parallel/mpi/mpi.h" +#include "atlas/runtime/Trace.h" +#include "atlas/trans/Trans.h" +#include "atlas/trans/local_noopt/FourierTransforms.h" +#include "atlas/trans/local_noopt/LegendrePolynomials.h" +#include "atlas/trans/local_noopt/LegendreTransforms.h" +#include "atlas/util/Constants.h" +#include "atlas/util/Earth.h" +#include "eckit/utils/MD5.h" + +#include "tests/AtlasTestEnvironment.h" + +#if ATLAS_HAVE_TRANS +#include "transi/trans.h" +#endif + +using namespace eckit; + +using atlas::array::Array; +using atlas::array::ArrayView; +using atlas::array::make_view; + +namespace atlas { +namespace test { + +//----------------------------------------------------------------------------- + +struct AtlasTransEnvironment : public AtlasTestEnvironment { + AtlasTransEnvironment( int argc, char* argv[] ) : AtlasTestEnvironment( argc, argv ) { +#if ATLAS_HAVE_TRANS + trans_use_mpi( mpi::comm().size() > 1 ); + trans_init(); +#endif + } + + ~AtlasTransEnvironment() { +#if ATLAS_HAVE_TRANS + trans_finalize(); +#endif + } +}; + +using trans::Trans; +using trans::LegendreCache; +using trans::Cache; +using grid::StructuredGrid; +using grid::GaussianGrid; +using XSpace = StructuredGrid::XSpace; +using YSpace = StructuredGrid::YSpace; +using LinearSpacing = grid::LinearSpacing; + +eckit::PathName CacheFile(const std::string& path) { + eckit::PathName cachefile(path); + if( cachefile.exists() ) cachefile.unlink(); + return cachefile; +} + +std::string hash( const trans::Cache& c ) { + return eckit::MD5( c.legendre().data(), c.legendre().size() ).digest(); +} + +std::string hash( const eckit::PathName& f ) { + return hash( LegendreCache(f) ); +} + +std::string F(int n) { return "F" +std::to_string(n); } +std::string O(int n) { return "O" +std::to_string(n); } +std::string N(int n) { return "N" +std::to_string(n); } +std::string L(int n) { return "L" +std::to_string(n); } +std::string S(int n) { return "S" +std::to_string(n); } +std::string Slon(int n) { return "Slon"+std::to_string(n); } +std::string Slat(int n) { return "Slat"+std::to_string(n); } + +//----------------------------------------------------------------------------- + +CASE( "test_global_grids" ) { + // auto resolutions = { 32, 64, 160, 320, 640 }; + auto resolutions = { 32, 64 }; + for( int n : resolutions ) { + int t = n-1; + auto cases = { + std::make_pair(F(n),t), + std::make_pair(O(n),t), + std::make_pair(N(n),t), + std::make_pair(L(n),t), + std::make_pair(S(n),t), + std::make_pair(Slon(n),t), + std::make_pair(Slat(n),t), + }; + + auto F_cachefile = CacheFile("leg_"+F(n)+"-T"+std::to_string(t)+".bin"); + Trans( Grid(F(n)), t, option::type("local") | option::write_legendre( F_cachefile ) ); + Cache F_cache = LegendreCache( F_cachefile ); + auto F_cache_hash = hash(F_cache); + + Cache cache; + for( auto _case : cases ) + { + auto gridname = _case.first; + auto truncation = _case.second; + Log::info() << "Case "+gridname+" T"+std::to_string(truncation) << std::endl; + ATLAS_TRACE("Case "+gridname+" T"+std::to_string(truncation)); + Grid grid(gridname); + auto cachefile = CacheFile("leg_"+gridname+"-T"+std::to_string(truncation)+".bin"); + ATLAS_TRACE_SCOPE("create without cache") + Trans( grid, truncation, option::type("local") ); + ATLAS_TRACE_SCOPE("create without cache and write") + Trans( grid, truncation, option::type("local") | option::write_legendre( cachefile ) ); + ATLAS_TRACE_SCOPE("read cache") + cache = LegendreCache( cachefile ); + ATLAS_TRACE_SCOPE("create with cache") + Trans( cache, grid, truncation, option::type("local") ); + + if( GaussianGrid(grid) ) { + ASSERT( hash(cache) == F_cache_hash ); + } + } + } +} + +CASE( "test_global_grids_with_subdomain" ) { + int n = 64; + int t = n-1; + auto cases = { + std::make_pair(F(n),t), + std::make_pair(O(n),t), + std::make_pair(N(n),t), + std::make_pair(L(n),t), + std::make_pair(S(n),t), + std::make_pair(Slon(n),t), + std::make_pair(Slat(n),t) + }; + auto domains = std::vector{ + ZonalBandDomain ( {-10., 5.} ), + RectangularDomain( {-1., 1.}, {50., 55.} ), + RectangularDomain( {-1., 1.}, {-5., 40.} ), + }; + for( auto _case : cases ) + { + auto gridname = _case.first; + auto truncation = _case.second; + + ATLAS_TRACE("Case "+gridname+" T"+std::to_string(truncation)); + + Grid global_grid( gridname ); + + auto global_cachefile = CacheFile( "leg_"+gridname+"-T"+std::to_string(truncation)+".bin" ); + Trans( Grid(gridname), truncation, option::type("local") | option::write_legendre( global_cachefile ) ); + + Cache global_cache; + ATLAS_TRACE_SCOPE("read cache") + global_cache = LegendreCache( global_cachefile ); + auto global_hash = hash(global_cache); + + for( auto domain : domains ) { + Grid grid( gridname, domain ); + auto cachefile = CacheFile("leg_"+gridname+"-T"+std::to_string(truncation)+"-domain.bin"); + ATLAS_TRACE_SCOPE("create without cache and write") + Trans( Grid(gridname), truncation, option::type("local") | option::global_grid(global_grid) | option::write_legendre( cachefile ) ); + LegendreCache new_cache = LegendreCache(cachefile); + ASSERT( hash(new_cache) == global_hash ); + ATLAS_TRACE_SCOPE("create with cache") + Trans( global_cache, Grid(gridname), truncation, option::type("local") ); + } + } +} + +CASE( "test_regional_grids_nested_in_global" ) { + Cache cache; + { + auto truncation = 89; + + ATLAS_TRACE("regional_lonlat"); + + auto cachefile = CacheFile("regional_lonlat.bin"); + StructuredGrid grid_global( + LinearSpacing( { 0., 360.}, 360, false ), + LinearSpacing( {-90., 90.}, 181, true ) + ); + ASSERT( grid_global.domain().global() ); + StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ) ); + ATLAS_TRACE_SCOPE("create without cache") + Trans( grid, truncation, option::type("local") | option::global_grid( grid_global ) ); + ATLAS_TRACE_SCOPE("create without cache and write") + Trans( grid, truncation, option::type("local") | option::global_grid( grid_global ) | option::write_legendre( cachefile ) ); + ATLAS_TRACE_SCOPE("read cache") + cache = LegendreCache( cachefile ); + ATLAS_TRACE_SCOPE("create with cache") + Trans( grid, truncation, option::type("local") | option::global_grid( grid_global ) ); + } +// { +// StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ) ); +// Trans( grid, 89 ); +// } +} + +CASE( "test_regional_grids not nested" ) { + if (false) { + StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ) ); + Trans( grid, 89 ); + } else { + Log::warning() << "This test fails if enabled!!! " << Here() << std::endl; + } +} + +CASE( "test_regional_grids with projection" ) { + Log::warning() << "TODO" << std::endl; +} + +} // namespace test +} // namespace atlas + +int main( int argc, char** argv ) { + return atlas::test::run( argc, argv ); +} diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index 1d34fb7fb..e2b6be252 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -972,14 +972,15 @@ CASE( "test_trans_domain" ) { int trc = 640; //Log::info() << "rgp1:" << std::endl; + if( eckit::PathName("legcache.bin").exists() ) eckit::PathName("legcache.bin").unlink(); Trace t1(Here(),"translocal1 construction"); - trans::Trans transLocal1( g1, trc, option::type("local") | option::write_legendre("legcache.bin") ); + trans::Trans transLocal1( g1, trc, option::type("local") | option::write_legendre("legcache.bin" ) | option::global_grid( Grid("O640")) ); t1.stop(); //Log::info() << "rgp2:" << std::endl; trans::Cache cache; ATLAS_TRACE_SCOPE("Read cache") cache = trans::LegendreCache("legcache.bin"); Trace t2(Here(),"translocal2 construction"); - trans::Trans transLocal2( cache, g2, trc, option::type("local") ); + trans::Trans transLocal2( cache, g2, trc, option::type("local") | option::global_grid( Grid("O640")) ); t2.stop(); double rav1 = 0., rav2 = 0.; // compute average rms errors of transLocal1 and transLocal2 From 89a8f176a08914bc77fa125ed0dec4130e9bd17a Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Tue, 1 May 2018 14:18:42 +0100 Subject: [PATCH 054/123] added some debug output and applied clang-format --- src/atlas/trans/localopt3/TransLocalopt3.cc | 243 ++++++++++---------- src/tests/trans/test_transgeneral.cc | 16 +- 2 files changed, 127 insertions(+), 132 deletions(-) diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc index 7e7851bda..f768f6c9e 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.cc +++ b/src/atlas/trans/localopt3/TransLocalopt3.cc @@ -19,12 +19,12 @@ #include "atlas/trans/local_noopt/LegendrePolynomials.h" #include "atlas/trans/localopt3/LegendrePolynomialsopt3.h" #include "atlas/util/Constants.h" +#include "eckit/config/YAMLConfiguration.h" #include "eckit/eckit_config.h" #include "eckit/linalg/LinearAlgebra.h" #include "eckit/linalg/Matrix.h" -#include "eckit/parser/JSON.h" #include "eckit/log/Bytes.h" -#include "eckit/config/YAMLConfiguration.h" +#include "eckit/parser/JSON.h" #ifdef ECKIT_HAVE_MKL #include "mkl.h" #endif @@ -35,7 +35,7 @@ namespace trans { namespace { static TransBuilderGrid builder_deprecated( "localopt3" ); static TransBuilderGrid builder( "local" ); -} +} // namespace namespace { class TransParameters { @@ -60,17 +60,15 @@ class TransParameters { Grid global_grid() const { Grid g; util::Config spec; - if( config_.get("global_grid",spec) ) { - g = Grid( spec ); - } + if ( config_.get( "global_grid", spec ) ) { g = Grid( spec ); } return g; } bool global() const { return config_.getBool( "global", false ); } int fft() const { - static const std::map string_to_FFT = - { { "OFF", (int) option::FFT::OFF }, { "FFTW", (int) option::FFT::FFTW } }; + static const std::map string_to_FFT = {{"OFF", (int)option::FFT::OFF}, + {"FFTW", (int)option::FFT::FFTW}}; #ifdef ATLAS_HAVE_FFTW std::string fft_default = "FFTW"; #else @@ -84,84 +82,78 @@ class TransParameters { }; struct ReadCache { -ReadCache( const void* cache ) { - begin = (char*) cache; - pos = 0; -} -template T* read(size_t size) { - T* v = (T*) (begin + pos); - pos += size * sizeof(T); - return v; -} + ReadCache( const void* cache ) { + begin = (char*)cache; + pos = 0; + } + template + T* read( size_t size ) { + T* v = (T*)( begin + pos ); + pos += size * sizeof( T ); + return v; + } -Grid read_grid() { - long& size = *read(1); - char* json = read(size); - return Grid( eckit::YAMLConfiguration( std::string( json, size ) ) ); -} + Grid read_grid() { + long& size = *read( 1 ); + char* json = read( size ); + return Grid( eckit::YAMLConfiguration( std::string( json, size ) ) ); + } -char* begin; -size_t pos; + char* begin; + size_t pos; }; struct WriteCache { -WriteCache( const eckit::PathName& file_path) : - dh_( file_path.fileHandle( /*overwrite = */ true ) ) -{ - if( file_path.exists() ) { - std::stringstream err; - err << "Cannot open cache file " << file_path << " for writing as it already exists. Remove first."; - throw eckit::BadParameter( err.str(), Here() ); + WriteCache( const eckit::PathName& file_path ) : dh_( file_path.fileHandle( /*overwrite = */ true ) ) { + if ( file_path.exists() ) { + std::stringstream err; + err << "Cannot open cache file " << file_path << " for writing as it already exists. Remove first."; + throw eckit::BadParameter( err.str(), Here() ); + } + dh_->openForWrite( 0 ); + pos = 0; + } + ~WriteCache() { dh_->close(); } + template + void write( const T* v, long size ) { + dh_->write( v, size * sizeof( T ) ); + pos += size * sizeof( T ); } - dh_->openForWrite(0); - pos = 0; -} -~WriteCache() { - dh_->close(); -} -template void write( const T* v, long size) { - dh_->write( v , size * sizeof(T) ); - pos += size * sizeof(T); -} -//void write( long v ) { -// dh_->write( &v , sizeof(long) ); -// pos += sizeof(long); -//} - -//void write( const Grid& grid ) { -// std::stringstream s; -// eckit::JSON json(s); -// json << grid.spec(); -// std::string grid_spec( s.str() ); -// long size = grid_spec.size(); -// write( size ); -// dh_->write( grid_spec.c_str(), grid_spec.size() ); -// pos += grid_spec.size(); -//} - - -std::unique_ptr dh_; -size_t pos; + //void write( long v ) { + // dh_->write( &v , sizeof(long) ); + // pos += sizeof(long); + //} + + //void write( const Grid& grid ) { + // std::stringstream s; + // eckit::JSON json(s); + // json << grid.spec(); + // std::string grid_spec( s.str() ); + // long size = grid_spec.size(); + // write( size ); + // dh_->write( grid_spec.c_str(), grid_spec.size() ); + // pos += grid_spec.size(); + //} + + + std::unique_ptr dh_; + size_t pos; }; #if ATLAS_HAVE_FFTW struct FFTW_Wisdom { char* wisdom; - FFTW_Wisdom() { - wisdom = fftw_export_wisdom_to_string(); - } - ~FFTW_Wisdom() { - free( wisdom ); - } + FFTW_Wisdom() { wisdom = fftw_export_wisdom_to_string(); } + ~FFTW_Wisdom() { free( wisdom ); } }; -std::ostream& operator<< (std::ostream& out, const FFTW_Wisdom& w) { +std::ostream& operator<<( std::ostream& out, const FFTW_Wisdom& w ) { out << w.wisdom; return out; } #endif -} +} // namespace // -------------------------------------------------------------------------------------------------------------------- // Helper functions @@ -188,10 +180,10 @@ int num_n( const int truncation, const int m, const bool symmetric ) { void alloc_aligned( double*& ptr, size_t n ) { #warning todo1 - // If we can assume that posix_memalign gives the same result, we would not need to support mkl_malloc - // We can then remove the include of mkl.h above (simplifying things). - // As well there is the C++ functions "std::align" (http://en.cppreference.com/w/cpp/memory/align) - // that we could look into. +// If we can assume that posix_memalign gives the same result, we would not need to support mkl_malloc +// We can then remove the include of mkl.h above (simplifying things). +// As well there is the C++ functions "std::align" (http://en.cppreference.com/w/cpp/memory/align) +// that we could look into. #ifdef ECKIT_HAVE_MKL int al = 64; ptr = (double*)mkl_malloc( sizeof( double ) * n, al ); @@ -259,8 +251,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long legendre_cache_( cache.legendre().data() ), legendre_cachesize_( cache.legendre().size() ), fft_cache_( cache.fft().data() ), - fft_cachesize_( cache.fft().size() ) -{ + fft_cachesize_( cache.fft().size() ) { ATLAS_TRACE( "Precompute legendre opt3" ); #ifdef ECKIT_HAVE_MKL eckit::linalg::LinearAlgebra::backend( "mkl" ); // might want to choose backend with this command @@ -273,7 +264,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long int nlats = 0; int nlonsMax = 0; int neqtr = 0; - useFFT_ = TransParameters(config).fft(); + useFFT_ = TransParameters( config ).fft(); unstruct_precomp_ = true; nlatsNH_ = 0; nlatsSH_ = 0; @@ -305,19 +296,19 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long } - gridGlobal_ = TransParameters(config).global_grid(); - if( not gridGlobal_ ) { - if ( grid_.domain().global() ) { - gridGlobal_ = grid_; - } else { - throw eckit::BadParameter("A global structured grid is required to be passed in the optional arguments",Here()); + gridGlobal_ = TransParameters( config ).global_grid(); + if ( not gridGlobal_ ) { + if ( grid_.domain().global() ) { gridGlobal_ = grid_; } + else { + throw eckit::BadParameter( + "A global structured grid is required to be passed in the optional arguments", Here() ); } } grid::StructuredGrid gs_global( gridGlobal_ ); - ASSERT( gs_global ); // assert structured grid + ASSERT( gs_global ); // assert structured grid grid::StructuredGrid gsLeg = ( useGlobalLeg ? gs_global : g ); - nlonsMaxGlobal_ = gs_global.nxmax(); + nlonsMaxGlobal_ = gs_global.nxmax(); jlonMin_.resize( 1 ); jlonMin_[0] = 0; jlatMin_ = 0; @@ -421,35 +412,35 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long legendre_sym_begin_[0] = 0; legendre_asym_begin_[0] = 0; for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { - size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsLeg_ ); + size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsLeg_ ); size_asym += add_padding( num_n( truncation_ + 1, jm, false ) * nlatsLeg_ ); legendre_sym_begin_[jm + 1] = size_sym; legendre_asym_begin_[jm + 1] = size_asym; } - if( legendre_cache_ ) { + if ( legendre_cache_ ) { ReadCache legendre( legendre_cache_ ); - legendre_sym_ = legendre.read( size_sym ); + legendre_sym_ = legendre.read( size_sym ); legendre_asym_ = legendre.read( size_asym ); ASSERT( legendre.pos == legendre_cachesize_ ); // TODO: check this is all aligned... - } else { - + } + else { alloc_aligned( legendre_sym_, size_sym ); alloc_aligned( legendre_asym_, size_asym ); compute_legendre_polynomialsopt3( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, legendre_asym_, legendre_sym_begin_.data(), legendre_asym_begin_.data() ); - std::string file_path = TransParameters(config).write_legendre(); - if( file_path.size() ) { + std::string file_path = TransParameters( config ).write_legendre(); + if ( file_path.size() ) { ATLAS_TRACE( "write_legendre" ); Log::debug() << "Writing Legendre cache file ..." << std::endl; Log::debug() << " path = " << file_path << std::endl; WriteCache legendre( file_path ); - legendre.write( legendre_sym_, size_sym ); + legendre.write( legendre_sym_, size_sym ); legendre.write( legendre_asym_, size_asym ); - Log::debug() << "Cache file size: " << eckit::Bytes(legendre.pos) << std::endl; + Log::debug() << "Cache file size: " << eckit::Bytes( legendre.pos ) << std::endl; } } } @@ -463,22 +454,22 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long fft_in_ = fftw_alloc_complex( nlats * num_complex ); fft_out_ = fftw_alloc_real( nlats * nlonsMaxGlobal_ ); - if( fft_cache_ ) { + if ( fft_cache_ ) { Log::debug() << "Import FFTW wisdom from cache" << std::endl; fftw_import_wisdom_from_string( (const char*)fft_cache_ ); } -// std::string wisdomString( "" ); -// std::ifstream read( "wisdom.bin" ); -// if ( read.is_open() ) { -// std::getline( read, wisdomString ); -// while ( read ) { -// std::string line; -// std::getline( read, line ); -// wisdomString += line; -// } -// } -// read.close(); -// if ( wisdomString.length() > 0 ) { fftw_import_wisdom_from_string( &wisdomString[0u] ); } + // std::string wisdomString( "" ); + // std::ifstream read( "wisdom.bin" ); + // if ( read.is_open() ) { + // std::getline( read, wisdomString ); + // while ( read ) { + // std::string line; + // std::getline( read, line ); + // wisdomString += line; + // } + // } + // read.close(); + // if ( wisdomString.length() > 0 ) { fftw_import_wisdom_from_string( &wisdomString[0u] ); } if ( grid::RegularGrid( gridGlobal_ ) ) { plans_.resize( 1 ); plans_[0] = fftw_plan_many_dft_c2r( 1, &nlonsMaxGlobal_, nlats, fft_in_, NULL, 1, num_complex, @@ -492,8 +483,8 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long plans_[j] = fftw_plan_dft_c2r_1d( nlonsGlobalj, fft_in_, fft_out_, FFTW_ESTIMATE ); } } - std::string file_path = TransParameters(config).write_fft(); - if( file_path.size() ) { + std::string file_path = TransParameters( config ).write_fft(); + if ( file_path.size() ) { Log::debug() << "Write FFTW wisdom to file " << file_path << std::endl; //bool success = fftw_export_wisdom_to_filename( "wisdom.bin" ); //ASSERT( success ); @@ -504,18 +495,18 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long fftw_export_wisdom_to_file( file_fftw ); fclose( file_fftw ); } -// std::string newWisdom( fftw_export_wisdom_to_string() ); -// if ( 1.1 * wisdomString.length() < newWisdom.length() ) { -// std::ofstream write( "wisdom.bin" ); -// write << newWisdom; -// write.close(); -// } + // std::string newWisdom( fftw_export_wisdom_to_string() ); + // if ( 1.1 * wisdomString.length() < newWisdom.length() ) { + // std::ofstream write( "wisdom.bin" ); + // write << newWisdom; + // write.close(); + // } } // other FFT implementations should be added with #elif statements #else useFFT_ = false; // no FFT implemented => default to dgemm - std::string file_path = TransParameters(config).write_fft(); - if( file_path.size() ) { + std::string file_path = TransParameters( config ).write_fft(); + if ( file_path.size() ) { std::ofstream write( file_path ); write << "No cache available, as FFTW is not enabled" << std::endl; write.close(); @@ -568,10 +559,9 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long } compute_legendre_polynomials_allopt3( truncation_, grid_.size(), lats.data(), legendre_ ); } - if( TransParameters(config).write_legendre().size() ) { - throw eckit::NotImplemented("Caching for unstructured grids not implemented",Here()); + if ( TransParameters( config ).write_legendre().size() ) { + throw eckit::NotImplemented( "Caching for unstructured grids not implemented", Here() ); } - } } // namespace trans @@ -584,7 +574,7 @@ TransLocalopt3::TransLocalopt3( const Grid& grid, const long truncation, const e TransLocalopt3::~TransLocalopt3() { if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { - if( not legendre_cache_ ) { + if ( not legendre_cache_ ) { free_aligned( legendre_sym_ ); free_aligned( legendre_asym_ ); } @@ -663,6 +653,8 @@ void TransLocalopt3::invtrans_legendreopt3( const int truncation, const int nlat const eckit::Configuration& config ) const { // Legendre transform: { + Log::debug() << "Legendre dgemm: using " << nlatsLegReduced_ - nlat0_[0] << " latitudes out of " + << nlatsGlobal_ / 2 << std::endl; ATLAS_TRACE( "opt3 Legendre dgemm" ); for ( int jm = 0; jm <= truncation_; jm++ ) { int size_sym = num_n( truncation_ + 1, jm, true ); @@ -849,8 +841,8 @@ void TransLocalopt3::invtrans_fourier_regularopt3( const int nlats, const int nl #endif } else { - - throw eckit::SeriousBug("dgemm for Fourier transforms currently broken. Make sure atlas is compiled with FFTW.",Here()); + throw eckit::SeriousBug( + "dgemm for Fourier transforms currently broken. Make sure atlas is compiled with FFTW.", Here() ); #if !TRANSLOCAL_DGEMM2 // dgemm-method 1 @@ -947,15 +939,15 @@ void TransLocalopt3::invtrans_fourier_reducedopt3( const int nlats, const grid:: #endif } else { - - throw eckit::SeriousBug("dgemm for Fourier transforms currently broken. Make sure atlas is compiled with FFTW.",Here()); + throw eckit::SeriousBug( + "dgemm for Fourier transforms currently broken. Make sure atlas is compiled with FFTW.", Here() ); #if !TRANSLOCAL_DGEMM2 // dgemm-method 1 { #warning dgemm currently broken for Fourier transforms. FFTW required! -// Noticed that Matrix C is trying to access more than is actually allocated -// Memory error!!! BEWARE!!! + // Noticed that Matrix C is trying to access more than is actually allocated + // Memory error!!! BEWARE!!! ATLAS_TRACE( "opt3 Fourier dgemm method 1" ); eckit::linalg::Matrix A( fourier_, nlonsMax, ( truncation_ + 1 ) * 2 ); eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * nlats ); @@ -993,7 +985,6 @@ void TransLocalopt3::invtrans_fourier_reducedopt3( const int nlats, const grid:: free_aligned( gp_opt3 ); #endif } - } // -------------------------------------------------------------------------------------------------------------------- diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index e2b6be252..58362327c 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -972,15 +972,17 @@ CASE( "test_trans_domain" ) { int trc = 640; //Log::info() << "rgp1:" << std::endl; - if( eckit::PathName("legcache.bin").exists() ) eckit::PathName("legcache.bin").unlink(); - Trace t1(Here(),"translocal1 construction"); - trans::Trans transLocal1( g1, trc, option::type("local") | option::write_legendre("legcache.bin" ) | option::global_grid( Grid("O640")) ); + if ( eckit::PathName( "legcache.bin" ).exists() ) eckit::PathName( "legcache.bin" ).unlink(); + Trace t1( Here(), "translocal1 construction" ); + trans::Trans transLocal1( + g1, trc, + option::type( "local" ) | option::write_legendre( "legcache.bin" ) | option::global_grid( Grid( "O640" ) ) ); t1.stop(); //Log::info() << "rgp2:" << std::endl; trans::Cache cache; - ATLAS_TRACE_SCOPE("Read cache") cache = trans::LegendreCache("legcache.bin"); - Trace t2(Here(),"translocal2 construction"); - trans::Trans transLocal2( cache, g2, trc, option::type("local") | option::global_grid( Grid("O640")) ); + ATLAS_TRACE_SCOPE( "Read cache" ) cache = trans::LegendreCache( "legcache.bin" ); + Trace t2( Here(), "translocal2 construction" ); + trans::Trans transLocal2( cache, g2, trc, option::type( "local" ) | option::global_grid( Grid( "O640" ) ) ); t2.stop(); double rav1 = 0., rav2 = 0.; // compute average rms errors of transLocal1 and transLocal2 @@ -1050,10 +1052,12 @@ CASE( "test_trans_domain" ) { rgp2_analytic.data(), ivar_in, ivar_out ); //Log::info() << std::endl << "rgp1:"; + ATLAS_TRACE_SCOPE( "translocal1" ) EXPECT_NO_THROW( transLocal1.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), div.data(), rgp1.data() ) ); //Log::info() << std::endl << "rgp2:"; + ATLAS_TRACE_SCOPE( "translocal2" ) EXPECT_NO_THROW( transLocal2.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), div.data(), rgp2.data() ) ); From 2616b79f2bc593b56f4862f4f233b4f3536c5b25 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Tue, 1 May 2018 14:39:43 +0100 Subject: [PATCH 055/123] added again option to run local transform without passing global grid --- src/atlas/trans/localopt3/TransLocalopt3.cc | 11 +++++++++-- src/tests/trans/test_transgeneral.cc | 3 ++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc index f768f6c9e..91f8ccfa8 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.cc +++ b/src/atlas/trans/localopt3/TransLocalopt3.cc @@ -300,8 +300,15 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long if ( not gridGlobal_ ) { if ( grid_.domain().global() ) { gridGlobal_ = grid_; } else { - throw eckit::BadParameter( - "A global structured grid is required to be passed in the optional arguments", Here() ); + if ( Grid( grid_.name() ).domain().global() ) { + Log::warning() << Here() << " Deprecated. We should pass a global grid as optional argument" + << std::endl; + gridGlobal_ = Grid( grid_.name() ); + } + else { + throw eckit::BadParameter( + "A global structured grid is required to be passed in the optional arguments", Here() ); + } } } diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index 58362327c..2c734b432 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -982,7 +982,8 @@ CASE( "test_trans_domain" ) { trans::Cache cache; ATLAS_TRACE_SCOPE( "Read cache" ) cache = trans::LegendreCache( "legcache.bin" ); Trace t2( Here(), "translocal2 construction" ); - trans::Trans transLocal2( cache, g2, trc, option::type( "local" ) | option::global_grid( Grid( "O640" ) ) ); + // trans::Trans transLocal2( cache, g2, trc, option::type( "local" ) | option::global_grid( Grid( "O640" ) ) ); + trans::Trans transLocal2( cache, g2, trc, option::type( "local" ) ); t2.stop(); double rav1 = 0., rav2 = 0.; // compute average rms errors of transLocal1 and transLocal2 From 7de89623dab2865cfec76a549537b9f2237f5f0f Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Tue, 1 May 2018 15:03:30 +0100 Subject: [PATCH 056/123] some more debug output --- src/atlas/trans/localopt3/TransLocalopt3.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc index 91f8ccfa8..3807bfcbe 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.cc +++ b/src/atlas/trans/localopt3/TransLocalopt3.cc @@ -320,6 +320,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long jlonMin_[0] = 0; jlatMin_ = 0; nlatsGlobal_ = gs_global.ny(); + Log::debug() << "Grid has " << nlats << " latitudes. Global grid has " << nlatsGlobal_ << std::endl; if ( useGlobalLeg ) { nlatsLeg_ = nlatsGlobal_ / 2; } else { nlatsLeg_ = nlatsLegDomain_; From 549ce04d930d162ca0187e5ba4d97d3cb7270331 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Wed, 2 May 2018 09:48:07 +0100 Subject: [PATCH 057/123] updated the debug output --- src/atlas/trans/localopt3/TransLocalopt3.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc index 3807bfcbe..1e380faa6 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.cc +++ b/src/atlas/trans/localopt3/TransLocalopt3.cc @@ -320,7 +320,10 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long jlonMin_[0] = 0; jlatMin_ = 0; nlatsGlobal_ = gs_global.ny(); - Log::debug() << "Grid has " << nlats << " latitudes. Global grid has " << nlatsGlobal_ << std::endl; + if ( grid_.domain().global() ) { Log::debug() << "Global grid with " << nlats << " latitudes." << std::endl; } + else { + Log::debug() << "Grid has " << nlats << " latitudes. Global grid has " << nlatsGlobal_ << std::endl; + } if ( useGlobalLeg ) { nlatsLeg_ = nlatsGlobal_ / 2; } else { nlatsLeg_ = nlatsLegDomain_; From 96da40e577fdb16534521296db79dd3a02922cc7 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 3 May 2018 16:01:40 +0100 Subject: [PATCH 058/123] no_fft working for regular grids --- src/atlas/trans/localopt3/TransLocalopt3.cc | 49 +-------------------- src/tests/trans/test_transgeneral.cc | 13 +++--- 2 files changed, 8 insertions(+), 54 deletions(-) diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc index 1e380faa6..5099add36 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.cc +++ b/src/atlas/trans/localopt3/TransLocalopt3.cc @@ -852,9 +852,6 @@ void TransLocalopt3::invtrans_fourier_regularopt3( const int nlats, const int nl #endif } else { - throw eckit::SeriousBug( - "dgemm for Fourier transforms currently broken. Make sure atlas is compiled with FFTW.", Here() ); - #if !TRANSLOCAL_DGEMM2 // dgemm-method 1 { @@ -950,51 +947,7 @@ void TransLocalopt3::invtrans_fourier_reducedopt3( const int nlats, const grid:: #endif } else { - throw eckit::SeriousBug( - "dgemm for Fourier transforms currently broken. Make sure atlas is compiled with FFTW.", Here() ); - -#if !TRANSLOCAL_DGEMM2 - // dgemm-method 1 - { -#warning dgemm currently broken for Fourier transforms. FFTW required! - // Noticed that Matrix C is trying to access more than is actually allocated - // Memory error!!! BEWARE!!! - ATLAS_TRACE( "opt3 Fourier dgemm method 1" ); - eckit::linalg::Matrix A( fourier_, nlonsMax, ( truncation_ + 1 ) * 2 ); - eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * nlats ); - eckit::linalg::Matrix C( gp_fields, nlonsMax, nb_fields * nlats ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); - } -#else - // dgemm-method 2 - // should be faster for small domains or large truncation - // but have not found any significant speedup so far - double* gp_opt3; - alloc_aligned( gp_opt3, nb_fields * grid_.size() ); - { - ATLAS_TRACE( "opt3 Fourier dgemm method 2" ); - eckit::linalg::Matrix A( scl_fourier, nb_fields * nlats, ( truncation_ + 1 ) * 2 ); - eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, nlonsMax ); - eckit::linalg::Matrix C( gp_opt3, nb_fields * nlats, nlonsMax ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); - } - - // Transposition in grid point space: - { - ATLAS_TRACE( "opt3 transposition in gp-space" ); - int idx = 0; - for ( int jlon = 0; jlon < nlonsMax; jlon++ ) { - for ( int jlat = 0; jlat < nlats; jlat++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int pos_tp = jlon + nlonsMax * ( jlat + nlats * ( jfld ) ); - //int pos = jfld + nb_fields * ( jlat + nlats * ( jlon ) ); - gp_fields[pos_tp] = gp_opt3[idx++]; // = gp_opt3[pos] - } - } - } - } - free_aligned( gp_opt3 ); -#endif + throw eckit::SeriousBug( "dgemm for Fourier transforms not implemented for reduced grids", Here() ); } } diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index 2c734b432..13b8dd711 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -966,7 +966,7 @@ CASE( "test_trans_domain" ) { Domain testdomain2 = RectangularDomain( {-1., 1.}, {-5., 40.} ); // Grid: (Adjust the following line if the test takes too long!) - std::string gridString = "O640"; + std::string gridString = "F640"; Grid g1( gridString, testdomain1 ); Grid g2( gridString, testdomain2 ); @@ -974,16 +974,17 @@ CASE( "test_trans_domain" ) { //Log::info() << "rgp1:" << std::endl; if ( eckit::PathName( "legcache.bin" ).exists() ) eckit::PathName( "legcache.bin" ).unlink(); Trace t1( Here(), "translocal1 construction" ); - trans::Trans transLocal1( - g1, trc, - option::type( "local" ) | option::write_legendre( "legcache.bin" ) | option::global_grid( Grid( "O640" ) ) ); + trans::Trans transLocal1( g1, trc, + option::type( "local" ) | option::write_legendre( "legcache.bin" ) | + option::global_grid( Grid( gridString ) ) ); t1.stop(); //Log::info() << "rgp2:" << std::endl; trans::Cache cache; ATLAS_TRACE_SCOPE( "Read cache" ) cache = trans::LegendreCache( "legcache.bin" ); Trace t2( Here(), "translocal2 construction" ); - // trans::Trans transLocal2( cache, g2, trc, option::type( "local" ) | option::global_grid( Grid( "O640" ) ) ); - trans::Trans transLocal2( cache, g2, trc, option::type( "local" ) ); + trans::Trans transLocal2( cache, g2, trc, + option::type( "local" ) | option::global_grid( Grid( gridString ) ) | option::no_fft() ); + //trans::Trans transLocal2( cache, g2, trc, option::type( "local" ) ); t2.stop(); double rav1 = 0., rav2 = 0.; // compute average rms errors of transLocal1 and transLocal2 From 0202fb16204de84c149959dbd59f48591dd2ad59 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 3 May 2018 16:10:03 +0100 Subject: [PATCH 059/123] added NOTIMP for Fourier dgemm with reduced grids --- src/atlas/trans/localopt3/TransLocalopt3.cc | 4 +++- src/tests/trans/test_transgeneral.cc | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc index 5099add36..e8d8fc679 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.cc +++ b/src/atlas/trans/localopt3/TransLocalopt3.cc @@ -947,7 +947,9 @@ void TransLocalopt3::invtrans_fourier_reducedopt3( const int nlats, const grid:: #endif } else { - throw eckit::SeriousBug( "dgemm for Fourier transforms not implemented for reduced grids", Here() ); + NOTIMP; + // Using dgemm in Fourier transform for reduced grids is extremely slow. + // Please install and use FFTW! } } diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index 13b8dd711..2804192c9 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -966,7 +966,7 @@ CASE( "test_trans_domain" ) { Domain testdomain2 = RectangularDomain( {-1., 1.}, {-5., 40.} ); // Grid: (Adjust the following line if the test takes too long!) - std::string gridString = "F640"; + std::string gridString = "O640"; Grid g1( gridString, testdomain1 ); Grid g2( gridString, testdomain2 ); From ed80d65da2b7de20cf9fbbce4c5976c0b769e28f Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 3 May 2018 18:58:46 +0100 Subject: [PATCH 060/123] non-nested regular grids are now supported --- src/atlas/trans/localopt3/TransLocalopt3.cc | 96 +++++++++++++-------- src/atlas/trans/localopt3/TransLocalopt3.h | 1 + src/tests/trans/test_transgeneral.cc | 44 ++++++---- 3 files changed, 89 insertions(+), 52 deletions(-) diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc index e8d8fc679..b878a337c 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.cc +++ b/src/atlas/trans/localopt3/TransLocalopt3.cc @@ -266,12 +266,14 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long int neqtr = 0; useFFT_ = TransParameters( config ).fft(); unstruct_precomp_ = true; + no_symmetry_ = false; nlatsNH_ = 0; nlatsSH_ = 0; nlatsLeg_ = 0; nlatsLegDomain_ = 0; nlatsLegReduced_ = 0; bool useGlobalLeg = true; + bool no_nest = false; if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { grid::StructuredGrid g( grid_ ); nlats = g.ny(); @@ -300,15 +302,28 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long if ( not gridGlobal_ ) { if ( grid_.domain().global() ) { gridGlobal_ = grid_; } else { - if ( Grid( grid_.name() ).domain().global() ) { + /*if ( Grid( grid_.name() ).domain().global() ) { Log::warning() << Here() << " Deprecated. We should pass a global grid as optional argument" << std::endl; gridGlobal_ = Grid( grid_.name() ); } + else {*/ + if ( grid::RegularGrid( grid_ ) ) { + // non-nested regular grid + no_nest = true; + no_symmetry_ = true; + useFFT_ = false; + nlatsNH_ = nlats; + nlatsSH_ = 0; + nlatsLegDomain_ = nlatsNH_; + gridGlobal_ = grid_; + useGlobalLeg = false; + } else { - throw eckit::BadParameter( - "A global structured grid is required to be passed in the optional arguments", Here() ); + NOTIMP; + // non-nested reduced grids are not supported } + //} } } @@ -343,21 +358,28 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long // reduce truncation towards the pole for reduced meshes: nlat0_.resize( truncation_ + 1 ); - int nmen0 = -1; - for ( int jlat = 0; jlat < nlatsGlobal_ / 2; jlat++ ) { - double lat = gs_global.y( jlat ) * util::Constants::degreesToRadians(); - int nmen = fourier_truncation( truncation_, gs_global.nx( jlat ), gs_global.nxmax(), nlatsGlobal_, lat, - grid::RegularGrid( gs_global ) ); - nmen = std::max( nmen0, nmen ); - int ndgluj = nlatsLeg_ - std::min( nlatsLeg_, nlatsLeg_ + jlatMinLeg_ - jlat ); - if ( useGlobalLeg ) { ndgluj = std::max( jlatMinLeg_, jlat ); } - for ( int j = nmen0 + 1; j <= nmen; j++ ) { - nlat0_[j] = ndgluj; + if ( no_nest ) { + for ( int j = 0; j <= truncation_; j++ ) { + nlat0_[j] = 0; } - nmen0 = nmen; } - for ( int j = nmen0 + 1; j <= truncation_; j++ ) { - nlat0_[j] = nlatsLeg_; + else { + int nmen0 = -1; + for ( int jlat = 0; jlat < nlatsGlobal_ / 2; jlat++ ) { + double lat = gs_global.y( jlat ) * util::Constants::degreesToRadians(); + int nmen = fourier_truncation( truncation_, gs_global.nx( jlat ), gs_global.nxmax(), nlatsGlobal_, lat, + grid::RegularGrid( gs_global ) ); + nmen = std::max( nmen0, nmen ); + int ndgluj = nlatsLeg_ - std::min( nlatsLeg_, nlatsLeg_ + jlatMinLeg_ - jlat ); + if ( useGlobalLeg ) { ndgluj = std::max( jlatMinLeg_, jlat ); } + for ( int j = nmen0 + 1; j <= nmen; j++ ) { + nlat0_[j] = ndgluj; + } + nmen0 = nmen; + } + for ( int j = nmen0 + 1; j <= truncation_; j++ ) { + nlat0_[j] = nlatsLeg_; + } } /*Log::info() << "nlats=" << g.ny() << " nlatsGlobal=" << gs_global.ny() << " jlatMin=" << jlatMin_ << " jlatMinLeg=" << jlatMinLeg_ << " nlatsGlobal/2-nlatsLeg=" << nlatsGlobal_ / 2 - nlatsLeg_ @@ -369,24 +391,26 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long if ( result < 0. ) { result += 360.; } return result; }; - double lonmin = wrapAngle( g.x( 0, 0 ) ); - if ( nlonsMax < fft_threshold * nlonsMaxGlobal_ ) { useFFT_ = false; } - else { - // need to use FFT with cropped grid - if ( grid::RegularGrid( gridGlobal_ ) ) { - for ( size_t jlon = 0; jlon < nlonsMaxGlobal_; ++jlon ) { - if ( gs_global.x( jlon, 0 ) < lonmin ) { jlonMin_[0]++; } - } - } + if ( useFFT_ ) { + double lonmin = wrapAngle( g.x( 0, 0 ) ); + if ( nlonsMax < fft_threshold * nlonsMaxGlobal_ ) { useFFT_ = false; } else { - nlonsGlobal_.resize( nlats ); - jlonMin_.resize( nlats ); - for ( size_t jlat = 0; jlat < nlats; jlat++ ) { - double lonmin = wrapAngle( g.x( 0, jlat ) ); - nlonsGlobal_[jlat] = gs_global.nx( jlat + jlatMin_ ); - jlonMin_[jlat] = 0; - for ( size_t jlon = 0; jlon < nlonsGlobal_[jlat]; ++jlon ) { - if ( gs_global.x( jlon, jlat + jlatMin_ ) < lonmin ) { jlonMin_[jlat]++; } + // need to use FFT with cropped grid + if ( grid::RegularGrid( gridGlobal_ ) ) { + for ( size_t jlon = 0; jlon < nlonsMaxGlobal_; ++jlon ) { + if ( gs_global.x( jlon, 0 ) < lonmin ) { jlonMin_[0]++; } + } + } + else { + nlonsGlobal_.resize( nlats ); + jlonMin_.resize( nlats ); + for ( size_t jlat = 0; jlat < nlats; jlat++ ) { + double lonmin = wrapAngle( g.x( 0, jlat ) ); + nlonsGlobal_[jlat] = gs_global.nx( jlat + jlatMin_ ); + jlonMin_[jlat] = 0; + for ( size_t jlon = 0; jlon < nlonsGlobal_[jlat]; ++jlon ) { + if ( gs_global.x( jlon, jlat + jlatMin_ ) < lonmin ) { jlonMin_[jlat]++; } + } } } } @@ -947,9 +971,9 @@ void TransLocalopt3::invtrans_fourier_reducedopt3( const int nlats, const grid:: #endif } else { - NOTIMP; - // Using dgemm in Fourier transform for reduced grids is extremely slow. - // Please install and use FFTW! + throw eckit::NotImplemented( + "Using dgemm in Fourier transform for reduced grids is extremely slow. Please install and use FFTW!", + Here() ); } } diff --git a/src/atlas/trans/localopt3/TransLocalopt3.h b/src/atlas/trans/localopt3/TransLocalopt3.h index 0651abeab..3826dbc9c 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.h +++ b/src/atlas/trans/localopt3/TransLocalopt3.h @@ -145,6 +145,7 @@ class TransLocalopt3 : public trans::TransImpl { bool useFFT_; bool dgemmMethod1_; bool unstruct_precomp_; + bool no_symmetry_; int truncation_; int nlatsNH_; int nlatsSH_; diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index 2804192c9..ff8185943 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -427,7 +427,7 @@ double sphericalharmonics_analytic_point( // void spectral_transform_grid_analytic( const size_t trc, // truncation (in) - const size_t trcFT, // truncation for Fourier transformation (in) + bool trcFT, // truncation for Fourier transformation (in) const double n, // total wave number (implemented so far for n<4 const double m, // zonal wave number (implemented so far for m<4, m g.y( 0 ) ) { jlatMin++; }; + Grid gridGlobal; + grid::StructuredGrid gs_global; + int jlatMin = 0; + if ( trcFT ) { + gridGlobal = Grid( grid.name() ); + gs_global = grid::StructuredGrid( gridGlobal ); + int nlatsGlobal = gs_global.ny(); + for ( int jlat = 0; jlat < nlatsGlobal; jlat++ ) { + if ( gs_global.y( jlat ) > g.y( 0 ) ) { jlatMin++; }; + } } int idx = 0; for ( size_t j = 0; j < g.ny(); ++j ) { double lat = g.y( j ) * util::Constants::degreesToRadians(); - - int ftrc = trans::fourier_truncation( trc, gs_global.nx( jlatMin + j ), gs_global.nxmax(), gs_global.ny(), + int ftrc = trc + 1; + if ( trcFT ) { + ftrc = trans::fourier_truncation( trc, gs_global.nx( jlatMin + j ), gs_global.nxmax(), gs_global.ny(), lat, grid::RegularGrid( gs_global ) ); + } /*Log::info() << "j=" << j << " ftrc=" << ftrc << " trc=" << trc << " nx=" << gs_global.nx( jlatMin + j ) << " nxmax=" << gs_global.nxmax() << " nlats=" << gs_global.ny() << " lat=" << g.y( j ) << " jlatMin=" << jlatMin << std::endl;*/ @@ -545,7 +551,7 @@ double spectral_transform_test( double trc, // truncation // compute analytic solution (this also initializes rspecg and needs to be // done before the actual transform): - spectral_transform_grid_analytic( trc, trc, n, m, imag, g, rspecg, rgp_analytic, 2, 2 ); + spectral_transform_grid_analytic( trc, true, n, m, imag, g, rspecg, rgp_analytic, 2, 2 ); // perform spectral transform: spectral_transform_grid( trc, trc, g, rspecg, rgp, pointwise ); @@ -968,7 +974,13 @@ CASE( "test_trans_domain" ) { std::string gridString = "O640"; Grid g1( gridString, testdomain1 ); - Grid g2( gridString, testdomain2 ); + //Grid g2( gridString, testdomain2 ); + + bool fourierTrc1 = true; + bool fourierTrc2 = false; + using grid::StructuredGrid; + using LinearSpacing = grid::LinearSpacing; + StructuredGrid g2( LinearSpacing( {0., 180.}, 181 ), LinearSpacing( {0., 45.}, 46 ) ); int trc = 640; //Log::info() << "rgp1:" << std::endl; @@ -982,9 +994,9 @@ CASE( "test_trans_domain" ) { trans::Cache cache; ATLAS_TRACE_SCOPE( "Read cache" ) cache = trans::LegendreCache( "legcache.bin" ); Trace t2( Here(), "translocal2 construction" ); - trans::Trans transLocal2( cache, g2, trc, - option::type( "local" ) | option::global_grid( Grid( gridString ) ) | option::no_fft() ); - //trans::Trans transLocal2( cache, g2, trc, option::type( "local" ) ); + //trans::Trans transLocal2( cache, g2, trc, + // option::type( "local" ) | option::global_grid( Grid( gridString ) ) | option::no_fft() ); + trans::Trans transLocal2( g2, trc, option::type( "local" ) ); t2.stop(); double rav1 = 0., rav2 = 0.; // compute average rms errors of transLocal1 and transLocal2 @@ -1047,10 +1059,10 @@ CASE( "test_trans_domain" ) { rgp2_analytic[j] = 0.; } - spectral_transform_grid_analytic( trc, trc, n, m, imag, g1, rspecg.data(), + spectral_transform_grid_analytic( trc, fourierTrc1, n, m, imag, g1, rspecg.data(), rgp1_analytic.data(), ivar_in, ivar_out ); - spectral_transform_grid_analytic( trc, trc, n, m, imag, g2, rspecg.data(), + spectral_transform_grid_analytic( trc, fourierTrc2, n, m, imag, g2, rspecg.data(), rgp2_analytic.data(), ivar_in, ivar_out ); //Log::info() << std::endl << "rgp1:"; From b5a7092c4911df6f126b30273d3d0fcac7a344cf Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Fri, 4 May 2018 14:56:46 +0100 Subject: [PATCH 061/123] Cleanup --- .../localopt3/LegendrePolynomialsopt3.cc | 1 - src/atlas/trans/localopt3/TransLocalopt3.cc | 129 ++++++++---------- src/atlas/trans/localopt3/TransLocalopt3.h | 8 ++ src/tests/trans/test_trans_localcache.cc | 80 ++++++----- 4 files changed, 113 insertions(+), 105 deletions(-) diff --git a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc index 2f16cc43b..1cddbc18b 100644 --- a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc +++ b/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc @@ -158,7 +158,6 @@ void compute_legendre_polynomialsopt3( size_t leg_start_sym[], // start indices for different zonal wave numbers, symmetric part size_t leg_start_asym[] ) // start indices for different zonal wave numbers, asymmetric part { - ATLAS_TRACE(); auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; }; std::vector legpol( legendre_size( trc ) ); std::vector zfn( ( trc + 1 ) * ( trc + 1 ) ); diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc index b878a337c..50f3ee7a6 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.cc +++ b/src/atlas/trans/localopt3/TransLocalopt3.cc @@ -9,6 +9,7 @@ */ #include "atlas/trans/localopt3/TransLocalopt3.h" +#include #include #include "atlas/array.h" #include "atlas/option.h" @@ -25,9 +26,6 @@ #include "eckit/linalg/Matrix.h" #include "eckit/log/Bytes.h" #include "eckit/parser/JSON.h" -#ifdef ECKIT_HAVE_MKL -#include "mkl.h" -#endif namespace atlas { namespace trans { @@ -179,27 +177,13 @@ int num_n( const int truncation, const int m, const bool symmetric ) { } void alloc_aligned( double*& ptr, size_t n ) { -#warning todo1 -// If we can assume that posix_memalign gives the same result, we would not need to support mkl_malloc -// We can then remove the include of mkl.h above (simplifying things). -// As well there is the C++ functions "std::align" (http://en.cppreference.com/w/cpp/memory/align) -// that we could look into. -#ifdef ECKIT_HAVE_MKL - int al = 64; - ptr = (double*)mkl_malloc( sizeof( double ) * n, al ); -#else - posix_memalign( (void**)&ptr, sizeof( double ) * 64, sizeof( double ) * n ); - //ptr = (double*)malloc( sizeof( double ) * n ); - //ptr = new double[n]; -#endif + const size_t alignment = 64 * sizeof( double ); + ptr = (double*) aligned_alloc( alignment, sizeof( double ) * n ); } void free_aligned( double*& ptr ) { -#ifdef ECKIT_HAVE_MKL - mkl_free( ptr ); -#else free( ptr ); -#endif + ptr = nullptr; } int add_padding( int n ) { @@ -242,6 +226,14 @@ int fourier_truncation( const int truncation, // truncation // Class TransLocalopt3 // -------------------------------------------------------------------------------------------------------------------- +const eckit::linalg::LinearAlgebra& linear_algebra_backend() { + if( eckit::linalg::LinearAlgebra::hasBackend("mkl") ) { + return eckit::linalg::LinearAlgebra::getBackend("mkl"); + } + // Default backend + return eckit::linalg::LinearAlgebra::backend(); +} + TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long truncation, const eckit::Configuration& config ) : grid_( grid ), @@ -251,13 +243,10 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long legendre_cache_( cache.legendre().data() ), legendre_cachesize_( cache.legendre().size() ), fft_cache_( cache.fft().data() ), - fft_cachesize_( cache.fft().size() ) { - ATLAS_TRACE( "Precompute legendre opt3" ); -#ifdef ECKIT_HAVE_MKL - eckit::linalg::LinearAlgebra::backend( "mkl" ); // might want to choose backend with this command -#else - eckit::linalg::LinearAlgebra::backend( "generic" ); // might want to choose backend with this command -#endif + fft_cachesize_( cache.fft().size() ), + linalg_( linear_algebra_backend() ) +{ + ATLAS_TRACE( "TransLocalOpt3 constructor" ); double fft_threshold = 0.0; // fraction of latitudes of the full grid down to which FFT is used. // This threshold needs to be adjusted depending on the dgemm and FFT performance of the machine // on which this code is running! @@ -439,7 +428,6 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long // precomputations for Legendre polynomials: { - ATLAS_TRACE( "opt3 precomp Legendre" ); int size_sym = 0; int size_asym = 0; legendre_sym_begin_.resize( truncation_ + 3 ); @@ -461,15 +449,18 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long // TODO: check this is all aligned... } else { - alloc_aligned( legendre_sym_, size_sym ); - alloc_aligned( legendre_asym_, size_asym ); + ATLAS_TRACE_SCOPE( "Legendre precomputations (structured)" ) { - compute_legendre_polynomialsopt3( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, - legendre_asym_, legendre_sym_begin_.data(), - legendre_asym_begin_.data() ); + alloc_aligned( legendre_sym_, size_sym ); + alloc_aligned( legendre_asym_, size_asym ); + + compute_legendre_polynomialsopt3( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, + legendre_asym_, legendre_sym_begin_.data(), + legendre_asym_begin_.data() ); + } std::string file_path = TransParameters( config ).write_legendre(); if ( file_path.size() ) { - ATLAS_TRACE( "write_legendre" ); + ATLAS_TRACE( "Write LegendreCache to file" ); Log::debug() << "Writing Legendre cache file ..." << std::endl; Log::debug() << " path = " << file_path << std::endl; WriteCache legendre( file_path ); @@ -484,7 +475,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long if ( useFFT_ ) { #if ATLAS_HAVE_FFTW && !TRANSLOCAL_DGEMM2 { - ATLAS_TRACE( "opt3 precomp FFTW" ); + ATLAS_TRACE( "Fourier precomputations (FFTW)" ); int num_complex = ( nlonsMaxGlobal_ / 2 ) + 1; fft_in_ = fftw_alloc_complex( nlats * num_complex ); fft_out_ = fftw_alloc_real( nlats * nlonsMaxGlobal_ ); @@ -553,7 +544,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlonsMax ); #if !TRANSLOCAL_DGEMM2 { - ATLAS_TRACE( "opt3 precomp Fourier tp" ); + ATLAS_TRACE( "Fourier precomputations (NoFFT)" ); int idx = 0; for ( int jm = 0; jm < truncation_ + 1; jm++ ) { double factor = 1.; @@ -585,17 +576,17 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long else { // unstructured grid if ( unstruct_precomp_ ) { - ATLAS_TRACE( "opt3 precomp unstructured" ); + ATLAS_TRACE( "Legendre precomputations (unstructured)" ); std::vector lats( grid_.size() ); alloc_aligned( legendre_, legendre_size( truncation_ ) * grid_.size() ); int j( 0 ); - for ( PointXY p : grid_.xy() ) { - lats[j++] = p.y() * util::Constants::degreesToRadians(); + for ( PointLonLat p : grid_.lonlat() ) { + lats[j++] = p.lat() * util::Constants::degreesToRadians(); } compute_legendre_polynomials_allopt3( truncation_, grid_.size(), lats.data(), legendre_ ); } if ( TransParameters( config ).write_legendre().size() ) { - throw eckit::NotImplemented( "Caching for unstructured grids not implemented", Here() ); + throw eckit::NotImplemented( "Caching for unstructured grids or structured grids with projections not yet implemented", Here() ); } } } // namespace trans @@ -690,7 +681,7 @@ void TransLocalopt3::invtrans_legendreopt3( const int truncation, const int nlat { Log::debug() << "Legendre dgemm: using " << nlatsLegReduced_ - nlat0_[0] << " latitudes out of " << nlatsGlobal_ / 2 << std::endl; - ATLAS_TRACE( "opt3 Legendre dgemm" ); + ATLAS_TRACE( "Inverse Legendre Transform (GEMM)" ); for ( int jm = 0; jm <= truncation_; jm++ ) { int size_sym = num_n( truncation_ + 1, jm, true ); int size_asym = num_n( truncation_ + 1, jm, false ); @@ -746,7 +737,7 @@ void TransLocalopt3::invtrans_legendreopt3( const int truncation, const int nlat eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm] + nlat0_[jm] * size_sym, size_sym, nlatsLegReduced_ - nlat0_[jm] ); eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsLegReduced_ - nlat0_[jm] ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + linalg_.gemm( A, B, C ); /*Log::info() << "sym: "; for ( int j = 0; j < size_sym * ( nlatsLegReduced_ - nlat0_[jm] ); j++ ) { Log::info() << legendre_sym_[j + legendre_sym_begin_[jm] + nlat0_[jm] * size_sym] << " "; @@ -758,7 +749,7 @@ void TransLocalopt3::invtrans_legendreopt3( const int truncation, const int nlat eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm] + nlat0_[jm] * size_asym, size_asym, nlatsLegReduced_ - nlat0_[jm] ); eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsLegReduced_ - nlat0_[jm] ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + linalg_.gemm( A, B, C ); /*Log::info() << "asym: "; for ( int j = 0; j < size_asym * ( nlatsLegReduced_ - nlat0_[jm] ); j++ ) { Log::info() << legendre_asym_[j + legendre_asym_begin_[jm] + nlat0_[jm] * size_asym] << " "; @@ -845,7 +836,7 @@ void TransLocalopt3::invtrans_fourier_regularopt3( const int nlats, const int nl { int num_complex = ( nlonsMaxGlobal_ / 2 ) + 1; { - ATLAS_TRACE( "opt3 FFTW regular" ); + ATLAS_TRACE( "Inverse Fourier Transform (FFTW, RegularGrid)" ); for ( int jfld = 0; jfld < nb_fields; jfld++ ) { int idx = 0; for ( int jlat = 0; jlat < nlats; jlat++ ) { @@ -879,11 +870,11 @@ void TransLocalopt3::invtrans_fourier_regularopt3( const int nlats, const int nl #if !TRANSLOCAL_DGEMM2 // dgemm-method 1 { - ATLAS_TRACE( "opt3 Fourier dgemm method 1" ); + ATLAS_TRACE( "Inverse Fourier Transform (NoFFT)" ); eckit::linalg::Matrix A( fourier_, nlons, ( truncation_ + 1 ) * 2 ); eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * nlats ); eckit::linalg::Matrix C( gp_fields, nlons, nb_fields * nlats ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + linalg_.gemm( A, B, C ); } #else // dgemm-method 2 @@ -896,7 +887,7 @@ void TransLocalopt3::invtrans_fourier_regularopt3( const int nlats, const int nl eckit::linalg::Matrix A( scl_fourier, nb_fields * nlats, ( truncation_ + 1 ) * 2 ); eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, nlons ); eckit::linalg::Matrix C( gp_opt3, nb_fields * nlats, nlons ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + linalg_.gemm( A, B, C ); } // Transposition in grid point space: @@ -929,7 +920,7 @@ void TransLocalopt3::invtrans_fourier_reducedopt3( const int nlats, const grid:: #if ATLAS_HAVE_FFTW && !TRANSLOCAL_DGEMM2 { { - ATLAS_TRACE( "opt3 FFTW reduced" ); + ATLAS_TRACE( "Inverse Fourier Transform (FFTW, ReducedGid)" ); int jgp = 0; for ( int jfld = 0; jfld < nb_fields; jfld++ ) { for ( int jlat = 0; jlat < nlats; jlat++ ) { @@ -984,8 +975,8 @@ void TransLocalopt3::invtrans_unstructured_precomp( const int truncation, const double gp_fields[], const eckit::Configuration& config ) const { ATLAS_TRACE( "invtrans_uv unstructured opt3" ); grid::UnstructuredGrid gu = grid_; - int nlats = grid_.size(); - int size_fourier = nb_fields * 2; + const int nlats = grid_.size(); + const int size_fourier = nb_fields * 2; double* legendre; double* scl_fourier; double* scl_fourier_tp; @@ -997,32 +988,32 @@ void TransLocalopt3::invtrans_unstructured_precomp( const int truncation, const alloc_aligned( gp_opt, nb_fields ); { - ATLAS_TRACE( "opt3 Legendre dgemm" ); + ATLAS_TRACE( "Inverse Legendre Transform (GEMM)" ); for ( int jm = 0; jm < truncation; jm++ ) { - int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1; + const int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1; eckit::linalg::Matrix A( eckit::linalg::Matrix( const_cast( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) ); eckit::linalg::Matrix B( legendre_ + noff * nlats, ns, nlats ); eckit::linalg::Matrix C( scl_fourier + jm * size_fourier * nlats, nb_fields * 2, nlats ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + linalg_.gemm( A, B, C ); } } // loop over all points: { - ATLAS_TRACE( "opt3 Fourier dgemm" ); + ATLAS_TRACE( "Inverse Fourier Transform (NoFFT)" ); for ( int ip = 0; ip < grid_.size(); ip++ ) { - PointXY p = gu.xy( ip ); - double lon = p.x() * util::Constants::degreesToRadians(); - double lat = p.y() * util::Constants::degreesToRadians(); + const PointLonLat p = gu.lonlat( ip ); + const double lon = p.lon() * util::Constants::degreesToRadians(); + const double lat = p.lat() * util::Constants::degreesToRadians(); { //ATLAS_TRACE( "opt transposition in Fourier" ); for ( int jm = 0; jm < truncation; jm++ ) { int idx = nb_fields * 2 * ( ip + nlats * jm ); for ( int imag = 0; imag < 2; imag++ ) { for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int pos_tp = imag + 2 * ( jm + ( truncation ) * ( jfld ) ); + const int pos_tp = imag + 2 * ( jm + ( truncation ) * ( jfld ) ); //int pos = jfld + nb_fields * ( imag + 2 * ( jm ) ); scl_fourier_tp[pos_tp] = scl_fourier[idx++]; // = scl_fourier[pos] } @@ -1046,7 +1037,7 @@ void TransLocalopt3::invtrans_unstructured_precomp( const int truncation, const eckit::linalg::Matrix A( fouriertp, 1, (truncation)*2 ); eckit::linalg::Matrix B( scl_fourier_tp, (truncation)*2, nb_fields ); eckit::linalg::Matrix C( gp_opt, 1, nb_fields ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + linalg_.gemm( A, B, C ); for ( int j = 0; j < nb_fields; j++ ) { gp_fields[ip + j * grid_.size()] = gp_opt[j]; } @@ -1074,7 +1065,7 @@ void TransLocalopt3::invtrans_unstructured_precomp( const int truncation, const void TransLocalopt3::invtrans_unstructured( const int truncation, const int nb_fields, const int nb_vordiv_fields, const double scalar_spectra[], double gp_fields[], const eckit::Configuration& config ) const { - ATLAS_TRACE( "invtrans_uv unstructured opt3" ); + ATLAS_TRACE( "invtrans_uv unstructured" ); grid::UnstructuredGrid gu = grid_; double* zfn; alloc_aligned( zfn, ( truncation + 1 ) * ( truncation + 1 ) ); @@ -1093,20 +1084,20 @@ void TransLocalopt3::invtrans_unstructured( const int truncation, const int nb_f // loop over all points: for ( int ip = 0; ip < grid_.size(); ip++ ) { - PointXY p = gu.xy( ip ); - double lon = p.x() * util::Constants::degreesToRadians(); - double lat = p.y() * util::Constants::degreesToRadians(); + const PointLonLat p = gu.lonlat( ip ); + const double lon = p.lon() * util::Constants::degreesToRadians(); + const double lat = p.lat() * util::Constants::degreesToRadians(); compute_legendre_polynomials_latopt3( truncation, lat, legendre, zfn ); // Legendre transform: { //ATLAS_TRACE( "opt Legendre dgemm" ); for ( int jm = 0; jm <= truncation; jm++ ) { - int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1; + const int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1; eckit::linalg::Matrix A( eckit::linalg::Matrix( const_cast( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) ); eckit::linalg::Matrix B( legendre + noff, ns, 1 ); eckit::linalg::Matrix C( scl_fourier + jm * size_fourier, nb_fields * 2, 1 ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + linalg_.gemm( A, B, C ); } } { @@ -1115,7 +1106,7 @@ void TransLocalopt3::invtrans_unstructured( const int truncation, const int nb_f for ( int jm = 0; jm < truncation + 1; jm++ ) { for ( int imag = 0; imag < 2; imag++ ) { for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int pos_tp = imag + 2 * ( jm + ( truncation + 1 ) * ( jfld ) ); + const int pos_tp = imag + 2 * ( jm + ( truncation + 1 ) * ( jfld ) ); //int pos = jfld + nb_fields * ( imag + 2 * ( jm ) ); scl_fourier_tp[pos_tp] = scl_fourier[idx++]; // = scl_fourier[pos] } @@ -1136,7 +1127,7 @@ void TransLocalopt3::invtrans_unstructured( const int truncation, const int nb_f eckit::linalg::Matrix A( fouriertp, 1, ( truncation + 1 ) * 2 ); eckit::linalg::Matrix B( scl_fourier_tp, ( truncation + 1 ) * 2, nb_fields ); eckit::linalg::Matrix C( gp_opt, 1, nb_fields ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); + linalg_.gemm( A, B, C ); for ( int j = 0; j < nb_fields; j++ ) { gp_fields[ip + j * grid_.size()] = gp_opt[j]; } @@ -1145,7 +1136,7 @@ void TransLocalopt3::invtrans_unstructured( const int truncation, const int nb_f { if ( nb_vordiv_fields > 0 ) { //ATLAS_TRACE( "opt3 u,v from U,V" ); - double coslat = std::cos( lat ); + const double coslat = std::cos( lat ); for ( int j = 0; j < nb_fields; j++ ) { gp_fields[ip + j * grid_.size()] /= coslat; } @@ -1182,7 +1173,7 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel // Transform if ( grid::StructuredGrid g = grid_ ) { - ATLAS_TRACE( "invtrans_uv structured opt3" ); + ATLAS_TRACE( "invtrans_uv structured" ); int nlats = g.ny(); int nlons = g.nxmax(); int size_fourier_max = nb_fields * 2 * nlats; diff --git a/src/atlas/trans/localopt3/TransLocalopt3.h b/src/atlas/trans/localopt3/TransLocalopt3.h index 3826dbc9c..22d77b48b 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.h +++ b/src/atlas/trans/localopt3/TransLocalopt3.h @@ -24,6 +24,12 @@ //----------------------------------------------------------------------------- // Forward declarations +namespace eckit { +namespace linalg { +class LinearAlgebra; +} // namespace linalg +} // namespace eckit + namespace atlas { class Field; class FieldSet; @@ -179,6 +185,8 @@ class TransLocalopt3 : public trans::TransImpl { size_t legendre_cachesize_{0}; const void* fft_cache_{nullptr}; size_t fft_cachesize_{0}; + + const eckit::linalg::LinearAlgebra& linalg_; }; //----------------------------------------------------------------------------- diff --git a/src/tests/trans/test_trans_localcache.cc b/src/tests/trans/test_trans_localcache.cc index 9dce62122..2a0e442ac 100644 --- a/src/tests/trans/test_trans_localcache.cc +++ b/src/tests/trans/test_trans_localcache.cc @@ -106,7 +106,7 @@ CASE( "test_global_grids" ) { auto resolutions = { 32, 64 }; for( int n : resolutions ) { int t = n-1; - auto cases = { + auto cases = { std::make_pair(F(n),t), std::make_pair(O(n),t), std::make_pair(N(n),t), @@ -149,7 +149,7 @@ CASE( "test_global_grids" ) { CASE( "test_global_grids_with_subdomain" ) { int n = 64; int t = n-1; - auto cases = { + auto cases = { std::make_pair(F(n),t), std::make_pair(O(n),t), std::make_pair(N(n),t), @@ -193,46 +193,56 @@ CASE( "test_global_grids_with_subdomain" ) { } } -CASE( "test_regional_grids_nested_in_global" ) { +CASE( "test_regional_grids nested_in_global" ) { + auto cachefile = CacheFile("regional_lonlat.bin"); + auto truncation = 89; Cache cache; - { - auto truncation = 89; - - ATLAS_TRACE("regional_lonlat"); - - auto cachefile = CacheFile("regional_lonlat.bin"); - StructuredGrid grid_global( - LinearSpacing( { 0., 360.}, 360, false ), - LinearSpacing( {-90., 90.}, 181, true ) - ); - ASSERT( grid_global.domain().global() ); - StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ) ); - ATLAS_TRACE_SCOPE("create without cache") - Trans( grid, truncation, option::type("local") | option::global_grid( grid_global ) ); - ATLAS_TRACE_SCOPE("create without cache and write") - Trans( grid, truncation, option::type("local") | option::global_grid( grid_global ) | option::write_legendre( cachefile ) ); - ATLAS_TRACE_SCOPE("read cache") - cache = LegendreCache( cachefile ); - ATLAS_TRACE_SCOPE("create with cache") - Trans( grid, truncation, option::type("local") | option::global_grid( grid_global ) ); - } -// { -// StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ) ); -// Trans( grid, 89 ); -// } + StructuredGrid grid_global( + LinearSpacing( { 0., 360.}, 360, false ), + LinearSpacing( {-90., 90.}, 181, true ) + ); + ASSERT( grid_global.domain().global() ); + StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ) ); + ATLAS_TRACE_SCOPE("create without cache") + Trans( grid, truncation, option::type("local") | option::global_grid( grid_global ) ); + ATLAS_TRACE_SCOPE("create without cache and write") + Trans( grid, truncation, option::type("local") | option::global_grid( grid_global ) | option::write_legendre( cachefile ) ); + ATLAS_TRACE_SCOPE("read cache") + cache = LegendreCache( cachefile ); + ATLAS_TRACE_SCOPE("create with cache") + Trans( cache, grid, truncation, option::type("local") | option::global_grid( grid_global ) ); } CASE( "test_regional_grids not nested" ) { - if (false) { - StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ) ); - Trans( grid, 89 ); - } else { - Log::warning() << "This test fails if enabled!!! " << Here() << std::endl; - } + auto cachefile = CacheFile("cache-regional.bin"); + auto truncation = 89; + Cache cache; + + StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ) ); + ATLAS_TRACE_SCOPE("create without cache") + Trans( grid, truncation, option::type("local") ); + ATLAS_TRACE_SCOPE("create without cache and write") + Trans( grid, truncation, option::type("local") | option::write_legendre( cachefile ) ); + ATLAS_TRACE_SCOPE("read cache") + cache = LegendreCache( cachefile ); + ATLAS_TRACE_SCOPE("create with cache") + Trans( cache, grid, truncation, option::type("local") ); } CASE( "test_regional_grids with projection" ) { - Log::warning() << "TODO" << std::endl; + auto cachefile = CacheFile("cache-regional.bin"); + auto truncation = 89; + Cache cache; + + Projection projection( util::Config + ( "type", "rotated_lonlat") + ("north_pole", std::vector{ 4., 54.} ) ); + + StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ), projection ); + ATLAS_TRACE_SCOPE("create without cache") + Trans( grid, truncation, option::type("local") ); + + // Note: caching not yet implemented for unstructured and projected grids } } // namespace test From b237a8ce8ac83c18df17679a3cec7b73455cbb15 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Fri, 4 May 2018 18:39:20 +0100 Subject: [PATCH 062/123] Introduce trans::LegendreCacheCreator --- src/atlas/CMakeLists.txt | 4 + src/atlas/trans/LegendreCacheCreator.cc | 148 ++++++++++++++++++ src/atlas/trans/LegendreCacheCreator.h | 108 +++++++++++++ .../localopt3/LegendreCacheCreatorLocal.cc | 74 +++++++++ .../localopt3/LegendreCacheCreatorLocal.h | 42 +++++ src/tests/trans/test_trans_localcache.cc | 27 ++++ 6 files changed, 403 insertions(+) create mode 100644 src/atlas/trans/LegendreCacheCreator.cc create mode 100644 src/atlas/trans/LegendreCacheCreator.h create mode 100644 src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc create mode 100644 src/atlas/trans/localopt3/LegendreCacheCreatorLocal.h diff --git a/src/atlas/CMakeLists.txt b/src/atlas/CMakeLists.txt index 6a10f2ff1..341cfa21a 100644 --- a/src/atlas/CMakeLists.txt +++ b/src/atlas/CMakeLists.txt @@ -321,6 +321,8 @@ trans/Trans.h trans/Trans.cc trans/VorDivToUV.h trans/VorDivToUV.cc +trans/LegendreCacheCreator.h +trans/LegendreCacheCreator.cc trans/local_noopt/TransLocal.h trans/local_noopt/TransLocal.cc trans/local_noopt/LegendrePolynomials.h @@ -357,6 +359,8 @@ trans/localopt3/LegendrePolynomialsopt3.h trans/localopt3/LegendrePolynomialsopt3.cc trans/localopt3/VorDivToUVLocalopt3.h trans/localopt3/VorDivToUVLocalopt3.cc +trans/localopt3/LegendreCacheCreatorLocal.h +trans/localopt3/LegendreCacheCreatorLocal.cc ) if( ATLAS_HAVE_TRANS ) diff --git a/src/atlas/trans/LegendreCacheCreator.cc b/src/atlas/trans/LegendreCacheCreator.cc new file mode 100644 index 000000000..9a0932e5c --- /dev/null +++ b/src/atlas/trans/LegendreCacheCreator.cc @@ -0,0 +1,148 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include "eckit/exception/Exceptions.h" +#include "eckit/thread/AutoLock.h" +#include "eckit/thread/Mutex.h" + +#include "atlas/grid/Grid.h" +#include "atlas/library/defines.h" +#include "atlas/runtime/Log.h" +#include "atlas/trans/LegendreCacheCreator.h" + +// For factory registration only: +#if ATLAS_HAVE_TRANS +#define TRANS_DEFAULT "ifs" +#else +#define TRANS_DEFAULT "local" +#endif +#include "atlas/trans/localopt3/LegendreCacheCreatorLocal.h" + +namespace atlas { +namespace trans { + +LegendreCacheCreatorImpl::~LegendreCacheCreatorImpl() {} + +namespace { + +static eckit::Mutex* local_mutex = 0; +static std::map* m = 0; +static pthread_once_t once = PTHREAD_ONCE_INIT; + +static void init() { + local_mutex = new eckit::Mutex(); + m = new std::map(); +} + +template +void load_builder() { + LegendreCacheCreatorBuilder( "tmp" ); +} + +struct force_link { + force_link() { +#if ATLAS_HAVE_TRANS + //load_builder(); +#endif + load_builder(); + } +}; + +LegendreCacheCreatorFactory& factory( const std::string& name ) { + std::map::const_iterator j = m->find( name ); + if ( j == m->end() ) { + Log::error() << "No LegendreCacheCreatorFactory for [" << name << "]" << std::endl; + Log::error() << "TransFactories are:" << std::endl; + for ( j = m->begin(); j != m->end(); ++j ) + Log::error() << " " << ( *j ).first << std::endl; + throw eckit::SeriousBug( std::string( "No LegendreCacheCreatorFactory called " ) + name ); + } + return *j->second; +} + +} // namespace + +LegendreCacheCreatorFactory::LegendreCacheCreatorFactory( const std::string& name ) : name_( name ) { + pthread_once( &once, init ); + + eckit::AutoLock lock( local_mutex ); + + ASSERT( m->find( name ) == m->end() ); + ( *m )[name] = this; +} + +LegendreCacheCreatorFactory::~LegendreCacheCreatorFactory() { + eckit::AutoLock lock( local_mutex ); + m->erase( name_ ); +} + +bool LegendreCacheCreatorFactory::has( const std::string& name ) { + pthread_once( &once, init ); + + eckit::AutoLock lock( local_mutex ); + + static force_link static_linking; + + return ( m->find( name ) != m->end() ); +} + +void LegendreCacheCreatorFactory::list( std::ostream& out ) { + pthread_once( &once, init ); + + eckit::AutoLock lock( local_mutex ); + + static force_link static_linking; + + const char* sep = ""; + for ( std::map::const_iterator j = m->begin(); j != m->end(); ++j ) { + out << sep << ( *j ).first; + sep = ", "; + } +} + +LegendreCacheCreator::Implementation* LegendreCacheCreatorFactory::build( const Grid& grid, int truncation, + const eckit::Configuration& config ) { + pthread_once( &once, init ); + + eckit::AutoLock lock( local_mutex ); + + static force_link static_linking; + + std::string name = config.getString( "type", TRANS_DEFAULT ); + + Log::debug() << "Looking for LegendreCacheCreatorFactory [" << name << "]" << std::endl; + + if ( not config.has( "type" ) and not has( name ) ) { + name = std::string( "local" ); + Log::debug() << "Looking for LegendreCacheCreatorFactory [" << name << "]" << std::endl; + } + + return factory( name ).make( grid, truncation, config ); +} + +LegendreCacheCreator::LegendreCacheCreator() {} + +LegendreCacheCreator::LegendreCacheCreator( Implementation* impl ) : impl_( impl ) {} + +LegendreCacheCreator::LegendreCacheCreator( const Grid& grid, int truncation, const eckit::Configuration& config ) : + impl_( LegendreCacheCreatorFactory::build( grid, truncation, config ) ) {} + +LegendreCacheCreator::LegendreCacheCreator( const LegendreCacheCreator& creator ) : impl_( creator.impl_ ) {} + +void LegendreCacheCreator::create( const std::string& path ) const { + impl_->create( path ); +} + +std::string LegendreCacheCreator::uid() const { + return impl_->uid(); +} + +} // namespace trans +} // namespace atlas diff --git a/src/atlas/trans/LegendreCacheCreator.h b/src/atlas/trans/LegendreCacheCreator.h new file mode 100644 index 000000000..97a21250a --- /dev/null +++ b/src/atlas/trans/LegendreCacheCreator.h @@ -0,0 +1,108 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#pragma once + +#include + +#include "eckit/config/Configuration.h" +#include "eckit/memory/Owned.h" +#include "eckit/memory/SharedPtr.h" + +#include "atlas/util/Config.h" + +//----------------------------------------------------------------------------- +// Forward declarations + +namespace atlas { +class Grid; +} // namespace atlas + +//----------------------------------------------------------------------------- + +namespace atlas { +namespace trans { + +//----------------------------------------------------------------------------- + +class LegendreCacheCreatorImpl : public eckit::Owned { +public: + virtual ~LegendreCacheCreatorImpl() = 0; + + virtual std::string uid() const = 0; + + virtual void create( const std::string& path ) const = 0; +}; + +// ------------------------------------------------------------------ + +class LegendreCacheCreator { +public: + using Implementation = LegendreCacheCreatorImpl; + +private: + eckit::SharedPtr impl_; + +public: + LegendreCacheCreator(); + LegendreCacheCreator( Implementation* ); + LegendreCacheCreator( const LegendreCacheCreator& ); + + LegendreCacheCreator( const Grid&, int truncation, const eckit::Configuration& = util::NoConfig() ); + + const Implementation* get() const { return impl_.get(); } + operator bool() const { return impl_.owners(); } + + std::string uid() const; + void create( const std::string& path ) const; +}; + +//---------------------------------------------------------------------------------------------------------------------- + +class LegendreCacheCreatorFactory { +public: + /*! + * \brief build Trans + * \return TransImpl + */ + static LegendreCacheCreatorImpl* build( const Grid&, int truncation, const eckit::Configuration& = util::Config() ); + + /*! + * \brief list all registered trans implementations + */ + static void list( std::ostream& ); + + static bool has( const std::string& name ); + +private: + std::string name_; + virtual LegendreCacheCreatorImpl* make( const Grid& gp, int truncation, const eckit::Configuration& ) { return nullptr; } + +protected: + LegendreCacheCreatorFactory( const std::string& ); + virtual ~LegendreCacheCreatorFactory(); +}; + +//---------------------------------------------------------------------------------------------------------------------- + +template +class LegendreCacheCreatorBuilder : public LegendreCacheCreatorFactory { + virtual LegendreCacheCreatorImpl* make( const Grid& grid, int truncation, const eckit::Configuration& config ) { + return new T( grid, truncation, config ); + } + +public: + LegendreCacheCreatorBuilder( const std::string& name ) : LegendreCacheCreatorFactory( name ) {} +}; + +//---------------------------------------------------------------------------------------------------------------------- + +} // namespace trans +} // namespace atlas diff --git a/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc b/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc new file mode 100644 index 000000000..e67609e8d --- /dev/null +++ b/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc @@ -0,0 +1,74 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include "atlas/trans/localopt3/LegendreCacheCreatorLocal.h" +#include +#include +#include "eckit/utils/MD5.h" +#include "atlas/grid.h" +#include "atlas/option.h" +#include "atlas/trans/Trans.h" + +namespace atlas { +namespace trans { + +namespace { +static LegendreCacheCreatorBuilder builder( "local" ); +} + +namespace { +std::string hash( const Grid& grid ) { + eckit::MD5 h; + if( grid::StructuredGrid( grid ) && not grid.projection() ) { + auto g = grid::StructuredGrid( grid ); + h.add( g.y().data(), g.y().size() * sizeof(double) ); + } else { + grid.hash( h ); + } + return h.digest(); +} +} + +std::string LegendreCacheCreatorLocal::uid() const { + if( unique_identifier_.empty() ) { + std::ostringstream stream; + stream << "local-T" << truncation_ << "-"; + if( grid::GaussianGrid( grid_ ) ) { + // Same cache for any global Gaussian grid + stream << "F" << grid::GaussianGrid( grid_ ).N(); + } else if( grid::RegularLonLatGrid( grid_ ) ) { + // Same cache for any global regular grid + auto g = grid::RegularLonLatGrid( grid_ ); + stream << ( g.shiftedLat() ? "S" : "L" ) << "+x" << g.ny(); + // The above '+' is a placeholder for any g.nx() + } else { + // We cannot make more assumptions on reusability for different grids + stream << hash( grid_ ); + } + unique_identifier_ = stream.str(); + } + return unique_identifier_; +} + +LegendreCacheCreatorLocal::~LegendreCacheCreatorLocal() {} + +LegendreCacheCreatorLocal::LegendreCacheCreatorLocal( const Grid& grid, int truncation, const eckit::Configuration& ) : + grid_(grid), + truncation_(truncation) { +} + +void LegendreCacheCreatorLocal::create( const std::string& path ) const { + Trans( grid_, truncation_, option::type("local") | option::write_legendre( path ) ); +} + + + +} // namespace trans +} // namespace atlas diff --git a/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.h b/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.h new file mode 100644 index 000000000..d10df945a --- /dev/null +++ b/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.h @@ -0,0 +1,42 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#pragma once + +#include "atlas/trans/LegendreCacheCreator.h" +#include "atlas/grid/Grid.h" + +//----------------------------------------------------------------------------- + +namespace atlas { +namespace trans { + +//----------------------------------------------------------------------------- + +class LegendreCacheCreatorLocal : public trans::LegendreCacheCreatorImpl { +public: + LegendreCacheCreatorLocal( const Grid&, int truncation, const eckit::Configuration& = util::NoConfig() ); + + virtual ~LegendreCacheCreatorLocal(); + + virtual std::string uid() const override; + + virtual void create(const std::string &path) const override; + +private: + Grid grid_; + int truncation_; + mutable std::string unique_identifier_; +}; + +// ------------------------------------------------------------------ + +} // namespace trans +} // namespace atlas diff --git a/src/tests/trans/test_trans_localcache.cc b/src/tests/trans/test_trans_localcache.cc index 2a0e442ac..153956766 100644 --- a/src/tests/trans/test_trans_localcache.cc +++ b/src/tests/trans/test_trans_localcache.cc @@ -29,6 +29,7 @@ #include "atlas/parallel/mpi/mpi.h" #include "atlas/runtime/Trace.h" #include "atlas/trans/Trans.h" +#include "atlas/trans/LegendreCacheCreator.h" #include "atlas/trans/local_noopt/FourierTransforms.h" #include "atlas/trans/local_noopt/LegendrePolynomials.h" #include "atlas/trans/local_noopt/LegendreTransforms.h" @@ -70,6 +71,7 @@ struct AtlasTransEnvironment : public AtlasTestEnvironment { using trans::Trans; using trans::LegendreCache; +using trans::LegendreCacheCreator; using trans::Cache; using grid::StructuredGrid; using grid::GaussianGrid; @@ -245,6 +247,31 @@ CASE( "test_regional_grids with projection" ) { // Note: caching not yet implemented for unstructured and projected grids } + +CASE( "test_regional_grids nested_in_global NEW" ) { + + auto truncation = 89; + StructuredGrid grid_global( + LinearSpacing( { 0., 360.}, 360, false ), + LinearSpacing( {-90., 90.}, 181, true ) + ); + + LegendreCacheCreator legendre_cache_creator( grid_global, truncation, option::type("local") ); + auto cachefile = CacheFile( legendre_cache_creator.uid() ); + ATLAS_TRACE_SCOPE( "Creating cache "+std::string(cachefile) ) + legendre_cache_creator.create( cachefile ); + + Cache cache; + ASSERT( grid_global.domain().global() ); + StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ) ); + ATLAS_TRACE_SCOPE("create without cache") + Trans( grid, truncation, option::type("local") | option::global_grid( grid_global ) ); + ATLAS_TRACE_SCOPE("read cache") + cache = LegendreCache( cachefile ); + ATLAS_TRACE_SCOPE("create with cache") + Trans( cache, grid, truncation, option::type("local") | option::global_grid( grid_global ) ); +} + } // namespace test } // namespace atlas From 96f96775f1f8c152a2242012ec788148f629813b Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Tue, 8 May 2018 15:03:54 +0100 Subject: [PATCH 063/123] LegendreCacheCreator also for TransIFS completed --- src/atlas/CMakeLists.txt | 2 + src/atlas/trans/LegendreCacheCreator.cc | 11 +- src/atlas/trans/LegendreCacheCreator.h | 5 + .../trans/ifs/LegendreCacheCreatorIFS.cc | 111 ++++++++++++++++++ src/atlas/trans/ifs/LegendreCacheCreatorIFS.h | 46 ++++++++ .../localopt3/LegendreCacheCreatorLocal.cc | 34 +++++- .../localopt3/LegendreCacheCreatorLocal.h | 8 +- src/tests/trans/test_trans_localcache.cc | 79 +++++++------ 8 files changed, 249 insertions(+), 47 deletions(-) create mode 100644 src/atlas/trans/ifs/LegendreCacheCreatorIFS.cc create mode 100644 src/atlas/trans/ifs/LegendreCacheCreatorIFS.h diff --git a/src/atlas/CMakeLists.txt b/src/atlas/CMakeLists.txt index 341cfa21a..119b81c24 100644 --- a/src/atlas/CMakeLists.txt +++ b/src/atlas/CMakeLists.txt @@ -365,6 +365,8 @@ trans/localopt3/LegendreCacheCreatorLocal.cc ) if( ATLAS_HAVE_TRANS ) list( APPEND atlas_numerics_srcs + trans/ifs/LegendreCacheCreatorIFS.h + trans/ifs/LegendreCacheCreatorIFS.cc trans/ifs/TransIFS.h trans/ifs/TransIFS.cc trans/ifs/TransIFSNodeColumns.h diff --git a/src/atlas/trans/LegendreCacheCreator.cc b/src/atlas/trans/LegendreCacheCreator.cc index 9a0932e5c..66e842323 100644 --- a/src/atlas/trans/LegendreCacheCreator.cc +++ b/src/atlas/trans/LegendreCacheCreator.cc @@ -20,6 +20,7 @@ // For factory registration only: #if ATLAS_HAVE_TRANS #define TRANS_DEFAULT "ifs" +#include "atlas/trans/ifs/LegendreCacheCreatorIFS.h" #else #define TRANS_DEFAULT "local" #endif @@ -49,7 +50,7 @@ void load_builder() { struct force_link { force_link() { #if ATLAS_HAVE_TRANS - //load_builder(); + load_builder(); #endif load_builder(); } @@ -136,13 +137,17 @@ LegendreCacheCreator::LegendreCacheCreator( const Grid& grid, int truncation, co LegendreCacheCreator::LegendreCacheCreator( const LegendreCacheCreator& creator ) : impl_( creator.impl_ ) {} -void LegendreCacheCreator::create( const std::string& path ) const { - impl_->create( path ); +bool LegendreCacheCreator::supported() const { + return impl_->supported(); } std::string LegendreCacheCreator::uid() const { return impl_->uid(); } +void LegendreCacheCreator::create( const std::string& path ) const { + impl_->create( path ); +} + } // namespace trans } // namespace atlas diff --git a/src/atlas/trans/LegendreCacheCreator.h b/src/atlas/trans/LegendreCacheCreator.h index 97a21250a..e552c5c80 100644 --- a/src/atlas/trans/LegendreCacheCreator.h +++ b/src/atlas/trans/LegendreCacheCreator.h @@ -36,6 +36,8 @@ class LegendreCacheCreatorImpl : public eckit::Owned { public: virtual ~LegendreCacheCreatorImpl() = 0; + virtual bool supported() const = 0; + virtual std::string uid() const = 0; virtual void create( const std::string& path ) const = 0; @@ -60,7 +62,10 @@ class LegendreCacheCreator { const Implementation* get() const { return impl_.get(); } operator bool() const { return impl_.owners(); } + bool supported() const; + std::string uid() const; + void create( const std::string& path ) const; }; diff --git a/src/atlas/trans/ifs/LegendreCacheCreatorIFS.cc b/src/atlas/trans/ifs/LegendreCacheCreatorIFS.cc new file mode 100644 index 000000000..7d7bffa47 --- /dev/null +++ b/src/atlas/trans/ifs/LegendreCacheCreatorIFS.cc @@ -0,0 +1,111 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include "atlas/trans/ifs/LegendreCacheCreatorIFS.h" +#include +#include +#include "eckit/utils/MD5.h" +#include "atlas/grid.h" +#include "atlas/option.h" +#include "atlas/trans/Trans.h" + +namespace atlas { +namespace trans { + +namespace { +static LegendreCacheCreatorBuilder builder( "ifs" ); +} + +namespace { + +std::string truncate( const std::string& str ) { + const int trunc = std::min(10ul,str.size()); + return str.substr( 0, trunc ); +} + +std::string hash( const Grid& grid ) { + eckit::MD5 h; + if( grid::StructuredGrid( grid ) && not grid.projection() ) { + auto g = grid::StructuredGrid( grid ); + h.add( g.y().data(), g.y().size() * sizeof(double) ); + } else { + grid.hash( h ); + } + return truncate( h.digest() ); +} + +std::string hash( const eckit::Configuration& config ) { + eckit::MD5 h; + + // Add options and other unique keys + h << "flt" << config.getBool( "flt", false ); + + return truncate( h.digest() ); +} + +} + +std::string LegendreCacheCreatorIFS::uid() const { + if( unique_identifier_.empty() ) { + std::ostringstream stream; + stream << "ifs-T" << truncation_ << "-"; + if( grid::GaussianGrid( grid_ ) ) { + if( grid::RegularGaussianGrid( grid_ ) ) { + stream << "RegularGaussianN" << grid::GaussianGrid( grid_ ).N(); + } else { + stream << "ReducedGaussianN" << grid::GaussianGrid( grid_ ).N() << "-PL"; + stream << hash( grid_ ); + } + } else if( grid::RegularLonLatGrid( grid_ ) ) { + auto g = grid::RegularLonLatGrid( grid_ ); + if( g.standard() || g.shifted() ) { + stream << ( g.standard() ? "L" : "S" ) << g.nx() << "x" << g.ny(); + } else { + // We cannot make more assumptions on reusability for different grids + stream << "grid-" << hash( grid_ ); + } + } else { + // We cannot make more assumptions on reusability for different grids + stream << "grid-" << hash( grid_ ); + } + stream << "-OPT" << hash( config_ ); + unique_identifier_ = stream.str(); + } + return unique_identifier_; +} + +LegendreCacheCreatorIFS::~LegendreCacheCreatorIFS() {} + +bool LegendreCacheCreatorIFS::supported() const { + if( grid::GaussianGrid( grid_ ) ) { + return true; + } else if( grid::RegularLonLatGrid( grid_ ) ) { + auto g = grid::RegularLonLatGrid( grid_ ); + if( g.standard() || g.shifted() ) { + return true; + } + } + return false; +} + +LegendreCacheCreatorIFS::LegendreCacheCreatorIFS( const Grid& grid, int truncation, const eckit::Configuration& config ) : + grid_(grid), + truncation_(truncation), + config_(config) { +} + +void LegendreCacheCreatorIFS::create( const std::string& path ) const { + Trans( grid_, truncation_, config_ | option::type("ifs") | option::write_legendre( path ) ); +} + + + +} // namespace trans +} // namespace atlas diff --git a/src/atlas/trans/ifs/LegendreCacheCreatorIFS.h b/src/atlas/trans/ifs/LegendreCacheCreatorIFS.h new file mode 100644 index 000000000..bc76cf00d --- /dev/null +++ b/src/atlas/trans/ifs/LegendreCacheCreatorIFS.h @@ -0,0 +1,46 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#pragma once + +#include "atlas/trans/LegendreCacheCreator.h" +#include "atlas/grid/Grid.h" +#include "atlas/util/Config.h" + +//----------------------------------------------------------------------------- + +namespace atlas { +namespace trans { + +//----------------------------------------------------------------------------- + +class LegendreCacheCreatorIFS : public trans::LegendreCacheCreatorImpl { +public: + LegendreCacheCreatorIFS( const Grid&, int truncation, const eckit::Configuration& = util::NoConfig() ); + + virtual ~LegendreCacheCreatorIFS(); + + virtual bool supported() const override; + + virtual std::string uid() const override; + + virtual void create(const std::string &path) const override; + +private: + const Grid grid_; + const int truncation_; + const util::Config config_; + mutable std::string unique_identifier_; +}; + +// ------------------------------------------------------------------ + +} // namespace trans +} // namespace atlas diff --git a/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc b/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc index e67609e8d..14bf740a7 100644 --- a/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc +++ b/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc @@ -24,6 +24,12 @@ static LegendreCacheCreatorBuilder builder( "local" ) } namespace { + +std::string truncate( const std::string& str ) { + const int trunc = std::min(10ul,str.size()); + return str.substr( 0, trunc ); +} + std::string hash( const Grid& grid ) { eckit::MD5 h; if( grid::StructuredGrid( grid ) && not grid.projection() ) { @@ -32,8 +38,18 @@ std::string hash( const Grid& grid ) { } else { grid.hash( h ); } - return h.digest(); + return truncate( h.digest() ); } + +std::string hash( const eckit::Configuration& config ) { + eckit::MD5 h; + + // Add options and other unique keys + h << "flt" << config.getBool( "flt", false ); + + return truncate( h.digest() ); +} + } std::string LegendreCacheCreatorLocal::uid() const { @@ -42,7 +58,7 @@ std::string LegendreCacheCreatorLocal::uid() const { stream << "local-T" << truncation_ << "-"; if( grid::GaussianGrid( grid_ ) ) { // Same cache for any global Gaussian grid - stream << "F" << grid::GaussianGrid( grid_ ).N(); + stream << "GaussianN" << grid::GaussianGrid( grid_ ).N(); } else if( grid::RegularLonLatGrid( grid_ ) ) { // Same cache for any global regular grid auto g = grid::RegularLonLatGrid( grid_ ); @@ -50,8 +66,9 @@ std::string LegendreCacheCreatorLocal::uid() const { // The above '+' is a placeholder for any g.nx() } else { // We cannot make more assumptions on reusability for different grids - stream << hash( grid_ ); + stream << "grid-" << hash( grid_ ); } + stream << "-OPT" << hash( config_ ); unique_identifier_ = stream.str(); } return unique_identifier_; @@ -59,13 +76,18 @@ std::string LegendreCacheCreatorLocal::uid() const { LegendreCacheCreatorLocal::~LegendreCacheCreatorLocal() {} -LegendreCacheCreatorLocal::LegendreCacheCreatorLocal( const Grid& grid, int truncation, const eckit::Configuration& ) : +LegendreCacheCreatorLocal::LegendreCacheCreatorLocal( const Grid& grid, int truncation, const eckit::Configuration& config ) : grid_(grid), - truncation_(truncation) { + truncation_(truncation), + config_(config) { +} + +bool LegendreCacheCreatorLocal::supported() const { + return true; } void LegendreCacheCreatorLocal::create( const std::string& path ) const { - Trans( grid_, truncation_, option::type("local") | option::write_legendre( path ) ); + Trans( grid_, truncation_, config_ | option::type("local") | option::write_legendre( path ) ); } diff --git a/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.h b/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.h index d10df945a..8ef10295e 100644 --- a/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.h +++ b/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.h @@ -12,6 +12,7 @@ #include "atlas/trans/LegendreCacheCreator.h" #include "atlas/grid/Grid.h" +#include "atlas/util/Config.h" //----------------------------------------------------------------------------- @@ -26,13 +27,16 @@ class LegendreCacheCreatorLocal : public trans::LegendreCacheCreatorImpl { virtual ~LegendreCacheCreatorLocal(); + virtual bool supported() const override; + virtual std::string uid() const override; virtual void create(const std::string &path) const override; private: - Grid grid_; - int truncation_; + const Grid grid_; + const int truncation_; + const util::Config config_; mutable std::string unique_identifier_; }; diff --git a/src/tests/trans/test_trans_localcache.cc b/src/tests/trans/test_trans_localcache.cc index 153956766..8dd7aaafb 100644 --- a/src/tests/trans/test_trans_localcache.cc +++ b/src/tests/trans/test_trans_localcache.cc @@ -118,12 +118,13 @@ CASE( "test_global_grids" ) { std::make_pair(Slat(n),t), }; - auto F_cachefile = CacheFile("leg_"+F(n)+"-T"+std::to_string(t)+".bin"); - Trans( Grid(F(n)), t, option::type("local") | option::write_legendre( F_cachefile ) ); + LegendreCacheCreator F_cache_creator( Grid(F(n)), t, option::type("local") ); + EXPECT( F_cache_creator.supported() ); + auto F_cachefile = CacheFile("leg_"+F_cache_creator.uid()+".bin"); + F_cache_creator.create( F_cachefile ); Cache F_cache = LegendreCache( F_cachefile ); auto F_cache_hash = hash(F_cache); - Cache cache; for( auto _case : cases ) { auto gridname = _case.first; @@ -131,19 +132,24 @@ CASE( "test_global_grids" ) { Log::info() << "Case "+gridname+" T"+std::to_string(truncation) << std::endl; ATLAS_TRACE("Case "+gridname+" T"+std::to_string(truncation)); Grid grid(gridname); - auto cachefile = CacheFile("leg_"+gridname+"-T"+std::to_string(truncation)+".bin"); + + LegendreCacheCreator cache_creator( grid, truncation, option::type("local") ); + EXPECT( cache_creator.supported() ); + auto cachefile = CacheFile("leg_"+cache_creator.uid()+".bin"); + cache_creator.create( cachefile ); + if( GaussianGrid(grid) ) { + EXPECT( hash(cachefile) == F_cache_hash ); + } + ATLAS_TRACE_SCOPE("create without cache") Trans( grid, truncation, option::type("local") ); - ATLAS_TRACE_SCOPE("create without cache and write") - Trans( grid, truncation, option::type("local") | option::write_legendre( cachefile ) ); + + Cache cache; ATLAS_TRACE_SCOPE("read cache") cache = LegendreCache( cachefile ); ATLAS_TRACE_SCOPE("create with cache") Trans( cache, grid, truncation, option::type("local") ); - if( GaussianGrid(grid) ) { - ASSERT( hash(cache) == F_cache_hash ); - } } } } @@ -174,8 +180,11 @@ CASE( "test_global_grids_with_subdomain" ) { Grid global_grid( gridname ); - auto global_cachefile = CacheFile( "leg_"+gridname+"-T"+std::to_string(truncation)+".bin" ); - Trans( Grid(gridname), truncation, option::type("local") | option::write_legendre( global_cachefile ) ); + LegendreCacheCreator global_cache_creator( Grid(gridname), truncation, option::type("local") ); + EXPECT( global_cache_creator.supported() ); + auto global_cachefile = CacheFile( "leg_" + global_cache_creator.uid() + ".bin" ); + ATLAS_TRACE_SCOPE( "Creating cache " + std::string( global_cachefile ) ) + global_cache_creator.create( global_cachefile ); Cache global_cache; ATLAS_TRACE_SCOPE("read cache") @@ -184,13 +193,8 @@ CASE( "test_global_grids_with_subdomain" ) { for( auto domain : domains ) { Grid grid( gridname, domain ); - auto cachefile = CacheFile("leg_"+gridname+"-T"+std::to_string(truncation)+"-domain.bin"); - ATLAS_TRACE_SCOPE("create without cache and write") - Trans( Grid(gridname), truncation, option::type("local") | option::global_grid(global_grid) | option::write_legendre( cachefile ) ); - LegendreCache new_cache = LegendreCache(cachefile); - ASSERT( hash(new_cache) == global_hash ); ATLAS_TRACE_SCOPE("create with cache") - Trans( global_cache, Grid(gridname), truncation, option::type("local") ); + Trans( global_cache, grid, truncation, option::type("local") | option::global_grid( global_grid) ); } } } @@ -201,16 +205,25 @@ CASE( "test_regional_grids nested_in_global" ) { Cache cache; StructuredGrid grid_global( LinearSpacing( { 0., 360.}, 360, false ), - LinearSpacing( {-90., 90.}, 181, true ) + LinearSpacing( { 90., -90.}, 181, true ) ); - ASSERT( grid_global.domain().global() ); + EXPECT( grid_global.domain().global() ); + + LegendreCacheCreator global_cache_creator( grid_global, truncation, option::type("local") ); + EXPECT( global_cache_creator.supported() ); + auto global_cachefile = CacheFile( "leg_" + global_cache_creator.uid() + ".bin" ); + ATLAS_TRACE_SCOPE( "Creating cache "+std::string(cachefile) ) + global_cache_creator.create( global_cachefile ); + + + StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ) ); + + ATLAS_TRACE_SCOPE("create without cache") Trans( grid, truncation, option::type("local") | option::global_grid( grid_global ) ); - ATLAS_TRACE_SCOPE("create without cache and write") - Trans( grid, truncation, option::type("local") | option::global_grid( grid_global ) | option::write_legendre( cachefile ) ); ATLAS_TRACE_SCOPE("read cache") - cache = LegendreCache( cachefile ); + cache = LegendreCache( global_cachefile ); ATLAS_TRACE_SCOPE("create with cache") Trans( cache, grid, truncation, option::type("local") | option::global_grid( grid_global ) ); } @@ -221,10 +234,14 @@ CASE( "test_regional_grids not nested" ) { Cache cache; StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ) ); + + LegendreCacheCreator cache_creator( grid, truncation, option::type("local") ); + EXPECT( cache_creator.supported() ); + ATLAS_TRACE_SCOPE( "Creating cache "+std::string(cachefile) ) + cache_creator.create( cachefile ); + ATLAS_TRACE_SCOPE("create without cache") Trans( grid, truncation, option::type("local") ); - ATLAS_TRACE_SCOPE("create without cache and write") - Trans( grid, truncation, option::type("local") | option::write_legendre( cachefile ) ); ATLAS_TRACE_SCOPE("read cache") cache = LegendreCache( cachefile ); ATLAS_TRACE_SCOPE("create with cache") @@ -248,28 +265,18 @@ CASE( "test_regional_grids with projection" ) { } -CASE( "test_regional_grids nested_in_global NEW" ) { +CASE( "test cache creator" ) { auto truncation = 89; StructuredGrid grid_global( LinearSpacing( { 0., 360.}, 360, false ), - LinearSpacing( {-90., 90.}, 181, true ) + LinearSpacing( { 90., -90.}, 181, true ) ); LegendreCacheCreator legendre_cache_creator( grid_global, truncation, option::type("local") ); auto cachefile = CacheFile( legendre_cache_creator.uid() ); ATLAS_TRACE_SCOPE( "Creating cache "+std::string(cachefile) ) legendre_cache_creator.create( cachefile ); - - Cache cache; - ASSERT( grid_global.domain().global() ); - StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ) ); - ATLAS_TRACE_SCOPE("create without cache") - Trans( grid, truncation, option::type("local") | option::global_grid( grid_global ) ); - ATLAS_TRACE_SCOPE("read cache") - cache = LegendreCache( cachefile ); - ATLAS_TRACE_SCOPE("create with cache") - Trans( cache, grid, truncation, option::type("local") | option::global_grid( grid_global ) ); } } // namespace test From 75e1061c27bfe6d3a1fd4a9fa839d3222fbc9ef9 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Tue, 8 May 2018 16:09:13 +0100 Subject: [PATCH 064/123] Grid cropping made easier --- src/atlas/grid/Grid.cc | 9 +++++++++ src/atlas/grid/Grid.h | 2 ++ src/atlas/grid/detail/grid/Grid.cc | 10 ++++++++++ src/atlas/grid/detail/grid/Grid.h | 2 ++ src/atlas/grid/detail/grid/Structured.h | 1 + src/tests/grid/test_grids.cc | 11 +++++++++++ 6 files changed, 35 insertions(+) diff --git a/src/atlas/grid/Grid.cc b/src/atlas/grid/Grid.cc index 889d3d11c..1c6a03b22 100644 --- a/src/atlas/grid/Grid.cc +++ b/src/atlas/grid/Grid.cc @@ -36,6 +36,11 @@ Grid::Grid( const std::string& shortname, const Domain& domain ) { grid_ = Grid::Implementation::create( shortname, Config( "domain", domain.spec() ) ); } +Grid::Grid( const Grid& grid, const Grid::Domain& domain ) { + ASSERT( grid ); + grid_ = Grid::Implementation::create( *grid.get(), domain ); +} + Grid::Grid( const Config& p ) { grid_ = Grid::Implementation::create( p ); } @@ -89,6 +94,10 @@ StructuredGrid::StructuredGrid( const XSpace& xspace, const YSpace& yspace, cons Grid( new detail::grid::Structured( xspace, yspace, projection, domain ) ), grid_( structured_grid( get() ) ) {} +StructuredGrid::StructuredGrid( const Grid& grid , const Grid::Domain& domain ) : + Grid( grid, domain ), + grid_( structured_grid( get() ) ) {} + ReducedGaussianGrid::ReducedGaussianGrid( const std::vector& nx, const Domain& domain ) : ReducedGaussianGrid::grid_t( detail::grid::reduced_gaussian( nx, domain ) ) {} diff --git a/src/atlas/grid/Grid.h b/src/atlas/grid/Grid.h index 9ac565825..e8d497878 100644 --- a/src/atlas/grid/Grid.h +++ b/src/atlas/grid/Grid.h @@ -80,6 +80,7 @@ class Grid { Grid( const Grid& ); Grid( const Implementation* ); Grid( const std::string& name, const Domain& = Domain() ); + Grid( const Grid&, const Domain& ); Grid( const Config& ); operator bool() const { return grid_; } @@ -186,6 +187,7 @@ class StructuredGrid : public Grid { StructuredGrid( const std::string& name, const Domain& = Domain() ); StructuredGrid( const Config& ); StructuredGrid( const XSpace&, const YSpace&, const Projection& = Projection(), const Domain& = Domain() ); + StructuredGrid( const Grid&, const Domain& ); operator bool() const { return valid(); } diff --git a/src/atlas/grid/detail/grid/Grid.cc b/src/atlas/grid/detail/grid/Grid.cc index 1903335b9..68e937678 100644 --- a/src/atlas/grid/detail/grid/Grid.cc +++ b/src/atlas/grid/detail/grid/Grid.cc @@ -76,6 +76,16 @@ const Grid* Grid::create( const std::string& name, const Grid::Config& config ) // return GridBuilder::createNamed(name); } +const Grid* Grid::create( const Grid& grid, const Domain& domain) { + if( grid.type() == "structured" ) { + const Structured& g = dynamic_cast(grid); + return new Structured( g.name(), g.xspace(), g.yspace(), g.projection(), domain ); + } else { + NOTIMP; + } +} + + Grid::Grid() { checkSizeOfPoint(); } diff --git a/src/atlas/grid/detail/grid/Grid.h b/src/atlas/grid/detail/grid/Grid.h index b93ffba73..3b2011b28 100644 --- a/src/atlas/grid/detail/grid/Grid.h +++ b/src/atlas/grid/detail/grid/Grid.h @@ -66,6 +66,8 @@ class Grid : public eckit::Owned { static const Grid* create( const std::string& name, const Config& = Config() ); + static const Grid* create( const Grid&, const Domain& ); + /// ctor (default) Grid(); diff --git a/src/atlas/grid/detail/grid/Structured.h b/src/atlas/grid/detail/grid/Structured.h index 20ca91e12..13be697a4 100644 --- a/src/atlas/grid/detail/grid/Structured.h +++ b/src/atlas/grid/detail/grid/Structured.h @@ -289,6 +289,7 @@ class Structured : public Grid { public: Structured( const std::string&, XSpace, YSpace, Projection, Domain ); Structured( XSpace, YSpace, Projection, Domain ); + Structured( const Structured&, Domain ); virtual ~Structured(); diff --git a/src/tests/grid/test_grids.cc b/src/tests/grid/test_grids.cc index 37f63e750..64f87095d 100644 --- a/src/tests/grid/test_grids.cc +++ b/src/tests/grid/test_grids.cc @@ -137,6 +137,17 @@ CASE( "test_reducedgaussian" ) { EXPECT( N640.size() == custom.size() ); } +CASE( "test_cropping previous case" ) { + StructuredGrid grid( "N32" ); + EXPECT( grid.ny() == 64 ); + EXPECT( grid.size() == 6114 ); + + StructuredGrid cropped( grid, RectangularDomain( {-27, 45}, {33, 73} ) ); + EXPECT( cropped.ny() == 14 ); + EXPECT( cropped.size() == 267 ); +} + + //----------------------------------------------------------------------------- } // namespace test From 1d8024bf85ed21684040ca6e009b5ebaeb80f366 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Tue, 8 May 2018 15:29:55 +0100 Subject: [PATCH 065/123] New Trans API for passing Grid and Domain --- src/atlas/option/TransOptions.cc | 4 -- src/atlas/option/TransOptions.h | 7 --- src/atlas/trans/Trans.cc | 20 ++++++- src/atlas/trans/Trans.h | 35 +++++------- src/atlas/trans/ifs/TransIFS.cc | 12 ++++ src/atlas/trans/ifs/TransIFS.h | 6 +- src/atlas/trans/local_noopt/TransLocal.cc | 6 ++ src/atlas/trans/local_noopt/TransLocal.h | 6 +- src/atlas/trans/localopt/TransLocalopt.cc | 6 ++ src/atlas/trans/localopt/TransLocalopt.h | 6 +- src/atlas/trans/localopt2/TransLocalopt2.cc | 6 ++ src/atlas/trans/localopt2/TransLocalopt2.h | 7 ++- src/atlas/trans/localopt3/TransLocalopt3.cc | 61 ++++++++------------- src/atlas/trans/localopt3/TransLocalopt3.h | 7 ++- src/tests/trans/test_trans_localcache.cc | 8 +-- src/tests/trans/test_transgeneral.cc | 11 ++-- 16 files changed, 117 insertions(+), 91 deletions(-) diff --git a/src/atlas/option/TransOptions.cc b/src/atlas/option/TransOptions.cc index 82131506f..a80582a78 100644 --- a/src/atlas/option/TransOptions.cc +++ b/src/atlas/option/TransOptions.cc @@ -49,10 +49,6 @@ write_legendre::write_legendre( const eckit::PathName& filepath ) { set( "write_legendre", filepath ); } -global_grid::global_grid( const Grid& grid ) { - set( "global_grid", grid.spec() ); -} - read_legendre::read_legendre( const eckit::PathName& filepath ) { set( "read_legendre", filepath ); } diff --git a/src/atlas/option/TransOptions.h b/src/atlas/option/TransOptions.h index a43e836c8..d9c11593d 100644 --- a/src/atlas/option/TransOptions.h +++ b/src/atlas/option/TransOptions.h @@ -87,13 +87,6 @@ class write_legendre : public util::Config { // ---------------------------------------------------------------------------- -class global_grid : public util::Config { -public: - global_grid( const Grid& ); -}; - -// ---------------------------------------------------------------------------- - class read_legendre : public util::Config { public: read_legendre( const eckit::PathName& ); diff --git a/src/atlas/trans/Trans.cc b/src/atlas/trans/Trans.cc index af6b325eb..2bf1a0096 100644 --- a/src/atlas/trans/Trans.cc +++ b/src/atlas/trans/Trans.cc @@ -153,8 +153,17 @@ Trans::Implementation* TransFactory::build( const Grid& grid, int truncation, co return build( Cache(), grid, truncation, config ); } +Trans::Implementation* TransFactory::build( const Grid& grid, const Domain& domain, int truncation, const eckit::Configuration& config ) { + return build( Cache(), grid, domain, truncation, config ); +} + Trans::Implementation* TransFactory::build( const Cache& cache, const Grid& grid, int truncation, const eckit::Configuration& config ) { + return build( cache, grid, grid.domain(), truncation, config ); +} + +Trans::Implementation* TransFactory::build( const Cache& cache, const Grid& grid, const Domain& domain, int truncation, + const eckit::Configuration& config ) { pthread_once( &once, init ); eckit::AutoLock lock( local_mutex ); @@ -170,9 +179,12 @@ Trans::Implementation* TransFactory::build( const Cache& cache, const Grid& grid Log::debug() << "Looking for TransFactory [" << name << "]" << std::endl; } - return factory( name ).make( cache, grid, truncation, config ); + return factory( name ).make( cache, grid, domain, truncation, config ); } + + + Trans::Trans() {} Trans::Trans( Implementation* impl ) : impl_( impl ) {} @@ -183,6 +195,9 @@ Trans::Trans( const FunctionSpace& gp, const FunctionSpace& sp, const eckit::Con Trans::Trans( const Grid& grid, int truncation, const eckit::Configuration& config ) : impl_( TransFactory::build( grid, truncation, config ) ) {} +Trans::Trans( const Grid& grid, const Domain& domain, int truncation, const eckit::Configuration& config ) : + impl_( TransFactory::build( grid, domain, truncation, config ) ) {} + Trans::Trans( const Cache& cache, const FunctionSpace& gp, const FunctionSpace& sp, const eckit::Configuration& config ) : impl_( TransFactory::build( cache, gp, sp, config ) ) {} @@ -190,6 +205,9 @@ Trans::Trans( const Cache& cache, const FunctionSpace& gp, const FunctionSpace& Trans::Trans( const Cache& cache, const Grid& grid, int truncation, const eckit::Configuration& config ) : impl_( TransFactory::build( cache, grid, truncation, config ) ) {} +Trans::Trans( const Cache& cache, const Grid& grid, const Domain& domain, int truncation, const eckit::Configuration& config ) : + impl_( TransFactory::build( cache, grid, domain, truncation, config ) ) {} + Trans::Trans( const Trans& trans ) : impl_( trans.impl_ ) {} int Trans::truncation() const { diff --git a/src/atlas/trans/Trans.h b/src/atlas/trans/Trans.h index a78cc1e49..d3f690f8e 100644 --- a/src/atlas/trans/Trans.h +++ b/src/atlas/trans/Trans.h @@ -30,6 +30,7 @@ class Field; class FieldSet; class FunctionSpace; class Grid; +class Domain; } // namespace atlas //----------------------------------------------------------------------------- @@ -223,10 +224,15 @@ class TransFactory { const eckit::Configuration& = util::Config() ); static TransImpl* build( const Grid&, int truncation, const eckit::Configuration& = util::Config() ); + static TransImpl* build( const Grid&, const Domain&, int truncation, const eckit::Configuration& = util::Config() ); + static TransImpl* build( const Cache&, const FunctionSpace& gp, const FunctionSpace& sp, const eckit::Configuration& = util::Config() ); + static TransImpl* build( const Cache&, const Grid&, int truncation, const eckit::Configuration& = util::Config() ); + static TransImpl* build( const Cache&, const Grid&, const Domain&, int truncation, const eckit::Configuration& = util::Config() ); + /*! * \brief list all registered trans implementations */ @@ -236,15 +242,10 @@ class TransFactory { private: std::string name_; - virtual TransImpl* make( const FunctionSpace& gp, const FunctionSpace& sp, const eckit::Configuration& ) { + virtual TransImpl* make( const Cache&, const FunctionSpace& gp, const FunctionSpace& sp, const eckit::Configuration& ) { return nullptr; } - virtual TransImpl* make( const Grid& gp, int truncation, const eckit::Configuration& ) { return nullptr; } - virtual TransImpl* make( const Cache&, const FunctionSpace& gp, const FunctionSpace& sp, - const eckit::Configuration& ) { - return nullptr; - } - virtual TransImpl* make( const Cache&, const Grid& gp, int truncation, const eckit::Configuration& ) { + virtual TransImpl* make( const Cache&, const Grid& gp, const Domain&, int truncation, const eckit::Configuration& ) { return nullptr; } @@ -257,17 +258,11 @@ class TransFactory { template class TransBuilderFunctionSpace : public TransFactory { - virtual TransImpl* make( const FunctionSpace& gp, const FunctionSpace& sp, const eckit::Configuration& config ) { - return new T( gp, sp, config ); - } virtual TransImpl* make( const Cache& cache, const FunctionSpace& gp, const FunctionSpace& sp, const eckit::Configuration& config ) { return new T( cache, gp, sp, config ); } - virtual TransImpl* make( const Grid&, int, const eckit::Configuration& ) { - throw eckit::SeriousBug( "This function should not be called", Here() ); - } - virtual TransImpl* make( const Cache&, const Grid&, int, const eckit::Configuration& ) { + virtual TransImpl* make( const Cache&, const Grid&, const Domain&, int, const eckit::Configuration& ) { throw eckit::SeriousBug( "This function should not be called", Here() ); } @@ -277,15 +272,9 @@ class TransBuilderFunctionSpace : public TransFactory { template class TransBuilderGrid : public TransFactory { - virtual TransImpl* make( const Grid& grid, int truncation, const eckit::Configuration& config ) { - return new T( grid, truncation, config ); - } - virtual TransImpl* make( const Cache& cache, const Grid& grid, int truncation, + virtual TransImpl* make( const Cache& cache, const Grid& grid, const Domain& domain, int truncation, const eckit::Configuration& config ) { - return new T( cache, grid, truncation, config ); - } - virtual TransImpl* make( const FunctionSpace&, const FunctionSpace&, const eckit::Configuration& ) { - throw eckit::SeriousBug( "This function should not be called", Here() ); + return new T( cache, grid, domain, truncation, config ); } virtual TransImpl* make( const Cache&, const FunctionSpace&, const FunctionSpace&, const eckit::Configuration& ) { throw eckit::SeriousBug( "This function should not be called", Here() ); @@ -311,10 +300,12 @@ class Trans { Trans( const FunctionSpace& gp, const FunctionSpace& sp, const eckit::Configuration& = util::NoConfig() ); Trans( const Grid&, int truncation, const eckit::Configuration& = util::NoConfig() ); + Trans( const Grid&, const Domain&, int truncation, const eckit::Configuration& = util::NoConfig() ); Trans( const Cache&, const FunctionSpace& gp, const FunctionSpace& sp, const eckit::Configuration& = util::NoConfig() ); Trans( const Cache&, const Grid&, int truncation, const eckit::Configuration& = util::NoConfig() ); + Trans( const Cache&, const Grid&, const Domain&, int truncation, const eckit::Configuration& = util::NoConfig() ); void hash( eckit::Hash& ) const; const Implementation* get() const { return impl_.get(); } diff --git a/src/atlas/trans/ifs/TransIFS.cc b/src/atlas/trans/ifs/TransIFS.cc index 0d80fcdf6..7532f237d 100644 --- a/src/atlas/trans/ifs/TransIFS.cc +++ b/src/atlas/trans/ifs/TransIFS.cc @@ -637,6 +637,18 @@ TransIFS::TransIFS( const Grid& grid, const long truncation, const eckit::Config TransIFS::TransIFS( const Grid& grid, const eckit::Configuration& config ) : TransIFS( grid, /*grid-only*/ -1, config ) {} + + +TransIFS::TransIFS( const Grid& grid, const Domain& domain, const long truncation, const eckit::Configuration& config ) : + TransIFS( Cache(), grid, truncation, config ) { + ASSERT( domain.global() ); +} + +TransIFS::TransIFS( const Cache& cache, const Grid& grid, const Domain& domain, const long truncation, const eckit::Configuration& config ) : + TransIFS( cache, grid, truncation, config ) { + ASSERT( domain.global() ); +} + TransIFS::~TransIFS() {} void TransIFS::ctor( const Grid& grid, long truncation, const eckit::Configuration& config ) { diff --git a/src/atlas/trans/ifs/TransIFS.h b/src/atlas/trans/ifs/TransIFS.h index 8d4835180..ee6bf8909 100644 --- a/src/atlas/trans/ifs/TransIFS.h +++ b/src/atlas/trans/ifs/TransIFS.h @@ -75,8 +75,10 @@ class TransIFS : public trans::TransImpl { typedef struct ::Trans_t Trans_t; public: - TransIFS( const Grid& g, const long truncation, const eckit::Configuration& = util::NoConfig() ); - TransIFS( const Cache&, const Grid& g, const long truncation, const eckit::Configuration& = util::NoConfig() ); + TransIFS( const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() ); + TransIFS( const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() ); + TransIFS( const Cache&, const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() ); + TransIFS( const Cache&, const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() ); virtual ~TransIFS(); operator ::Trans_t*() const { return trans(); } diff --git a/src/atlas/trans/local_noopt/TransLocal.cc b/src/atlas/trans/local_noopt/TransLocal.cc index 33947d15f..77d3af78f 100644 --- a/src/atlas/trans/local_noopt/TransLocal.cc +++ b/src/atlas/trans/local_noopt/TransLocal.cc @@ -87,6 +87,12 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const long truncat TransLocal::TransLocal( const Grid& grid, const long truncation, const eckit::Configuration& config ) : TransLocal( Cache(), grid, truncation, config ) {} +TransLocal::TransLocal( const Grid& grid, const Domain&, const long truncation, const eckit::Configuration& config ) : + TransLocal( Cache(), grid, truncation, config ) {} + +TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain&, const long truncation, const eckit::Configuration& config ) : + TransLocal( cache, grid, truncation, config ) {} + // -------------------------------------------------------------------------------------------------------------------- TransLocal::~TransLocal() {} diff --git a/src/atlas/trans/local_noopt/TransLocal.h b/src/atlas/trans/local_noopt/TransLocal.h index 29f8984ff..9ad2cb5db 100644 --- a/src/atlas/trans/local_noopt/TransLocal.h +++ b/src/atlas/trans/local_noopt/TransLocal.h @@ -44,8 +44,10 @@ namespace trans { /// the grid is global. There are no plans to support this at the moment. class TransLocal : public trans::TransImpl { public: - TransLocal( const Grid& g, const long truncation, const eckit::Configuration& = util::NoConfig() ); - TransLocal( const Cache&, const Grid& g, const long truncation, const eckit::Configuration& = util::NoConfig() ); + TransLocal( const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() ); + TransLocal( const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() ); + TransLocal( const Cache&, const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() ); + TransLocal( const Cache&, const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() ); virtual ~TransLocal(); diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc index 29ac535c1..9b139fc4f 100644 --- a/src/atlas/trans/localopt/TransLocalopt.cc +++ b/src/atlas/trans/localopt/TransLocalopt.cc @@ -268,6 +268,12 @@ TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long t TransLocalopt::TransLocalopt( const Grid& grid, const long truncation, const eckit::Configuration& config ) : TransLocalopt( Cache(), grid, truncation, config ) {} +TransLocalopt::TransLocalopt( const Grid& grid, const Domain&, const long truncation, const eckit::Configuration& config ) : + TransLocalopt( Cache(), grid, truncation, config ) {} + +TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const Domain&, const long truncation, const eckit::Configuration& config ) : + TransLocalopt( cache, grid, truncation, config ) {} + // -------------------------------------------------------------------------------------------------------------------- TransLocalopt::~TransLocalopt() { diff --git a/src/atlas/trans/localopt/TransLocalopt.h b/src/atlas/trans/localopt/TransLocalopt.h index d81445f5b..19bbab862 100644 --- a/src/atlas/trans/localopt/TransLocalopt.h +++ b/src/atlas/trans/localopt/TransLocalopt.h @@ -48,8 +48,10 @@ namespace trans { /// the grid is global. There are no plans to support this at the moment. class TransLocalopt : public trans::TransImpl { public: - TransLocalopt( const Grid& g, const long truncation, const eckit::Configuration& = util::NoConfig() ); - TransLocalopt( const Cache&, const Grid& g, const long truncation, const eckit::Configuration& = util::NoConfig() ); + TransLocalopt( const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() ); + TransLocalopt( const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() ); + TransLocalopt( const Cache&, const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() ); + TransLocalopt( const Cache&, const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() ); virtual ~TransLocalopt(); diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc index 7a46c7245..073ba312e 100644 --- a/src/atlas/trans/localopt2/TransLocalopt2.cc +++ b/src/atlas/trans/localopt2/TransLocalopt2.cc @@ -251,6 +251,12 @@ TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long TransLocalopt2::TransLocalopt2( const Grid& grid, const long truncation, const eckit::Configuration& config ) : TransLocalopt2( Cache(), grid, truncation, config ) {} +TransLocalopt2::TransLocalopt2( const Grid& grid, const Domain&, const long truncation, const eckit::Configuration& config ) : + TransLocalopt2( Cache(), grid, truncation, config ) {} + +TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const Domain&, const long truncation, const eckit::Configuration& config ) : + TransLocalopt2( cache, grid, truncation, config ) {} + // -------------------------------------------------------------------------------------------------------------------- TransLocalopt2::~TransLocalopt2() { diff --git a/src/atlas/trans/localopt2/TransLocalopt2.h b/src/atlas/trans/localopt2/TransLocalopt2.h index 2bc500ea6..1a2e20801 100644 --- a/src/atlas/trans/localopt2/TransLocalopt2.h +++ b/src/atlas/trans/localopt2/TransLocalopt2.h @@ -48,8 +48,11 @@ namespace trans { /// the grid is global. There are no plans to support this at the moment. class TransLocalopt2 : public trans::TransImpl { public: - TransLocalopt2( const Grid& g, const long truncation, const eckit::Configuration& = util::NoConfig() ); - TransLocalopt2( const Cache&, const Grid& g, const long truncation, + TransLocalopt2( const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() ); + TransLocalopt2( const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() ); + TransLocalopt2( const Cache&, const Grid&, const long truncation, + const eckit::Configuration& = util::NoConfig() ); + TransLocalopt2( const Cache&, const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() ); virtual ~TransLocalopt2(); diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc index 50f3ee7a6..08ecbcb70 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.cc +++ b/src/atlas/trans/localopt3/TransLocalopt3.cc @@ -55,13 +55,6 @@ class TransParameters { std::string write_fft() const { return config_.getString( "write_fft", "" ); } - Grid global_grid() const { - Grid g; - util::Config spec; - if ( config_.get( "global_grid", spec ) ) { g = Grid( spec ); } - return g; - } - bool global() const { return config_.getBool( "global", false ); } int fft() const { @@ -234,9 +227,9 @@ const eckit::linalg::LinearAlgebra& linear_algebra_backend() { return eckit::linalg::LinearAlgebra::backend(); } -TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long truncation, +TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const Domain& domain, const long truncation, const eckit::Configuration& config ) : - grid_( grid ), + grid_( grid, domain ), truncation_( truncation ), precompute_( config.getBool( "precompute", true ) ), cache_( cache ), @@ -286,33 +279,21 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long nlatsLegDomain_ = nlatsSH_; } - - gridGlobal_ = TransParameters( config ).global_grid(); - if ( not gridGlobal_ ) { - if ( grid_.domain().global() ) { gridGlobal_ = grid_; } - else { - /*if ( Grid( grid_.name() ).domain().global() ) { - Log::warning() << Here() << " Deprecated. We should pass a global grid as optional argument" - << std::endl; - gridGlobal_ = Grid( grid_.name() ); - } - else {*/ - if ( grid::RegularGrid( grid_ ) ) { - // non-nested regular grid - no_nest = true; - no_symmetry_ = true; - useFFT_ = false; - nlatsNH_ = nlats; - nlatsSH_ = 0; - nlatsLegDomain_ = nlatsNH_; - gridGlobal_ = grid_; - useGlobalLeg = false; - } - else { - NOTIMP; - // non-nested reduced grids are not supported - } - //} + gridGlobal_ = grid; + if( not gridGlobal_.domain().global() ) { + if ( grid::RegularGrid( grid_ ) ) { + // non-nested regular grid + no_nest = true; + no_symmetry_ = true; + useFFT_ = false; + nlatsNH_ = nlats; + nlatsSH_ = 0; + nlatsLegDomain_ = nlatsNH_; + gridGlobal_ = grid_; + useGlobalLeg = false; + } else { + NOTIMP; + // non-nested reduced grids are not supported } } @@ -594,7 +575,13 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long // -------------------------------------------------------------------------------------------------------------------- TransLocalopt3::TransLocalopt3( const Grid& grid, const long truncation, const eckit::Configuration& config ) : - TransLocalopt3( Cache(), grid, truncation, config ) {} + TransLocalopt3( Cache(), grid, grid.domain(), truncation, config ) {} + +TransLocalopt3::TransLocalopt3( const Grid& grid, const Domain& domain, const long truncation, const eckit::Configuration& config ) : + TransLocalopt3( Cache(), grid, domain, truncation, config ) {} + +TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long truncation, const eckit::Configuration& config ) : + TransLocalopt3( cache, grid, grid.domain(), truncation, config ) {} // -------------------------------------------------------------------------------------------------------------------- diff --git a/src/atlas/trans/localopt3/TransLocalopt3.h b/src/atlas/trans/localopt3/TransLocalopt3.h index 22d77b48b..14d8d4753 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.h +++ b/src/atlas/trans/localopt3/TransLocalopt3.h @@ -56,8 +56,11 @@ namespace trans { /// the grid is global. There are no plans to support this at the moment. class TransLocalopt3 : public trans::TransImpl { public: - TransLocalopt3( const Grid& g, const long truncation, const eckit::Configuration& = util::NoConfig() ); - TransLocalopt3( const Cache&, const Grid& g, const long truncation, + TransLocalopt3( const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() ); + TransLocalopt3( const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() ); + TransLocalopt3( const Cache&, const Grid&, const long truncation, + const eckit::Configuration& = util::NoConfig() ); + TransLocalopt3( const Cache&, const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() ); virtual ~TransLocalopt3(); diff --git a/src/tests/trans/test_trans_localcache.cc b/src/tests/trans/test_trans_localcache.cc index 8dd7aaafb..1b31af0eb 100644 --- a/src/tests/trans/test_trans_localcache.cc +++ b/src/tests/trans/test_trans_localcache.cc @@ -194,7 +194,7 @@ CASE( "test_global_grids_with_subdomain" ) { for( auto domain : domains ) { Grid grid( gridname, domain ); ATLAS_TRACE_SCOPE("create with cache") - Trans( global_cache, grid, truncation, option::type("local") | option::global_grid( global_grid) ); + Trans( global_cache, global_grid, domain, truncation, option::type("local") ); } } } @@ -217,15 +217,15 @@ CASE( "test_regional_grids nested_in_global" ) { - StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ) ); + StructuredGrid regional( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ) ); ATLAS_TRACE_SCOPE("create without cache") - Trans( grid, truncation, option::type("local") | option::global_grid( grid_global ) ); + Trans( grid_global, regional.domain(), truncation, option::type("local") ); ATLAS_TRACE_SCOPE("read cache") cache = LegendreCache( global_cachefile ); ATLAS_TRACE_SCOPE("create with cache") - Trans( cache, grid, truncation, option::type("local") | option::global_grid( grid_global ) ); + Trans( cache, grid_global, regional.domain(), truncation, option::type("local") ); } CASE( "test_regional_grids not nested" ) { diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index ff8185943..9c2fb2801 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -972,8 +972,8 @@ CASE( "test_trans_domain" ) { Domain testdomain2 = RectangularDomain( {-1., 1.}, {-5., 40.} ); // Grid: (Adjust the following line if the test takes too long!) - std::string gridString = "O640"; - Grid g1( gridString, testdomain1 ); + Grid global_grid( "O640" ); + Grid g1( global_grid, testdomain1 ); //Grid g2( gridString, testdomain2 ); bool fourierTrc1 = true; @@ -986,16 +986,15 @@ CASE( "test_trans_domain" ) { //Log::info() << "rgp1:" << std::endl; if ( eckit::PathName( "legcache.bin" ).exists() ) eckit::PathName( "legcache.bin" ).unlink(); Trace t1( Here(), "translocal1 construction" ); - trans::Trans transLocal1( g1, trc, - option::type( "local" ) | option::write_legendre( "legcache.bin" ) | - option::global_grid( Grid( gridString ) ) ); + trans::Trans transLocal1( global_grid, g1.domain(), trc, + option::type( "local" ) | option::write_legendre( "legcache.bin" ) ); t1.stop(); //Log::info() << "rgp2:" << std::endl; trans::Cache cache; ATLAS_TRACE_SCOPE( "Read cache" ) cache = trans::LegendreCache( "legcache.bin" ); Trace t2( Here(), "translocal2 construction" ); //trans::Trans transLocal2( cache, g2, trc, - // option::type( "local" ) | option::global_grid( Grid( gridString ) ) | option::no_fft() ); + // option::type( "local" ) | option::no_fft() ); trans::Trans transLocal2( g2, trc, option::type( "local" ) ); t2.stop(); From f0c1b7def5c8d0ad7d8d669d6685e563736a7bd5 Mon Sep 17 00:00:00 2001 From: Pedro Maciel Date: Tue, 8 May 2018 19:11:00 +0100 Subject: [PATCH 066/123] MIR-178, MIR-191, MIR-192, MIR-193, MIR-270: support caching for "local" transforms --- src/atlas/grid/detail/grid/Structured.cc | 2 ++ src/atlas/trans/LegendreCacheCreator.cc | 8 ++++++++ src/atlas/trans/LegendreCacheCreator.h | 9 +++++++++ src/atlas/trans/Trans.h | 2 ++ src/atlas/trans/ifs/LegendreCacheCreatorIFS.cc | 7 +++++++ src/atlas/trans/ifs/LegendreCacheCreatorIFS.h | 4 ++++ src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc | 7 +++++++ src/atlas/trans/localopt3/LegendreCacheCreatorLocal.h | 4 ++++ 8 files changed, 43 insertions(+) diff --git a/src/atlas/grid/detail/grid/Structured.cc b/src/atlas/grid/detail/grid/Structured.cc index 76fbc0a41..a2c6f0dcb 100644 --- a/src/atlas/grid/detail/grid/Structured.cc +++ b/src/atlas/grid/detail/grid/Structured.cc @@ -363,6 +363,8 @@ void Structured::crop( const Domain& dom ) { jmax = std::max( j, jmax ); } } + ASSERT(jmax >= jmin); + size_t cropped_ny = jmax - jmin + 1; std::vector cropped_y( y_.begin() + jmin, y_.begin() + jmin + cropped_ny ); std::vector cropped_dx( dx_.begin() + jmin, dx_.begin() + jmin + cropped_ny ); diff --git a/src/atlas/trans/LegendreCacheCreator.cc b/src/atlas/trans/LegendreCacheCreator.cc index 66e842323..6d65c76c1 100644 --- a/src/atlas/trans/LegendreCacheCreator.cc +++ b/src/atlas/trans/LegendreCacheCreator.cc @@ -149,5 +149,13 @@ void LegendreCacheCreator::create( const std::string& path ) const { impl_->create( path ); } +Cache LegendreCacheCreator::create() const { + return impl_->create(); +} + +size_t LegendreCacheCreator::estimate() const { + return impl_->estimate(); +} + } // namespace trans } // namespace atlas diff --git a/src/atlas/trans/LegendreCacheCreator.h b/src/atlas/trans/LegendreCacheCreator.h index e552c5c80..db0f71aa8 100644 --- a/src/atlas/trans/LegendreCacheCreator.h +++ b/src/atlas/trans/LegendreCacheCreator.h @@ -17,6 +17,7 @@ #include "eckit/memory/SharedPtr.h" #include "atlas/util/Config.h" +#include "atlas/trans/Trans.h" //----------------------------------------------------------------------------- // Forward declarations @@ -41,6 +42,10 @@ class LegendreCacheCreatorImpl : public eckit::Owned { virtual std::string uid() const = 0; virtual void create( const std::string& path ) const = 0; + + virtual Cache create() const = 0; + + virtual size_t estimate() const = 0; }; // ------------------------------------------------------------------ @@ -67,6 +72,10 @@ class LegendreCacheCreator { std::string uid() const; void create( const std::string& path ) const; + + Cache create() const; + + size_t estimate() const; }; //---------------------------------------------------------------------------------------------------------------------- diff --git a/src/atlas/trans/Trans.h b/src/atlas/trans/Trans.h index d3f690f8e..20cb8ae65 100644 --- a/src/atlas/trans/Trans.h +++ b/src/atlas/trans/Trans.h @@ -96,6 +96,8 @@ class Cache { fft_( fft ) {} public: + operator bool() const { return bool(legendre()) ; } + const TransCacheEntry& legendre() const { return *legendre_; } const TransCacheEntry& fft() const { return *fft_; } diff --git a/src/atlas/trans/ifs/LegendreCacheCreatorIFS.cc b/src/atlas/trans/ifs/LegendreCacheCreatorIFS.cc index 7d7bffa47..2c9276270 100644 --- a/src/atlas/trans/ifs/LegendreCacheCreatorIFS.cc +++ b/src/atlas/trans/ifs/LegendreCacheCreatorIFS.cc @@ -105,6 +105,13 @@ void LegendreCacheCreatorIFS::create( const std::string& path ) const { Trans( grid_, truncation_, config_ | option::type("ifs") | option::write_legendre( path ) ); } +Cache LegendreCacheCreatorIFS::create() const { + NOTIMP; +} + +size_t LegendreCacheCreatorIFS::estimate() const { + return size_t(truncation_ * truncation_ * truncation_) / 2 * sizeof(double); +} } // namespace trans diff --git a/src/atlas/trans/ifs/LegendreCacheCreatorIFS.h b/src/atlas/trans/ifs/LegendreCacheCreatorIFS.h index bc76cf00d..e6629ebea 100644 --- a/src/atlas/trans/ifs/LegendreCacheCreatorIFS.h +++ b/src/atlas/trans/ifs/LegendreCacheCreatorIFS.h @@ -33,6 +33,10 @@ class LegendreCacheCreatorIFS : public trans::LegendreCacheCreatorImpl { virtual void create(const std::string &path) const override; + virtual Cache create() const override; + + virtual size_t estimate() const override; + private: const Grid grid_; const int truncation_; diff --git a/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc b/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc index 14bf740a7..c87bb4641 100644 --- a/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc +++ b/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc @@ -90,6 +90,13 @@ void LegendreCacheCreatorLocal::create( const std::string& path ) const { Trans( grid_, truncation_, config_ | option::type("local") | option::write_legendre( path ) ); } +Cache LegendreCacheCreatorLocal::create() const { + NOTIMP; +} + +size_t LegendreCacheCreatorLocal::estimate() const { + return size_t(truncation_ * truncation_ * truncation_) / 2 * sizeof(double); +} } // namespace trans diff --git a/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.h b/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.h index 8ef10295e..b21237e5b 100644 --- a/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.h +++ b/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.h @@ -33,6 +33,10 @@ class LegendreCacheCreatorLocal : public trans::LegendreCacheCreatorImpl { virtual void create(const std::string &path) const override; + virtual Cache create() const override; + + size_t estimate() const override; + private: const Grid grid_; const int truncation_; From 38e02bf162c520204743aeec98774a1bf9ee8f0b Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Wed, 9 May 2018 12:20:26 +0100 Subject: [PATCH 067/123] MIR-178, MIR-191, MIR-192, MIR-193, MIR-270: support in memory caching --- src/atlas/CMakeLists.txt | 2 + src/atlas/trans/Cache.cc | 132 +++++++++++++++++ src/atlas/trans/Cache.h | 135 ++++++++++++++++++ src/atlas/trans/Trans.cc | 10 ++ src/atlas/trans/Trans.h | 127 +++------------- .../trans/ifs/LegendreCacheCreatorIFS.cc | 3 +- .../localopt3/LegendreCacheCreatorLocal.cc | 8 +- src/atlas/trans/localopt3/TransLocalopt3.cc | 17 ++- src/atlas/trans/localopt3/TransLocalopt3.h | 5 + src/tests/trans/test_trans_localcache.cc | 25 +++- 10 files changed, 347 insertions(+), 117 deletions(-) create mode 100644 src/atlas/trans/Cache.cc create mode 100644 src/atlas/trans/Cache.h diff --git a/src/atlas/CMakeLists.txt b/src/atlas/CMakeLists.txt index 119b81c24..2e6384ef0 100644 --- a/src/atlas/CMakeLists.txt +++ b/src/atlas/CMakeLists.txt @@ -317,6 +317,8 @@ numerics/fvm/Method.cc numerics/fvm/Nabla.h numerics/fvm/Nabla.cc +trans/Cache.h +trans/Cache.cc trans/Trans.h trans/Trans.cc trans/VorDivToUV.h diff --git a/src/atlas/trans/Cache.cc b/src/atlas/trans/Cache.cc new file mode 100644 index 000000000..14e3726d1 --- /dev/null +++ b/src/atlas/trans/Cache.cc @@ -0,0 +1,132 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include +#include "atlas/trans/Cache.h" + +#include "eckit/io/DataHandle.h" +#include "eckit/exception/Exceptions.h" +#include "eckit/thread/AutoLock.h" +#include "eckit/thread/Mutex.h" + +#include "atlas/runtime/Trace.h" +#include "atlas/runtime/Log.h" +#include "atlas/trans/Trans.h" + +namespace { +static eckit::Mutex* local_mutex = 0; +static pthread_once_t once = PTHREAD_ONCE_INIT; +static void init() { + local_mutex = new eckit::Mutex(); +} +} + +namespace atlas { +namespace trans { + +TransCacheFileEntry::TransCacheFileEntry(const eckit::PathName& path) : buffer_( path.size() ) { + ATLAS_TRACE(); + Log::debug() << "Loading cache from file " << path << std::endl; + std::unique_ptr dh( path.fileHandle() ); + dh->openForRead(); + dh->read( buffer_.data(), buffer_.size() ); + dh->close(); +} + +TransCacheMemoryEntry::TransCacheMemoryEntry(const void* data, size_t size) : data_(data), size_(size) { + ASSERT(data_); + ASSERT(size_); +} + +LegendreFFTCache::LegendreFFTCache( const void* legendre_address, size_t legendre_size, const void* fft_address, size_t fft_size ) : + Cache( std::make_shared( legendre_address, legendre_size ), + std::make_shared( fft_address, fft_size ) ) { +} + +LegendreFFTCache::LegendreFFTCache( const eckit::PathName& legendre_path, const eckit::PathName& fft_path ) : + Cache( std::shared_ptr( new TransCacheFileEntry( legendre_path ) ), + std::shared_ptr( new TransCacheFileEntry( fft_path ) ) ) { +} + +LegendreCache::LegendreCache( const eckit::PathName& path ) : + Cache( std::shared_ptr( new TransCacheFileEntry( path ) ) ) { +} + +LegendreCache::LegendreCache( size_t size) : + Cache( std::make_shared( size ) ) { +} + +LegendreCache::LegendreCache( const void* address, size_t size ) : + Cache( std::make_shared( address, size ) ) { +} + +Cache::Cache(const std::shared_ptr& legendre) : + legendre_( legendre ), + fft_( new EmptyCacheEntry() ) {} + +Cache::Cache(const std::shared_ptr& legendre, const std::shared_ptr& fft) : + legendre_( legendre ), + fft_( fft ) {} + +Cache::Cache( const TransImpl* trans ) : + trans_( trans ), + legendre_( new EmptyCacheEntry() ), + fft_( new EmptyCacheEntry() ) { + if( trans_ ) + trans_->attach(); +} + +Cache::Cache() : + legendre_( new EmptyCacheEntry() ), + fft_( new EmptyCacheEntry() ) {} + +Cache::Cache(const Cache& other) : + trans_( other.trans_ ), + legendre_( other.legendre_ ), + fft_( other.fft_ ) { + if( trans_ ) + trans_->attach(); +} + +Cache::operator bool() const { + return trans_ || bool(legendre()) ; +} + +Cache::~Cache() { + pthread_once( &once, init ); + eckit::AutoLock lock( local_mutex ); + if( trans_ ) { + trans_->detach(); + if( trans_->owners() == 0 ) { + delete trans_; + } + trans_ = nullptr; + } +} + +TransCache::TransCache( const Trans& trans ) : + Cache( trans.get() ) { +} + +TransCacheOwnedMemoryEntry::TransCacheOwnedMemoryEntry(size_t size) : + size_(size) { + if( size_ ) { + data_ = std::malloc( size_ ); + } +} + +TransCacheOwnedMemoryEntry::~TransCacheOwnedMemoryEntry() { + if( size_ ) { + std::free( data_ ); + } +} + +} // namespace trans +} // namespace atlas diff --git a/src/atlas/trans/Cache.h b/src/atlas/trans/Cache.h new file mode 100644 index 000000000..e362d6c34 --- /dev/null +++ b/src/atlas/trans/Cache.h @@ -0,0 +1,135 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#pragma once + +#include + +#include "eckit/filesystem/PathName.h" +#include "eckit/io/Buffer.h" + +//----------------------------------------------------------------------------- +// Forward declarations + +namespace atlas { +class Field; +class FieldSet; +class FunctionSpace; +class Grid; +class Domain; +namespace trans { +class TransImpl; +class Trans; +} // namespace trans +} // namespace atlas + +//----------------------------------------------------------------------------- + +namespace atlas { +namespace trans { + +//----------------------------------------------------------------------------- + +class TransCacheEntry { +public: + operator bool() const { return size() != 0; } + virtual size_t size() const = 0; + virtual const void* data() const = 0; +}; + +//----------------------------------------------------------------------------- + +class EmptyCacheEntry final : public TransCacheEntry { +public: + virtual size_t size() const override { return 0; } + virtual const void* data() const override { return nullptr; } +}; + +//----------------------------------------------------------------------------- + +class TransCacheFileEntry final : public TransCacheEntry { +private: + eckit::Buffer buffer_; +public: + TransCacheFileEntry( const eckit::PathName& path ); + virtual size_t size() const override { return buffer_.size(); } + virtual const void* data() const override { return buffer_.data(); } +}; + +//----------------------------------------------------------------------------- + +class TransCacheMemoryEntry final : public TransCacheEntry { +public: + TransCacheMemoryEntry(const void* data, size_t size); + virtual const void* data() const override { return data_; } + virtual size_t size() const override { return size_; } +private: + const void* data_; + const size_t size_; +}; + +//----------------------------------------------------------------------------- + +class TransCacheOwnedMemoryEntry final : public TransCacheEntry { +public: + TransCacheOwnedMemoryEntry(size_t size); + ~TransCacheOwnedMemoryEntry(); + virtual const void* data() const override { return data_; } + virtual size_t size() const override { return size_; } +private: + void* data_ = nullptr; + const size_t size_ = 0; +}; + +//----------------------------------------------------------------------------- + +class Cache { +public: + Cache(); + Cache( const Cache& other ); + operator bool() const; + const TransImpl* trans() const { return trans_; } + const TransCacheEntry& legendre() const { return *legendre_; } + const TransCacheEntry& fft() const { return *fft_; } + ~Cache(); +protected: + Cache( const std::shared_ptr& legendre ); + Cache( const std::shared_ptr& legendre, const std::shared_ptr& fft ); + Cache( const TransImpl* ); +private: + const TransImpl* trans_ = nullptr; + std::shared_ptr legendre_; + std::shared_ptr fft_; +}; + +class TransCache : public Cache { +public: + TransCache( const Trans& ); +}; + + +class LegendreCache : public Cache { +public: + LegendreCache( size_t size ); + LegendreCache( const void* address, size_t size ); + LegendreCache( const eckit::PathName& path ); +}; + +class LegendreFFTCache : public Cache { +public: + LegendreFFTCache( const void* legendre_address, size_t legendre_size, + const void* fft_address, size_t fft_size ); + LegendreFFTCache( const eckit::PathName& legendre_path, const eckit::PathName& fft_path ); +}; + +//---------------------------------------------------------------------------------------------------------------------- + +} // namespace trans +} // namespace atlas diff --git a/src/atlas/trans/Trans.cc b/src/atlas/trans/Trans.cc index 2bf1a0096..cb9452d8e 100644 --- a/src/atlas/trans/Trans.cc +++ b/src/atlas/trans/Trans.cc @@ -130,6 +130,11 @@ Trans::Implementation* TransFactory::build( const FunctionSpace& gp, const Funct Trans::Implementation* TransFactory::build( const Cache& cache, const FunctionSpace& gp, const FunctionSpace& sp, const eckit::Configuration& config ) { + if( cache.trans() ) { + Log::debug() << "Creating Trans from cache, ignoring any other arguments" << std::endl; + return cache.trans(); + } + pthread_once( &once, init ); eckit::AutoLock lock( local_mutex ); @@ -164,6 +169,11 @@ Trans::Implementation* TransFactory::build( const Cache& cache, const Grid& grid Trans::Implementation* TransFactory::build( const Cache& cache, const Grid& grid, const Domain& domain, int truncation, const eckit::Configuration& config ) { + if( cache.trans() ) { + Log::debug() << "Creating Trans from cache, ignoring any other arguments" << std::endl; + return cache.trans(); + } + pthread_once( &once, init ); eckit::AutoLock lock( local_mutex ); diff --git a/src/atlas/trans/Trans.h b/src/atlas/trans/Trans.h index 20cb8ae65..63a38817b 100644 --- a/src/atlas/trans/Trans.h +++ b/src/atlas/trans/Trans.h @@ -10,17 +10,11 @@ #pragma once -#include - -#include "eckit/config/Configuration.h" -#include "eckit/io/Buffer.h" -#include "eckit/io/DataHandle.h" #include "eckit/memory/Owned.h" #include "eckit/memory/SharedPtr.h" #include "atlas/util/Config.h" -#include "atlas/runtime/Trace.h" -#include "atlas/runtime/Log.h" +#include "atlas/trans/Cache.h" //----------------------------------------------------------------------------- // Forward declarations @@ -40,95 +34,6 @@ namespace trans { //----------------------------------------------------------------------------- -class TransCacheEntry { -public: - operator bool() const { return size() != 0; } - virtual size_t size() const = 0; - virtual const void* data() const = 0; -}; - -class EmptyCacheEntry final : public TransCacheEntry { -public: - virtual size_t size() const override { return 0; } - virtual const void* data() const override { return nullptr; } -}; - -class TransCacheFileEntry final : public TransCacheEntry { - eckit::Buffer buffer_; - -public: - TransCacheFileEntry( const eckit::PathName& path ) : buffer_( path.size() ) { - ATLAS_TRACE(); - Log::debug() << "Loading cache from file " << path << std::endl; - std::unique_ptr dh( path.fileHandle() ); - dh->openForRead(); - dh->read( buffer_.data(), buffer_.size() ); - dh->close(); - } - virtual size_t size() const override { return buffer_.size(); } - virtual const void* data() const override { return buffer_.data(); } -}; - -class TransCacheMemoryEntry final : public TransCacheEntry { -public: - TransCacheMemoryEntry(const void* data, size_t size) : data_(data), size_(size) { - ASSERT(data_); - ASSERT(size_); - } - virtual const void* data() const override { return data_; } - virtual size_t size() const override { return size_; } -private: - const void* data_; - const size_t size_; -}; - -//----------------------------------------------------------------------------- - -class Cache { -public: - Cache() : legendre_( new EmptyCacheEntry() ), fft_( new EmptyCacheEntry() ) {} - Cache( const Cache& other ) = default; - -protected: - Cache( const std::shared_ptr& legendre ) : legendre_( legendre ), fft_( new EmptyCacheEntry() ) {} - Cache( const std::shared_ptr& legendre, const std::shared_ptr& fft ) : - legendre_( legendre ), - fft_( fft ) {} - -public: - operator bool() const { return bool(legendre()) ; } - - const TransCacheEntry& legendre() const { return *legendre_; } - const TransCacheEntry& fft() const { return *fft_; } - -private: - std::shared_ptr legendre_; - std::shared_ptr fft_; -}; - -class LegendreCache : public Cache { -public: - LegendreCache(const void* address, size_t size) : - Cache( std::make_shared( address, size ) ) { - } - LegendreCache( const eckit::PathName& path ) : - Cache( std::shared_ptr( new TransCacheFileEntry( path ) ) ) { - } -}; - -class LegendreFFTCache : public Cache { -public: - LegendreFFTCache( const void* legendre_address, size_t legendre_size, - const void* fft_address, size_t fft_size ) : - Cache( std::make_shared( legendre_address, legendre_size ), - std::make_shared( fft_address, fft_size ) ) { - } - LegendreFFTCache( const eckit::PathName& legendre_path, const eckit::PathName& fft_path ) : - Cache( std::shared_ptr( new TransCacheFileEntry( legendre_path ) ), - std::shared_ptr( new TransCacheFileEntry( fft_path ) ) ) { - } -}; - class TransImpl : public eckit::Owned { public: virtual ~TransImpl() = 0; @@ -217,23 +122,25 @@ class TransImpl : public eckit::Owned { // ------------------------------------------------------------------ class TransFactory { +protected: + using Trans_t = const TransImpl; public: /*! * \brief build Trans * \return TransImpl */ - static TransImpl* build( const FunctionSpace& gp, const FunctionSpace& sp, + static Trans_t* build( const FunctionSpace& gp, const FunctionSpace& sp, const eckit::Configuration& = util::Config() ); - static TransImpl* build( const Grid&, int truncation, const eckit::Configuration& = util::Config() ); + static Trans_t* build( const Grid&, int truncation, const eckit::Configuration& = util::Config() ); - static TransImpl* build( const Grid&, const Domain&, int truncation, const eckit::Configuration& = util::Config() ); + static Trans_t* build( const Grid&, const Domain&, int truncation, const eckit::Configuration& = util::Config() ); - static TransImpl* build( const Cache&, const FunctionSpace& gp, const FunctionSpace& sp, + static Trans_t* build( const Cache&, const FunctionSpace& gp, const FunctionSpace& sp, const eckit::Configuration& = util::Config() ); - static TransImpl* build( const Cache&, const Grid&, int truncation, const eckit::Configuration& = util::Config() ); + static Trans_t* build( const Cache&, const Grid&, int truncation, const eckit::Configuration& = util::Config() ); - static TransImpl* build( const Cache&, const Grid&, const Domain&, int truncation, const eckit::Configuration& = util::Config() ); + static Trans_t* build( const Cache&, const Grid&, const Domain&, int truncation, const eckit::Configuration& = util::Config() ); /*! * \brief list all registered trans implementations @@ -244,10 +151,10 @@ class TransFactory { private: std::string name_; - virtual TransImpl* make( const Cache&, const FunctionSpace& gp, const FunctionSpace& sp, const eckit::Configuration& ) { + virtual Trans_t* make( const Cache&, const FunctionSpace& gp, const FunctionSpace& sp, const eckit::Configuration& ) { return nullptr; } - virtual TransImpl* make( const Cache&, const Grid& gp, const Domain&, int truncation, const eckit::Configuration& ) { + virtual Trans_t* make( const Cache&, const Grid& gp, const Domain&, int truncation, const eckit::Configuration& ) { return nullptr; } @@ -260,11 +167,11 @@ class TransFactory { template class TransBuilderFunctionSpace : public TransFactory { - virtual TransImpl* make( const Cache& cache, const FunctionSpace& gp, const FunctionSpace& sp, + virtual Trans_t* make( const Cache& cache, const FunctionSpace& gp, const FunctionSpace& sp, const eckit::Configuration& config ) { return new T( cache, gp, sp, config ); } - virtual TransImpl* make( const Cache&, const Grid&, const Domain&, int, const eckit::Configuration& ) { + virtual Trans_t* make( const Cache&, const Grid&, const Domain&, int, const eckit::Configuration& ) { throw eckit::SeriousBug( "This function should not be called", Here() ); } @@ -274,11 +181,11 @@ class TransBuilderFunctionSpace : public TransFactory { template class TransBuilderGrid : public TransFactory { - virtual TransImpl* make( const Cache& cache, const Grid& grid, const Domain& domain, int truncation, + virtual Trans_t* make( const Cache& cache, const Grid& grid, const Domain& domain, int truncation, const eckit::Configuration& config ) { return new T( cache, grid, domain, truncation, config ); } - virtual TransImpl* make( const Cache&, const FunctionSpace&, const FunctionSpace&, const eckit::Configuration& ) { + virtual Trans_t* make( const Cache&, const FunctionSpace&, const FunctionSpace&, const eckit::Configuration& ) { throw eckit::SeriousBug( "This function should not be called", Here() ); } @@ -290,7 +197,7 @@ class TransBuilderGrid : public TransFactory { class Trans { public: - using Implementation = TransImpl; + using Implementation = const TransImpl; private: eckit::SharedPtr impl_; @@ -310,7 +217,7 @@ class Trans { Trans( const Cache&, const Grid&, const Domain&, int truncation, const eckit::Configuration& = util::NoConfig() ); void hash( eckit::Hash& ) const; - const Implementation* get() const { return impl_.get(); } + Implementation* get() const { return impl_.get(); } operator bool() const { return impl_.owners(); } int truncation() const; diff --git a/src/atlas/trans/ifs/LegendreCacheCreatorIFS.cc b/src/atlas/trans/ifs/LegendreCacheCreatorIFS.cc index 2c9276270..7ac8e74b0 100644 --- a/src/atlas/trans/ifs/LegendreCacheCreatorIFS.cc +++ b/src/atlas/trans/ifs/LegendreCacheCreatorIFS.cc @@ -106,7 +106,8 @@ void LegendreCacheCreatorIFS::create( const std::string& path ) const { } Cache LegendreCacheCreatorIFS::create() const { - NOTIMP; + return TransCache( + Trans( grid_, truncation_, config_ | option::type("ifs") ) ); } size_t LegendreCacheCreatorIFS::estimate() const { diff --git a/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc b/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc index c87bb4641..a004348b9 100644 --- a/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc +++ b/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc @@ -15,6 +15,7 @@ #include "atlas/grid.h" #include "atlas/option.h" #include "atlas/trans/Trans.h" +#include "atlas/trans/localopt3/TransLocalopt3.h" namespace atlas { namespace trans { @@ -87,11 +88,14 @@ bool LegendreCacheCreatorLocal::supported() const { } void LegendreCacheCreatorLocal::create( const std::string& path ) const { - Trans( grid_, truncation_, config_ | option::type("local") | option::write_legendre( path ) ); + Trans tmp( grid_, truncation_, config_ | option::type("local") | option::write_legendre( path ) ); } Cache LegendreCacheCreatorLocal::create() const { - NOTIMP; + util::Config export_legendre("export_legendre",true); + Trans tmp( grid_, truncation_, config_ | option::type("local") | export_legendre ); + auto impl = dynamic_cast( tmp.get() ); + return impl->export_legendre_; } size_t LegendreCacheCreatorLocal::estimate() const { diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc index 08ecbcb70..1effd3584 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.cc +++ b/src/atlas/trans/localopt3/TransLocalopt3.cc @@ -51,6 +51,8 @@ class TransParameters { std::string write_legendre() const { return config_.getString( "write_legendre", "" ); } + bool export_legendre() const { return config_.getBool( "export_legendre", false ); } + std::string read_fft() const { return config_.getString( "read_fft", "" ); } std::string write_fft() const { return config_.getString( "write_fft", "" ); } @@ -432,8 +434,19 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const Doma else { ATLAS_TRACE_SCOPE( "Legendre precomputations (structured)" ) { - alloc_aligned( legendre_sym_, size_sym ); - alloc_aligned( legendre_asym_, size_asym ); + if( TransParameters(config).export_legendre() ) { + ASSERT( not cache_.legendre() ); + export_legendre_ = LegendreCache( sizeof(double) * ( size_sym + size_asym ) ); + legendre_cachesize_ = export_legendre_.legendre().size(); + legendre_cache_ = export_legendre_.legendre().data(); + legendre_cache_ = std::malloc( legendre_cachesize_ ); + ReadCache legendre( legendre_cache_ ); + legendre_sym_ = legendre.read( size_sym ); + legendre_asym_ = legendre.read( size_asym ); + } else { + alloc_aligned( legendre_sym_, size_sym ); + alloc_aligned( legendre_asym_, size_asym ); + } compute_legendre_polynomialsopt3( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, legendre_asym_, legendre_sym_begin_.data(), diff --git a/src/atlas/trans/localopt3/TransLocalopt3.h b/src/atlas/trans/localopt3/TransLocalopt3.h index 14d8d4753..dda875ae5 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.h +++ b/src/atlas/trans/localopt3/TransLocalopt3.h @@ -40,6 +40,8 @@ class FieldSet; namespace atlas { namespace trans { +class LegendreCacheCreatorLocal; + //----------------------------------------------------------------------------- /// @class TransLocalopt3 @@ -148,6 +150,8 @@ class TransLocalopt3 : public trans::TransImpl { const double scalar_spectra[], double gp_fields[], const eckit::Configuration& = util::NoConfig() ) const; +friend class LegendreCacheCreatorLocal; + private: Grid grid_; Grid gridGlobal_; @@ -184,6 +188,7 @@ class TransLocalopt3 : public trans::TransImpl { #endif Cache cache_; + Cache export_legendre_; const void* legendre_cache_{nullptr}; size_t legendre_cachesize_{0}; const void* fft_cache_{nullptr}; diff --git a/src/tests/trans/test_trans_localcache.cc b/src/tests/trans/test_trans_localcache.cc index 1b31af0eb..247200315 100644 --- a/src/tests/trans/test_trans_localcache.cc +++ b/src/tests/trans/test_trans_localcache.cc @@ -264,8 +264,7 @@ CASE( "test_regional_grids with projection" ) { // Note: caching not yet implemented for unstructured and projected grids } - -CASE( "test cache creator" ) { +CASE( "test cache creator to file" ) { auto truncation = 89; StructuredGrid grid_global( @@ -277,6 +276,28 @@ CASE( "test cache creator" ) { auto cachefile = CacheFile( legendre_cache_creator.uid() ); ATLAS_TRACE_SCOPE( "Creating cache "+std::string(cachefile) ) legendre_cache_creator.create( cachefile ); + + Cache c = legendre_cache_creator.create(); + auto trans1 = Trans( c, grid_global, truncation, option::type("local") ); + auto trans2 = Trans( c, grid_global, truncation, option::type("local") ); +} + +CASE( "test cache creator in memory" ) { + + auto truncation = 89; + StructuredGrid grid_global( + LinearSpacing( { 0., 360.}, 360, false ), + LinearSpacing( { 90., -90.}, 181, true ) + ); + + LegendreCacheCreator legendre_cache_creator( grid_global, truncation, option::type("local") ); + + Cache cache; + ATLAS_TRACE_SCOPE( "Creating cache in memory" ) + cache = legendre_cache_creator.create(); + + auto trans1 = Trans( cache, grid_global, truncation, option::type("local") ); + auto trans2 = Trans( cache, grid_global, truncation, option::type("local") ); } } // namespace test From 389280b0b8522fbf911536e6a536c29de241e7ef Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Wed, 9 May 2018 15:00:24 +0100 Subject: [PATCH 068/123] Removed trans/localopt trans/localopt2 --- src/atlas/CMakeLists.txt | 20 - src/atlas/trans/Trans.cc | 4 - src/atlas/trans/VorDivToUV.cc | 4 - .../trans/localopt/FourierTransformsopt.cc | 78 -- .../trans/localopt/FourierTransformsopt.h | 38 - .../trans/localopt/LegendrePolynomialsopt.cc | 198 ----- .../trans/localopt/LegendrePolynomialsopt.h | 47 -- .../trans/localopt/LegendreTransformsopt.cc | 62 -- .../trans/localopt/LegendreTransformsopt.h | 36 - src/atlas/trans/localopt/TransLocalopt.cc | 704 ----------------- src/atlas/trans/localopt/TransLocalopt.h | 141 ---- .../trans/localopt/VorDivToUVLocalopt.cc | 184 ----- src/atlas/trans/localopt/VorDivToUVLocalopt.h | 67 -- .../trans/localopt2/FourierTransformsopt2.cc | 78 -- .../trans/localopt2/FourierTransformsopt2.h | 38 - .../localopt2/LegendrePolynomialsopt2.cc | 237 ------ .../trans/localopt2/LegendrePolynomialsopt2.h | 59 -- .../trans/localopt2/LegendreTransformsopt2.cc | 62 -- .../trans/localopt2/LegendreTransformsopt2.h | 37 - src/atlas/trans/localopt2/TransLocalopt2.cc | 746 ------------------ src/atlas/trans/localopt2/TransLocalopt2.h | 143 ---- .../trans/localopt2/VorDivToUVLocalopt2.cc | 184 ----- .../trans/localopt2/VorDivToUVLocalopt2.h | 67 -- src/atlas/trans/localopt3/TransLocalopt3.cc | 1 - src/tests/trans/test_trans_localcache.cc | 26 +- 25 files changed, 3 insertions(+), 3258 deletions(-) delete mode 100644 src/atlas/trans/localopt/FourierTransformsopt.cc delete mode 100644 src/atlas/trans/localopt/FourierTransformsopt.h delete mode 100644 src/atlas/trans/localopt/LegendrePolynomialsopt.cc delete mode 100644 src/atlas/trans/localopt/LegendrePolynomialsopt.h delete mode 100644 src/atlas/trans/localopt/LegendreTransformsopt.cc delete mode 100644 src/atlas/trans/localopt/LegendreTransformsopt.h delete mode 100644 src/atlas/trans/localopt/TransLocalopt.cc delete mode 100644 src/atlas/trans/localopt/TransLocalopt.h delete mode 100644 src/atlas/trans/localopt/VorDivToUVLocalopt.cc delete mode 100644 src/atlas/trans/localopt/VorDivToUVLocalopt.h delete mode 100644 src/atlas/trans/localopt2/FourierTransformsopt2.cc delete mode 100644 src/atlas/trans/localopt2/FourierTransformsopt2.h delete mode 100644 src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc delete mode 100644 src/atlas/trans/localopt2/LegendrePolynomialsopt2.h delete mode 100644 src/atlas/trans/localopt2/LegendreTransformsopt2.cc delete mode 100644 src/atlas/trans/localopt2/LegendreTransformsopt2.h delete mode 100644 src/atlas/trans/localopt2/TransLocalopt2.cc delete mode 100644 src/atlas/trans/localopt2/TransLocalopt2.h delete mode 100644 src/atlas/trans/localopt2/VorDivToUVLocalopt2.cc delete mode 100644 src/atlas/trans/localopt2/VorDivToUVLocalopt2.h diff --git a/src/atlas/CMakeLists.txt b/src/atlas/CMakeLists.txt index 2e6384ef0..4ed6679f4 100644 --- a/src/atlas/CMakeLists.txt +++ b/src/atlas/CMakeLists.txt @@ -335,26 +335,6 @@ trans/local_noopt/FourierTransforms.h trans/local_noopt/FourierTransforms.cc trans/local_noopt/VorDivToUVLocal.h trans/local_noopt/VorDivToUVLocal.cc -trans/localopt/TransLocalopt.h -trans/localopt/TransLocalopt.cc -trans/localopt/LegendrePolynomialsopt.h -trans/localopt/LegendrePolynomialsopt.cc -trans/localopt/LegendreTransformsopt.h -trans/localopt/LegendreTransformsopt.cc -trans/localopt/FourierTransformsopt.h -trans/localopt/FourierTransformsopt.cc -trans/localopt/VorDivToUVLocalopt.h -trans/localopt/VorDivToUVLocalopt.cc -trans/localopt2/TransLocalopt2.h -trans/localopt2/TransLocalopt2.cc -trans/localopt2/LegendrePolynomialsopt2.h -trans/localopt2/LegendrePolynomialsopt2.cc -trans/localopt2/LegendreTransformsopt2.h -trans/localopt2/LegendreTransformsopt2.cc -trans/localopt2/FourierTransformsopt2.h -trans/localopt2/FourierTransformsopt2.cc -trans/localopt2/VorDivToUVLocalopt2.h -trans/localopt2/VorDivToUVLocalopt2.cc trans/localopt3/TransLocalopt3.h trans/localopt3/TransLocalopt3.cc trans/localopt3/LegendrePolynomialsopt3.h diff --git a/src/atlas/trans/Trans.cc b/src/atlas/trans/Trans.cc index cb9452d8e..d14660060 100644 --- a/src/atlas/trans/Trans.cc +++ b/src/atlas/trans/Trans.cc @@ -28,8 +28,6 @@ #define TRANS_DEFAULT "local" #endif #include "atlas/trans/local_noopt/TransLocal.h" -#include "atlas/trans/localopt/TransLocalopt.h" -#include "atlas/trans/localopt2/TransLocalopt2.h" #include "atlas/trans/localopt3/TransLocalopt3.h" // --> recommended "local" namespace atlas { @@ -65,8 +63,6 @@ struct force_link { load_builder_grid(); #endif load_builder_grid(); - load_builder_grid(); - load_builder_grid(); load_builder_grid(); } }; diff --git a/src/atlas/trans/VorDivToUV.cc b/src/atlas/trans/VorDivToUV.cc index 70feef4d8..aa8fc5781 100644 --- a/src/atlas/trans/VorDivToUV.cc +++ b/src/atlas/trans/VorDivToUV.cc @@ -27,8 +27,6 @@ #define TRANS_DEFAULT "local" #endif #include "atlas/trans/local_noopt/VorDivToUVLocal.h" -#include "atlas/trans/localopt/VorDivToUVLocalopt.h" -#include "atlas/trans/localopt2/VorDivToUVLocalopt2.h" #include "atlas/trans/localopt3/VorDivToUVLocalopt3.h" // --> recommended "local" namespace atlas { @@ -58,8 +56,6 @@ struct force_link { load_builder(); #endif load_builder(); - load_builder(); - load_builder(); load_builder(); } }; diff --git a/src/atlas/trans/localopt/FourierTransformsopt.cc b/src/atlas/trans/localopt/FourierTransformsopt.cc deleted file mode 100644 index d854f5aaa..000000000 --- a/src/atlas/trans/localopt/FourierTransformsopt.cc +++ /dev/null @@ -1,78 +0,0 @@ -/* - * (C) Copyright 2013 ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor - * does it submit to any jurisdiction. - */ - -#include -#include -#include - -#include "atlas/trans/localopt/FourierTransformsopt.h" - -namespace atlas { -namespace trans { - -//----------------------------------------------------------------------------- - -void invtrans_fourieropt( const size_t trcFT, - const double lon, // longitude in radians (in) - const int nb_fields, // Number of fields - const double rlegReal[], // associated Legendre functions, size (trc+1)*trc/2 (in) - const double rlegImag[], // associated Legendre functions, size (trc+1)*trc/2 (in) - double rgp[] ) // gridpoint -{ - for ( int jfld = 0; jfld < nb_fields; ++jfld ) { - rgp[jfld] = 0.; - } - // local Fourier transformation: - for ( int jm = 0; jm <= trcFT; ++jm ) { - const double cos = std::cos( jm * lon ); - const double sin = std::sin( jm * lon ); - for ( int jfld = 0; jfld < nb_fields; ++jfld ) { - double real = cos * rlegReal[jm * nb_fields + jfld]; - double imag = sin * rlegImag[jm * nb_fields + jfld]; - rgp[jfld] += real - imag; - } - } -} - -int fourier_truncationopt( const int truncation, // truncation - const int nx, // number of longitudes - const int nxmax, // maximum nx - const int ndgl, // number of latitudes - const double lat, // latitude in radian - const bool fullgrid ) { // regular grid - int trc = truncation; - int trclin = ndgl - 1; - int trcquad = ndgl * 2 / 3 - 1; - if ( truncation >= trclin || fullgrid ) { - // linear - trc = ( nx - 1 ) / 2; - } - else if ( truncation >= trcquad ) { - // quadratic - double weight = 3 * ( trclin - truncation ) / ndgl; - double sqcos = std::pow( std::cos( lat ), 2 ); - - trc = ( nx - 1 ) / ( 2 + weight * sqcos ); - } - else { - // cubic - double sqcos = std::pow( std::cos( lat ), 2 ); - - trc = ( nx - 1 ) / ( 2 + sqcos ) - 1; - } - trc = std::min( truncation, trc ); - return trc; -} - -// -------------------------------------------------------------------------------------------------------------------- - -} // namespace trans -} // namespace atlas diff --git a/src/atlas/trans/localopt/FourierTransformsopt.h b/src/atlas/trans/localopt/FourierTransformsopt.h deleted file mode 100644 index 0ddfdfcdf..000000000 --- a/src/atlas/trans/localopt/FourierTransformsopt.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * (C) Copyright 2013 ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor does it submit to any jurisdiction. - */ - -#pragma once - -#include - -namespace atlas { -namespace trans { - -//----------------------------------------------------------------------------- -// Routine to compute the local Fourier transformation -// -// Author: -// Andreas Mueller *ECMWF* -// - -void invtrans_fourieropt( const size_t trcFT, - const double lon, // longitude in radians (in) - const int nb_fields, // Number of fields - const double rlegReal[], // values of associated Legendre functions, size (trc+1)*trc/2 (in) - const double rlegImag[], // values of associated Legendre functions, size (trc+1)*trc/2 (in) - double rgp[] ); // gridpoint - -int fourier_truncationopt( const int truncation, const int nx, const int nxmax, const int ndgl, const double lat, - const bool fullgrid ); - -// -------------------------------------------------------------------------------------------------------------------- - -} // namespace trans -} // namespace atlas diff --git a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc b/src/atlas/trans/localopt/LegendrePolynomialsopt.cc deleted file mode 100644 index 9c5562fa2..000000000 --- a/src/atlas/trans/localopt/LegendrePolynomialsopt.cc +++ /dev/null @@ -1,198 +0,0 @@ -/* - * (C) Copyright 2013 ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor - * does it submit to any jurisdiction. - */ - -#include -#include - -#include "atlas/array.h" -#include "atlas/parallel/mpi/mpi.h" -#include "atlas/trans/localopt/LegendrePolynomialsopt.h" - -namespace atlas { -namespace trans { - -//----------------------------------------------------------------------------- - -void compute_legendre_polynomialsopt( - const size_t trc, // truncation (in) - const int nlats, // number of latitudes - const double lats[], // latitudes in radians (in) - double leg_sym[], // values of associated Legendre functions, symmetric part - double leg_asym[], // values of associated Legendre functions, asymmetric part - size_t leg_start_sym[], // start indices for different zonal wave numbers, symmetric part - size_t leg_start_asym[] ) // start indices for different zonal wave numbers, asymmetric part -{ - auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; }; - array::ArrayT zfn_( trc + 1, trc + 1 ); - array::ArrayView zfn = array::make_view( zfn_ ); - std::vector legpol( legendre_size( trc ) ); - auto idxmn = [&]( int jm, int jn ) { return ( 2 * trc + 3 - jm ) * jm / 2 + jn - jm; }; - int iodd; - - // Compute coefficients for Taylor series in Belousov (19) and (21) - // Belousov, Swarztrauber use zfn(0,0)=std::sqrt(2.) - // IFS normalisation chosen to be 0.5*Integral(Pnm**2) = 1 - zfn( 0, 0 ) = 2.; - for ( int jn = 1; jn <= trc; ++jn ) { - double zfnn = zfn( 0, 0 ); - for ( int jgl = 1; jgl <= jn; ++jgl ) { - zfnn *= std::sqrt( 1. - 0.25 / ( jgl * jgl ) ); - } - iodd = jn % 2; - zfn( jn, jn ) = zfnn; - for ( int jgl = 2; jgl <= jn - iodd; jgl += 2 ) { - double zfjn = ( ( jgl - 1. ) * ( 2. * jn - jgl + 2. ) ); // new factor numerator - double zfjd = ( jgl * ( 2. * jn - jgl + 1. ) ); // new factor denominator - - zfn( jn, jn - jgl ) = zfn( jn, jn - jgl + 2 ) * zfjn / zfjd; - } - } - - // Loop over latitudes: - for ( int jlat = 0; jlat < nlats; ++jlat ) { - { - //ATLAS_TRACE( "compute Legendre polynomials" ); - // -------------------- - // 1. First two columns - // -------------------- - double lat = lats[jlat]; - double zdlx1 = ( M_PI_2 - lat ); // theta - double zdlx = std::cos( zdlx1 ); // cos(theta) - double zdlsita = std::sqrt( 1. - zdlx * zdlx ); // sin(theta) (this is how trans library does it) - - legpol[idxmn( 0, 0 )] = 1.; - double vsin[trc + 1], vcos[trc + 1]; - for ( int j = 1; j <= trc; j++ ) { - vsin[j] = std::sin( j * zdlx1 ); - } - for ( int j = 1; j <= trc; j++ ) { - vcos[j] = std::cos( j * zdlx1 ); - } - - double zdl1sita = 0.; - // if we are less than 1 meter from the pole, - if ( std::abs( zdlsita ) <= std::sqrt( std::numeric_limits::epsilon() ) ) { - zdlx = 1.; - zdlsita = 0.; - } - else { - zdl1sita = 1. / zdlsita; - } - - // ordinary Legendre polynomials from series expansion - // --------------------------------------------------- - - // even N - for ( int jn = 2; jn <= trc; jn += 2 ) { - double zdlk = 0.5 * zfn( jn, 0 ); - double zdlldn = 0.0; - double zdsq = 1. / std::sqrt( jn * ( jn + 1. ) ); - // represented by only even k - for ( int jk = 2; jk <= jn; jk += 2 ) { - // normalised ordinary Legendre polynomial == \overbar{P_n}^0 - zdlk = zdlk + zfn( jn, jk ) * vcos[jk]; - // normalised associated Legendre polynomial == \overbar{P_n}^1 - zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * vsin[jk]; - } - legpol[idxmn( 0, jn )] = zdlk; - legpol[idxmn( 1, jn )] = zdlldn; - } - - // odd N - for ( int jn = 1; jn <= trc; jn += 2 ) { - zfn( jn, 0 ) = 0.; - double zdlk = 0.; - double zdlldn = 0.0; - double zdsq = 1. / std::sqrt( jn * ( jn + 1. ) ); - // represented by only even k - for ( int jk = 1; jk <= jn; jk += 2 ) { - // normalised ordinary Legendre polynomial == \overbar{P_n}^0 - zdlk = zdlk + zfn( jn, jk ) * vcos[jk]; - // normalised associated Legendre polynomial == \overbar{P_n}^1 - zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * vsin[jk]; - } - legpol[idxmn( 0, jn )] = zdlk; - legpol[idxmn( 1, jn )] = zdlldn; - } - - // -------------------------------------------------------------- - // 2. Diagonal (the terms 0,0 and 1,1 have already been computed) - // Belousov, equation (23) - // -------------------------------------------------------------- - - double zdls = zdl1sita * std::numeric_limits::min(); - for ( int jn = 2; jn <= trc; ++jn ) { - double sq = std::sqrt( ( 2. * jn + 1. ) / ( 2. * jn ) ); - - legpol[idxmn( jn, jn )] = legpol[idxmn( jn - 1, jn - 1 )] * zdlsita * sq; - if ( std::abs( legpol[idxmn( jn, jn )] ) < zdls ) legpol[idxmn( jn, jn )] = 0.0; - } - - // --------------------------------------------- - // 3. General recurrence (Belousov, equation 17) - // --------------------------------------------- - - for ( int jn = 3; jn <= trc; ++jn ) { - for ( int jm = 2; jm < jn; ++jm ) { - double cn = - ( ( 2. * jn + 1. ) * ( jn + jm - 3. ) * ( jn + jm - 1. ) ); // numerator of c in Belousov - double cd = ( ( 2. * jn - 3. ) * ( jn + jm - 2. ) * ( jn + jm ) ); // denominator of c in Belousov - double dn = - ( ( 2. * jn + 1. ) * ( jn - jm + 1. ) * ( jn + jm - 1. ) ); // numerator of d in Belousov - double dd = ( ( 2. * jn - 1. ) * ( jn + jm - 2. ) * ( jn + jm ) ); // denominator of d in Belousov - double en = ( ( 2. * jn + 1. ) * ( jn - jm ) ); // numerator of e in Belousov - double ed = ( ( 2. * jn - 1. ) * ( jn + jm ) ); // denominator of e in Belousov - - legpol[idxmn( jm, jn )] = std::sqrt( cn / cd ) * legpol[idxmn( jm - 2, jn - 2 )] - - std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1 )] * zdlx + - std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1 )] * zdlx; - } - } - } - - { - //ATLAS_TRACE( "add to global arrays" ); - - for ( int jm = 0; jm <= trc; jm++ ) { - int is1 = 0, ia1 = 0; - for ( int jn = jm; jn <= trc; jn++ ) { - if ( ( jn - jm ) % 2 == 0 ) { is1++; } - else { - ia1++; - } - } - int is2 = 0, ia2 = 0; - // the choice between the following two code lines determines whether - // total wavenumbers are summed in an ascending or descending order. - // The trans library in IFS uses descending order because it should - // be more accurate (higher wavenumbers have smaller contributions). - // This also needs to be changed when splitting the spectral data in - // TransLocalopt::invtrans_uv! - //for ( int jn = jm; jn <= trc; jn++ ) { - for ( int jn = trc; jn >= jm; jn-- ) { - if ( ( jn - jm ) % 2 == 0 ) { - int is = leg_start_sym[jm] + is1 * jlat + is2++; - leg_sym[is] = legpol[idxmn( jm, jn )]; - } - else { - int ia = leg_start_asym[jm] + ia1 * jlat + ia2++; - leg_asym[ia] = legpol[idxmn( jm, jn )]; - } - } - } - } - } -} - -// -------------------------------------------------------------------------------------------------------------------- - -} // namespace trans -} // namespace atlas diff --git a/src/atlas/trans/localopt/LegendrePolynomialsopt.h b/src/atlas/trans/localopt/LegendrePolynomialsopt.h deleted file mode 100644 index 4a2f004c7..000000000 --- a/src/atlas/trans/localopt/LegendrePolynomialsopt.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * (C) Copyright 2013 ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor does it submit to any jurisdiction. - */ - -#pragma once - -#include - -namespace atlas { -namespace trans { - -//----------------------------------------------------------------------------- -// Routine to compute the Legendre polynomials in serial according to Belousov -// (using correction by Swarztrauber) -// -// Reference: -// S.L. Belousov, Tables of normalized associated Legendre Polynomials, Pergamon -// Press (1962) -// P.N. Swarztrauber, On computing the points and weights for Gauss-Legendre -// quadrature, -// SIAM J. Sci. Comput. Vol. 24 (3) pp. 945-954 (2002) -// -// Author of Fortran version: -// Mats Hamrud, Philippe Courtier, Nils Wedi *ECMWF* -// -// Ported to C++ by: -// Andreas Mueller *ECMWF* -// -void compute_legendre_polynomialsopt( - const size_t trc, // truncation (in) - const int nlats, // number of latitudes - const double lats[], // latitudes in radians (in) - double legendre_sym[], // values of associated Legendre functions, symmetric part - double legendre_asym[], // values of associated Legendre functions, asymmetric part - size_t leg_start_sym[], // start indices for different zonal wave numbers, symmetric part - size_t leg_start_asym[] ); // start indices for different zonal wave numbers, asymmetric part - -// -------------------------------------------------------------------------------------------------------------------- - -} // namespace trans -} // namespace atlas diff --git a/src/atlas/trans/localopt/LegendreTransformsopt.cc b/src/atlas/trans/localopt/LegendreTransformsopt.cc deleted file mode 100644 index 29dd5dad4..000000000 --- a/src/atlas/trans/localopt/LegendreTransformsopt.cc +++ /dev/null @@ -1,62 +0,0 @@ -/* - * (C) Copyright 2013 ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor does it submit to any jurisdiction. - */ - -#include - -#include "atlas/trans/localopt/LegendreTransformsopt.h" - -namespace atlas { -namespace trans { - -//----------------------------------------------------------------------------- - -void invtrans_legendreopt( - const size_t trc, // truncation (in) - const size_t trcFT, // truncation for Fourier transformation (in) - const size_t trcLP, // truncation of Legendre polynomials data legpol. Needs to be >= trc (in) - const double legpol[], // values of associated Legendre functions, size (trc+1)*trc/2 (in) - const int nb_fields, // number of fields - const double spec[], // spectral data, size (trc+1)*trc (in) - double leg_real[], // values of associated Legendre functions, size (trc+1)*trc/2 (out) - double leg_imag[] ) // values of associated Legendre functions, size (trc+1)*trc/2 (out) -{ - // Legendre transformation: - int k = 0, klp = 0; - for ( int jm = 0; jm <= trcFT; ++jm ) { - for ( int jfld = 0; jfld < nb_fields; ++jfld ) { - leg_real[jm * nb_fields + jfld] = 0.; - leg_imag[jm * nb_fields + jfld] = 0.; - } - for ( int jn = jm; jn <= trcLP; ++jn, ++klp ) { - if ( jn <= trc ) { - for ( int jfld = 0; jfld < nb_fields; ++jfld ) { - // not completely sure where this factor 2 comes from. One possible - // explanation: - // normalization of trigonometric functions in the spherical harmonics - // integral over square of trig function is 1 for m=0 and 0.5 (?) for - // m>0 - leg_real[jm * nb_fields + jfld] += 2. * spec[( 2 * k ) * nb_fields + jfld] * legpol[klp]; - leg_imag[jm * nb_fields + jfld] += 2. * spec[( 2 * k + 1 ) * nb_fields + jfld] * legpol[klp]; - } - ++k; - } - } - } - // Undo factor 2 for (jm == 0) - for ( int jfld = 0; jfld < nb_fields; ++jfld ) { - leg_real[jfld] /= 2.; - leg_imag[jfld] /= 2.; - } -} - -// -------------------------------------------------------------------------------------------------------------------- - -} // namespace trans -} // namespace atlas diff --git a/src/atlas/trans/localopt/LegendreTransformsopt.h b/src/atlas/trans/localopt/LegendreTransformsopt.h deleted file mode 100644 index 55dcf0212..000000000 --- a/src/atlas/trans/localopt/LegendreTransformsopt.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * (C) Copyright 2013 ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor does it submit to any jurisdiction. - */ - -#pragma once - -#include - -namespace atlas { -namespace trans { - -//----------------------------------------------------------------------------- -// Routine to compute the Legendre transformation -// -// Author: -// Andreas Mueller *ECMWF* -// -void invtrans_legendreopt( const size_t trc, // truncation (in) - const size_t trcFT, // truncation for Fourier transformation (in) - const size_t trcLP, // truncation of Legendre polynomials data legpol. Needs to be >= trc (in) - const double legpol[], // values of associated Legendre functions, size (trc+1)*trc/2 (in) - const int nb_fields, // number of fields - const double spec[], // spectral data, size (trc+1)*trc (in) - double leg_real[], // values of associated Legendre functions, size (trc+1)*trc/2 (out) - double leg_imag[] ); // values of associated Legendre functions, size (trc+1)*trc/2 (out) - -// -------------------------------------------------------------------------------------------------------------------- - -} // namespace trans -} // namespace atlas diff --git a/src/atlas/trans/localopt/TransLocalopt.cc b/src/atlas/trans/localopt/TransLocalopt.cc deleted file mode 100644 index 9b139fc4f..000000000 --- a/src/atlas/trans/localopt/TransLocalopt.cc +++ /dev/null @@ -1,704 +0,0 @@ -/* - * (C) Copyright 2013 ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor does it submit to any jurisdiction. - */ - -#include "atlas/trans/localopt/TransLocalopt.h" -#include -#include "atlas/array.h" -#include "atlas/option.h" -#include "atlas/parallel/mpi/mpi.h" -#include "atlas/runtime/ErrorHandling.h" -#include "atlas/runtime/Log.h" -#include "atlas/trans/VorDivToUV.h" -#include "atlas/trans/local_noopt/LegendrePolynomials.h" -#include "atlas/trans/localopt/FourierTransformsopt.h" -#include "atlas/trans/localopt/LegendrePolynomialsopt.h" -#include "atlas/trans/localopt/LegendreTransformsopt.h" -#include "atlas/util/Constants.h" -#include "eckit/linalg/LinearAlgebra.h" -#include "eckit/linalg/Matrix.h" -#include "eckit/eckit_config.h" -#ifdef ECKIT_HAVE_MKL -#include "mkl.h" -#endif - -namespace atlas { -namespace trans { - -namespace { -static TransBuilderGrid builder( "localopt" ); -} - -// -------------------------------------------------------------------------------------------------------------------- -// Helper functions -// -------------------------------------------------------------------------------------------------------------------- -namespace { // anonymous - -size_t legendre_size( const size_t truncation ) { - return ( truncation + 2 ) * ( truncation + 1 ) / 2; -} - -int nlats_northernHemisphere( const int nlats ) { - return ceil( nlats / 2. ); - // using ceil here should make it possible to have odd number of latitudes (with the centre latitude being the equator) -} - -int num_n( const int truncation, const int m, const bool symmetric ) { - int len = 0; - if ( symmetric ) { len = ( truncation - m + 2 ) / 2; } - else { - len = ( truncation - m + 1 ) / 2; - } - return len; -} - -void alloc_aligned( double*& ptr, size_t n ) { -#ifdef ECKIT_HAVE_MKL - int al = 64; - ptr = (double*)mkl_malloc( sizeof( double ) * n, al ); -#else - posix_memalign( (void**)&ptr, sizeof( double ) * 64, sizeof( double ) * n ); - //ptr = (double*)malloc( sizeof( double ) * n ); - //ptr = new double[n]; -#endif -} - -void free_aligned( double*& ptr ) { -#ifdef ECKIT_HAVE_MKL - mkl_free( ptr ); -#else - free( ptr ); -#endif -} - -int add_padding( int n ) { - return std::ceil( n / 8. ) * 8; -} -} // namespace - -// -------------------------------------------------------------------------------------------------------------------- -// Class TransLocalopt -// -------------------------------------------------------------------------------------------------------------------- - -TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const long truncation, - const eckit::Configuration& config ) : - grid_( grid ), - truncation_( truncation ), - precompute_( config.getBool( "precompute", true ) ) { - ATLAS_TRACE( "Precompute legendre opt" ); -#ifdef ECKIT_HAVE_MKL - eckit::linalg::LinearAlgebra::backend( "mkl" ); // might want to choose backend with this command -#else - eckit::linalg::LinearAlgebra::backend( "generic" ); // might want to choose backend with this command -#endif - double fft_threshold = 0.; // 0.05; // fraction of latitudes of the full grid up to which FFT is used. - // This threshold needs to be adjusted depending on the dgemm and FFT performance of the machine - // on which this code is running! - int nlats = 0; - int nlons = 0; - int neqtr = 0; - useFFT_ = true; - dgemmMethod1_ = true; - nlatsNH_ = 0; - nlatsSH_ = 0; - nlatsLeg_ = 0; - if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { - grid::StructuredGrid g( grid_ ); - nlats = g.ny(); - nlons = g.nxmax(); - for ( size_t j = 0; j < nlats; ++j ) { - // assumptions: latitudes in g.y(j) are monotone and decreasing - // no assumption on whether we have 0, 1 or 2 latitudes at the equator - double lat = g.y( j ); - if ( lat > 0. ) { nlatsNH_++; } - if ( lat == 0. ) { neqtr++; } - if ( lat < 0. ) { nlatsSH_++; } - } - if ( neqtr > 0 ) { - nlatsNH_++; - nlatsSH_++; - } - if ( nlatsNH_ >= nlatsSH_ ) { nlatsLeg_ = nlatsNH_; } - else { - nlatsLeg_ = nlatsSH_; - } - Grid g_global( grid.name() ); - grid::StructuredGrid gs_global( g_global ); - nlonsGlobal_ = gs_global.nxmax(); - jlonMin_ = 0; - double lonmin = fmod( g.x( 0, 0 ), 360 ); - if ( lonmin < 0. ) { lonmin += 360.; } - if ( nlons < fft_threshold * nlonsGlobal_ ) { useFFT_ = false; } - else { - if ( nlons < nlonsGlobal_ ) { - // need to use FFT with cropped grid - for ( size_t j = 0; j < nlonsGlobal_; ++j ) { - if ( gs_global.x( j, 0 ) == lonmin ) { jlonMin_ = j; } - } - } - } - //Log::info() << "nlats=" << g.ny() << " nlatsGlobal=" << gs_global.ny() << std::endl; - } - else { - // unstructured grid - useFFT_ = false; - nlats = grid_.size(); - nlons = grid_.size(); - nlatsNH_ = nlats; - nlatsLeg_ = nlats; - } - std::vector lats( nlatsLeg_ ); - std::vector lons( nlons ); - if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { - grid::StructuredGrid g( grid_ ); - if ( nlatsNH_ >= nlatsSH_ ) { - for ( size_t j = 0; j < nlatsLeg_; ++j ) { - lats[j] = g.y( j ) * util::Constants::degreesToRadians(); - } - } - else { - for ( size_t j = nlats - 1, idx = 0; idx < nlatsLeg_; --j, ++idx ) { - lats[idx] = -g.y( j ) * util::Constants::degreesToRadians(); - } - } - for ( size_t j = 0; j < nlons; ++j ) { - lons[j] = g.x( j, 0 ) * util::Constants::degreesToRadians(); - } - } - else { - int j( 0 ); - for ( PointXY p : grid_.xy() ) { - lats[j++] = p.y() * util::Constants::degreesToRadians(); - lons[j++] = p.x() * util::Constants::degreesToRadians(); - } - } - // precomputations for Legendre polynomials: - { - ATLAS_TRACE( "opt precomp Legendre" ); - int size_sym = 0; - int size_asym = 0; - legendre_sym_begin_.resize( truncation_ + 3 ); - legendre_asym_begin_.resize( truncation_ + 3 ); - legendre_sym_begin_[0] = 0; - legendre_asym_begin_[0] = 0; - for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { - size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsLeg_ ); - size_asym += add_padding( num_n( truncation_ + 1, jm, false ) * nlatsLeg_ ); - legendre_sym_begin_[jm + 1] = size_sym; - legendre_asym_begin_[jm + 1] = size_asym; - } - alloc_aligned( legendre_sym_, size_sym ); - alloc_aligned( legendre_asym_, size_asym ); - FILE* file_leg; - file_leg = fopen( "legendre.bin", "r" ); - if ( file_leg ) { - fread( legendre_sym_, sizeof( double ), size_sym, file_leg ); - fread( legendre_asym_, sizeof( double ), size_asym, file_leg ); - fclose( file_leg ); - } - else { - compute_legendre_polynomialsopt( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, legendre_asym_, - legendre_sym_begin_.data(), legendre_asym_begin_.data() ); - file_leg = fopen( "legendre.bin", "wb" ); - fwrite( legendre_sym_, sizeof( double ), size_sym, file_leg ); - fwrite( legendre_asym_, sizeof( double ), size_asym, file_leg ); - fclose( file_leg ); - } - } - - // precomputations for Fourier transformations: - if ( useFFT_ ) { -#if ATLAS_HAVE_FFTW - { - ATLAS_TRACE( "opt precomp FFTW" ); - int num_complex = ( nlonsGlobal_ / 2 ) + 1; - fft_in_ = fftw_alloc_complex( nlats * num_complex ); - fft_out_ = fftw_alloc_real( nlats * nlonsGlobal_ ); - plan_ = fftw_plan_many_dft_c2r( 1, &nlonsGlobal_, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, 1, - nlonsGlobal_, FFTW_ESTIMATE ); - } - // other FFT implementations should be added with #elif statements -#else - useFFT_ = false; // no FFT implemented => default to dgemm -#endif - } - if ( !useFFT_ ) { - alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons ); - if ( dgemmMethod1_ ) { - { - ATLAS_TRACE( "opt precomp Fourier" ); - int idx = 0; - for ( int jlon = 0; jlon < nlons; jlon++ ) { - double factor = 1.; - for ( int jm = 0; jm < truncation_ + 1; jm++ ) { - if ( jm > 0 ) { factor = 2.; } - fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor; // real part - fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor; // imaginary part - } - } - } - } - else { - { - ATLAS_TRACE( "opt precomp Fourier tp" ); - int idx = 0; - for ( int jm = 0; jm < truncation_ + 1; jm++ ) { - double factor = 1.; - if ( jm > 0 ) { factor = 2.; } - for ( int jlon = 0; jlon < nlons; jlon++ ) { - fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor; // real part - } - for ( int jlon = 0; jlon < nlons; jlon++ ) { - fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor; // imaginary part - } - } - } - } - } -} // namespace trans - -// -------------------------------------------------------------------------------------------------------------------- - -TransLocalopt::TransLocalopt( const Grid& grid, const long truncation, const eckit::Configuration& config ) : - TransLocalopt( Cache(), grid, truncation, config ) {} - -TransLocalopt::TransLocalopt( const Grid& grid, const Domain&, const long truncation, const eckit::Configuration& config ) : - TransLocalopt( Cache(), grid, truncation, config ) {} - -TransLocalopt::TransLocalopt( const Cache& cache, const Grid& grid, const Domain&, const long truncation, const eckit::Configuration& config ) : - TransLocalopt( cache, grid, truncation, config ) {} - -// -------------------------------------------------------------------------------------------------------------------- - -TransLocalopt::~TransLocalopt() { - free_aligned( legendre_sym_ ); - free_aligned( legendre_asym_ ); - if ( useFFT_ ) { -#if ATLAS_HAVE_FFTW - fftw_destroy_plan( plan_ ); - fftw_free( fft_in_ ); - fftw_free( fft_out_ ); -#endif - } - else { - free_aligned( fourier_ ); - } -} - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocalopt::invtrans( const Field& spfield, Field& gpfield, const eckit::Configuration& config ) const { - NOTIMP; -} - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocalopt::invtrans( const FieldSet& spfields, FieldSet& gpfields, const eckit::Configuration& config ) const { - NOTIMP; -} - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocalopt::invtrans_grad( const Field& spfield, Field& gradfield, const eckit::Configuration& config ) const { - NOTIMP; -} - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocalopt::invtrans_grad( const FieldSet& spfields, FieldSet& gradfields, - const eckit::Configuration& config ) const { - NOTIMP; -} - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocalopt::invtrans_vordiv2wind( const Field& spvor, const Field& spdiv, Field& gpwind, - const eckit::Configuration& config ) const { - NOTIMP; -} - -void TransLocalopt::invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[], - const eckit::Configuration& config ) const { - invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields, config ); -} - -void gp_transposeopt( const int nb_size, const int nb_fields, const double gp_tmp[], double gp_fields[] ) { - for ( int jgp = 0; jgp < nb_size; jgp++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - gp_fields[jfld * nb_size + jgp] = gp_tmp[jgp * nb_fields + jfld]; - } - } -} - -//----------------------------------------------------------------------------- -// Routine to compute the spectral transform by using a localopt Fourier transformation -// for a grid (same latitude for all longitudes, allows to compute Legendre functions -// once for all longitudes). U and v components are divided by cos(latitude) for -// nb_vordiv_fields > 0. -// -// Legendre polynomials are computed up to truncation_+1 to be accurate for vorticity and -// divergence computation. The parameter truncation is the truncation used in storing the -// spectral data scalar_spectra and can be different from truncation_. If truncation is -// larger than truncation_+1 the transform will behave as if the spectral data was truncated -// to truncation_+1. -// -// Author: -// Andreas Mueller *ECMWF* -// -void TransLocalopt::invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields, - const double scalar_spectra[], double gp_fields[], - const eckit::Configuration& config ) const { - if ( nb_scalar_fields > 0 ) { - int nb_fields = nb_scalar_fields; - - // Transform - if ( grid::StructuredGrid g = grid_ ) { - ATLAS_TRACE( "invtrans_uv structured opt" ); - int nlats = g.ny(); - int nlons = g.nxmax(); - auto posMethod = [&]( int jfld, int imag, int jlat, int jm ) { - if ( useFFT_ || !dgemmMethod1_ ) { - return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) ); - } - else { - return jfld + nb_fields * ( jlat + nlats * ( imag + 2 * ( jm ) ) ); - }; - }; - int size_fourier_max = nb_fields * 2 * nlats; - double* scl_fourier; - alloc_aligned( scl_fourier, size_fourier_max * ( truncation_ + 1 ) ); - - // Legendre transform: - { - ATLAS_TRACE( "opt Legendre dgemm" ); - for ( int jm = 0; jm <= truncation_; jm++ ) { - int size_sym = num_n( truncation_ + 1, jm, true ); - int size_asym = num_n( truncation_ + 1, jm, false ); - int n_imag = 2; - if ( jm == 0 ) { n_imag = 1; } - int size_fourier = nb_fields * n_imag * nlatsLeg_; - auto posFourier = [&]( int jfld, int imag, int jlat, int jm, int nlatsH ) { - return jfld + nb_fields * ( imag + n_imag * ( nlatsLeg_ - nlatsH + jlat ) ); - }; - double* scalar_sym; - double* scalar_asym; - double* scl_fourier_sym; - double* scl_fourier_asym; - alloc_aligned( scalar_sym, n_imag * nb_fields * size_sym ); - alloc_aligned( scalar_asym, n_imag * nb_fields * size_asym ); - alloc_aligned( scl_fourier_sym, size_fourier ); - alloc_aligned( scl_fourier_asym, size_fourier ); - { - //ATLAS_TRACE( "opt Legendre split" ); - int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2; - // the choice between the following two code lines determines whether - // total wavenumbers are summed in an ascending or descending order. - // The trans library in IFS uses descending order because it should - // be more accurate (higher wavenumbers have smaller contributions). - // This also needs to be changed when splitting the spectral data in - // compute_legendre_polynomialsopt! - //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) { - for ( int jn = truncation_ + 1; jn >= jm; jn-- ) { - for ( int imag = 0; imag < n_imag; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - idx = jfld + nb_fields * ( imag + 2 * ( jn - jm ) ); - if ( jn <= truncation && jm < truncation ) { - if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; } - else { - scalar_asym[ia++] = scalar_spectra[idx + ioff]; - } - } - else { - if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = 0.; } - else { - scalar_asym[ia++] = 0.; - } - } - } - } - } - ASSERT( ia == n_imag * nb_fields * size_asym && is == n_imag * nb_fields * size_sym ); - } - { - eckit::linalg::Matrix A( scalar_sym, nb_fields * n_imag, size_sym ); - eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm], size_sym, nlatsLeg_ ); - eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsLeg_ ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); - } - if ( size_asym > 0 ) { - eckit::linalg::Matrix A( scalar_asym, nb_fields * n_imag, size_asym ); - eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm], size_asym, nlatsLeg_ ); - eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsLeg_ ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); - } - { - //ATLAS_TRACE( "opt merge spheres" ); - // northern hemisphere: - for ( int jlat = 0; jlat < nlatsNH_; jlat++ ) { - for ( int imag = 0; imag < n_imag; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int idx = posFourier( jfld, imag, jlat, jm, nlatsNH_ ); - scl_fourier[posMethod( jfld, imag, jlat, jm )] = - scl_fourier_sym[idx] + scl_fourier_asym[idx]; - } - } - } - // southern hemisphere: - for ( int jlat = 0; jlat < nlatsSH_; jlat++ ) { - for ( int imag = 0; imag < n_imag; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int idx = posFourier( jfld, imag, jlat, jm, nlatsSH_ ); - int jslat = nlats - jlat - 1; - scl_fourier[posMethod( jfld, imag, jslat, jm )] = - scl_fourier_sym[idx] - scl_fourier_asym[idx]; - } - } - } - } - free_aligned( scalar_sym ); - free_aligned( scalar_asym ); - free_aligned( scl_fourier_sym ); - free_aligned( scl_fourier_asym ); - } - } - // Fourier transformation: - if ( useFFT_ ) { -#if ATLAS_HAVE_FFTW - { - int num_complex = ( nlonsGlobal_ / 2 ) + 1; - { - ATLAS_TRACE( "opt FFTW" ); - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int idx = 0; - for ( int jlat = 0; jlat < nlats; jlat++ ) { - fft_in_[idx++][0] = scl_fourier[posMethod( jfld, 0, jlat, 0 )]; - for ( int jm = 1; jm < num_complex; jm++, idx++ ) { - for ( int imag = 0; imag < 2; imag++ ) { - if ( jm <= truncation_ ) { - fft_in_[idx][imag] = scl_fourier[posMethod( jfld, imag, jlat, jm )]; - } - else { - fft_in_[idx][imag] = 0.; - } - } - } - } - fftw_execute_dft_c2r( plan_, fft_in_, fft_out_ ); - for ( int jlat = 0; jlat < nlats; jlat++ ) { - for ( int jlon = 0; jlon < nlons; jlon++ ) { - int j = jlon + jlonMin_; - if ( j >= nlonsGlobal_ ) { j -= nlonsGlobal_; } - gp_fields[jlon + nlons * ( jlat + nlats * jfld )] = - fft_out_[j + nlonsGlobal_ * jlat]; - } - } - } - } - } -#endif - } - else { - if ( dgemmMethod1_ ) { - // dgemm-method 1 - // should be faster for small domains or large truncation - double* gp_opt; - alloc_aligned( gp_opt, nb_fields * grid_.size() ); - { - ATLAS_TRACE( "opt Fourier dgemm method 1" ); - eckit::linalg::Matrix A( scl_fourier, nb_fields * nlats, ( truncation_ + 1 ) * 2 ); - eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, nlons ); - eckit::linalg::Matrix C( gp_opt, nb_fields * nlats, nlons ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); - } - - // Transposition in grid point space: - { - ATLAS_TRACE( "opt transposition in gp-space" ); - int idx = 0; - for ( int jlon = 0; jlon < nlons; jlon++ ) { - for ( int jlat = 0; jlat < nlats; jlat++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int pos_tp = jlon + nlons * ( jlat + nlats * ( jfld ) ); - //int pos = jfld + nb_fields * ( jlat + nlats * ( jlon ) ); - gp_fields[pos_tp] = gp_opt[idx++]; // = gp_opt[pos] - } - } - } - } - free_aligned( gp_opt ); - } - else { - // dgemm-method 2 - { - ATLAS_TRACE( "opt Fourier dgemm method 2" ); - eckit::linalg::Matrix A( fourier_, nlons, ( truncation_ + 1 ) * 2 ); - eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * nlats ); - eckit::linalg::Matrix C( gp_fields, nlons, nb_fields * nlats ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); - } - } - } // namespace trans - // Computing u,v from U,V: - { - if ( nb_vordiv_fields > 0 ) { - ATLAS_TRACE( "opt u,v from U,V" ); - std::vector coslats( nlats ); - for ( size_t j = 0; j < nlats; ++j ) { - coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() ); - } - int idx = 0; - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - for ( int jlat = 0; jlat < g.ny(); jlat++ ) { - for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) { - gp_fields[idx] /= coslats[jlat]; - idx++; - } - } - } - } - } - free_aligned( scl_fourier ); - } // namespace atlas - else { - ATLAS_TRACE( "invtrans_uv unstructured opt" ); - int idx = 0; - for ( PointXY p : grid_.xy() ) { - double lon = p.x() * util::Constants::degreesToRadians(); - double lat = p.y() * util::Constants::degreesToRadians(); - double trcFT = truncation; - - // Legendre transform: - //invtrans_legendreopt( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, scalar_spectra, - // legReal.data(), legImag.data() ); - - // Fourier transform: - //invtrans_fourieropt( trcFT, lon, nb_fields, legReal.data(), legImag.data(), - // gp_tmp.data() + ( nb_fields * idx ) ); - for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { - //gp_tmp[nb_fields * idx + jfld] /= std::cos( lat ); - } - ++idx; - } - } - } // namespace trans -} // namespace atlas - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocalopt::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[], - const double divergence_spectra[], double gp_fields[], - const eckit::Configuration& config ) const { - invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config ); -} - -void extend_truncationopt( const int old_truncation, const int nb_fields, const double old_spectra[], - double new_spectra[] ) { - int k = 0, k_old = 0; - for ( int m = 0; m <= old_truncation + 1; m++ ) { // zonal wavenumber - for ( int n = m; n <= old_truncation + 1; n++ ) { // total wavenumber - for ( int imag = 0; imag < 2; imag++ ) { // imaginary/real part - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { // field - if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; } - else { - new_spectra[k++] = old_spectra[k_old++]; - } - } - } - } - } -} - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocalopt::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields, - const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[], - const eckit::Configuration& config ) const { - ATLAS_TRACE( "TransLocalopt::invtrans" ); - int nb_gp = grid_.size(); - int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields; - if ( nb_vordiv_fields > 0 ) { - std::vector vorticity_spectra_extended( nb_vordiv_spec_ext, 0. ); - std::vector divergence_spectra_extended( nb_vordiv_spec_ext, 0. ); - std::vector U_ext( nb_vordiv_spec_ext, 0. ); - std::vector V_ext( nb_vordiv_spec_ext, 0. ); - - { - ATLAS_TRACE( "opt extend vordiv" ); - // increase truncation in vorticity_spectra and divergence_spectra: - extend_truncationopt( truncation_, nb_vordiv_fields, vorticity_spectra, vorticity_spectra_extended.data() ); - extend_truncationopt( truncation_, nb_vordiv_fields, divergence_spectra, - divergence_spectra_extended.data() ); - } - - { - ATLAS_TRACE( "vordiv to UV opt" ); - // call vd2uv to compute u and v in spectral space - trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt" ) ); - vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(), - divergence_spectra_extended.data(), U_ext.data(), V_ext.data() ); - } - - // perform spectral transform to compute all fields in grid point space - invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config ); - invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(), - gp_fields + nb_gp * nb_vordiv_fields, config ); - } - if ( nb_scalar_fields > 0 ) { - invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields + 2 * nb_gp * nb_vordiv_fields, - config ); - } -} - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocalopt::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. -} - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocalopt::dirtrans( const FieldSet& gpfields, FieldSet& spfields, const eckit::Configuration& config ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. -} - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocalopt::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv, - const eckit::Configuration& config ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. -} - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocalopt::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[], - const eckit::Configuration& ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. -} - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocalopt::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[], - double divergence_spectra[], const eckit::Configuration& ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. -} - -// -------------------------------------------------------------------------------------------------------------------- - -} // namespace trans -} // namespace atlas diff --git a/src/atlas/trans/localopt/TransLocalopt.h b/src/atlas/trans/localopt/TransLocalopt.h deleted file mode 100644 index 19bbab862..000000000 --- a/src/atlas/trans/localopt/TransLocalopt.h +++ /dev/null @@ -1,141 +0,0 @@ -/* - * (C) Copyright 2013 ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor does it submit to any jurisdiction. - */ - -#pragma once - -#include - -#include "atlas/array.h" -#include "atlas/grid/Grid.h" -#include "atlas/trans/Trans.h" -#if ATLAS_HAVE_FFTW -#include -#endif - -//----------------------------------------------------------------------------- -// Forward declarations - -namespace atlas { -class Field; -class FieldSet; -} // namespace atlas - -//----------------------------------------------------------------------------- - -namespace atlas { -namespace trans { - -//----------------------------------------------------------------------------- - -/// @class TransLocalopt -/// -/// Localopt spherical harmonics transformations to any grid -/// Optimisations are present for structured grids -/// For global grids, please consider using TransIFS instead. -/// -/// @todo: -/// - support multiple fields -/// - support atlas::Field and atlas::FieldSet based on function spaces -/// -/// @note: Direct transforms are not implemented and cannot be unless -/// the grid is global. There are no plans to support this at the moment. -class TransLocalopt : public trans::TransImpl { -public: - TransLocalopt( const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() ); - TransLocalopt( const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() ); - TransLocalopt( const Cache&, const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() ); - TransLocalopt( const Cache&, const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() ); - - virtual ~TransLocalopt(); - - virtual int truncation() const override { return truncation_; } - virtual size_t spectralCoefficients() const override { return ( truncation_ + 1 ) * ( truncation_ + 2 ); } - - virtual const Grid& grid() const override { return grid_; } - - virtual void invtrans( const Field& spfield, Field& gpfield, - const eckit::Configuration& = util::NoConfig() ) const override; - - virtual void invtrans( const FieldSet& spfields, FieldSet& gpfields, - const eckit::Configuration& = util::NoConfig() ) const override; - - virtual void invtrans_grad( const Field& spfield, Field& gradfield, - const eckit::Configuration& = util::NoConfig() ) const override; - - virtual void invtrans_grad( const FieldSet& spfields, FieldSet& gradfields, - const eckit::Configuration& = util::NoConfig() ) const override; - - virtual void invtrans_vordiv2wind( const Field& spvor, const Field& spdiv, Field& gpwind, - const eckit::Configuration& = util::NoConfig() ) const override; - - // -- IFS style API -- - - virtual void invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields, - const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[], - const eckit::Configuration& = util::NoConfig() ) const override; - - virtual void invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[], - const eckit::Configuration& = util::NoConfig() ) const override; - - virtual void invtrans( const int nb_vordiv_fields, const double vorticity_spectra[], - const double divergence_spectra[], double gp_fields[], - const eckit::Configuration& = util::NoConfig() ) const override; - - // -- NOT SUPPORTED -- // - - virtual void dirtrans( const Field& gpfield, Field& spfield, - const eckit::Configuration& = util::NoConfig() ) const override; - - virtual void dirtrans( const FieldSet& gpfields, FieldSet& spfields, - const eckit::Configuration& = util::NoConfig() ) const override; - - virtual void dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv, - const eckit::Configuration& = util::NoConfig() ) const override; - - virtual void dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[], - const eckit::Configuration& = util::NoConfig() ) const override; - - virtual void dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[], - double divergence_spectra[], const eckit::Configuration& = util::NoConfig() ) const override; - -private: - void invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields, - const double scalar_spectra[], double gp_fields[], - const eckit::Configuration& = util::NoConfig() ) const; - -private: - Grid grid_; - bool useFFT_; - bool dgemmMethod1_; - int truncation_; - int nlatsNH_; - int nlatsSH_; - int nlatsLeg_; - int jlonMin_; - int nlonsGlobal_; - bool precompute_; - double* legendre_sym_; - double* legendre_asym_; - double* fourier_; - double* fouriertp_; - std::vector legendre_begin_; - std::vector legendre_sym_begin_; - std::vector legendre_asym_begin_; -#if ATLAS_HAVE_FFTW - fftw_complex* fft_in_; - double* fft_out_; - fftw_plan plan_; -#endif -}; - -//----------------------------------------------------------------------------- - -} // namespace trans -} // namespace atlas diff --git a/src/atlas/trans/localopt/VorDivToUVLocalopt.cc b/src/atlas/trans/localopt/VorDivToUVLocalopt.cc deleted file mode 100644 index 032aa6073..000000000 --- a/src/atlas/trans/localopt/VorDivToUVLocalopt.cc +++ /dev/null @@ -1,184 +0,0 @@ -/* - * (C) Copyright 2013 ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor does it submit to any jurisdiction. - */ - -#include "atlas/trans/localopt/VorDivToUVLocalopt.h" -#include // for std::sqrt -#include "atlas/functionspace/Spectral.h" -#include "atlas/runtime/Log.h" -#include "atlas/util/Earth.h" - -using atlas::FunctionSpace; -using atlas::functionspace::Spectral; - -namespace atlas { -namespace trans { - -namespace { -static VorDivToUVBuilder builder( "localopt" ); -} - -// -------------------------------------------------------------------------------------------------------------------- -// Routine to copy spectral data into internal storage form of IFS trans -// Ported to C++ by: Andreas Mueller *ECMWF* -void prfi1bopt( const int truncation, - const int km, // zonal wavenumber - const int nb_fields, // number of fields - const double rspec[], // spectral data - double pia[] ) // spectral components in data layout of trans library -{ - int ilcm = truncation + 1 - km, ioff = ( 2 * truncation - km + 3 ) * km, - nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2; - for ( int j = 1; j <= ilcm; j++ ) { - int inm = ioff + ( ilcm - j ) * 2; - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int ir = 2 * jfld, ii = ir + 1; - pia[ir * nlei1 + j + 1] = rspec[inm * nb_fields + jfld]; - pia[ii * nlei1 + j + 1] = rspec[( inm + 1 ) * nb_fields + jfld]; - } - } - - for ( int jfld = 0; jfld < 2 * nb_fields; jfld++ ) { - pia[jfld * nlei1] = 0.; - pia[jfld * nlei1 + 1] = 0.; - pia[jfld * nlei1 + ilcm + 2] = 0.; - } -} - -// -------------------------------------------------------------------------------------------------------------------- -// Routine to compute spectral velocities (*cos(latitude)) out of spectral -// vorticity and divergence -// Reference: -// ECMWF Research Department documentation of the IFS -// Temperton, 1991, MWR 119 p1303 -// Ported to C++ by: Andreas Mueller *ECMWF* -void vd2uvopt( const int truncation, // truncation - const int km, // zonal wavenumber - const int nb_vordiv_fields, // number of vorticity and divergence fields - const double vorticity_spectra[], // spectral data of vorticity - const double divergence_spectra[], // spectral data of divergence - double U[], // spectral data of U - double V[], // spectral data of V - const eckit::Configuration& config ) { - int nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2; - - // repsnm: epsilon from eq.(2.12) and (2.13) in [Temperton 1991] - std::vector repsnm( ( truncation + 1 ) * ( truncation + 6 ) / 2 ); - int idx = 0; - for ( int jm = 0; jm <= truncation; ++jm ) { - for ( int jn = jm; jn <= truncation + 2; ++jn, ++idx ) { - repsnm[idx] = std::sqrt( ( jn * jn - jm * jm ) / ( 4. * jn * jn - 1. ) ); - } - } - repsnm[0] = 0.; - - // rlapin: constant factor from eq.(2.2) and (2.3) in [Temperton 1991] - double ra = util::Earth::radius(); - std::vector rlapin( truncation + 3 ); - for ( int jn = 1; jn <= truncation + 2; ++jn ) { - rlapin[jn] = -ra * ra / ( jn * ( jn + 1. ) ); - } - rlapin[0] = 0.; - - // inverse the order of repsnm and rlapin for improved accuracy - std::vector zepsnm( truncation + 6 ); - std::vector zlapin( truncation + 6 ); - std::vector zn( truncation + 6 ); - for ( int jn = km - 1; jn <= truncation + 2; ++jn ) { - int ij = truncation + 3 - jn; - if ( jn >= 0 ) { - zlapin[ij] = rlapin[jn]; - if ( jn < km ) { zepsnm[ij] = 0.; } - else { - zepsnm[ij] = repsnm[jn + ( 2 * truncation - km + 5 ) * km / 2]; - } - } - else { - zlapin[ij] = 0.; - zepsnm[ij] = 0.; - } - zn[ij] = jn; - } - zn[0] = truncation + 3; - - // copy spectral data into internal trans storage: - std::vector rvor( 2 * nb_vordiv_fields * nlei1 ); - std::vector rdiv( 2 * nb_vordiv_fields * nlei1 ); - std::vector ru( 2 * nb_vordiv_fields * nlei1 ); - std::vector rv( 2 * nb_vordiv_fields * nlei1 ); - prfi1bopt( truncation, km, nb_vordiv_fields, vorticity_spectra, rvor.data() ); - prfi1bopt( truncation, km, nb_vordiv_fields, divergence_spectra, rdiv.data() ); - - // compute eq.(2.12) and (2.13) in [Temperton 1991]: - if ( km == 0 ) { - for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { - int ir = 2 * jfld * nlei1 - 1; - for ( int ji = 2; ji < truncation + 4 - km; ++ji ) { - double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1]; - double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1]; - ru[ir + ji] = +psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1]; - rv[ir + ji] = -psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1]; - } - } - } - else { - for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { - int ir = 2 * jfld * nlei1 - 1, ii = ir + nlei1; - for ( int ji = 2; ji < truncation + 4 - km; ++ji ) { - double chiIm = km * zlapin[ji]; - double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1]; - double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1]; - ru[ir + ji] = -chiIm * rdiv[ii + ji] + psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1]; - ru[ii + ji] = +chiIm * rdiv[ir + ji] + psiM1 * rvor[ii + ji + 1] - psiP1 * rvor[ii + ji - 1]; - rv[ir + ji] = -chiIm * rvor[ii + ji] - psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1]; - rv[ii + ji] = +chiIm * rvor[ir + ji] - psiM1 * rdiv[ii + ji + 1] + psiP1 * rdiv[ii + ji - 1]; - } - } - } - - // copy data from internal storage back to external spectral data: - int ilcm = truncation - km; - int ioff = ( 2 * truncation - km + 3 ) * km; - // ioff: start index of zonal wavenumber km in spectral data - double za_r = 1. / util::Earth::radius(); - for ( int j = 0; j <= ilcm; ++j ) { - // ilcm-j = total wavenumber - int inm = ioff + ( ilcm - j ) * 2; - for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { - int ir = 2 * jfld * nlei1, ii = ir + nlei1; - int idx = inm * nb_vordiv_fields + jfld; - // real part: - U[idx] = ru[ir + j + 2] * za_r; - V[idx] = rv[ir + j + 2] * za_r; - idx += nb_vordiv_fields; - // imaginary part: - U[idx] = ru[ii + j + 2] * za_r; - V[idx] = rv[ii + j + 2] * za_r; - } - } -} - -void VorDivToUVLocalopt::execute( const int nb_coeff, const int nb_fields, const double vorticity[], - const double divergence[], double U[], double V[], - const eckit::Configuration& config ) const { - for ( int jm = 0; jm <= truncation_; ++jm ) { - vd2uvopt( truncation_, jm, nb_fields, vorticity, divergence, U, V, config ); - } -} - -VorDivToUVLocalopt::VorDivToUVLocalopt( const int truncation, const eckit::Configuration& config ) : - truncation_( truncation ) {} - -VorDivToUVLocalopt::VorDivToUVLocalopt( const FunctionSpace& fs, const eckit::Configuration& config ) : - truncation_( Spectral( fs ).truncation() ) {} - -VorDivToUVLocalopt::~VorDivToUVLocalopt() {} - -} // namespace trans -} // namespace atlas diff --git a/src/atlas/trans/localopt/VorDivToUVLocalopt.h b/src/atlas/trans/localopt/VorDivToUVLocalopt.h deleted file mode 100644 index 4bb7eda88..000000000 --- a/src/atlas/trans/localopt/VorDivToUVLocalopt.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * (C) Copyright 2013 ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor does it submit to any jurisdiction. - */ - -#pragma once - -#include "atlas/trans/VorDivToUV.h" - -//----------------------------------------------------------------------------- -// Forward declarations - -namespace atlas { -class FunctionSpace; -} - -//----------------------------------------------------------------------------- - -namespace atlas { -namespace trans { - -//----------------------------------------------------------------------------- - -class VorDivToUVLocalopt : public trans::VorDivToUVImpl { -public: - VorDivToUVLocalopt( const FunctionSpace&, const eckit::Configuration& = util::NoConfig() ); - VorDivToUVLocalopt( int truncation, const eckit::Configuration& = util::NoConfig() ); - - virtual ~VorDivToUVLocalopt(); - - virtual int truncation() const override { return truncation_; } - - // pure virtual interface - - // -- IFS style API -- - // These fields have special interpretation required. You need to know what - // you're doing. - // See IFS trans library. - - /*! - * @brief Compute spectral wind (U/V) from spectral vorticity/divergence - * - * U = u*cos(lat) - * V = v*cos(lat) - * - * @param nb_fields [in] Number of fields - * @param vorticity [in] Spectral vorticity - * @param divergence [in] Spectral divergence - * @param U [out] Spectral wind U = u*cos(lat) - * @param V [out] Spectral wind V = v*cos(lat) - */ - virtual void execute( const int nb_coeff, const int nb_fields, const double vorticity[], const double divergence[], - double U[], double V[], const eckit::Configuration& = util::NoConfig() ) const override; - -private: - int truncation_; -}; - -// ------------------------------------------------------------------ - -} // namespace trans -} // namespace atlas diff --git a/src/atlas/trans/localopt2/FourierTransformsopt2.cc b/src/atlas/trans/localopt2/FourierTransformsopt2.cc deleted file mode 100644 index 71d3202dd..000000000 --- a/src/atlas/trans/localopt2/FourierTransformsopt2.cc +++ /dev/null @@ -1,78 +0,0 @@ -/* - * (C) Copyright 2013 ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor - * does it submit to any jurisdiction. - */ - -#include -#include -#include - -#include "atlas/trans/localopt2/FourierTransformsopt2.h" - -namespace atlas { -namespace trans { - -//----------------------------------------------------------------------------- - -void invtrans_fourieropt2( const size_t trcFT, - const double lon, // longitude in radians (in) - const int nb_fields, // Number of fields - const double rlegReal[], // associated Legendre functions, size (trc+1)*trc/2 (in) - const double rlegImag[], // associated Legendre functions, size (trc+1)*trc/2 (in) - double rgp[] ) // gridpoint -{ - for ( int jfld = 0; jfld < nb_fields; ++jfld ) { - rgp[jfld] = 0.; - } - // local Fourier transformation: - for ( int jm = 0; jm <= trcFT; ++jm ) { - const double cos = std::cos( jm * lon ); - const double sin = std::sin( jm * lon ); - for ( int jfld = 0; jfld < nb_fields; ++jfld ) { - double real = cos * rlegReal[jm * nb_fields + jfld]; - double imag = sin * rlegImag[jm * nb_fields + jfld]; - rgp[jfld] += real - imag; - } - } -} - -int fourier_truncationopt2( const int truncation, // truncation - const int nx, // number of longitudes - const int nxmax, // maximum nx - const int ndgl, // number of latitudes - const double lat, // latitude in radian - const bool fullgrid ) { // regular grid - int trc = truncation; - int trclin = ndgl - 1; - int trcquad = ndgl * 2 / 3 - 1; - if ( truncation >= trclin || fullgrid ) { - // linear - trc = ( nx - 1 ) / 2; - } - else if ( truncation >= trcquad ) { - // quadratic - double weight = 3 * ( trclin - truncation ) / ndgl; - double sqcos = std::pow( std::cos( lat ), 2 ); - - trc = ( nx - 1 ) / ( 2 + weight * sqcos ); - } - else { - // cubic - double sqcos = std::pow( std::cos( lat ), 2 ); - - trc = ( nx - 1 ) / ( 2 + sqcos ) - 1; - } - trc = std::min( truncation, trc ); - return trc; -} - -// -------------------------------------------------------------------------------------------------------------------- - -} // namespace trans -} // namespace atlas diff --git a/src/atlas/trans/localopt2/FourierTransformsopt2.h b/src/atlas/trans/localopt2/FourierTransformsopt2.h deleted file mode 100644 index 4281a92cc..000000000 --- a/src/atlas/trans/localopt2/FourierTransformsopt2.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * (C) Copyright 2013 ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor does it submit to any jurisdiction. - */ - -#pragma once - -#include - -namespace atlas { -namespace trans { - -//----------------------------------------------------------------------------- -// Routine to compute the local Fourier transformation -// -// Author: -// Andreas Mueller *ECMWF* -// - -void invtrans_fourieropt2( const size_t trcFT, - const double lon, // longitude in radians (in) - const int nb_fields, // Number of fields - const double rlegReal[], // values of associated Legendre functions, size (trc+1)*trc/2 (in) - const double rlegImag[], // values of associated Legendre functions, size (trc+1)*trc/2 (in) - double rgp[] ); // gridpoint - -int fourier_truncationopt2( const int truncation, const int nx, const int nxmax, const int ndgl, const double lat, - const bool fullgrid ); - -// -------------------------------------------------------------------------------------------------------------------- - -} // namespace trans -} // namespace atlas diff --git a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc deleted file mode 100644 index c3e3fe2f6..000000000 --- a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.cc +++ /dev/null @@ -1,237 +0,0 @@ -/* - * (C) Copyright 2013 ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor - * does it submit to any jurisdiction. - */ - -#include -#include - -#include "atlas/array.h" -#include "atlas/parallel/mpi/mpi.h" -#include "atlas/trans/localopt2/LegendrePolynomialsopt2.h" - -namespace atlas { -namespace trans { - -//----------------------------------------------------------------------------- - -void compute_zfnopt2( const size_t trc, double zfn[] ) { - auto idxzfn = [&]( int jn, int jk ) { return jk + ( trc + 1 ) * jn; }; - int iodd = 0; - // Compute coefficients for Taylor series in Belousov (19) and (21) - // Belousov, Swarztrauber use zfn[0]=std::sqrt(2.) - // IFS normalisation chosen to be 0.5*Integral(Pnm**2) = 1 - zfn[idxzfn( 0, 0 )] = 2.; - for ( int jn = 1; jn <= trc; ++jn ) { - double zfnn = zfn[idxzfn( 0, 0 )]; - for ( int jgl = 1; jgl <= jn; ++jgl ) { - zfnn *= std::sqrt( 1. - 0.25 / ( jgl * jgl ) ); - } - iodd = jn % 2; - zfn[idxzfn( jn, jn )] = zfnn; - for ( int jgl = 2; jgl <= jn - iodd; jgl += 2 ) { - double zfjn = ( ( jgl - 1. ) * ( 2. * jn - jgl + 2. ) ); // new factor numerator - double zfjd = ( jgl * ( 2. * jn - jgl + 1. ) ); // new factor denominator - - zfn[idxzfn( jn, jn - jgl )] = zfn[idxzfn( jn, jn - jgl + 2 )] * zfjn / zfjd; - } - } -} - - -void compute_legendre_polynomials_latopt2( const size_t trc, // truncation (in) - const double lat, // latitude in radians (in) - double legpol[], // legendre polynomials - double zfn[] ) { - auto idxmn = [&]( int jm, int jn ) { return ( 2 * trc + 3 - jm ) * jm / 2 + jn - jm; }; - auto idxzfn = [&]( int jn, int jk ) { return jk + ( trc + 1 ) * jn; }; - { //ATLAS_TRACE( "compute Legendre polynomials" ); - // -------------------- - // 1. First two columns - // -------------------- - double zdlx1 = ( M_PI_2 - lat ); // theta - double zdlx = std::cos( zdlx1 ); // cos(theta) - double zdlsita = std::sqrt( 1. - zdlx * zdlx ); // sin(theta) (this is how trans library does it) - - legpol[idxmn( 0, 0 )] = 1.; - double vsin[trc + 1], vcos[trc + 1]; - for ( int j = 1; j <= trc; j++ ) { - vsin[j] = std::sin( j * zdlx1 ); - } - for ( int j = 1; j <= trc; j++ ) { - vcos[j] = std::cos( j * zdlx1 ); - } - - double zdl1sita = 0.; - // if we are less than 1 meter from the pole, - if ( std::abs( zdlsita ) <= std::sqrt( std::numeric_limits::epsilon() ) ) { - zdlx = 1.; - zdlsita = 0.; - } - else { - zdl1sita = 1. / zdlsita; - } - - // ordinary Legendre polynomials from series expansion - // --------------------------------------------------- - - // even N - for ( int jn = 2; jn <= trc; jn += 2 ) { - double zdlk = 0.5 * zfn[idxzfn( jn, 0 )]; - double zdlldn = 0.0; - double zdsq = 1. / std::sqrt( jn * ( jn + 1. ) ); - // represented by only even k - for ( int jk = 2; jk <= jn; jk += 2 ) { - // normalised ordinary Legendre polynomial == \overbar{P_n}^0 - zdlk = zdlk + zfn[idxzfn( jn, jk )] * vcos[jk]; - // normalised associated Legendre polynomial == \overbar{P_n}^1 - zdlldn = zdlldn + zdsq * zfn[idxzfn( jn, jk )] * jk * vsin[jk]; - } - legpol[idxmn( 0, jn )] = zdlk; - legpol[idxmn( 1, jn )] = zdlldn; - } - - // odd N - for ( int jn = 1; jn <= trc; jn += 2 ) { - zfn[idxzfn( jn, 0 )] = 0.; - double zdlk = 0.; - double zdlldn = 0.0; - double zdsq = 1. / std::sqrt( jn * ( jn + 1. ) ); - // represented by only even k - for ( int jk = 1; jk <= jn; jk += 2 ) { - // normalised ordinary Legendre polynomial == \overbar{P_n}^0 - zdlk = zdlk + zfn[idxzfn( jn, jk )] * vcos[jk]; - // normalised associated Legendre polynomial == \overbar{P_n}^1 - zdlldn = zdlldn + zdsq * zfn[idxzfn( jn, jk )] * jk * vsin[jk]; - } - legpol[idxmn( 0, jn )] = zdlk; - legpol[idxmn( 1, jn )] = zdlldn; - } - - // -------------------------------------------------------------- - // 2. Diagonal (the terms 0,0 and 1,1 have already been computed) - // Belousov, equation (23) - // -------------------------------------------------------------- - - double zdls = zdl1sita * std::numeric_limits::min(); - for ( int jn = 2; jn <= trc; ++jn ) { - double sq = std::sqrt( ( 2. * jn + 1. ) / ( 2. * jn ) ); - - legpol[idxmn( jn, jn )] = legpol[idxmn( jn - 1, jn - 1 )] * zdlsita * sq; - if ( std::abs( legpol[idxmn( jn, jn )] ) < zdls ) legpol[idxmn( jn, jn )] = 0.0; - } - - // --------------------------------------------- - // 3. General recurrence (Belousov, equation 17) - // --------------------------------------------- - - for ( int jn = 3; jn <= trc; ++jn ) { - for ( int jm = 2; jm < jn; ++jm ) { - double cn = ( ( 2. * jn + 1. ) * ( jn + jm - 3. ) * ( jn + jm - 1. ) ); // numerator of c in Belousov - double cd = ( ( 2. * jn - 3. ) * ( jn + jm - 2. ) * ( jn + jm ) ); // denominator of c in Belousov - double dn = ( ( 2. * jn + 1. ) * ( jn - jm + 1. ) * ( jn + jm - 1. ) ); // numerator of d in Belousov - double dd = ( ( 2. * jn - 1. ) * ( jn + jm - 2. ) * ( jn + jm ) ); // denominator of d in Belousov - double en = ( ( 2. * jn + 1. ) * ( jn - jm ) ); // numerator of e in Belousov - double ed = ( ( 2. * jn - 1. ) * ( jn + jm ) ); // denominator of e in Belousov - - legpol[idxmn( jm, jn )] = std::sqrt( cn / cd ) * legpol[idxmn( jm - 2, jn - 2 )] - - std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1 )] * zdlx + - std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1 )] * zdlx; - } - } - } -} - - -void compute_legendre_polynomialsopt2( - const size_t trc, // truncation (in) - const int nlats, // number of latitudes - const double lats[], // latitudes in radians (in) - double leg_sym[], // values of associated Legendre functions, symmetric part - double leg_asym[], // values of associated Legendre functions, asymmetric part - size_t leg_start_sym[], // start indices for different zonal wave numbers, symmetric part - size_t leg_start_asym[] ) // start indices for different zonal wave numbers, asymmetric part -{ - auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; }; - std::vector legpol( legendre_size( trc ) ); - std::vector zfn( ( trc + 1 ) * ( trc + 1 ) ); - auto idxmn = [&]( int jm, int jn ) { return ( 2 * trc + 3 - jm ) * jm / 2 + jn - jm; }; - compute_zfnopt2( trc, zfn.data() ); - - // Loop over latitudes: - for ( int jlat = 0; jlat < nlats; ++jlat ) { - // compute legendre polynomials for current latitude: - compute_legendre_polynomials_latopt2( trc, lats[jlat], legpol.data(), zfn.data() ); - - // split polynomials into symmetric and antisymmetric parts: - { - //ATLAS_TRACE( "add to global arrays" ); - - for ( int jm = 0; jm <= trc; jm++ ) { - int is1 = 0, ia1 = 0; - for ( int jn = jm; jn <= trc; jn++ ) { - if ( ( jn - jm ) % 2 == 0 ) { is1++; } - else { - ia1++; - } - } - int is2 = 0, ia2 = 0; - // the choice between the following two code lines determines whether - // total wavenumbers are summed in an ascending or descending order. - // The trans library in IFS uses descending order because it should - // be more accurate (higher wavenumbers have smaller contributions). - // This also needs to be changed when splitting the spectral data in - // TransLocalopt2::invtrans_uv! - //for ( int jn = jm; jn <= trc; jn++ ) { - for ( int jn = trc; jn >= jm; jn-- ) { - if ( ( jn - jm ) % 2 == 0 ) { - int is = leg_start_sym[jm] + is1 * jlat + is2++; - leg_sym[is] = legpol[idxmn( jm, jn )]; - } - else { - int ia = leg_start_asym[jm] + ia1 * jlat + ia2++; - leg_asym[ia] = legpol[idxmn( jm, jn )]; - } - } - } - } - } -} - -void compute_legendre_polynomials_allopt2( const size_t trc, // truncation (in) - const int nlats, // number of latitudes - const double lats[], // latitudes in radians (in) - double legendre[] ) // legendre polynomials for all latitudes -{ - auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; }; - std::vector legpol( legendre_size( trc ) ); - std::vector zfn( ( trc + 1 ) * ( trc + 1 ) ); - auto idxmn = [&]( int jm, int jn ) { return ( 2 * trc + 3 - jm ) * jm / 2 + jn - jm; }; - auto idxmnl = [&]( int jm, int jn, int jlat ) { - return ( 2 * trc + 3 - jm ) * jm / 2 * nlats + jlat * ( trc - jm + 1 ) + jn - jm; - }; - compute_zfnopt2( trc, zfn.data() ); - - // Loop over latitudes: - for ( int jlat = 0; jlat < nlats; ++jlat ) { - // compute legendre polynomials for current latitude: - compute_legendre_polynomials_latopt2( trc, lats[jlat], legpol.data(), zfn.data() ); - - for ( int jm = 1; jm <= trc; ++jm ) { - for ( int jn = jm; jn <= trc; ++jn ) { - legendre[idxmnl( jm, jn, jlat )] = legpol[idxmn( jm, jn )]; - } - } - } -} // namespace trans - -// -------------------------------------------------------------------------------------------------------------------- - -} // namespace trans -} // namespace atlas diff --git a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h b/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h deleted file mode 100644 index f79438aea..000000000 --- a/src/atlas/trans/localopt2/LegendrePolynomialsopt2.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * (C) Copyright 2013 ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor does it submit to any jurisdiction. - */ - -#pragma once - -#include - -namespace atlas { -namespace trans { - -//----------------------------------------------------------------------------- -// Routine to compute the Legendre polynomials in serial according to Belousov -// (using correction by Swarztrauber) -// -// Reference: -// S.L. Belousov, Tables of normalized associated Legendre Polynomials, Pergamon -// Press (1962) -// P.N. Swarztrauber, On computing the points and weights for Gauss-Legendre -// quadrature, -// SIAM J. Sci. Comput. Vol. 24 (3) pp. 945-954 (2002) -// -// Author of Fortran version: -// Mats Hamrud, Philippe Courtier, Nils Wedi *ECMWF* -// -// Ported to C++ by: -// Andreas Mueller *ECMWF* -// -void compute_zfnopt2( const size_t trc, double zfn[] ); - -void compute_legendre_polynomials_latopt2( const size_t trc, // truncation (in) - const double lat, // latitude in radians (in) - double legpol[], // legendre polynomials - double zfn[] ); - -void compute_legendre_polynomialsopt2( - const size_t trc, // truncation (in) - const int nlats, // number of latitudes - const double lats[], // latitudes in radians (in) - double legendre_sym[], // values of associated Legendre functions, symmetric part - double legendre_asym[], // values of associated Legendre functions, asymmetric part - size_t leg_start_sym[], // start indices for different zonal wave numbers, symmetric part - size_t leg_start_asym[] ); // start indices for different zonal wave numbers, asymmetric part - -void compute_legendre_polynomials_allopt2( const size_t trc, // truncation (in) - const int nlats, // number of latitudes - const double lats[], // latitudes in radians (in) - double legendre[] ); // legendre polynomials for all latitudes - -// -------------------------------------------------------------------------------------------------------------------- - -} // namespace trans -} // namespace atlas diff --git a/src/atlas/trans/localopt2/LegendreTransformsopt2.cc b/src/atlas/trans/localopt2/LegendreTransformsopt2.cc deleted file mode 100644 index ddb71a967..000000000 --- a/src/atlas/trans/localopt2/LegendreTransformsopt2.cc +++ /dev/null @@ -1,62 +0,0 @@ -/* - * (C) Copyright 2013 ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor does it submit to any jurisdiction. - */ - -#include - -#include "atlas/trans/localopt2/LegendreTransformsopt2.h" - -namespace atlas { -namespace trans { - -//----------------------------------------------------------------------------- - -void invtrans_legendreopt2( - const size_t trc, // truncation (in) - const size_t trcFT, // truncation for Fourier transformation (in) - const size_t trcLP, // truncation of Legendre polynomials data legpol. Needs to be >= trc (in) - const double legpol[], // values of associated Legendre functions, size (trc+1)*trc/2 (in) - const int nb_fields, // number of fields - const double spec[], // spectral data, size (trc+1)*trc (in) - double leg_real[], // values of associated Legendre functions, size (trc+1)*trc/2 (out) - double leg_imag[] ) // values of associated Legendre functions, size (trc+1)*trc/2 (out) -{ - // Legendre transformation: - int k = 0, klp = 0; - for ( int jm = 0; jm <= trcFT; ++jm ) { - for ( int jfld = 0; jfld < nb_fields; ++jfld ) { - leg_real[jm * nb_fields + jfld] = 0.; - leg_imag[jm * nb_fields + jfld] = 0.; - } - for ( int jn = jm; jn <= trcLP; ++jn, ++klp ) { - if ( jn <= trc ) { - for ( int jfld = 0; jfld < nb_fields; ++jfld ) { - // not completely sure where this factor 2 comes from. One possible - // explanation: - // normalization of trigonometric functions in the spherical harmonics - // integral over square of trig function is 1 for m=0 and 0.5 (?) for - // m>0 - leg_real[jm * nb_fields + jfld] += 2. * spec[( 2 * k ) * nb_fields + jfld] * legpol[klp]; - leg_imag[jm * nb_fields + jfld] += 2. * spec[( 2 * k + 1 ) * nb_fields + jfld] * legpol[klp]; - } - ++k; - } - } - } - // Undo factor 2 for (jm == 0) - for ( int jfld = 0; jfld < nb_fields; ++jfld ) { - leg_real[jfld] /= 2.; - leg_imag[jfld] /= 2.; - } -} - -// -------------------------------------------------------------------------------------------------------------------- - -} // namespace trans -} // namespace atlas diff --git a/src/atlas/trans/localopt2/LegendreTransformsopt2.h b/src/atlas/trans/localopt2/LegendreTransformsopt2.h deleted file mode 100644 index ef10eb885..000000000 --- a/src/atlas/trans/localopt2/LegendreTransformsopt2.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * (C) Copyright 2013 ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor does it submit to any jurisdiction. - */ - -#pragma once - -#include - -namespace atlas { -namespace trans { - -//----------------------------------------------------------------------------- -// Routine to compute the Legendre transformation -// -// Author: -// Andreas Mueller *ECMWF* -// -void invtrans_legendreopt2( - const size_t trc, // truncation (in) - const size_t trcFT, // truncation for Fourier transformation (in) - const size_t trcLP, // truncation of Legendre polynomials data legpol. Needs to be >= trc (in) - const double legpol[], // values of associated Legendre functions, size (trc+1)*trc/2 (in) - const int nb_fields, // number of fields - const double spec[], // spectral data, size (trc+1)*trc (in) - double leg_real[], // values of associated Legendre functions, size (trc+1)*trc/2 (out) - double leg_imag[] ); // values of associated Legendre functions, size (trc+1)*trc/2 (out) - -// -------------------------------------------------------------------------------------------------------------------- - -} // namespace trans -} // namespace atlas diff --git a/src/atlas/trans/localopt2/TransLocalopt2.cc b/src/atlas/trans/localopt2/TransLocalopt2.cc deleted file mode 100644 index 073ba312e..000000000 --- a/src/atlas/trans/localopt2/TransLocalopt2.cc +++ /dev/null @@ -1,746 +0,0 @@ -/* - * (C) Copyright 2013 ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor does it submit to any jurisdiction. - */ - -#include "atlas/trans/localopt2/TransLocalopt2.h" -#include -#include "atlas/array.h" -#include "atlas/option.h" -#include "atlas/parallel/mpi/mpi.h" -#include "atlas/runtime/ErrorHandling.h" -#include "atlas/runtime/Log.h" -#include "atlas/trans/VorDivToUV.h" -#include "atlas/trans/local_noopt/LegendrePolynomials.h" -#include "atlas/trans/localopt2/FourierTransformsopt2.h" -#include "atlas/trans/localopt2/LegendrePolynomialsopt2.h" -#include "atlas/trans/localopt2/LegendreTransformsopt2.h" -#include "atlas/util/Constants.h" -#include "eckit/linalg/LinearAlgebra.h" -#include "eckit/linalg/Matrix.h" -#include "eckit/eckit_config.h" -#ifdef ECKIT_HAVE_MKL -#include "mkl.h" -#endif - -namespace atlas { -namespace trans { - -namespace { -static TransBuilderGrid builder( "localopt2" ); -} - -// -------------------------------------------------------------------------------------------------------------------- -// Helper functions -// -------------------------------------------------------------------------------------------------------------------- -namespace { // anonymous - -size_t legendre_size( const size_t truncation ) { - return ( truncation + 2 ) * ( truncation + 1 ) / 2; -} - -int nlats_northernHemisphere( const int nlats ) { - return ceil( nlats / 2. ); - // using ceil here should make it possible to have odd number of latitudes (with the centre latitude being the equator) -} - -int num_n( const int truncation, const int m, const bool symmetric ) { - int len = 0; - if ( symmetric ) { len = ( truncation - m + 2 ) / 2; } - else { - len = ( truncation - m + 1 ) / 2; - } - return len; -} - -void alloc_aligned( double*& ptr, size_t n ) { -#ifdef ECKIT_HAVE_MKL - int al = 64; - ptr = (double*)mkl_malloc( sizeof( double ) * n, al ); -#else - posix_memalign( (void**)&ptr, sizeof( double ) * 64, sizeof( double ) * n ); - //ptr = (double*)malloc( sizeof( double ) * n ); - //ptr = new double[n]; -#endif -} - -void free_aligned( double*& ptr ) { -#ifdef ECKIT_HAVE_MKL - mkl_free( ptr ); -#else - free( ptr ); -#endif -} - -int add_padding( int n ) { - return std::ceil( n / 8. ) * 8; -} -} // namespace - -// -------------------------------------------------------------------------------------------------------------------- -// Class TransLocalopt2 -// -------------------------------------------------------------------------------------------------------------------- - -TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const long truncation, - const eckit::Configuration& config ) : - grid_( grid ), - truncation_( truncation ), - precompute_( config.getBool( "precompute", true ) ) { - ATLAS_TRACE( "Precompute legendre opt2" ); -#ifdef ECKIT_HAVE_MKL - eckit::linalg::LinearAlgebra::backend( "mkl" ); // might want to choose backend with this command -#else - eckit::linalg::LinearAlgebra::backend( "generic" ); // might want to choose backend with this command -#endif - double fft_threshold = 0.05; // fraction of latitudes of the full grid up to which FFT is used. - // This threshold needs to be adjusted depending on the dgemm and FFT performance of the machine - // on which this code is running! - int nlats = 0; - int nlons = 0; - int neqtr = 0; - useFFT_ = true; - dgemmMethod1_ = false; - nlatsNH_ = 0; - nlatsSH_ = 0; - nlatsLeg_ = 0; - if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { - grid::StructuredGrid g( grid_ ); - nlats = g.ny(); - nlons = g.nxmax(); - for ( size_t j = 0; j < nlats; ++j ) { - // assumptions: latitudes in g.y(j) are monotone and decreasing - // no assumption on whether we have 0, 1 or 2 latitudes at the equator - double lat = g.y( j ); - if ( lat > 0. ) { nlatsNH_++; } - if ( lat == 0. ) { neqtr++; } - if ( lat < 0. ) { nlatsSH_++; } - } - if ( neqtr > 0 ) { - nlatsNH_++; - nlatsSH_++; - } - if ( nlatsNH_ >= nlatsSH_ ) { nlatsLeg_ = nlatsNH_; } - else { - nlatsLeg_ = nlatsSH_; - } - Grid g_global( grid.name() ); - grid::StructuredGrid gs_global( g_global ); - nlonsGlobal_ = gs_global.nxmax(); - jlonMin_ = 0; - double lonmin = fmod( g.x( 0, 0 ), 360 ); - if ( lonmin < 0. ) { lonmin += 360.; } - if ( nlons < fft_threshold * nlonsGlobal_ ) { useFFT_ = false; } - else { - if ( nlons < nlonsGlobal_ ) { - // need to use FFT with cropped grid - for ( size_t j = 0; j < nlonsGlobal_; ++j ) { - if ( gs_global.x( j, 0 ) == lonmin ) { jlonMin_ = j; } - } - } - } - //Log::info() << "nlats=" << g.ny() << " nlatsGlobal=" << gs_global.ny() << std::endl; - std::vector lats( nlatsLeg_ ); - std::vector lons( nlons ); - if ( nlatsNH_ >= nlatsSH_ ) { - for ( size_t j = 0; j < nlatsLeg_; ++j ) { - lats[j] = g.y( j ) * util::Constants::degreesToRadians(); - } - } - else { - for ( size_t j = nlats - 1, idx = 0; idx < nlatsLeg_; --j, ++idx ) { - lats[idx] = -g.y( j ) * util::Constants::degreesToRadians(); - } - } - for ( size_t j = 0; j < nlons; ++j ) { - lons[j] = g.x( j, 0 ) * util::Constants::degreesToRadians(); - } - // precomputations for Legendre polynomials: - { - ATLAS_TRACE( "opt2 precomp Legendre" ); - int size_sym = 0; - int size_asym = 0; - legendre_sym_begin_.resize( truncation_ + 3 ); - legendre_asym_begin_.resize( truncation_ + 3 ); - legendre_sym_begin_[0] = 0; - legendre_asym_begin_[0] = 0; - for ( int jm = 0; jm <= truncation_ + 1; jm++ ) { - size_sym += add_padding( num_n( truncation_ + 1, jm, true ) * nlatsLeg_ ); - size_asym += add_padding( num_n( truncation_ + 1, jm, false ) * nlatsLeg_ ); - legendre_sym_begin_[jm + 1] = size_sym; - legendre_asym_begin_[jm + 1] = size_asym; - } - alloc_aligned( legendre_sym_, size_sym ); - alloc_aligned( legendre_asym_, size_asym ); - FILE* file_leg; - file_leg = fopen( "legendre.bin", "r" ); - if ( false ) { //if ( file_leg ) { - fread( legendre_sym_, sizeof( double ), size_sym, file_leg ); - fread( legendre_asym_, sizeof( double ), size_asym, file_leg ); - fclose( file_leg ); - } - else { - compute_legendre_polynomialsopt2( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, - legendre_asym_, legendre_sym_begin_.data(), - legendre_asym_begin_.data() ); - /*file_leg = fopen( "legendre.bin", "wb" ); - fwrite( legendre_sym_, sizeof( double ), size_sym, file_leg ); - fwrite( legendre_asym_, sizeof( double ), size_asym, file_leg ); - fclose( file_leg );*/ - } - } - - // precomputations for Fourier transformations: - if ( useFFT_ ) { -#if ATLAS_HAVE_FFTW - { - ATLAS_TRACE( "opt2 precomp FFTW" ); - int num_complex = ( nlonsGlobal_ / 2 ) + 1; - fft_in_ = fftw_alloc_complex( nlats * num_complex ); - fft_out_ = fftw_alloc_real( nlats * nlonsGlobal_ ); - plan_ = fftw_plan_many_dft_c2r( 1, &nlonsGlobal_, nlats, fft_in_, NULL, 1, num_complex, fft_out_, NULL, - 1, nlonsGlobal_, FFTW_ESTIMATE ); - } - // other FFT implementations should be added with #elif statements -#else - useFFT_ = false; // no FFT implemented => default to dgemm -#endif - } - if ( !useFFT_ ) { - alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlons ); - if ( dgemmMethod1_ ) { - { - ATLAS_TRACE( "opt2 precomp Fourier" ); - int idx = 0; - for ( int jlon = 0; jlon < nlons; jlon++ ) { - double factor = 1.; - for ( int jm = 0; jm < truncation_ + 1; jm++ ) { - if ( jm > 0 ) { factor = 2.; } - fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor; // real part - fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor; // imaginary part - } - } - } - } - else { - { - ATLAS_TRACE( "opt2 precomp Fourier tp" ); - int idx = 0; - for ( int jm = 0; jm < truncation_ + 1; jm++ ) { - double factor = 1.; - if ( jm > 0 ) { factor = 2.; } - for ( int jlon = 0; jlon < nlons; jlon++ ) { - fourier_[idx++] = +std::cos( jm * lons[jlon] ) * factor; // real part - } - for ( int jlon = 0; jlon < nlons; jlon++ ) { - fourier_[idx++] = -std::sin( jm * lons[jlon] ) * factor; // imaginary part - } - } - } - } - } - } -} // namespace trans - -// -------------------------------------------------------------------------------------------------------------------- - -TransLocalopt2::TransLocalopt2( const Grid& grid, const long truncation, const eckit::Configuration& config ) : - TransLocalopt2( Cache(), grid, truncation, config ) {} - -TransLocalopt2::TransLocalopt2( const Grid& grid, const Domain&, const long truncation, const eckit::Configuration& config ) : - TransLocalopt2( Cache(), grid, truncation, config ) {} - -TransLocalopt2::TransLocalopt2( const Cache& cache, const Grid& grid, const Domain&, const long truncation, const eckit::Configuration& config ) : - TransLocalopt2( cache, grid, truncation, config ) {} - -// -------------------------------------------------------------------------------------------------------------------- - -TransLocalopt2::~TransLocalopt2() { - if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { - free_aligned( legendre_sym_ ); - free_aligned( legendre_asym_ ); - if ( useFFT_ ) { -#if ATLAS_HAVE_FFTW - fftw_destroy_plan( plan_ ); - fftw_free( fft_in_ ); - fftw_free( fft_out_ ); -#endif - } - else { - free_aligned( fourier_ ); - } - } -} - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocalopt2::invtrans( const Field& spfield, Field& gpfield, const eckit::Configuration& config ) const { - NOTIMP; -} - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocalopt2::invtrans( const FieldSet& spfields, FieldSet& gpfields, - const eckit::Configuration& config ) const { - NOTIMP; -} - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocalopt2::invtrans_grad( const Field& spfield, Field& gradfield, const eckit::Configuration& config ) const { - NOTIMP; -} - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocalopt2::invtrans_grad( const FieldSet& spfields, FieldSet& gradfields, - const eckit::Configuration& config ) const { - NOTIMP; -} - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocalopt2::invtrans_vordiv2wind( const Field& spvor, const Field& spdiv, Field& gpwind, - const eckit::Configuration& config ) const { - NOTIMP; -} - -void TransLocalopt2::invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[], - const eckit::Configuration& config ) const { - invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields, config ); -} - -void gp_transposeopt2( const int nb_size, const int nb_fields, const double gp_tmp[], double gp_fields[] ) { - for ( int jgp = 0; jgp < nb_size; jgp++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - gp_fields[jfld * nb_size + jgp] = gp_tmp[jgp * nb_fields + jfld]; - } - } -} - -//----------------------------------------------------------------------------- -// Routine to compute the spectral transform by using a localopt2 Fourier transformation -// for a grid (same latitude for all longitudes, allows to compute Legendre functions -// once for all longitudes). U and v components are divided by cos(latitude) for -// nb_vordiv_fields > 0. -// -// Legendre polynomials are computed up to truncation_+1 to be accurate for vorticity and -// divergence computation. The parameter truncation is the truncation used in storing the -// spectral data scalar_spectra and can be different from truncation_. If truncation is -// larger than truncation_+1 the transform will behave as if the spectral data was truncated -// to truncation_+1. -// -// Author: -// Andreas Mueller *ECMWF* -// -void TransLocalopt2::invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields, - const double scalar_spectra[], double gp_fields[], - const eckit::Configuration& config ) const { - if ( nb_scalar_fields > 0 ) { - int nb_fields = nb_scalar_fields; - - // Transform - if ( grid::StructuredGrid g = grid_ ) { - ATLAS_TRACE( "invtrans_uv structured opt2" ); - int nlats = g.ny(); - int nlons = g.nxmax(); - auto posMethod = [&]( int jfld, int imag, int jlat, int jm ) { - if ( useFFT_ || !dgemmMethod1_ ) { - return imag + 2 * ( jm + ( truncation_ + 1 ) * ( jlat + nlats * jfld ) ); - } - else { - return jfld + nb_fields * ( jlat + nlats * ( imag + 2 * ( jm ) ) ); - }; - }; - int size_fourier_max = nb_fields * 2 * nlats; - double* scl_fourier; - alloc_aligned( scl_fourier, size_fourier_max * ( truncation_ + 1 ) ); - - // Legendre transform: - { - ATLAS_TRACE( "opt2 Legendre dgemm" ); - for ( int jm = 0; jm <= truncation_; jm++ ) { - int size_sym = num_n( truncation_ + 1, jm, true ); - int size_asym = num_n( truncation_ + 1, jm, false ); - int n_imag = 2; - if ( jm == 0 ) { n_imag = 1; } - int size_fourier = nb_fields * n_imag * nlatsLeg_; - auto posFourier = [&]( int jfld, int imag, int jlat, int jm, int nlatsH ) { - return jfld + nb_fields * ( imag + n_imag * ( nlatsLeg_ - nlatsH + jlat ) ); - }; - double* scalar_sym; - double* scalar_asym; - double* scl_fourier_sym; - double* scl_fourier_asym; - alloc_aligned( scalar_sym, n_imag * nb_fields * size_sym ); - alloc_aligned( scalar_asym, n_imag * nb_fields * size_asym ); - alloc_aligned( scl_fourier_sym, size_fourier ); - alloc_aligned( scl_fourier_asym, size_fourier ); - { - //ATLAS_TRACE( "opt2 Legendre split" ); - int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2; - // the choice between the following two code lines determines whether - // total wavenumbers are summed in an ascending or descending order. - // The trans library in IFS uses descending order because it should - // be more accurate (higher wavenumbers have smaller contributions). - // This also needs to be changed when splitting the spectral data in - // compute_legendre_polynomialsopt2! - //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) { - for ( int jn = truncation_ + 1; jn >= jm; jn-- ) { - for ( int imag = 0; imag < n_imag; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - idx = jfld + nb_fields * ( imag + 2 * ( jn - jm ) ); - if ( jn <= truncation && jm < truncation ) { - if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = scalar_spectra[idx + ioff]; } - else { - scalar_asym[ia++] = scalar_spectra[idx + ioff]; - } - } - else { - if ( ( jn - jm ) % 2 == 0 ) { scalar_sym[is++] = 0.; } - else { - scalar_asym[ia++] = 0.; - } - } - } - } - } - ASSERT( ia == n_imag * nb_fields * size_asym && is == n_imag * nb_fields * size_sym ); - } - { - eckit::linalg::Matrix A( scalar_sym, nb_fields * n_imag, size_sym ); - eckit::linalg::Matrix B( legendre_sym_ + legendre_sym_begin_[jm], size_sym, nlatsLeg_ ); - eckit::linalg::Matrix C( scl_fourier_sym, nb_fields * n_imag, nlatsLeg_ ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); - } - if ( size_asym > 0 ) { - eckit::linalg::Matrix A( scalar_asym, nb_fields * n_imag, size_asym ); - eckit::linalg::Matrix B( legendre_asym_ + legendre_asym_begin_[jm], size_asym, nlatsLeg_ ); - eckit::linalg::Matrix C( scl_fourier_asym, nb_fields * n_imag, nlatsLeg_ ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); - } - { - //ATLAS_TRACE( "opt2 merge spheres" ); - // northern hemisphere: - for ( int jlat = 0; jlat < nlatsNH_; jlat++ ) { - for ( int imag = 0; imag < n_imag; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int idx = posFourier( jfld, imag, jlat, jm, nlatsNH_ ); - scl_fourier[posMethod( jfld, imag, jlat, jm )] = - scl_fourier_sym[idx] + scl_fourier_asym[idx]; - } - } - } - // southern hemisphere: - for ( int jlat = 0; jlat < nlatsSH_; jlat++ ) { - for ( int imag = 0; imag < n_imag; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int idx = posFourier( jfld, imag, jlat, jm, nlatsSH_ ); - int jslat = nlats - jlat - 1; - scl_fourier[posMethod( jfld, imag, jslat, jm )] = - scl_fourier_sym[idx] - scl_fourier_asym[idx]; - } - } - } - } - free_aligned( scalar_sym ); - free_aligned( scalar_asym ); - free_aligned( scl_fourier_sym ); - free_aligned( scl_fourier_asym ); - } - } - // Fourier transformation: - if ( useFFT_ ) { -#if ATLAS_HAVE_FFTW - { - int num_complex = ( nlonsGlobal_ / 2 ) + 1; - { - ATLAS_TRACE( "opt2 FFTW" ); - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int idx = 0; - for ( int jlat = 0; jlat < nlats; jlat++ ) { - fft_in_[idx++][0] = scl_fourier[posMethod( jfld, 0, jlat, 0 )]; - for ( int jm = 1; jm < num_complex; jm++, idx++ ) { - for ( int imag = 0; imag < 2; imag++ ) { - if ( jm <= truncation_ ) { - fft_in_[idx][imag] = scl_fourier[posMethod( jfld, imag, jlat, jm )]; - } - else { - fft_in_[idx][imag] = 0.; - } - } - } - } - fftw_execute_dft_c2r( plan_, fft_in_, fft_out_ ); - for ( int jlat = 0; jlat < nlats; jlat++ ) { - for ( int jlon = 0; jlon < nlons; jlon++ ) { - int j = jlon + jlonMin_; - if ( j >= nlonsGlobal_ ) { j -= nlonsGlobal_; } - gp_fields[jlon + nlons * ( jlat + nlats * jfld )] = - fft_out_[j + nlonsGlobal_ * jlat]; - } - } - } - } - } -#endif - } - else { - if ( dgemmMethod1_ ) { - // dgemm-method 1 - // should be faster for small domains or large truncation - double* gp_opt2; - alloc_aligned( gp_opt2, nb_fields * grid_.size() ); - { - ATLAS_TRACE( "opt2 Fourier dgemm method 1" ); - eckit::linalg::Matrix A( scl_fourier, nb_fields * nlats, ( truncation_ + 1 ) * 2 ); - eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, nlons ); - eckit::linalg::Matrix C( gp_opt2, nb_fields * nlats, nlons ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); - } - - // Transposition in grid point space: - { - ATLAS_TRACE( "opt2 transposition in gp-space" ); - int idx = 0; - for ( int jlon = 0; jlon < nlons; jlon++ ) { - for ( int jlat = 0; jlat < nlats; jlat++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int pos_tp = jlon + nlons * ( jlat + nlats * ( jfld ) ); - //int pos = jfld + nb_fields * ( jlat + nlats * ( jlon ) ); - gp_fields[pos_tp] = gp_opt2[idx++]; // = gp_opt2[pos] - } - } - } - } - free_aligned( gp_opt2 ); - } - else { - // dgemm-method 2 - { - ATLAS_TRACE( "opt2 Fourier dgemm method 2" ); - eckit::linalg::Matrix A( fourier_, nlons, ( truncation_ + 1 ) * 2 ); - eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * nlats ); - eckit::linalg::Matrix C( gp_fields, nlons, nb_fields * nlats ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); - } - } - } // namespace trans - // Computing u,v from U,V: - { - if ( nb_vordiv_fields > 0 ) { - ATLAS_TRACE( "opt2 u,v from U,V" ); - std::vector coslats( nlats ); - for ( size_t j = 0; j < nlats; ++j ) { - coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() ); - } - int idx = 0; - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - for ( int jlat = 0; jlat < g.ny(); jlat++ ) { - for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) { - gp_fields[idx] /= coslats[jlat]; - idx++; - } - } - } - } - } - free_aligned( scl_fourier ); - } // namespace atlas - else { - ATLAS_TRACE( "invtrans_uv unstructured opt2" ); - grid::UnstructuredGrid gu = grid_; - double* zfn; - alloc_aligned( zfn, ( truncation + 1 ) * ( truncation + 1 ) ); - compute_zfnopt2( truncation, zfn ); - int size_fourier = nb_fields * 2; - double* legendre; - double* scl_fourier; - double* scl_fourier_tp; - double* fouriertp; - double* gp_opt; - alloc_aligned( legendre, legendre_size( truncation + 1 ) ); - alloc_aligned( scl_fourier, size_fourier * ( truncation + 1 ) ); - alloc_aligned( scl_fourier_tp, size_fourier * ( truncation + 1 ) ); - alloc_aligned( fouriertp, 2 * ( truncation + 1 ) ); - alloc_aligned( gp_opt, nb_fields ); - - // loop over all points: - for ( int ip = 0; ip < grid_.size(); ip++ ) { - PointXY p = gu.xy( ip ); - double lon = p.x() * util::Constants::degreesToRadians(); - double lat = p.y() * util::Constants::degreesToRadians(); - compute_legendre_polynomials_latopt2( truncation, lat, legendre, zfn ); - // Legendre transform: - { - //ATLAS_TRACE( "opt Legendre dgemm" ); - for ( int jm = 0; jm <= truncation; jm++ ) { - int noff = ( 2 * truncation + 3 - jm ) * jm / 2, ns = truncation - jm + 1; - eckit::linalg::Matrix A( eckit::linalg::Matrix( - const_cast( scalar_spectra ) + nb_fields * 2 * noff, nb_fields * 2, ns ) ); - eckit::linalg::Matrix B( legendre + noff, ns, 1 ); - eckit::linalg::Matrix C( scl_fourier + jm * size_fourier, nb_fields * 2, 1 ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); - } - } - { - //ATLAS_TRACE( "opt transposition in Fourier" ); - int idx = 0; - for ( int jm = 0; jm < truncation + 1; jm++ ) { - for ( int imag = 0; imag < 2; imag++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int pos_tp = imag + 2 * ( jm + ( truncation + 1 ) * ( jfld ) ); - //int pos = jfld + nb_fields * ( imag + 2 * ( jm ) ); - scl_fourier_tp[pos_tp] = scl_fourier[idx++]; // = scl_fourier[pos] - } - } - } - } - - // Fourier transformation: - int idx = 0; - fouriertp[idx++] = 1.; // real part - fouriertp[idx++] = 0.; // imaginary part - for ( int jm = 1; jm < truncation + 1; jm++ ) { - fouriertp[idx++] = +2. * std::cos( jm * lon ); // real part - fouriertp[idx++] = -2. * std::sin( jm * lon ); // imaginary part - } - { - //ATLAS_TRACE( "opt Fourier dgemm" ); - eckit::linalg::Matrix A( fouriertp, 1, ( truncation + 1 ) * 2 ); - eckit::linalg::Matrix B( scl_fourier_tp, ( truncation + 1 ) * 2, nb_fields ); - eckit::linalg::Matrix C( gp_opt, 1, nb_fields ); - eckit::linalg::LinearAlgebra::backend().gemm( A, B, C ); - for ( int j = 0; j < nb_fields; j++ ) { - gp_fields[ip + j * grid_.size()] = gp_opt[j]; - } - } - } - free_aligned( legendre ); - free_aligned( scl_fourier ); - free_aligned( scl_fourier_tp ); - free_aligned( fouriertp ); - free_aligned( gp_opt ); - } - } // namespace trans -} // namespace atlas - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocalopt2::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[], - const double divergence_spectra[], double gp_fields[], - const eckit::Configuration& config ) const { - invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config ); -} - -void extend_truncationopt2( const int old_truncation, const int nb_fields, const double old_spectra[], - double new_spectra[] ) { - int k = 0, k_old = 0; - for ( int m = 0; m <= old_truncation + 1; m++ ) { // zonal wavenumber - for ( int n = m; n <= old_truncation + 1; n++ ) { // total wavenumber - for ( int imag = 0; imag < 2; imag++ ) { // imaginary/real part - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { // field - if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; } - else { - new_spectra[k++] = old_spectra[k_old++]; - } - } - } - } - } -} - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocalopt2::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields, - const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[], - const eckit::Configuration& config ) const { - ATLAS_TRACE( "TransLocalopt2::invtrans" ); - int nb_gp = grid_.size(); - int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields; - if ( nb_vordiv_fields > 0 ) { - std::vector vorticity_spectra_extended( nb_vordiv_spec_ext, 0. ); - std::vector divergence_spectra_extended( nb_vordiv_spec_ext, 0. ); - std::vector U_ext( nb_vordiv_spec_ext, 0. ); - std::vector V_ext( nb_vordiv_spec_ext, 0. ); - - { - ATLAS_TRACE( "opt2 extend vordiv" ); - // increase truncation in vorticity_spectra and divergence_spectra: - extend_truncationopt2( truncation_, nb_vordiv_fields, vorticity_spectra, - vorticity_spectra_extended.data() ); - extend_truncationopt2( truncation_, nb_vordiv_fields, divergence_spectra, - divergence_spectra_extended.data() ); - } - - { - ATLAS_TRACE( "vordiv to UV opt2" ); - // call vd2uv to compute u and v in spectral space - trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt2" ) ); - vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(), - divergence_spectra_extended.data(), U_ext.data(), V_ext.data() ); - } - - // perform spectral transform to compute all fields in grid point space - invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config ); - invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(), - gp_fields + nb_gp * nb_vordiv_fields, config ); - } - if ( nb_scalar_fields > 0 ) { - invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields + 2 * nb_gp * nb_vordiv_fields, - config ); - } -} - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocalopt2::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. -} - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocalopt2::dirtrans( const FieldSet& gpfields, FieldSet& spfields, - const eckit::Configuration& config ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. -} - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocalopt2::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv, - const eckit::Configuration& config ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. -} - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocalopt2::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[], - const eckit::Configuration& ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. -} - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocalopt2::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[], - double divergence_spectra[], const eckit::Configuration& ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. -} - -// -------------------------------------------------------------------------------------------------------------------- - -} // namespace trans -} // namespace atlas diff --git a/src/atlas/trans/localopt2/TransLocalopt2.h b/src/atlas/trans/localopt2/TransLocalopt2.h deleted file mode 100644 index 1a2e20801..000000000 --- a/src/atlas/trans/localopt2/TransLocalopt2.h +++ /dev/null @@ -1,143 +0,0 @@ -/* - * (C) Copyright 2013 ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor does it submit to any jurisdiction. - */ - -#pragma once - -#include - -#include "atlas/array.h" -#include "atlas/grid/Grid.h" -#include "atlas/trans/Trans.h" -#if ATLAS_HAVE_FFTW -#include -#endif - -//----------------------------------------------------------------------------- -// Forward declarations - -namespace atlas { -class Field; -class FieldSet; -} // namespace atlas - -//----------------------------------------------------------------------------- - -namespace atlas { -namespace trans { - -//----------------------------------------------------------------------------- - -/// @class TransLocalopt2 -/// -/// Localopt2 spherical harmonics transformations to any grid -/// Optimisations are present for structured grids -/// For global grids, please consider using TransIFS instead. -/// -/// @todo: -/// - support multiple fields -/// - support atlas::Field and atlas::FieldSet based on function spaces -/// -/// @note: Direct transforms are not implemented and cannot be unless -/// the grid is global. There are no plans to support this at the moment. -class TransLocalopt2 : public trans::TransImpl { -public: - TransLocalopt2( const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() ); - TransLocalopt2( const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() ); - TransLocalopt2( const Cache&, const Grid&, const long truncation, - const eckit::Configuration& = util::NoConfig() ); - TransLocalopt2( const Cache&, const Grid&, const Domain&, const long truncation, - const eckit::Configuration& = util::NoConfig() ); - - virtual ~TransLocalopt2(); - - virtual int truncation() const override { return truncation_; } - virtual size_t spectralCoefficients() const override { return ( truncation_ + 1 ) * ( truncation_ + 2 ); } - - virtual const Grid& grid() const override { return grid_; } - - virtual void invtrans( const Field& spfield, Field& gpfield, - const eckit::Configuration& = util::NoConfig() ) const override; - - virtual void invtrans( const FieldSet& spfields, FieldSet& gpfields, - const eckit::Configuration& = util::NoConfig() ) const override; - - virtual void invtrans_grad( const Field& spfield, Field& gradfield, - const eckit::Configuration& = util::NoConfig() ) const override; - - virtual void invtrans_grad( const FieldSet& spfields, FieldSet& gradfields, - const eckit::Configuration& = util::NoConfig() ) const override; - - virtual void invtrans_vordiv2wind( const Field& spvor, const Field& spdiv, Field& gpwind, - const eckit::Configuration& = util::NoConfig() ) const override; - - // -- IFS style API -- - - virtual void invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields, - const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[], - const eckit::Configuration& = util::NoConfig() ) const override; - - virtual void invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[], - const eckit::Configuration& = util::NoConfig() ) const override; - - virtual void invtrans( const int nb_vordiv_fields, const double vorticity_spectra[], - const double divergence_spectra[], double gp_fields[], - const eckit::Configuration& = util::NoConfig() ) const override; - - // -- NOT SUPPORTED -- // - - virtual void dirtrans( const Field& gpfield, Field& spfield, - const eckit::Configuration& = util::NoConfig() ) const override; - - virtual void dirtrans( const FieldSet& gpfields, FieldSet& spfields, - const eckit::Configuration& = util::NoConfig() ) const override; - - virtual void dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv, - const eckit::Configuration& = util::NoConfig() ) const override; - - virtual void dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[], - const eckit::Configuration& = util::NoConfig() ) const override; - - virtual void dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[], - double divergence_spectra[], const eckit::Configuration& = util::NoConfig() ) const override; - -private: - void invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields, - const double scalar_spectra[], double gp_fields[], - const eckit::Configuration& = util::NoConfig() ) const; - -private: - Grid grid_; - bool useFFT_; - bool dgemmMethod1_; - int truncation_; - int nlatsNH_; - int nlatsSH_; - int nlatsLeg_; - int jlonMin_; - int nlonsGlobal_; - bool precompute_; - double* legendre_sym_; - double* legendre_asym_; - double* fourier_; - double* fouriertp_; - std::vector legendre_begin_; - std::vector legendre_sym_begin_; - std::vector legendre_asym_begin_; -#if ATLAS_HAVE_FFTW - fftw_complex* fft_in_; - double* fft_out_; - fftw_plan plan_; -#endif -}; - -//----------------------------------------------------------------------------- - -} // namespace trans -} // namespace atlas diff --git a/src/atlas/trans/localopt2/VorDivToUVLocalopt2.cc b/src/atlas/trans/localopt2/VorDivToUVLocalopt2.cc deleted file mode 100644 index 72c5a3ac9..000000000 --- a/src/atlas/trans/localopt2/VorDivToUVLocalopt2.cc +++ /dev/null @@ -1,184 +0,0 @@ -/* - * (C) Copyright 2013 ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor does it submit to any jurisdiction. - */ - -#include "atlas/trans/localopt2/VorDivToUVLocalopt2.h" -#include // for std::sqrt -#include "atlas/functionspace/Spectral.h" -#include "atlas/runtime/Log.h" -#include "atlas/util/Earth.h" - -using atlas::FunctionSpace; -using atlas::functionspace::Spectral; - -namespace atlas { -namespace trans { - -namespace { -static VorDivToUVBuilder builder( "localopt2" ); -} - -// -------------------------------------------------------------------------------------------------------------------- -// Routine to copy spectral data into internal storage form of IFS trans -// Ported to C++ by: Andreas Mueller *ECMWF* -void prfi1bopt2( const int truncation, - const int km, // zonal wavenumber - const int nb_fields, // number of fields - const double rspec[], // spectral data - double pia[] ) // spectral components in data layout of trans library -{ - int ilcm = truncation + 1 - km, ioff = ( 2 * truncation - km + 3 ) * km, - nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2; - for ( int j = 1; j <= ilcm; j++ ) { - int inm = ioff + ( ilcm - j ) * 2; - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int ir = 2 * jfld, ii = ir + 1; - pia[ir * nlei1 + j + 1] = rspec[inm * nb_fields + jfld]; - pia[ii * nlei1 + j + 1] = rspec[( inm + 1 ) * nb_fields + jfld]; - } - } - - for ( int jfld = 0; jfld < 2 * nb_fields; jfld++ ) { - pia[jfld * nlei1] = 0.; - pia[jfld * nlei1 + 1] = 0.; - pia[jfld * nlei1 + ilcm + 2] = 0.; - } -} - -// -------------------------------------------------------------------------------------------------------------------- -// Routine to compute spectral velocities (*cos(latitude)) out of spectral -// vorticity and divergence -// Reference: -// ECMWF Research Department documentation of the IFS -// Temperton, 1991, MWR 119 p1303 -// Ported to C++ by: Andreas Mueller *ECMWF* -void vd2uvopt2( const int truncation, // truncation - const int km, // zonal wavenumber - const int nb_vordiv_fields, // number of vorticity and divergence fields - const double vorticity_spectra[], // spectral data of vorticity - const double divergence_spectra[], // spectral data of divergence - double U[], // spectral data of U - double V[], // spectral data of V - const eckit::Configuration& config ) { - int nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2; - - // repsnm: epsilon from eq.(2.12) and (2.13) in [Temperton 1991] - std::vector repsnm( ( truncation + 1 ) * ( truncation + 6 ) / 2 ); - int idx = 0; - for ( int jm = 0; jm <= truncation; ++jm ) { - for ( int jn = jm; jn <= truncation + 2; ++jn, ++idx ) { - repsnm[idx] = std::sqrt( ( jn * jn - jm * jm ) / ( 4. * jn * jn - 1. ) ); - } - } - repsnm[0] = 0.; - - // rlapin: constant factor from eq.(2.2) and (2.3) in [Temperton 1991] - double ra = util::Earth::radius(); - std::vector rlapin( truncation + 3 ); - for ( int jn = 1; jn <= truncation + 2; ++jn ) { - rlapin[jn] = -ra * ra / ( jn * ( jn + 1. ) ); - } - rlapin[0] = 0.; - - // inverse the order of repsnm and rlapin for improved accuracy - std::vector zepsnm( truncation + 6 ); - std::vector zlapin( truncation + 6 ); - std::vector zn( truncation + 6 ); - for ( int jn = km - 1; jn <= truncation + 2; ++jn ) { - int ij = truncation + 3 - jn; - if ( jn >= 0 ) { - zlapin[ij] = rlapin[jn]; - if ( jn < km ) { zepsnm[ij] = 0.; } - else { - zepsnm[ij] = repsnm[jn + ( 2 * truncation - km + 5 ) * km / 2]; - } - } - else { - zlapin[ij] = 0.; - zepsnm[ij] = 0.; - } - zn[ij] = jn; - } - zn[0] = truncation + 3; - - // copy spectral data into internal trans storage: - std::vector rvor( 2 * nb_vordiv_fields * nlei1 ); - std::vector rdiv( 2 * nb_vordiv_fields * nlei1 ); - std::vector ru( 2 * nb_vordiv_fields * nlei1 ); - std::vector rv( 2 * nb_vordiv_fields * nlei1 ); - prfi1bopt2( truncation, km, nb_vordiv_fields, vorticity_spectra, rvor.data() ); - prfi1bopt2( truncation, km, nb_vordiv_fields, divergence_spectra, rdiv.data() ); - - // compute eq.(2.12) and (2.13) in [Temperton 1991]: - if ( km == 0 ) { - for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { - int ir = 2 * jfld * nlei1 - 1; - for ( int ji = 2; ji < truncation + 4 - km; ++ji ) { - double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1]; - double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1]; - ru[ir + ji] = +psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1]; - rv[ir + ji] = -psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1]; - } - } - } - else { - for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { - int ir = 2 * jfld * nlei1 - 1, ii = ir + nlei1; - for ( int ji = 2; ji < truncation + 4 - km; ++ji ) { - double chiIm = km * zlapin[ji]; - double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1]; - double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1]; - ru[ir + ji] = -chiIm * rdiv[ii + ji] + psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1]; - ru[ii + ji] = +chiIm * rdiv[ir + ji] + psiM1 * rvor[ii + ji + 1] - psiP1 * rvor[ii + ji - 1]; - rv[ir + ji] = -chiIm * rvor[ii + ji] - psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1]; - rv[ii + ji] = +chiIm * rvor[ir + ji] - psiM1 * rdiv[ii + ji + 1] + psiP1 * rdiv[ii + ji - 1]; - } - } - } - - // copy data from internal storage back to external spectral data: - int ilcm = truncation - km; - int ioff = ( 2 * truncation - km + 3 ) * km; - // ioff: start index of zonal wavenumber km in spectral data - double za_r = 1. / util::Earth::radius(); - for ( int j = 0; j <= ilcm; ++j ) { - // ilcm-j = total wavenumber - int inm = ioff + ( ilcm - j ) * 2; - for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { - int ir = 2 * jfld * nlei1, ii = ir + nlei1; - int idx = inm * nb_vordiv_fields + jfld; - // real part: - U[idx] = ru[ir + j + 2] * za_r; - V[idx] = rv[ir + j + 2] * za_r; - idx += nb_vordiv_fields; - // imaginary part: - U[idx] = ru[ii + j + 2] * za_r; - V[idx] = rv[ii + j + 2] * za_r; - } - } -} - -void VorDivToUVLocalopt2::execute( const int nb_coeff, const int nb_fields, const double vorticity[], - const double divergence[], double U[], double V[], - const eckit::Configuration& config ) const { - for ( int jm = 0; jm <= truncation_; ++jm ) { - vd2uvopt2( truncation_, jm, nb_fields, vorticity, divergence, U, V, config ); - } -} - -VorDivToUVLocalopt2::VorDivToUVLocalopt2( const int truncation, const eckit::Configuration& config ) : - truncation_( truncation ) {} - -VorDivToUVLocalopt2::VorDivToUVLocalopt2( const FunctionSpace& fs, const eckit::Configuration& config ) : - truncation_( Spectral( fs ).truncation() ) {} - -VorDivToUVLocalopt2::~VorDivToUVLocalopt2() {} - -} // namespace trans -} // namespace atlas diff --git a/src/atlas/trans/localopt2/VorDivToUVLocalopt2.h b/src/atlas/trans/localopt2/VorDivToUVLocalopt2.h deleted file mode 100644 index 30c386a92..000000000 --- a/src/atlas/trans/localopt2/VorDivToUVLocalopt2.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * (C) Copyright 2013 ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor does it submit to any jurisdiction. - */ - -#pragma once - -#include "atlas/trans/VorDivToUV.h" - -//----------------------------------------------------------------------------- -// Forward declarations - -namespace atlas { -class FunctionSpace; -} - -//----------------------------------------------------------------------------- - -namespace atlas { -namespace trans { - -//----------------------------------------------------------------------------- - -class VorDivToUVLocalopt2 : public trans::VorDivToUVImpl { -public: - VorDivToUVLocalopt2( const FunctionSpace&, const eckit::Configuration& = util::NoConfig() ); - VorDivToUVLocalopt2( int truncation, const eckit::Configuration& = util::NoConfig() ); - - virtual ~VorDivToUVLocalopt2(); - - virtual int truncation() const override { return truncation_; } - - // pure virtual interface - - // -- IFS style API -- - // These fields have special interpretation required. You need to know what - // you're doing. - // See IFS trans library. - - /*! - * @brief Compute spectral wind (U/V) from spectral vorticity/divergence - * - * U = u*cos(lat) - * V = v*cos(lat) - * - * @param nb_fields [in] Number of fields - * @param vorticity [in] Spectral vorticity - * @param divergence [in] Spectral divergence - * @param U [out] Spectral wind U = u*cos(lat) - * @param V [out] Spectral wind V = v*cos(lat) - */ - virtual void execute( const int nb_coeff, const int nb_fields, const double vorticity[], const double divergence[], - double U[], double V[], const eckit::Configuration& = util::NoConfig() ) const override; - -private: - int truncation_; -}; - -// ------------------------------------------------------------------ - -} // namespace trans -} // namespace atlas diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc index 1effd3584..5d2e50dd9 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.cc +++ b/src/atlas/trans/localopt3/TransLocalopt3.cc @@ -17,7 +17,6 @@ #include "atlas/runtime/ErrorHandling.h" #include "atlas/runtime/Log.h" #include "atlas/trans/VorDivToUV.h" -#include "atlas/trans/local_noopt/LegendrePolynomials.h" #include "atlas/trans/localopt3/LegendrePolynomialsopt3.h" #include "atlas/util/Constants.h" #include "eckit/config/YAMLConfiguration.h" diff --git a/src/tests/trans/test_trans_localcache.cc b/src/tests/trans/test_trans_localcache.cc index 247200315..1011719f0 100644 --- a/src/tests/trans/test_trans_localcache.cc +++ b/src/tests/trans/test_trans_localcache.cc @@ -11,31 +11,17 @@ #include #include -#include "atlas/array/MakeView.h" -#include "atlas/field/FieldSet.h" -#include "atlas/functionspace/NodeColumns.h" -#include "atlas/functionspace/Spectral.h" -#include "atlas/functionspace/StructuredColumns.h" +#include "eckit/utils/MD5.h" + #include "atlas/grid.h" -#include "atlas/grid/Distribution.h" -#include "atlas/grid/Partitioner.h" -#include "atlas/grid/detail/partitioner/EqualRegionsPartitioner.h" -#include "atlas/grid/detail/partitioner/TransPartitioner.h" +#include "atlas/option.h" #include "atlas/library/Library.h" -#include "atlas/mesh/Mesh.h" -#include "atlas/mesh/Nodes.h" #include "atlas/meshgenerator/StructuredMeshGenerator.h" -#include "atlas/output/Gmsh.h" #include "atlas/parallel/mpi/mpi.h" #include "atlas/runtime/Trace.h" #include "atlas/trans/Trans.h" #include "atlas/trans/LegendreCacheCreator.h" -#include "atlas/trans/local_noopt/FourierTransforms.h" -#include "atlas/trans/local_noopt/LegendrePolynomials.h" -#include "atlas/trans/local_noopt/LegendreTransforms.h" #include "atlas/util/Constants.h" -#include "atlas/util/Earth.h" -#include "eckit/utils/MD5.h" #include "tests/AtlasTestEnvironment.h" @@ -43,12 +29,6 @@ #include "transi/trans.h" #endif -using namespace eckit; - -using atlas::array::Array; -using atlas::array::ArrayView; -using atlas::array::make_view; - namespace atlas { namespace test { From ff9c8513d0a4480c8ccccd9c582d85cab4ed8bae Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Wed, 9 May 2018 15:17:53 +0100 Subject: [PATCH 069/123] Removed trans/local_noopt --- src/atlas/CMakeLists.txt | 10 - src/atlas/trans/Trans.cc | 2 - src/atlas/trans/VorDivToUV.cc | 2 - .../trans/local_noopt/FourierTransforms.cc | 78 ---- .../trans/local_noopt/FourierTransforms.h | 38 -- .../trans/local_noopt/LegendrePolynomials.cc | 153 -------- .../trans/local_noopt/LegendrePolynomials.h | 43 --- .../trans/local_noopt/LegendreTransforms.cc | 61 --- .../trans/local_noopt/LegendreTransforms.h | 36 -- src/atlas/trans/local_noopt/TransLocal.cc | 349 ------------------ src/atlas/trans/local_noopt/TransLocal.h | 123 ------ .../trans/local_noopt/VorDivToUVLocal.cc | 184 --------- src/atlas/trans/local_noopt/VorDivToUVLocal.h | 67 ---- src/atlas/trans/localopt3/TransLocalopt3.cc | 4 +- src/atlas/trans/localopt3/TransLocalopt3.h | 6 + src/tests/trans/test_transgeneral.cc | 9 +- 16 files changed, 13 insertions(+), 1152 deletions(-) delete mode 100644 src/atlas/trans/local_noopt/FourierTransforms.cc delete mode 100644 src/atlas/trans/local_noopt/FourierTransforms.h delete mode 100644 src/atlas/trans/local_noopt/LegendrePolynomials.cc delete mode 100644 src/atlas/trans/local_noopt/LegendrePolynomials.h delete mode 100644 src/atlas/trans/local_noopt/LegendreTransforms.cc delete mode 100644 src/atlas/trans/local_noopt/LegendreTransforms.h delete mode 100644 src/atlas/trans/local_noopt/TransLocal.cc delete mode 100644 src/atlas/trans/local_noopt/TransLocal.h delete mode 100644 src/atlas/trans/local_noopt/VorDivToUVLocal.cc delete mode 100644 src/atlas/trans/local_noopt/VorDivToUVLocal.h diff --git a/src/atlas/CMakeLists.txt b/src/atlas/CMakeLists.txt index 4ed6679f4..64dd7e1cc 100644 --- a/src/atlas/CMakeLists.txt +++ b/src/atlas/CMakeLists.txt @@ -325,16 +325,6 @@ trans/VorDivToUV.h trans/VorDivToUV.cc trans/LegendreCacheCreator.h trans/LegendreCacheCreator.cc -trans/local_noopt/TransLocal.h -trans/local_noopt/TransLocal.cc -trans/local_noopt/LegendrePolynomials.h -trans/local_noopt/LegendrePolynomials.cc -trans/local_noopt/LegendreTransforms.h -trans/local_noopt/LegendreTransforms.cc -trans/local_noopt/FourierTransforms.h -trans/local_noopt/FourierTransforms.cc -trans/local_noopt/VorDivToUVLocal.h -trans/local_noopt/VorDivToUVLocal.cc trans/localopt3/TransLocalopt3.h trans/localopt3/TransLocalopt3.cc trans/localopt3/LegendrePolynomialsopt3.h diff --git a/src/atlas/trans/Trans.cc b/src/atlas/trans/Trans.cc index d14660060..57a7e707e 100644 --- a/src/atlas/trans/Trans.cc +++ b/src/atlas/trans/Trans.cc @@ -27,7 +27,6 @@ #else #define TRANS_DEFAULT "local" #endif -#include "atlas/trans/local_noopt/TransLocal.h" #include "atlas/trans/localopt3/TransLocalopt3.h" // --> recommended "local" namespace atlas { @@ -62,7 +61,6 @@ struct force_link { load_builder_functionspace(); load_builder_grid(); #endif - load_builder_grid(); load_builder_grid(); } }; diff --git a/src/atlas/trans/VorDivToUV.cc b/src/atlas/trans/VorDivToUV.cc index aa8fc5781..c01076f6e 100644 --- a/src/atlas/trans/VorDivToUV.cc +++ b/src/atlas/trans/VorDivToUV.cc @@ -26,7 +26,6 @@ #else #define TRANS_DEFAULT "local" #endif -#include "atlas/trans/local_noopt/VorDivToUVLocal.h" #include "atlas/trans/localopt3/VorDivToUVLocalopt3.h" // --> recommended "local" namespace atlas { @@ -55,7 +54,6 @@ struct force_link { #if ATLAS_HAVE_TRANS load_builder(); #endif - load_builder(); load_builder(); } }; diff --git a/src/atlas/trans/local_noopt/FourierTransforms.cc b/src/atlas/trans/local_noopt/FourierTransforms.cc deleted file mode 100644 index c9f6f2974..000000000 --- a/src/atlas/trans/local_noopt/FourierTransforms.cc +++ /dev/null @@ -1,78 +0,0 @@ -/* - * (C) Copyright 2013 ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor - * does it submit to any jurisdiction. - */ - -#include -#include -#include - -#include "atlas/trans/local_noopt/FourierTransforms.h" - -namespace atlas { -namespace trans { - -//----------------------------------------------------------------------------- - -void invtrans_fourier( const size_t trcFT, - const double lon, // longitude in radians (in) - const int nb_fields, // Number of fields - const double rlegReal[], // associated Legendre functions, size (trc+1)*trc/2 (in) - const double rlegImag[], // associated Legendre functions, size (trc+1)*trc/2 (in) - double rgp[] ) // gridpoint -{ - for ( int jfld = 0; jfld < nb_fields; ++jfld ) { - rgp[jfld] = 0.; - } - // local Fourier transformation: - for ( int jm = 0; jm <= trcFT; ++jm ) { - const double cos = std::cos( jm * lon ); - const double sin = std::sin( jm * lon ); - for ( int jfld = 0; jfld < nb_fields; ++jfld ) { - double real = cos * rlegReal[jm * nb_fields + jfld]; - double imag = sin * rlegImag[jm * nb_fields + jfld]; - rgp[jfld] += real - imag; - } - } -} - -int fourier_truncation( const int truncation, // truncation - const int nx, // number of longitudes - const int nxmax, // maximum nx - const int ndgl, // number of latitudes - const double lat, // latitude in radian - const bool fullgrid ) { // regular grid - int trc = truncation; - int trclin = ndgl - 1; - int trcquad = ndgl * 2 / 3 - 1; - if ( truncation >= trclin || fullgrid ) { - // linear - trc = ( nx - 1 ) / 2; - } - else if ( truncation >= trcquad ) { - // quadratic - double weight = 3 * ( trclin - truncation ) / ndgl; - double sqcos = std::pow( std::cos( lat ), 2 ); - - trc = ( nx - 1 ) / ( 2 + weight * sqcos ); - } - else { - // cubic - double sqcos = std::pow( std::cos( lat ), 2 ); - - trc = ( nx - 1 ) / ( 2 + sqcos ) - 1; - } - trc = std::min( truncation, trc ); - return trc; -} - -// -------------------------------------------------------------------------------------------------------------------- - -} // namespace trans -} // namespace atlas diff --git a/src/atlas/trans/local_noopt/FourierTransforms.h b/src/atlas/trans/local_noopt/FourierTransforms.h deleted file mode 100644 index 8b47a8dd9..000000000 --- a/src/atlas/trans/local_noopt/FourierTransforms.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * (C) Copyright 2013 ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor does it submit to any jurisdiction. - */ - -#pragma once - -#include - -namespace atlas { -namespace trans { - -//----------------------------------------------------------------------------- -// Routine to compute the local Fourier transformation -// -// Author: -// Andreas Mueller *ECMWF* -// - -void invtrans_fourier( const size_t trcFT, - const double lon, // longitude in radians (in) - const int nb_fields, // Number of fields - const double rlegReal[], // values of associated Legendre functions, size (trc+1)*trc/2 (in) - const double rlegImag[], // values of associated Legendre functions, size (trc+1)*trc/2 (in) - double rgp[] ); // gridpoint - -int fourier_truncation( const int truncation, const int nx, const int nxmax, const int ndgl, const double lat, - const bool fullgrid ); - -// -------------------------------------------------------------------------------------------------------------------- - -} // namespace trans -} // namespace atlas diff --git a/src/atlas/trans/local_noopt/LegendrePolynomials.cc b/src/atlas/trans/local_noopt/LegendrePolynomials.cc deleted file mode 100644 index 26854fd69..000000000 --- a/src/atlas/trans/local_noopt/LegendrePolynomials.cc +++ /dev/null @@ -1,153 +0,0 @@ -/* - * (C) Copyright 2013 ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor - * does it submit to any jurisdiction. - */ - -#include -#include - -#include "atlas/array.h" -#include "atlas/trans/local_noopt/LegendrePolynomials.h" - -namespace atlas { -namespace trans { - -//----------------------------------------------------------------------------- - -void compute_legendre_polynomials( - const size_t trc, // truncation (in) - const double lat, // latitude in radians (in) - double legpol[] ) // values of associated Legendre functions, size (trc+1)*trc/2 (out) -{ - array::ArrayT idxmn_( trc + 1, trc + 1 ); - array::ArrayView idxmn = array::make_view( idxmn_ ); - - int j = 0; - for ( int jm = 0; jm <= trc; ++jm ) { - for ( int jn = jm; jn <= trc; ++jn ) { - idxmn( jm, jn ) = j++; - } - } - - array::ArrayT zfn_( trc + 1, trc + 1 ); - array::ArrayView zfn = array::make_view( zfn_ ); - - int iodd; - - // Compute coefficients for Taylor series in Belousov (19) and (21) - // Belousov, Swarztrauber use zfn(0,0)=std::sqrt(2.) - // IFS normalisation chosen to be 0.5*Integral(Pnm**2) = 1 - zfn( 0, 0 ) = 2.; - for ( int jn = 1; jn <= trc; ++jn ) { - double zfnn = zfn( 0, 0 ); - for ( int jgl = 1; jgl <= jn; ++jgl ) { - zfnn *= std::sqrt( 1. - 0.25 / ( jgl * jgl ) ); - } - iodd = jn % 2; - zfn( jn, jn ) = zfnn; - for ( int jgl = 2; jgl <= jn - iodd; jgl += 2 ) { - double zfjn = ( ( jgl - 1. ) * ( 2. * jn - jgl + 2. ) ); // new factor numerator - double zfjd = ( jgl * ( 2. * jn - jgl + 1. ) ); // new factor denominator - - zfn( jn, jn - jgl ) = zfn( jn, jn - jgl + 2 ) * zfjn / zfjd; - } - } - - // -------------------- - // 1. First two columns - // -------------------- - double zdlx1 = ( M_PI_2 - lat ); // theta - double zdlx = std::cos( zdlx1 ); // cos(theta) - double zdlsita = std::sqrt( 1. - zdlx * zdlx ); // sin(theta) (this is how trans library does it) - - legpol[0] = 1.; - double zdl1sita = 0.; - - // if we are less than 1 meter from the pole, - if ( std::abs( zdlsita ) <= std::sqrt( std::numeric_limits::epsilon() ) ) { - zdlx = 1.; - zdlsita = 0.; - } - else { - zdl1sita = 1. / zdlsita; - } - - // ordinary Legendre polynomials from series expansion - // --------------------------------------------------- - - // even N - for ( int jn = 2; jn <= trc; jn += 2 ) { - double zdlk = 0.5 * zfn( jn, 0 ); - double zdlldn = 0.0; - double zdsq = 1. / std::sqrt( jn * ( jn + 1. ) ); - // represented by only even k - for ( int jk = 2; jk <= jn; jk += 2 ) { - // normalised ordinary Legendre polynomial == \overbar{P_n}^0 - zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 ); - // normalised associated Legendre polynomial == \overbar{P_n}^1 - zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 ); - } - legpol[idxmn( 0, jn )] = zdlk; - legpol[idxmn( 1, jn )] = zdlldn; - } - - // odd N - for ( int jn = 1; jn <= trc; jn += 2 ) { - zfn( jn, 0 ) = 0.; - double zdlk = 0.; - double zdlldn = 0.0; - double zdsq = 1. / std::sqrt( jn * ( jn + 1. ) ); - // represented by only even k - for ( int jk = 1; jk <= jn; jk += 2 ) { - // normalised ordinary Legendre polynomial == \overbar{P_n}^0 - zdlk = zdlk + zfn( jn, jk ) * std::cos( jk * zdlx1 ); - // normalised associated Legendre polynomial == \overbar{P_n}^1 - zdlldn = zdlldn + zdsq * zfn( jn, jk ) * jk * std::sin( jk * zdlx1 ); - } - legpol[idxmn( 0, jn )] = zdlk; - legpol[idxmn( 1, jn )] = zdlldn; - } - - // -------------------------------------------------------------- - // 2. Diagonal (the terms 0,0 and 1,1 have already been computed) - // Belousov, equation (23) - // -------------------------------------------------------------- - - double zdls = zdl1sita * std::numeric_limits::min(); - for ( int jn = 2; jn <= trc; ++jn ) { - double sq = std::sqrt( ( 2. * jn + 1. ) / ( 2. * jn ) ); - - legpol[idxmn( jn, jn )] = legpol[idxmn( jn - 1, jn - 1 )] * zdlsita * sq; - if ( std::abs( legpol[idxmn( jn, jn )] ) < zdls ) legpol[idxmn( jn, jn )] = 0.0; - } - - // --------------------------------------------- - // 3. General recurrence (Belousov, equation 17) - // --------------------------------------------- - - for ( int jn = 3; jn <= trc; ++jn ) { - for ( int jm = 2; jm < jn; ++jm ) { - double cn = ( ( 2. * jn + 1. ) * ( jn + jm - 3. ) * ( jn + jm - 1. ) ); // numerator of c in Belousov - double cd = ( ( 2. * jn - 3. ) * ( jn + jm - 2. ) * ( jn + jm ) ); // denominator of c in Belousov - double dn = ( ( 2. * jn + 1. ) * ( jn - jm + 1. ) * ( jn + jm - 1. ) ); // numerator of d in Belousov - double dd = ( ( 2. * jn - 1. ) * ( jn + jm - 2. ) * ( jn + jm ) ); // denominator of d in Belousov - double en = ( ( 2. * jn + 1. ) * ( jn - jm ) ); // numerator of e in Belousov - double ed = ( ( 2. * jn - 1. ) * ( jn + jm ) ); // denominator of e in Belousov - - legpol[idxmn( jm, jn )] = std::sqrt( cn / cd ) * legpol[idxmn( jm - 2, jn - 2 )] - - std::sqrt( dn / dd ) * legpol[idxmn( jm - 2, jn - 1 )] * zdlx + - std::sqrt( en / ed ) * legpol[idxmn( jm, jn - 1 )] * zdlx; - } - } -} - -// -------------------------------------------------------------------------------------------------------------------- - -} // namespace trans -} // namespace atlas diff --git a/src/atlas/trans/local_noopt/LegendrePolynomials.h b/src/atlas/trans/local_noopt/LegendrePolynomials.h deleted file mode 100644 index 56a3e7443..000000000 --- a/src/atlas/trans/local_noopt/LegendrePolynomials.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * (C) Copyright 2013 ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor does it submit to any jurisdiction. - */ - -#pragma once - -#include - -namespace atlas { -namespace trans { - -//----------------------------------------------------------------------------- -// Routine to compute the Legendre polynomials in serial according to Belousov -// (using correction by Swarztrauber) -// -// Reference: -// S.L. Belousov, Tables of normalized associated Legendre Polynomials, Pergamon -// Press (1962) -// P.N. Swarztrauber, On computing the points and weights for Gauss-Legendre -// quadrature, -// SIAM J. Sci. Comput. Vol. 24 (3) pp. 945-954 (2002) -// -// Author of Fortran version: -// Mats Hamrud, Philippe Courtier, Nils Wedi *ECMWF* -// -// Ported to C++ by: -// Andreas Mueller *ECMWF* -// -void compute_legendre_polynomials( - const size_t trc, // truncation (in) - const double lat, // latitude in radians (in) - double legpol[] ); // values of associated Legendre functions, size (trc+1)*trc/2 (out) - -// -------------------------------------------------------------------------------------------------------------------- - -} // namespace trans -} // namespace atlas diff --git a/src/atlas/trans/local_noopt/LegendreTransforms.cc b/src/atlas/trans/local_noopt/LegendreTransforms.cc deleted file mode 100644 index f82d9f401..000000000 --- a/src/atlas/trans/local_noopt/LegendreTransforms.cc +++ /dev/null @@ -1,61 +0,0 @@ -/* - * (C) Copyright 2013 ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor does it submit to any jurisdiction. - */ - -#include - -#include "atlas/trans/local_noopt/LegendreTransforms.h" - -namespace atlas { -namespace trans { - -//----------------------------------------------------------------------------- - -void invtrans_legendre( const size_t trc, // truncation (in) - const size_t trcFT, // truncation for Fourier transformation (in) - const size_t trcLP, // truncation of Legendre polynomials data legpol. Needs to be >= trc (in) - const double legpol[], // values of associated Legendre functions, size (trc+1)*trc/2 (in) - const int nb_fields, // number of fields - const double spec[], // spectral data, size (trc+1)*trc (in) - double leg_real[], // values of associated Legendre functions, size (trc+1)*trc/2 (out) - double leg_imag[] ) // values of associated Legendre functions, size (trc+1)*trc/2 (out) -{ - // Legendre transformation: - int k = 0, klp = 0; - for ( int jm = 0; jm <= trcFT; ++jm ) { - for ( int jfld = 0; jfld < nb_fields; ++jfld ) { - leg_real[jm * nb_fields + jfld] = 0.; - leg_imag[jm * nb_fields + jfld] = 0.; - } - for ( int jn = jm; jn <= trcLP; ++jn, ++klp ) { - if ( jn <= trc ) { - for ( int jfld = 0; jfld < nb_fields; ++jfld ) { - // not completely sure where this factor 2 comes from. One possible - // explanation: - // normalization of trigonometric functions in the spherical harmonics - // integral over square of trig function is 1 for m=0 and 0.5 (?) for - // m>0 - leg_real[jm * nb_fields + jfld] += 2. * spec[( 2 * k ) * nb_fields + jfld] * legpol[klp]; - leg_imag[jm * nb_fields + jfld] += 2. * spec[( 2 * k + 1 ) * nb_fields + jfld] * legpol[klp]; - } - ++k; - } - } - } - // Undo factor 2 for (jm == 0) - for ( int jfld = 0; jfld < nb_fields; ++jfld ) { - leg_real[jfld] /= 2.; - leg_imag[jfld] /= 2.; - } -} - -// -------------------------------------------------------------------------------------------------------------------- - -} // namespace trans -} // namespace atlas diff --git a/src/atlas/trans/local_noopt/LegendreTransforms.h b/src/atlas/trans/local_noopt/LegendreTransforms.h deleted file mode 100644 index c3152e1f5..000000000 --- a/src/atlas/trans/local_noopt/LegendreTransforms.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * (C) Copyright 2013 ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor does it submit to any jurisdiction. - */ - -#pragma once - -#include - -namespace atlas { -namespace trans { - -//----------------------------------------------------------------------------- -// Routine to compute the Legendre transformation -// -// Author: -// Andreas Mueller *ECMWF* -// -void invtrans_legendre( const size_t trc, // truncation (in) - const size_t trcFT, // truncation for Fourier transformation (in) - const size_t trcLP, // truncation of Legendre polynomials data legpol. Needs to be >= trc (in) - const double legpol[], // values of associated Legendre functions, size (trc+1)*trc/2 (in) - const int nb_fields, // number of fields - const double spec[], // spectral data, size (trc+1)*trc (in) - double leg_real[], // values of associated Legendre functions, size (trc+1)*trc/2 (out) - double leg_imag[] ); // values of associated Legendre functions, size (trc+1)*trc/2 (out) - -// -------------------------------------------------------------------------------------------------------------------- - -} // namespace trans -} // namespace atlas diff --git a/src/atlas/trans/local_noopt/TransLocal.cc b/src/atlas/trans/local_noopt/TransLocal.cc deleted file mode 100644 index 77d3af78f..000000000 --- a/src/atlas/trans/local_noopt/TransLocal.cc +++ /dev/null @@ -1,349 +0,0 @@ -/* - * (C) Copyright 2013 ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor does it submit to any jurisdiction. - */ - -#include "atlas/trans/local_noopt/TransLocal.h" -#include "atlas/array.h" -#include "atlas/option.h" -#include "atlas/parallel/mpi/mpi.h" -#include "atlas/runtime/ErrorHandling.h" -#include "atlas/runtime/Log.h" -#include "atlas/trans/VorDivToUV.h" -#include "atlas/trans/local_noopt/FourierTransforms.h" -#include "atlas/trans/local_noopt/LegendrePolynomials.h" -#include "atlas/trans/local_noopt/LegendreTransforms.h" -#include "atlas/util/Constants.h" - -namespace atlas { -namespace trans { - -namespace { -static TransBuilderGrid builder( "local_noopt" ); -} - -// -------------------------------------------------------------------------------------------------------------------- -// Helper functions -// -------------------------------------------------------------------------------------------------------------------- -namespace { // anonymous - -size_t legendre_size( const size_t truncation ) { - return ( truncation + 2 ) * ( truncation + 1 ) / 2; -} - -} // namespace - -// -------------------------------------------------------------------------------------------------------------------- -// Class TransLocal -// -------------------------------------------------------------------------------------------------------------------- - -TransLocal::TransLocal( const Cache& cache, const Grid& grid, const long truncation, - const eckit::Configuration& config ) : - grid_( grid ), - truncation_( truncation ), - precompute_( config.getBool( "precompute", true ) ) { - if ( precompute_ ) { - if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { - ATLAS_TRACE( "Precompute legendre structured" ); - grid::StructuredGrid g( grid_ ); - size_t size( 0 ); - legendre_begin_.resize( g.ny() ); - for ( size_t j = 0; j < g.ny(); ++j ) { - legendre_begin_[j] = size; - size += legendre_size( truncation_ + 1 ); - } - legendre_.resize( size ); - - for ( size_t j = 0; j < g.ny(); ++j ) { - double lat = g.y( j ) * util::Constants::degreesToRadians(); - compute_legendre_polynomials( truncation_ + 1, lat, legendre_data( j ) ); - } - } - else { - ATLAS_TRACE( "Precompute legendre unstructured" ); - size_t size( 0 ); - legendre_begin_.resize( grid_.size() ); - for ( size_t j = 0; j < grid_.size(); ++j ) { - legendre_begin_[j] = size; - size += legendre_size( truncation_ + 1 ); - } - legendre_.resize( size ); - int j( 0 ); - for ( PointXY p : grid_.xy() ) { - double lat = p.y() * util::Constants::degreesToRadians(); - compute_legendre_polynomials( truncation_ + 1, lat, legendre_data( j++ ) ); - } - } - } -} - -// -------------------------------------------------------------------------------------------------------------------- - -TransLocal::TransLocal( const Grid& grid, const long truncation, const eckit::Configuration& config ) : - TransLocal( Cache(), grid, truncation, config ) {} - -TransLocal::TransLocal( const Grid& grid, const Domain&, const long truncation, const eckit::Configuration& config ) : - TransLocal( Cache(), grid, truncation, config ) {} - -TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain&, const long truncation, const eckit::Configuration& config ) : - TransLocal( cache, grid, truncation, config ) {} - -// -------------------------------------------------------------------------------------------------------------------- - -TransLocal::~TransLocal() {} - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocal::invtrans( const Field& spfield, Field& gpfield, const eckit::Configuration& config ) const { - NOTIMP; -} - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocal::invtrans( const FieldSet& spfields, FieldSet& gpfields, const eckit::Configuration& config ) const { - NOTIMP; -} - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocal::invtrans_grad( const Field& spfield, Field& gradfield, const eckit::Configuration& config ) const { - NOTIMP; -} - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocal::invtrans_grad( const FieldSet& spfields, FieldSet& gradfields, - const eckit::Configuration& config ) const { - NOTIMP; -} - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocal::invtrans_vordiv2wind( const Field& spvor, const Field& spdiv, Field& gpwind, - const eckit::Configuration& config ) const { - NOTIMP; -} - -void TransLocal::invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[], - const eckit::Configuration& config ) const { - invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields, config ); -} - -void gp_transpose( const int nb_size, const int nb_fields, const double gp_tmp[], double gp_fields[] ) { - ATLAS_TRACE( "gp_transpose" ); - for ( int jgp = 0; jgp < nb_size; jgp++ ) { - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - gp_fields[jfld * nb_size + jgp] = gp_tmp[jgp * nb_fields + jfld]; - } - } -} - -//----------------------------------------------------------------------------- -// Routine to compute the spectral transform by using a local Fourier -// transformation -// for a grid (same latitude for all longitudes, allows to compute Legendre -// functions -// once for all longitudes). U and v components are divided by cos(latitude) for -// nb_vordiv_fields > 0. -// -// Author: -// Andreas Mueller *ECMWF* -// -void TransLocal::invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields, - const double scalar_spectra[], double gp_fields[], - const eckit::Configuration& config ) const { - if ( nb_scalar_fields > 0 ) { - int nb_fields = nb_scalar_fields; - - // Depending on "precompute_legendre_", we have to compute the - // legendre polynomials for every latitute - std::vector recomputed_legendre_; - - auto legPol = [&]( double lat, int j ) -> const double* { - if ( precompute_ ) { return legendre_data( j ); } - else { - recomputed_legendre_.resize( legendre_size( truncation ) ); - compute_legendre_polynomials( truncation, lat, recomputed_legendre_.data() ); - return recomputed_legendre_.data(); - } - }; - - // Temporary storage for legendre space - std::vector legReal( nb_fields * ( truncation + 1 ) ); - std::vector legImag( nb_fields * ( truncation + 1 ) ); - std::vector gp_tmp( nb_fields * grid_.size(), 0. ); - - // Transform - if ( grid::StructuredGrid g = grid_ ) { - ATLAS_TRACE( "invtrans_uv structured" ); - int idx = 0; - for ( size_t j = 0; j < g.ny(); ++j ) { - double lat = g.y( j ) * util::Constants::degreesToRadians(); - double trcFT = - fourier_truncation( truncation, g.nx( j ), g.nxmax(), g.ny(), lat, grid::RegularGrid( grid_ ) ); - - // Legendre transform: - { - ATLAS_TRACE( "invtrans_legendre" ); - invtrans_legendre( truncation, trcFT, truncation_ + 1, legPol( lat, j ), nb_fields, scalar_spectra, - legReal.data(), legImag.data() ); - } - - // Fourier transform: - { - ATLAS_TRACE( "invtrans_fourier" ); - for ( size_t i = 0; i < g.nx( j ); ++i ) { - double lon = g.x( i, j ) * util::Constants::degreesToRadians(); - invtrans_fourier( trcFT, lon, nb_fields, legReal.data(), legImag.data(), - gp_tmp.data() + ( nb_fields * idx ) ); - for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { - gp_tmp[nb_fields * idx + jfld] /= std::cos( lat ); - } - ++idx; - } - } - } - } - else { - ATLAS_TRACE( "invtrans_uv unstructured" ); - int idx = 0; - for ( PointXY p : grid_.xy() ) { - double lon = p.x() * util::Constants::degreesToRadians(); - double lat = p.y() * util::Constants::degreesToRadians(); - double trcFT = truncation; - - // Legendre transform: - { - ATLAS_TRACE( "invtrans_legendre" ); - invtrans_legendre( truncation, trcFT, truncation_ + 1, legPol( lat, idx ), nb_fields, - scalar_spectra, legReal.data(), legImag.data() ); - } - - // Fourier transform: - { - ATLAS_TRACE( "invtrans_fourier" ); - invtrans_fourier( trcFT, lon, nb_fields, legReal.data(), legImag.data(), - gp_tmp.data() + ( nb_fields * idx ) ); - for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { - gp_tmp[nb_fields * idx + jfld] /= std::cos( lat ); - } - } - ++idx; - } - } - - // transpose result (gp_tmp: jfld is fastest index. gp_fields: jfld needs to - // be slowest index) - gp_transpose( grid_.size(), nb_fields, gp_tmp.data(), gp_fields ); - } -} - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocal::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[], - const double divergence_spectra[], double gp_fields[], - const eckit::Configuration& config ) const { - invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config ); -} - -void extend_truncation( const int old_truncation, const int nb_fields, const double old_spectra[], - double new_spectra[] ) { - int k = 0, k_old = 0; - for ( int m = 0; m <= old_truncation + 1; m++ ) { // zonal wavenumber - for ( int n = m; n <= old_truncation + 1; n++ ) { // total wavenumber - for ( int imag = 0; imag < 2; imag++ ) { // imaginary/real part - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { // field - if ( m == old_truncation + 1 || n == old_truncation + 1 ) { new_spectra[k++] = 0.; } - else { - new_spectra[k++] = old_spectra[k_old++]; - } - } - } - } - } -} - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocal::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields, - const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[], - const eckit::Configuration& config ) const { - ATLAS_TRACE( "TransLocal::invtrans" ); - int nb_gp = grid_.size(); - int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields; - std::vector vorticity_spectra_extended( nb_vordiv_spec_ext, 0. ); - std::vector divergence_spectra_extended( nb_vordiv_spec_ext, 0. ); - std::vector U_ext( nb_vordiv_spec_ext, 0. ); - std::vector V_ext( nb_vordiv_spec_ext, 0. ); - - { - ATLAS_TRACE( "vordiv to UV" ); - // increase truncation in vorticity_spectra and divergence_spectra: - extend_truncation( truncation_, nb_vordiv_fields, vorticity_spectra, vorticity_spectra_extended.data() ); - extend_truncation( truncation_, nb_vordiv_fields, divergence_spectra, divergence_spectra_extended.data() ); - - // call vd2uv to compute u and v in spectral space - trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt" ) ); - vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(), - divergence_spectra_extended.data(), U_ext.data(), V_ext.data() ); - } - - // perform spectral transform to compute all fields in grid point space - invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config ); - invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(), - gp_fields + nb_gp * nb_vordiv_fields, config ); - invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields + 2 * nb_gp * nb_vordiv_fields, config ); -} - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocal::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. -} - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocal::dirtrans( const FieldSet& gpfields, FieldSet& spfields, const eckit::Configuration& config ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. -} - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocal::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv, - const eckit::Configuration& config ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. -} - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocal::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[], - const eckit::Configuration& ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. -} - -// -------------------------------------------------------------------------------------------------------------------- - -void TransLocal::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[], - double divergence_spectra[], const eckit::Configuration& ) const { - NOTIMP; - // Not implemented and not planned. - // Use the TransIFS implementation instead. -} - -// -------------------------------------------------------------------------------------------------------------------- - -} // namespace trans -} // namespace atlas diff --git a/src/atlas/trans/local_noopt/TransLocal.h b/src/atlas/trans/local_noopt/TransLocal.h deleted file mode 100644 index 9ad2cb5db..000000000 --- a/src/atlas/trans/local_noopt/TransLocal.h +++ /dev/null @@ -1,123 +0,0 @@ -/* - * (C) Copyright 2013 ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor does it submit to any jurisdiction. - */ - -#pragma once - -#include - -#include "atlas/grid/Grid.h" -#include "atlas/trans/Trans.h" - -//----------------------------------------------------------------------------- -// Forward declarations - -namespace atlas { -class Field; -class FieldSet; -} // namespace atlas - -//----------------------------------------------------------------------------- - -namespace atlas { -namespace trans { - -//----------------------------------------------------------------------------- - -/// @class TransLocal -/// -/// Local spherical harmonics transformations to any grid -/// Optimisations are present for structured grids -/// For global grids, please consider using TransIFS instead. -/// -/// @todo: -/// - support multiple fields -/// - support atlas::Field and atlas::FieldSet based on function spaces -/// -/// @note: Direct transforms are not implemented and cannot be unless -/// the grid is global. There are no plans to support this at the moment. -class TransLocal : public trans::TransImpl { -public: - TransLocal( const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() ); - TransLocal( const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() ); - TransLocal( const Cache&, const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() ); - TransLocal( const Cache&, const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() ); - - virtual ~TransLocal(); - - virtual int truncation() const override { return truncation_; } - virtual size_t spectralCoefficients() const override { return ( truncation_ + 1 ) * ( truncation_ + 2 ); } - - virtual const Grid& grid() const override { return grid_; } - - virtual void invtrans( const Field& spfield, Field& gpfield, - const eckit::Configuration& = util::NoConfig() ) const override; - - virtual void invtrans( const FieldSet& spfields, FieldSet& gpfields, - const eckit::Configuration& = util::NoConfig() ) const override; - - virtual void invtrans_grad( const Field& spfield, Field& gradfield, - const eckit::Configuration& = util::NoConfig() ) const override; - - virtual void invtrans_grad( const FieldSet& spfields, FieldSet& gradfields, - const eckit::Configuration& = util::NoConfig() ) const override; - - virtual void invtrans_vordiv2wind( const Field& spvor, const Field& spdiv, Field& gpwind, - const eckit::Configuration& = util::NoConfig() ) const override; - - // -- IFS style API -- - - virtual void invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields, - const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[], - const eckit::Configuration& = util::NoConfig() ) const override; - - virtual void invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[], - const eckit::Configuration& = util::NoConfig() ) const override; - - virtual void invtrans( const int nb_vordiv_fields, const double vorticity_spectra[], - const double divergence_spectra[], double gp_fields[], - const eckit::Configuration& = util::NoConfig() ) const override; - - // -- NOT SUPPORTED -- // - - virtual void dirtrans( const Field& gpfield, Field& spfield, - const eckit::Configuration& = util::NoConfig() ) const override; - - virtual void dirtrans( const FieldSet& gpfields, FieldSet& spfields, - const eckit::Configuration& = util::NoConfig() ) const override; - - virtual void dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv, - const eckit::Configuration& = util::NoConfig() ) const override; - - virtual void dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[], - const eckit::Configuration& = util::NoConfig() ) const override; - - virtual void dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[], - double divergence_spectra[], const eckit::Configuration& = util::NoConfig() ) const override; - -private: - const double* legendre_data( int j ) const { return legendre_.data() + legendre_begin_[j]; } - double* legendre_data( int j ) { return legendre_.data() + legendre_begin_[j]; } - - void invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields, - const double scalar_spectra[], double gp_fields[], - const eckit::Configuration& = util::NoConfig() ) const; - -private: - Grid grid_; - int truncation_; - bool precompute_; - std::vector legendre_; - std::vector legendre_begin_; -}; - -//----------------------------------------------------------------------------- - -} // namespace trans -} // namespace atlas diff --git a/src/atlas/trans/local_noopt/VorDivToUVLocal.cc b/src/atlas/trans/local_noopt/VorDivToUVLocal.cc deleted file mode 100644 index 0db63e792..000000000 --- a/src/atlas/trans/local_noopt/VorDivToUVLocal.cc +++ /dev/null @@ -1,184 +0,0 @@ -/* - * (C) Copyright 2013 ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor does it submit to any jurisdiction. - */ - -#include // for std::sqrt -#include "atlas/trans/local_noopt/VorDivToUVLocal.h" -#include "atlas/functionspace/Spectral.h" -#include "atlas/runtime/Log.h" -#include "atlas/util/Earth.h" - -using atlas::FunctionSpace; -using atlas::functionspace::Spectral; - -namespace atlas { -namespace trans { - -namespace { -static VorDivToUVBuilder builder( "local_noopt" ); -} - -// -------------------------------------------------------------------------------------------------------------------- -// Routine to copy spectral data into internal storage form of IFS trans -// Ported to C++ by: Andreas Mueller *ECMWF* -void prfi1b( const int truncation, - const int km, // zonal wavenumber - const int nb_fields, // number of fields - const double rspec[], // spectral data - double pia[] ) // spectral components in data layout of trans library -{ - int ilcm = truncation + 1 - km, ioff = ( 2 * truncation - km + 3 ) * km, - nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2; - for ( int j = 1; j <= ilcm; j++ ) { - int inm = ioff + ( ilcm - j ) * 2; - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { - int ir = 2 * jfld, ii = ir + 1; - pia[ir * nlei1 + j + 1] = rspec[inm * nb_fields + jfld]; - pia[ii * nlei1 + j + 1] = rspec[( inm + 1 ) * nb_fields + jfld]; - } - } - - for ( int jfld = 0; jfld < 2 * nb_fields; jfld++ ) { - pia[jfld * nlei1] = 0.; - pia[jfld * nlei1 + 1] = 0.; - pia[jfld * nlei1 + ilcm + 2] = 0.; - } -} - -// -------------------------------------------------------------------------------------------------------------------- -// Routine to compute spectral velocities (*cos(latitude)) out of spectral -// vorticity and divergence -// Reference: -// ECMWF Research Department documentation of the IFS -// Temperton, 1991, MWR 119 p1303 -// Ported to C++ by: Andreas Mueller *ECMWF* -void vd2uv( const int truncation, // truncation - const int km, // zonal wavenumber - const int nb_vordiv_fields, // number of vorticity and divergence fields - const double vorticity_spectra[], // spectral data of vorticity - const double divergence_spectra[], // spectral data of divergence - double U[], // spectral data of U - double V[], // spectral data of V - const eckit::Configuration& config ) { - int nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2; - - // repsnm: epsilon from eq.(2.12) and (2.13) in [Temperton 1991] - std::vector repsnm( ( truncation + 1 ) * ( truncation + 6 ) / 2 ); - int idx = 0; - for ( int jm = 0; jm <= truncation; ++jm ) { - for ( int jn = jm; jn <= truncation + 2; ++jn, ++idx ) { - repsnm[idx] = std::sqrt( ( jn * jn - jm * jm ) / ( 4. * jn * jn - 1. ) ); - } - } - repsnm[0] = 0.; - - // rlapin: constant factor from eq.(2.2) and (2.3) in [Temperton 1991] - double ra = util::Earth::radius(); - std::vector rlapin( truncation + 3 ); - for ( int jn = 1; jn <= truncation + 2; ++jn ) { - rlapin[jn] = -ra * ra / ( jn * ( jn + 1. ) ); - } - rlapin[0] = 0.; - - // inverse the order of repsnm and rlapin for improved accuracy - std::vector zepsnm( truncation + 6 ); - std::vector zlapin( truncation + 6 ); - std::vector zn( truncation + 6 ); - for ( int jn = km - 1; jn <= truncation + 2; ++jn ) { - int ij = truncation + 3 - jn; - if ( jn >= 0 ) { - zlapin[ij] = rlapin[jn]; - if ( jn < km ) { zepsnm[ij] = 0.; } - else { - zepsnm[ij] = repsnm[jn + ( 2 * truncation - km + 5 ) * km / 2]; - } - } - else { - zlapin[ij] = 0.; - zepsnm[ij] = 0.; - } - zn[ij] = jn; - } - zn[0] = truncation + 3; - - // copy spectral data into internal trans storage: - std::vector rvor( 2 * nb_vordiv_fields * nlei1 ); - std::vector rdiv( 2 * nb_vordiv_fields * nlei1 ); - std::vector ru( 2 * nb_vordiv_fields * nlei1 ); - std::vector rv( 2 * nb_vordiv_fields * nlei1 ); - prfi1b( truncation, km, nb_vordiv_fields, vorticity_spectra, rvor.data() ); - prfi1b( truncation, km, nb_vordiv_fields, divergence_spectra, rdiv.data() ); - - // compute eq.(2.12) and (2.13) in [Temperton 1991]: - if ( km == 0 ) { - for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { - int ir = 2 * jfld * nlei1 - 1; - for ( int ji = 2; ji < truncation + 4 - km; ++ji ) { - double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1]; - double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1]; - ru[ir + ji] = +psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1]; - rv[ir + ji] = -psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1]; - } - } - } - else { - for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { - int ir = 2 * jfld * nlei1 - 1, ii = ir + nlei1; - for ( int ji = 2; ji < truncation + 4 - km; ++ji ) { - double chiIm = km * zlapin[ji]; - double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1]; - double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1]; - ru[ir + ji] = -chiIm * rdiv[ii + ji] + psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1]; - ru[ii + ji] = +chiIm * rdiv[ir + ji] + psiM1 * rvor[ii + ji + 1] - psiP1 * rvor[ii + ji - 1]; - rv[ir + ji] = -chiIm * rvor[ii + ji] - psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1]; - rv[ii + ji] = +chiIm * rvor[ir + ji] - psiM1 * rdiv[ii + ji + 1] + psiP1 * rdiv[ii + ji - 1]; - } - } - } - - // copy data from internal storage back to external spectral data: - int ilcm = truncation - km; - int ioff = ( 2 * truncation - km + 3 ) * km; - // ioff: start index of zonal wavenumber km in spectral data - double za_r = 1. / util::Earth::radius(); - for ( int j = 0; j <= ilcm; ++j ) { - // ilcm-j = total wavenumber - int inm = ioff + ( ilcm - j ) * 2; - for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { - int ir = 2 * jfld * nlei1, ii = ir + nlei1; - int idx = inm * nb_vordiv_fields + jfld; - // real part: - U[idx] = ru[ir + j + 2] * za_r; - V[idx] = rv[ir + j + 2] * za_r; - idx += nb_vordiv_fields; - // imaginary part: - U[idx] = ru[ii + j + 2] * za_r; - V[idx] = rv[ii + j + 2] * za_r; - } - } -} - -void VorDivToUVLocal::execute( const int nb_coeff, const int nb_fields, const double vorticity[], - const double divergence[], double U[], double V[], - const eckit::Configuration& config ) const { - for ( int jm = 0; jm <= truncation_; ++jm ) { - vd2uv( truncation_, jm, nb_fields, vorticity, divergence, U, V, config ); - } -} - -VorDivToUVLocal::VorDivToUVLocal( const int truncation, const eckit::Configuration& config ) : - truncation_( truncation ) {} - -VorDivToUVLocal::VorDivToUVLocal( const FunctionSpace& fs, const eckit::Configuration& config ) : - truncation_( Spectral( fs ).truncation() ) {} - -VorDivToUVLocal::~VorDivToUVLocal() {} - -} // namespace trans -} // namespace atlas diff --git a/src/atlas/trans/local_noopt/VorDivToUVLocal.h b/src/atlas/trans/local_noopt/VorDivToUVLocal.h deleted file mode 100644 index ee3903de1..000000000 --- a/src/atlas/trans/local_noopt/VorDivToUVLocal.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * (C) Copyright 2013 ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor does it submit to any jurisdiction. - */ - -#pragma once - -#include "atlas/trans/VorDivToUV.h" - -//----------------------------------------------------------------------------- -// Forward declarations - -namespace atlas { -class FunctionSpace; -} - -//----------------------------------------------------------------------------- - -namespace atlas { -namespace trans { - -//----------------------------------------------------------------------------- - -class VorDivToUVLocal : public trans::VorDivToUVImpl { -public: - VorDivToUVLocal( const FunctionSpace&, const eckit::Configuration& = util::NoConfig() ); - VorDivToUVLocal( int truncation, const eckit::Configuration& = util::NoConfig() ); - - virtual ~VorDivToUVLocal(); - - virtual int truncation() const override { return truncation_; } - - // pure virtual interface - - // -- IFS style API -- - // These fields have special interpretation required. You need to know what - // you're doing. - // See IFS trans library. - - /*! - * @brief Compute spectral wind (U/V) from spectral vorticity/divergence - * - * U = u*cos(lat) - * V = v*cos(lat) - * - * @param nb_fields [in] Number of fields - * @param vorticity [in] Spectral vorticity - * @param divergence [in] Spectral divergence - * @param U [out] Spectral wind U = u*cos(lat) - * @param V [out] Spectral wind V = v*cos(lat) - */ - virtual void execute( const int nb_coeff, const int nb_fields, const double vorticity[], const double divergence[], - double U[], double V[], const eckit::Configuration& = util::NoConfig() ) const override; - -private: - int truncation_; -}; - -// ------------------------------------------------------------------ - -} // namespace trans -} // namespace atlas diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/localopt3/TransLocalopt3.cc index 5d2e50dd9..30995afb1 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.cc +++ b/src/atlas/trans/localopt3/TransLocalopt3.cc @@ -184,6 +184,8 @@ int add_padding( int n ) { return std::ceil( n / 8. ) * 8; } +} // namespace + int fourier_truncation( const int truncation, // truncation const int nx, // number of longitudes const int nxmax, // maximum nx @@ -214,8 +216,6 @@ int fourier_truncation( const int truncation, // truncation return trc; } -} // namespace - // -------------------------------------------------------------------------------------------------------------------- // Class TransLocalopt3 // -------------------------------------------------------------------------------------------------------------------- diff --git a/src/atlas/trans/localopt3/TransLocalopt3.h b/src/atlas/trans/localopt3/TransLocalopt3.h index dda875ae5..bcc80b1d7 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.h +++ b/src/atlas/trans/localopt3/TransLocalopt3.h @@ -41,6 +41,12 @@ namespace atlas { namespace trans { class LegendreCacheCreatorLocal; +int fourier_truncation( const int truncation, // truncation + const int nx, // number of longitudes + const int nxmax, // maximum nx + const int ndgl, // number of latitudes + const double lat, // latitude in radian + const bool regular ); // regular grid //----------------------------------------------------------------------------- diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index 9c2fb2801..92b03f489 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -29,11 +29,9 @@ #include "atlas/parallel/mpi/mpi.h" #include "atlas/runtime/Trace.h" #include "atlas/trans/Trans.h" -#include "atlas/trans/local_noopt/FourierTransforms.h" -#include "atlas/trans/local_noopt/LegendrePolynomials.h" -#include "atlas/trans/local_noopt/LegendreTransforms.h" #include "atlas/util/Constants.h" #include "atlas/util/Earth.h" +#include "atlas/trans/localopt3/TransLocalopt3.h" #include "tests/AtlasTestEnvironment.h" @@ -69,6 +67,7 @@ struct AtlasTransEnvironment : public AtlasTestEnvironment { //----------------------------------------------------------------------------- +#if 0 void compute_legendre( const size_t trc, // truncation (in) const double& lat, // latitude in radians (in) array::ArrayView& zlfpol ) // values of @@ -216,7 +215,7 @@ void spectral_transform_grid( const size_t trc, // truncation (in) EXPECT( idx == grid.size() ); } - +#endif //----------------------------------------------------------------------------- // Routine to compute the spherical harmonics analytically at one point // (up to wave number 3) @@ -528,6 +527,7 @@ double compute_rms( const size_t N, // length of the arrays return rms; } +#if 0 //----------------------------------------------------------------------------- // Routine to test the spectral transform by comparing it with the analytically // derived spherical harmonics @@ -566,6 +566,7 @@ double spectral_transform_test( double trc, // truncation return rms; } +#endif //----------------------------------------------------------------------------- #if 0 From 9d79e036c6194d1f8ab283157a0517a358c574ae Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Wed, 9 May 2018 15:39:11 +0100 Subject: [PATCH 070/123] rename trans/localopt3 trans/local --- src/atlas/CMakeLists.txt | 16 ++--- src/atlas/trans/LegendreCacheCreator.cc | 2 +- src/atlas/trans/Trans.cc | 4 +- src/atlas/trans/VorDivToUV.cc | 4 +- .../LegendreCacheCreatorLocal.cc | 6 +- .../LegendreCacheCreatorLocal.h | 0 .../LegendrePolynomials.cc} | 4 +- .../LegendrePolynomials.h} | 0 .../TransLocalopt3.cc => local/TransLocal.cc} | 71 +++++++++---------- .../TransLocalopt3.h => local/TransLocal.h} | 16 ++--- .../VorDivToUVLocal.cc} | 13 ++-- .../VorDivToUVLocal.h} | 8 +-- src/tests/trans/test_transgeneral.cc | 16 ++--- 13 files changed, 79 insertions(+), 81 deletions(-) rename src/atlas/trans/{localopt3 => local}/LegendreCacheCreatorLocal.cc (94%) rename src/atlas/trans/{localopt3 => local}/LegendreCacheCreatorLocal.h (100%) rename src/atlas/trans/{localopt3/LegendrePolynomialsopt3.cc => local/LegendrePolynomials.cc} (99%) rename src/atlas/trans/{localopt3/LegendrePolynomialsopt3.h => local/LegendrePolynomials.h} (100%) rename src/atlas/trans/{localopt3/TransLocalopt3.cc => local/TransLocal.cc} (94%) rename src/atlas/trans/{localopt3/TransLocalopt3.h => local/TransLocal.h} (93%) rename src/atlas/trans/{localopt3/VorDivToUVLocalopt3.cc => local/VorDivToUVLocal.cc} (93%) rename src/atlas/trans/{localopt3/VorDivToUVLocalopt3.h => local/VorDivToUVLocal.h} (86%) diff --git a/src/atlas/CMakeLists.txt b/src/atlas/CMakeLists.txt index 64dd7e1cc..9bf883d18 100644 --- a/src/atlas/CMakeLists.txt +++ b/src/atlas/CMakeLists.txt @@ -325,14 +325,14 @@ trans/VorDivToUV.h trans/VorDivToUV.cc trans/LegendreCacheCreator.h trans/LegendreCacheCreator.cc -trans/localopt3/TransLocalopt3.h -trans/localopt3/TransLocalopt3.cc -trans/localopt3/LegendrePolynomialsopt3.h -trans/localopt3/LegendrePolynomialsopt3.cc -trans/localopt3/VorDivToUVLocalopt3.h -trans/localopt3/VorDivToUVLocalopt3.cc -trans/localopt3/LegendreCacheCreatorLocal.h -trans/localopt3/LegendreCacheCreatorLocal.cc +trans/local/TransLocal.h +trans/local/TransLocal.cc +trans/local/LegendrePolynomials.h +trans/local/LegendrePolynomials.cc +trans/local/VorDivToUVLocal.h +trans/local/VorDivToUVLocal.cc +trans/local/LegendreCacheCreatorLocal.h +trans/local/LegendreCacheCreatorLocal.cc ) if( ATLAS_HAVE_TRANS ) diff --git a/src/atlas/trans/LegendreCacheCreator.cc b/src/atlas/trans/LegendreCacheCreator.cc index 6d65c76c1..8c6b402a3 100644 --- a/src/atlas/trans/LegendreCacheCreator.cc +++ b/src/atlas/trans/LegendreCacheCreator.cc @@ -24,7 +24,7 @@ #else #define TRANS_DEFAULT "local" #endif -#include "atlas/trans/localopt3/LegendreCacheCreatorLocal.h" +#include "atlas/trans/local/LegendreCacheCreatorLocal.h" namespace atlas { namespace trans { diff --git a/src/atlas/trans/Trans.cc b/src/atlas/trans/Trans.cc index 57a7e707e..6f67b57bb 100644 --- a/src/atlas/trans/Trans.cc +++ b/src/atlas/trans/Trans.cc @@ -27,7 +27,7 @@ #else #define TRANS_DEFAULT "local" #endif -#include "atlas/trans/localopt3/TransLocalopt3.h" // --> recommended "local" +#include "atlas/trans/local/TransLocal.h" // --> recommended "local" namespace atlas { namespace trans { @@ -61,7 +61,7 @@ struct force_link { load_builder_functionspace(); load_builder_grid(); #endif - load_builder_grid(); + load_builder_grid(); } }; diff --git a/src/atlas/trans/VorDivToUV.cc b/src/atlas/trans/VorDivToUV.cc index c01076f6e..e160062b5 100644 --- a/src/atlas/trans/VorDivToUV.cc +++ b/src/atlas/trans/VorDivToUV.cc @@ -26,7 +26,7 @@ #else #define TRANS_DEFAULT "local" #endif -#include "atlas/trans/localopt3/VorDivToUVLocalopt3.h" // --> recommended "local" +#include "atlas/trans/local/VorDivToUVLocal.h" // --> recommended "local" namespace atlas { namespace trans { @@ -54,7 +54,7 @@ struct force_link { #if ATLAS_HAVE_TRANS load_builder(); #endif - load_builder(); + load_builder(); } }; diff --git a/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc b/src/atlas/trans/local/LegendreCacheCreatorLocal.cc similarity index 94% rename from src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc rename to src/atlas/trans/local/LegendreCacheCreatorLocal.cc index a004348b9..9424ea52f 100644 --- a/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.cc +++ b/src/atlas/trans/local/LegendreCacheCreatorLocal.cc @@ -8,14 +8,14 @@ * nor does it submit to any jurisdiction. */ -#include "atlas/trans/localopt3/LegendreCacheCreatorLocal.h" +#include "atlas/trans/local/LegendreCacheCreatorLocal.h" #include #include #include "eckit/utils/MD5.h" #include "atlas/grid.h" #include "atlas/option.h" #include "atlas/trans/Trans.h" -#include "atlas/trans/localopt3/TransLocalopt3.h" +#include "atlas/trans/local/TransLocal.h" namespace atlas { namespace trans { @@ -94,7 +94,7 @@ void LegendreCacheCreatorLocal::create( const std::string& path ) const { Cache LegendreCacheCreatorLocal::create() const { util::Config export_legendre("export_legendre",true); Trans tmp( grid_, truncation_, config_ | option::type("local") | export_legendre ); - auto impl = dynamic_cast( tmp.get() ); + auto impl = dynamic_cast( tmp.get() ); return impl->export_legendre_; } diff --git a/src/atlas/trans/localopt3/LegendreCacheCreatorLocal.h b/src/atlas/trans/local/LegendreCacheCreatorLocal.h similarity index 100% rename from src/atlas/trans/localopt3/LegendreCacheCreatorLocal.h rename to src/atlas/trans/local/LegendreCacheCreatorLocal.h diff --git a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc b/src/atlas/trans/local/LegendrePolynomials.cc similarity index 99% rename from src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc rename to src/atlas/trans/local/LegendrePolynomials.cc index 1cddbc18b..4bd0d67a1 100644 --- a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.cc +++ b/src/atlas/trans/local/LegendrePolynomials.cc @@ -14,7 +14,7 @@ #include "atlas/array.h" #include "atlas/parallel/mpi/mpi.h" -#include "atlas/trans/localopt3/LegendrePolynomialsopt3.h" +#include "atlas/trans/local/LegendrePolynomials.h" namespace atlas { namespace trans { @@ -187,7 +187,7 @@ void compute_legendre_polynomialsopt3( // The trans library in IFS uses descending order because it should // be more accurate (higher wavenumbers have smaller contributions). // This also needs to be changed when splitting the spectral data in - // TransLocalopt3::invtrans_uv! + // TransLocal::invtrans_uv! //for ( int jn = jm; jn <= trc; jn++ ) { for ( int jn = trc; jn >= jm; jn-- ) { if ( ( jn - jm ) % 2 == 0 ) { diff --git a/src/atlas/trans/localopt3/LegendrePolynomialsopt3.h b/src/atlas/trans/local/LegendrePolynomials.h similarity index 100% rename from src/atlas/trans/localopt3/LegendrePolynomialsopt3.h rename to src/atlas/trans/local/LegendrePolynomials.h diff --git a/src/atlas/trans/localopt3/TransLocalopt3.cc b/src/atlas/trans/local/TransLocal.cc similarity index 94% rename from src/atlas/trans/localopt3/TransLocalopt3.cc rename to src/atlas/trans/local/TransLocal.cc index 30995afb1..ac8fd9ac4 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.cc +++ b/src/atlas/trans/local/TransLocal.cc @@ -8,7 +8,7 @@ * nor does it submit to any jurisdiction. */ -#include "atlas/trans/localopt3/TransLocalopt3.h" +#include "atlas/trans/local/TransLocal.h" #include #include #include "atlas/array.h" @@ -17,7 +17,7 @@ #include "atlas/runtime/ErrorHandling.h" #include "atlas/runtime/Log.h" #include "atlas/trans/VorDivToUV.h" -#include "atlas/trans/localopt3/LegendrePolynomialsopt3.h" +#include "atlas/trans/local/LegendrePolynomials.h" #include "atlas/util/Constants.h" #include "eckit/config/YAMLConfiguration.h" #include "eckit/eckit_config.h" @@ -30,8 +30,7 @@ namespace atlas { namespace trans { namespace { -static TransBuilderGrid builder_deprecated( "localopt3" ); -static TransBuilderGrid builder( "local" ); +static TransBuilderGrid builder( "local" ); } // namespace namespace { @@ -217,7 +216,7 @@ int fourier_truncation( const int truncation, // truncation } // -------------------------------------------------------------------------------------------------------------------- -// Class TransLocalopt3 +// Class TransLocal // -------------------------------------------------------------------------------------------------------------------- const eckit::linalg::LinearAlgebra& linear_algebra_backend() { @@ -228,7 +227,7 @@ const eckit::linalg::LinearAlgebra& linear_algebra_backend() { return eckit::linalg::LinearAlgebra::backend(); } -TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const Domain& domain, const long truncation, +TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& domain, const long truncation, const eckit::Configuration& config ) : grid_( grid, domain ), truncation_( truncation ), @@ -240,7 +239,7 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const Doma fft_cachesize_( cache.fft().size() ), linalg_( linear_algebra_backend() ) { - ATLAS_TRACE( "TransLocalOpt3 constructor" ); + ATLAS_TRACE( "TransLocal constructor" ); double fft_threshold = 0.0; // fraction of latitudes of the full grid down to which FFT is used. // This threshold needs to be adjusted depending on the dgemm and FFT performance of the machine // on which this code is running! @@ -586,18 +585,18 @@ TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const Doma // -------------------------------------------------------------------------------------------------------------------- -TransLocalopt3::TransLocalopt3( const Grid& grid, const long truncation, const eckit::Configuration& config ) : - TransLocalopt3( Cache(), grid, grid.domain(), truncation, config ) {} +TransLocal::TransLocal( const Grid& grid, const long truncation, const eckit::Configuration& config ) : + TransLocal( Cache(), grid, grid.domain(), truncation, config ) {} -TransLocalopt3::TransLocalopt3( const Grid& grid, const Domain& domain, const long truncation, const eckit::Configuration& config ) : - TransLocalopt3( Cache(), grid, domain, truncation, config ) {} +TransLocal::TransLocal( const Grid& grid, const Domain& domain, const long truncation, const eckit::Configuration& config ) : + TransLocal( Cache(), grid, domain, truncation, config ) {} -TransLocalopt3::TransLocalopt3( const Cache& cache, const Grid& grid, const long truncation, const eckit::Configuration& config ) : - TransLocalopt3( cache, grid, grid.domain(), truncation, config ) {} +TransLocal::TransLocal( const Cache& cache, const Grid& grid, const long truncation, const eckit::Configuration& config ) : + TransLocal( cache, grid, grid.domain(), truncation, config ) {} // -------------------------------------------------------------------------------------------------------------------- -TransLocalopt3::~TransLocalopt3() { +TransLocal::~TransLocal() { if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { if ( not legendre_cache_ ) { free_aligned( legendre_sym_ ); @@ -623,40 +622,40 @@ TransLocalopt3::~TransLocalopt3() { // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt3::invtrans( const Field& spfield, Field& gpfield, const eckit::Configuration& config ) const { +void TransLocal::invtrans( const Field& spfield, Field& gpfield, const eckit::Configuration& config ) const { NOTIMP; } // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt3::invtrans( const FieldSet& spfields, FieldSet& gpfields, +void TransLocal::invtrans( const FieldSet& spfields, FieldSet& gpfields, const eckit::Configuration& config ) const { NOTIMP; } // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt3::invtrans_grad( const Field& spfield, Field& gradfield, const eckit::Configuration& config ) const { +void TransLocal::invtrans_grad( const Field& spfield, Field& gradfield, const eckit::Configuration& config ) const { NOTIMP; } // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt3::invtrans_grad( const FieldSet& spfields, FieldSet& gradfields, +void TransLocal::invtrans_grad( const FieldSet& spfields, FieldSet& gradfields, const eckit::Configuration& config ) const { NOTIMP; } // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt3::invtrans_vordiv2wind( const Field& spvor, const Field& spdiv, Field& gpwind, +void TransLocal::invtrans_vordiv2wind( const Field& spvor, const Field& spdiv, Field& gpwind, const eckit::Configuration& config ) const { NOTIMP; } // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt3::invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[], +void TransLocal::invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[], const eckit::Configuration& config ) const { invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields, config ); } @@ -673,7 +672,7 @@ void gp_transposeopt3( const int nb_size, const int nb_fields, const double gp_t // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt3::invtrans_legendreopt3( const int truncation, const int nlats, const int nb_fields, +void TransLocal::invtrans_legendreopt3( const int truncation, const int nlats, const int nb_fields, const double scalar_spectra[], double scl_fourier[], const eckit::Configuration& config ) const { // Legendre transform: @@ -826,7 +825,7 @@ void TransLocalopt3::invtrans_legendreopt3( const int truncation, const int nlat // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt3::invtrans_fourier_regularopt3( const int nlats, const int nlons, const int nb_fields, +void TransLocal::invtrans_fourier_regularopt3( const int nlats, const int nlons, const int nb_fields, double scl_fourier[], double gp_fields[], const eckit::Configuration& config ) const { // Fourier transformation: @@ -910,7 +909,7 @@ void TransLocalopt3::invtrans_fourier_regularopt3( const int nlats, const int nl // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt3::invtrans_fourier_reducedopt3( const int nlats, const grid::StructuredGrid g, const int nb_fields, +void TransLocal::invtrans_fourier_reducedopt3( const int nlats, const grid::StructuredGrid g, const int nb_fields, double scl_fourier[], double gp_fields[], const eckit::Configuration& config ) const { // Fourier transformation: @@ -969,7 +968,7 @@ void TransLocalopt3::invtrans_fourier_reducedopt3( const int nlats, const grid:: // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt3::invtrans_unstructured_precomp( const int truncation, const int nb_fields, +void TransLocal::invtrans_unstructured_precomp( const int truncation, const int nb_fields, const int nb_vordiv_fields, const double scalar_spectra[], double gp_fields[], const eckit::Configuration& config ) const { ATLAS_TRACE( "invtrans_uv unstructured opt3" ); @@ -1061,7 +1060,7 @@ void TransLocalopt3::invtrans_unstructured_precomp( const int truncation, const // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt3::invtrans_unstructured( const int truncation, const int nb_fields, const int nb_vordiv_fields, +void TransLocal::invtrans_unstructured( const int truncation, const int nb_fields, const int nb_vordiv_fields, const double scalar_spectra[], double gp_fields[], const eckit::Configuration& config ) const { ATLAS_TRACE( "invtrans_uv unstructured" ); @@ -1150,7 +1149,7 @@ void TransLocalopt3::invtrans_unstructured( const int truncation, const int nb_f } //----------------------------------------------------------------------------- -// Routine to compute the spectral transform by using a localopt3 Fourier transformation +// Routine to compute the spectral transform by using a Local Fourier transformation // for a grid (same latitude for all longitudes, allows to compute Legendre functions // once for all longitudes). U and v components are divided by cos(latitude) for // nb_vordiv_fields > 0. @@ -1164,7 +1163,7 @@ void TransLocalopt3::invtrans_unstructured( const int truncation, const int nb_f // Author: // Andreas Mueller *ECMWF* // -void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields, +void TransLocal::invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields, const double scalar_spectra[], double gp_fields[], const eckit::Configuration& config ) const { if ( nb_scalar_fields > 0 ) { @@ -1226,7 +1225,7 @@ void TransLocalopt3::invtrans_uv( const int truncation, const int nb_scalar_fiel // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt3::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[], +void TransLocal::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[], const eckit::Configuration& config ) const { invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config ); @@ -1253,10 +1252,10 @@ void extend_truncationopt3( const int old_truncation, const int nb_fields, const // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt3::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields, +void TransLocal::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields, const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[], const eckit::Configuration& config ) const { - ATLAS_TRACE( "TransLocalopt3::invtrans" ); + ATLAS_TRACE( "TransLocal::invtrans" ); int nb_gp = grid_.size(); int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields; if ( nb_vordiv_fields > 0 ) { @@ -1277,7 +1276,7 @@ void TransLocalopt3::invtrans( const int nb_scalar_fields, const double scalar_s { ATLAS_TRACE( "vordiv to UV opt3" ); // call vd2uv to compute u and v in spectral space - trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "localopt3" ) ); + trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "Local" ) ); vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(), divergence_spectra_extended.data(), U_ext.data(), V_ext.data() ); } @@ -1295,7 +1294,7 @@ void TransLocalopt3::invtrans( const int nb_scalar_fields, const double scalar_s // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt3::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const { +void TransLocal::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const { NOTIMP; // Not implemented and not planned. // Use the TransIFS implementation instead. @@ -1303,7 +1302,7 @@ void TransLocalopt3::dirtrans( const Field& gpfield, Field& spfield, const eckit // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt3::dirtrans( const FieldSet& gpfields, FieldSet& spfields, +void TransLocal::dirtrans( const FieldSet& gpfields, FieldSet& spfields, const eckit::Configuration& config ) const { NOTIMP; // Not implemented and not planned. @@ -1312,7 +1311,7 @@ void TransLocalopt3::dirtrans( const FieldSet& gpfields, FieldSet& spfields, // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt3::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv, +void TransLocal::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv, const eckit::Configuration& config ) const { NOTIMP; // Not implemented and not planned. @@ -1321,7 +1320,7 @@ void TransLocalopt3::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Fi // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt3::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[], +void TransLocal::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[], const eckit::Configuration& ) const { NOTIMP; // Not implemented and not planned. @@ -1330,7 +1329,7 @@ void TransLocalopt3::dirtrans( const int nb_fields, const double scalar_fields[] // -------------------------------------------------------------------------------------------------------------------- -void TransLocalopt3::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[], +void TransLocal::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[], double divergence_spectra[], const eckit::Configuration& ) const { NOTIMP; // Not implemented and not planned. diff --git a/src/atlas/trans/localopt3/TransLocalopt3.h b/src/atlas/trans/local/TransLocal.h similarity index 93% rename from src/atlas/trans/localopt3/TransLocalopt3.h rename to src/atlas/trans/local/TransLocal.h index bcc80b1d7..8dd80359d 100644 --- a/src/atlas/trans/localopt3/TransLocalopt3.h +++ b/src/atlas/trans/local/TransLocal.h @@ -50,9 +50,9 @@ int fourier_truncation( const int truncation, // truncation //----------------------------------------------------------------------------- -/// @class TransLocalopt3 +/// @class TransLocal /// -/// Localopt3 spherical harmonics transformations to any grid +/// Local spherical harmonics transformations to any grid /// Optimisations are present for structured grids /// For global grids, please consider using TransIFS instead. /// @@ -62,16 +62,16 @@ int fourier_truncation( const int truncation, // truncation /// /// @note: Direct transforms are not implemented and cannot be unless /// the grid is global. There are no plans to support this at the moment. -class TransLocalopt3 : public trans::TransImpl { +class TransLocal : public trans::TransImpl { public: - TransLocalopt3( const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() ); - TransLocalopt3( const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() ); - TransLocalopt3( const Cache&, const Grid&, const long truncation, + TransLocal( const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() ); + TransLocal( const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() ); + TransLocal( const Cache&, const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() ); - TransLocalopt3( const Cache&, const Grid&, const Domain&, const long truncation, + TransLocal( const Cache&, const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() ); - virtual ~TransLocalopt3(); + virtual ~TransLocal(); virtual int truncation() const override { return truncation_; } virtual size_t spectralCoefficients() const override { return ( truncation_ + 1 ) * ( truncation_ + 2 ); } diff --git a/src/atlas/trans/localopt3/VorDivToUVLocalopt3.cc b/src/atlas/trans/local/VorDivToUVLocal.cc similarity index 93% rename from src/atlas/trans/localopt3/VorDivToUVLocalopt3.cc rename to src/atlas/trans/local/VorDivToUVLocal.cc index b7b9474af..9d23a9db7 100644 --- a/src/atlas/trans/localopt3/VorDivToUVLocalopt3.cc +++ b/src/atlas/trans/local/VorDivToUVLocal.cc @@ -8,7 +8,7 @@ * nor does it submit to any jurisdiction. */ -#include "atlas/trans/localopt3/VorDivToUVLocalopt3.h" +#include "atlas/trans/local/VorDivToUVLocal.h" #include // for std::sqrt #include "atlas/functionspace/Spectral.h" #include "atlas/runtime/Log.h" @@ -21,8 +21,7 @@ namespace atlas { namespace trans { namespace { -static VorDivToUVBuilder builder_deprecated( "localopt3" ); -static VorDivToUVBuilder builder( "local" ); +static VorDivToUVBuilder builder( "local" ); } // -------------------------------------------------------------------------------------------------------------------- @@ -165,7 +164,7 @@ void vd2uvopt3( const int truncation, // truncation } } -void VorDivToUVLocalopt3::execute( const int nb_coeff, const int nb_fields, const double vorticity[], +void VorDivToUVLocal::execute( const int nb_coeff, const int nb_fields, const double vorticity[], const double divergence[], double U[], double V[], const eckit::Configuration& config ) const { for ( int jm = 0; jm <= truncation_; ++jm ) { @@ -173,13 +172,13 @@ void VorDivToUVLocalopt3::execute( const int nb_coeff, const int nb_fields, cons } } -VorDivToUVLocalopt3::VorDivToUVLocalopt3( const int truncation, const eckit::Configuration& config ) : +VorDivToUVLocal::VorDivToUVLocal( const int truncation, const eckit::Configuration& config ) : truncation_( truncation ) {} -VorDivToUVLocalopt3::VorDivToUVLocalopt3( const FunctionSpace& fs, const eckit::Configuration& config ) : +VorDivToUVLocal::VorDivToUVLocal( const FunctionSpace& fs, const eckit::Configuration& config ) : truncation_( Spectral( fs ).truncation() ) {} -VorDivToUVLocalopt3::~VorDivToUVLocalopt3() {} +VorDivToUVLocal::~VorDivToUVLocal() {} } // namespace trans } // namespace atlas diff --git a/src/atlas/trans/localopt3/VorDivToUVLocalopt3.h b/src/atlas/trans/local/VorDivToUVLocal.h similarity index 86% rename from src/atlas/trans/localopt3/VorDivToUVLocalopt3.h rename to src/atlas/trans/local/VorDivToUVLocal.h index 44fdc98fe..ee3903de1 100644 --- a/src/atlas/trans/localopt3/VorDivToUVLocalopt3.h +++ b/src/atlas/trans/local/VorDivToUVLocal.h @@ -26,12 +26,12 @@ namespace trans { //----------------------------------------------------------------------------- -class VorDivToUVLocalopt3 : public trans::VorDivToUVImpl { +class VorDivToUVLocal : public trans::VorDivToUVImpl { public: - VorDivToUVLocalopt3( const FunctionSpace&, const eckit::Configuration& = util::NoConfig() ); - VorDivToUVLocalopt3( int truncation, const eckit::Configuration& = util::NoConfig() ); + VorDivToUVLocal( const FunctionSpace&, const eckit::Configuration& = util::NoConfig() ); + VorDivToUVLocal( int truncation, const eckit::Configuration& = util::NoConfig() ); - virtual ~VorDivToUVLocalopt3(); + virtual ~VorDivToUVLocal(); virtual int truncation() const override { return truncation_; } diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index 92b03f489..fb976bd25 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -31,7 +31,7 @@ #include "atlas/trans/Trans.h" #include "atlas/util/Constants.h" #include "atlas/util/Earth.h" -#include "atlas/trans/localopt3/TransLocalopt3.h" +#include "atlas/trans/local/TransLocal.h" #include "tests/AtlasTestEnvironment.h" @@ -744,7 +744,7 @@ CASE( "test_trans_vordiv_with_translib" ) { double rav = 0.; // compute average rms error of trans library in rav #endif trans::Trans transLocal1( g, trc, util::Config( "type", "localopt2" ) ); - trans::Trans transLocal2( g, trc, util::Config( "type", "localopt3" ) ); + trans::Trans transLocal2( g, trc, util::Config( "type", "Local" ) ); double rav1 = 0., rav2 = 0.; // compute average rms errors of transLocal1 and transLocal2 functionspace::Spectral spectral( trc ); @@ -882,10 +882,10 @@ CASE( "test_trans_hires" ) { double tolerance = 1.e-13; #if ATLAS_HAVE_TRANS - //std::string transTypes[4] = {"localopt", "localopt2", "localopt3", "ifs"}; - //std::string transTypes[2] = {"localopt2", "localopt3"}; - std::string transTypes[3] = {"localopt3", "localopt2", "localopt"}; - //std::string transTypes[1] = {"localopt3"}; + //std::string transTypes[4] = {"localopt", "localopt2", "Local", "ifs"}; + //std::string transTypes[2] = {"localopt2", "Local"}; + std::string transTypes[3] = {"Local", "localopt2", "localopt"}; + //std::string transTypes[1] = {"Local"}; #else std::string transTypes[1] = {"localopt2"}; #endif @@ -1176,8 +1176,8 @@ CASE( "test_trans_unstructured" ) { std::vector rgp_analytic1( g.size() ); std::vector rgp_analytic2( gu.size() ); - trans::Trans transLocal1( g, trc, util::Config( "type", "localopt3" ) ); - trans::Trans transLocal2( gu, trc, util::Config( "type", "localopt3" ) ); + trans::Trans transLocal1( g, trc, util::Config( "type", "Local" ) ); + trans::Trans transLocal2( gu, trc, util::Config( "type", "Local" ) ); int icase = 0; for ( int ivar_in = 2; ivar_in < 3; ivar_in++ ) { // vorticity, divergence, scalar From 160fe1d0caa8eee9d4421b3e7426f3ffbd0ed5cd Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Wed, 9 May 2018 18:02:10 +0100 Subject: [PATCH 071/123] Improve uid for Legendre cache files --- .../trans/local/LegendreCacheCreatorLocal.cc | 52 +++++++++++++++---- src/atlas/trans/local/TransLocal.cc | 42 +++++++-------- src/tests/trans/test_trans_localcache.cc | 6 ++- 3 files changed, 68 insertions(+), 32 deletions(-) diff --git a/src/atlas/trans/local/LegendreCacheCreatorLocal.cc b/src/atlas/trans/local/LegendreCacheCreatorLocal.cc index 9424ea52f..e4440e0d4 100644 --- a/src/atlas/trans/local/LegendreCacheCreatorLocal.cc +++ b/src/atlas/trans/local/LegendreCacheCreatorLocal.cc @@ -12,6 +12,7 @@ #include #include #include "eckit/utils/MD5.h" +#include "eckit/types/FloatCompare.h" #include "atlas/grid.h" #include "atlas/option.h" #include "atlas/trans/Trans.h" @@ -56,18 +57,47 @@ std::string hash( const eckit::Configuration& config ) { std::string LegendreCacheCreatorLocal::uid() const { if( unique_identifier_.empty() ) { std::ostringstream stream; - stream << "local-T" << truncation_ << "-"; - if( grid::GaussianGrid( grid_ ) ) { - // Same cache for any global Gaussian grid - stream << "GaussianN" << grid::GaussianGrid( grid_ ).N(); - } else if( grid::RegularLonLatGrid( grid_ ) ) { - // Same cache for any global regular grid - auto g = grid::RegularLonLatGrid( grid_ ); - stream << ( g.shiftedLat() ? "S" : "L" ) << "+x" << g.ny(); - // The above '+' is a placeholder for any g.nx() - } else { + auto give_up = [&]() { // We cannot make more assumptions on reusability for different grids stream << "grid-" << hash( grid_ ); + }; + stream << "local-T" << truncation_ << "-"; + grid::StructuredGrid structured ( grid_ ); + if( grid_.domain().global() ) { + if( grid::GaussianGrid( grid_ ) ) { + // Same cache for any global Gaussian grid + stream << "GaussianN" << grid::GaussianGrid( grid_ ).N(); + } else if( grid::RegularLonLatGrid( grid_ ) ) { + // Same cache for any global regular grid + auto g = grid::RegularLonLatGrid( grid_ ); + + const double dy_2 = 90. / double(g.ny()); + bool shifted_lat = eckit::types::is_approximately_equal( g.y().front(), 90. - dy_2 ) && + eckit::types::is_approximately_equal( g.y().back(), -90. + dy_2 ); + bool standard_lat = eckit::types::is_approximately_equal( g.y().front(), 90. ) && + eckit::types::is_approximately_equal( g.y().back(), -90. ); + + if( standard_lat ) { + stream << "L" << "-ny" << g.ny(); + } else if( shifted_lat ) { + stream << "S" << "-ny" << g.ny(); + } else { // I don't think we get here, but just in case, give up + give_up(); + } + } else { // global but not gaussian or regularlonlat + give_up(); + } + } else { // regional grid + if( grid::RegularGrid( grid_ ) && not grid_.projection() && structured.yspace().type() == "linear" ) { + RectangularDomain domain( grid_.domain() ); + ASSERT( domain ); + stream << "Regional"; + stream << "-south" << domain.ymin(); + stream << "-north" << domain.ymax(); + stream << "-ny" << structured.ny(); + } else { // It gets too complicated, so let's not be smart + give_up(); + } } stream << "-OPT" << hash( config_ ); unique_identifier_ = stream.str(); @@ -84,6 +114,8 @@ LegendreCacheCreatorLocal::LegendreCacheCreatorLocal( const Grid& grid, int trun } bool LegendreCacheCreatorLocal::supported() const { + if( not grid::StructuredGrid( grid_ ) ) return false; + if( grid_.projection() ) return false; return true; } diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc index ac8fd9ac4..f47893e71 100644 --- a/src/atlas/trans/local/TransLocal.cc +++ b/src/atlas/trans/local/TransLocal.cc @@ -228,7 +228,7 @@ const eckit::linalg::LinearAlgebra& linear_algebra_backend() { } TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& domain, const long truncation, - const eckit::Configuration& config ) : + const eckit::Configuration& config ) : grid_( grid, domain ), truncation_( truncation ), precompute_( config.getBool( "precompute", true ) ), @@ -430,22 +430,22 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma // TODO: check this is all aligned... } else { - ATLAS_TRACE_SCOPE( "Legendre precomputations (structured)" ) { - - if( TransParameters(config).export_legendre() ) { - ASSERT( not cache_.legendre() ); - export_legendre_ = LegendreCache( sizeof(double) * ( size_sym + size_asym ) ); - legendre_cachesize_ = export_legendre_.legendre().size(); - legendre_cache_ = export_legendre_.legendre().data(); - legendre_cache_ = std::malloc( legendre_cachesize_ ); - ReadCache legendre( legendre_cache_ ); - legendre_sym_ = legendre.read( size_sym ); - legendre_asym_ = legendre.read( size_asym ); - } else { - alloc_aligned( legendre_sym_, size_sym ); - alloc_aligned( legendre_asym_, size_asym ); - } + if( TransParameters(config).export_legendre() ) { + ASSERT( not cache_.legendre() ); + export_legendre_ = LegendreCache( sizeof(double) * ( size_sym + size_asym ) ); + legendre_cachesize_ = export_legendre_.legendre().size(); + legendre_cache_ = export_legendre_.legendre().data(); + legendre_cache_ = std::malloc( legendre_cachesize_ ); + ReadCache legendre( legendre_cache_ ); + legendre_sym_ = legendre.read( size_sym ); + legendre_asym_ = legendre.read( size_asym ); + } else { + alloc_aligned( legendre_sym_, size_sym ); + alloc_aligned( legendre_asym_, size_asym ); + } + + ATLAS_TRACE_SCOPE( "Legendre precomputations (structured)" ) { compute_legendre_polynomialsopt3( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, legendre_asym_, legendre_sym_begin_.data(), legendre_asym_begin_.data() ); @@ -454,11 +454,11 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma if ( file_path.size() ) { ATLAS_TRACE( "Write LegendreCache to file" ); Log::debug() << "Writing Legendre cache file ..." << std::endl; - Log::debug() << " path = " << file_path << std::endl; + Log::debug() << " path: " << file_path << std::endl; WriteCache legendre( file_path ); legendre.write( legendre_sym_, size_sym ); legendre.write( legendre_asym_, size_asym ); - Log::debug() << "Cache file size: " << eckit::Bytes( legendre.pos ) << std::endl; + Log::debug() << " size: " << eckit::Bytes( legendre.pos ) << std::endl; } } } @@ -1192,7 +1192,7 @@ void TransLocal::invtrans_uv( const int truncation, const int nb_scalar_fields, // Computing u,v from U,V: { if ( nb_vordiv_fields > 0 ) { - ATLAS_TRACE( "opt3 u,v from U,V" ); + ATLAS_TRACE( "compute u,v from U,V" ); std::vector coslats( nlats ); for ( size_t j = 0; j < nlats; ++j ) { coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() ); @@ -1265,7 +1265,7 @@ void TransLocal::invtrans( const int nb_scalar_fields, const double scalar_spect std::vector V_ext( nb_vordiv_spec_ext, 0. ); { - ATLAS_TRACE( "opt3 extend vordiv" ); + ATLAS_TRACE( "extend vordiv" ); // increase truncation in vorticity_spectra and divergence_spectra: extend_truncationopt3( truncation_, nb_vordiv_fields, vorticity_spectra, vorticity_spectra_extended.data() ); @@ -1274,7 +1274,7 @@ void TransLocal::invtrans( const int nb_scalar_fields, const double scalar_spect } { - ATLAS_TRACE( "vordiv to UV opt3" ); + ATLAS_TRACE( "vordiv to UV" ); // call vd2uv to compute u and v in spectral space trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "Local" ) ); vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(), diff --git a/src/tests/trans/test_trans_localcache.cc b/src/tests/trans/test_trans_localcache.cc index 1011719f0..e9bfb6baf 100644 --- a/src/tests/trans/test_trans_localcache.cc +++ b/src/tests/trans/test_trans_localcache.cc @@ -209,7 +209,6 @@ CASE( "test_regional_grids nested_in_global" ) { } CASE( "test_regional_grids not nested" ) { - auto cachefile = CacheFile("cache-regional.bin"); auto truncation = 89; Cache cache; @@ -217,6 +216,8 @@ CASE( "test_regional_grids not nested" ) { LegendreCacheCreator cache_creator( grid, truncation, option::type("local") ); EXPECT( cache_creator.supported() ); + auto cachefile = CacheFile( "leg_" + cache_creator.uid() + ".bin" ); + ATLAS_TRACE_SCOPE( "Creating cache "+std::string(cachefile) ) cache_creator.create( cachefile ); @@ -242,6 +243,9 @@ CASE( "test_regional_grids with projection" ) { Trans( grid, truncation, option::type("local") ); // Note: caching not yet implemented for unstructured and projected grids + LegendreCacheCreator legendre_cache_creator( grid, truncation, option::type("local") ); + ATLAS_DEBUG_VAR( legendre_cache_creator.uid() ); + EXPECT( not legendre_cache_creator.supported() ); } CASE( "test cache creator to file" ) { From e56f34b34e95807928f5d592eed062459f7a02d0 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Wed, 9 May 2018 18:08:01 +0100 Subject: [PATCH 072/123] Improve uid for Legendre cache files --- .../trans/local/LegendreCacheCreatorLocal.cc | 60 +++++++++---------- 1 file changed, 27 insertions(+), 33 deletions(-) diff --git a/src/atlas/trans/local/LegendreCacheCreatorLocal.cc b/src/atlas/trans/local/LegendreCacheCreatorLocal.cc index e4440e0d4..4e3488aed 100644 --- a/src/atlas/trans/local/LegendreCacheCreatorLocal.cc +++ b/src/atlas/trans/local/LegendreCacheCreatorLocal.cc @@ -63,41 +63,35 @@ std::string LegendreCacheCreatorLocal::uid() const { }; stream << "local-T" << truncation_ << "-"; grid::StructuredGrid structured ( grid_ ); - if( grid_.domain().global() ) { - if( grid::GaussianGrid( grid_ ) ) { - // Same cache for any global Gaussian grid - stream << "GaussianN" << grid::GaussianGrid( grid_ ).N(); - } else if( grid::RegularLonLatGrid( grid_ ) ) { - // Same cache for any global regular grid - auto g = grid::RegularLonLatGrid( grid_ ); - - const double dy_2 = 90. / double(g.ny()); - bool shifted_lat = eckit::types::is_approximately_equal( g.y().front(), 90. - dy_2 ) && - eckit::types::is_approximately_equal( g.y().back(), -90. + dy_2 ); - bool standard_lat = eckit::types::is_approximately_equal( g.y().front(), 90. ) && - eckit::types::is_approximately_equal( g.y().back(), -90. ); - - if( standard_lat ) { - stream << "L" << "-ny" << g.ny(); - } else if( shifted_lat ) { - stream << "S" << "-ny" << g.ny(); - } else { // I don't think we get here, but just in case, give up - give_up(); - } - } else { // global but not gaussian or regularlonlat - give_up(); - } - } else { // regional grid - if( grid::RegularGrid( grid_ ) && not grid_.projection() && structured.yspace().type() == "linear" ) { - RectangularDomain domain( grid_.domain() ); - ASSERT( domain ); - stream << "Regional"; - stream << "-south" << domain.ymin(); - stream << "-north" << domain.ymax(); - stream << "-ny" << structured.ny(); - } else { // It gets too complicated, so let's not be smart + if( grid::GaussianGrid( grid_ ) ) { + // Same cache for any global Gaussian grid + stream << "GaussianN" << grid::GaussianGrid( grid_ ).N(); + } else if( grid::RegularLonLatGrid( grid_ ) ) { + // Same cache for any global regular grid + auto g = grid::RegularLonLatGrid( grid_ ); + + const double dy_2 = 90. / double(g.ny()); + bool shifted_lat = eckit::types::is_approximately_equal( g.y().front(), 90. - dy_2 ) && + eckit::types::is_approximately_equal( g.y().back(), -90. + dy_2 ); + bool standard_lat = eckit::types::is_approximately_equal( g.y().front(), 90. ) && + eckit::types::is_approximately_equal( g.y().back(), -90. ); + + if( standard_lat ) { + stream << "L" << "-ny" << g.ny(); + } else if( shifted_lat ) { + stream << "S" << "-ny" << g.ny(); + } else { // I don't think we get here, but just in case, give up give_up(); } + } else if ( grid::RegularGrid( grid_ ) && not grid_.projection() && structured.yspace().type() == "linear" ) { + RectangularDomain domain( grid_.domain() ); + ASSERT( domain ); + stream << "Regional"; + stream << "-south" << domain.ymin(); + stream << "-north" << domain.ymax(); + stream << "-ny" << structured.ny(); + } else { // It gets too complicated, so let's not be smart + give_up(); } stream << "-OPT" << hash( config_ ); unique_identifier_ = stream.str(); From 3b0518b279501c694a31573e1c2e28044802777b Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Wed, 9 May 2018 18:53:34 +0100 Subject: [PATCH 073/123] Warning message for transforms to unstructured or projected grids --- src/atlas/option/TransOptions.cc | 4 +++ src/atlas/option/TransOptions.h | 8 +++++ src/atlas/trans/local/TransLocal.cc | 40 +++++++++++++++++------- src/atlas/trans/local/TransLocal.h | 1 + src/tests/trans/test_trans_localcache.cc | 7 ++++- 5 files changed, 48 insertions(+), 12 deletions(-) diff --git a/src/atlas/option/TransOptions.cc b/src/atlas/option/TransOptions.cc index a80582a78..98e036acc 100644 --- a/src/atlas/option/TransOptions.cc +++ b/src/atlas/option/TransOptions.cc @@ -65,6 +65,10 @@ nproma::nproma( int nproma ) { set( "nproma", nproma ); } +warning::warning( int warning ) { + set( "warning", warning ); +} + // ---------------------------------------------------------------------------- } // namespace option diff --git a/src/atlas/option/TransOptions.h b/src/atlas/option/TransOptions.h index d9c11593d..d87bcdab3 100644 --- a/src/atlas/option/TransOptions.h +++ b/src/atlas/option/TransOptions.h @@ -109,10 +109,18 @@ class read_fft : public util::Config { // ---------------------------------------------------------------------------- class nproma : public util::Config { +public: nproma( int ); }; // ---------------------------------------------------------------------------- +class warning : public util::Config { +public: + warning( int ); +}; + +// ---------------------------------------------------------------------------- + } // namespace option } // namespace atlas diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc index f47893e71..e7bb7ee4d 100644 --- a/src/atlas/trans/local/TransLocal.cc +++ b/src/atlas/trans/local/TransLocal.cc @@ -57,6 +57,8 @@ class TransParameters { bool global() const { return config_.getBool( "global", false ); } + int warning() const { return config_.getLong( "warning", 0 ); } + int fft() const { static const std::map string_to_FFT = {{"OFF", (int)option::FFT::OFF}, {"FFTW", (int)option::FFT::FFTW}}; @@ -237,7 +239,8 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma legendre_cachesize_( cache.legendre().size() ), fft_cache_( cache.fft().data() ), fft_cachesize_( cache.fft().size() ), - linalg_( linear_algebra_backend() ) + linalg_( linear_algebra_backend() ), + warning_( TransParameters(config).warning() ) { ATLAS_TRACE( "TransLocal constructor" ); double fft_threshold = 0.0; // fraction of latitudes of the full grid down to which FFT is used. @@ -569,6 +572,11 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma // unstructured grid if ( unstruct_precomp_ ) { ATLAS_TRACE( "Legendre precomputations (unstructured)" ); + + if( warning_ > 0 && grid_.size() > warning_ ) { + Log::warning() << "WARNING: Precomputations for spectral transforms could take a long time and consume a lot of memory (unstructured grid approach)!" << std::endl; + } + std::vector lats( grid_.size() ); alloc_aligned( legendre_, legendre_size( truncation_ ) * grid_.size() ); int j( 0 ); @@ -971,8 +979,9 @@ void TransLocal::invtrans_fourier_reducedopt3( const int nlats, const grid::Stru void TransLocal::invtrans_unstructured_precomp( const int truncation, const int nb_fields, const int nb_vordiv_fields, const double scalar_spectra[], double gp_fields[], const eckit::Configuration& config ) const { - ATLAS_TRACE( "invtrans_uv unstructured opt3" ); - grid::UnstructuredGrid gu = grid_; + + ATLAS_TRACE( "invtrans_uv unstructured" ); + const int nlats = grid_.size(); const int size_fourier = nb_fields * 2; double* legendre; @@ -1000,9 +1009,8 @@ void TransLocal::invtrans_unstructured_precomp( const int truncation, const int // loop over all points: { ATLAS_TRACE( "Inverse Fourier Transform (NoFFT)" ); - - for ( int ip = 0; ip < grid_.size(); ip++ ) { - const PointLonLat p = gu.lonlat( ip ); + int ip = 0; + for( const PointLonLat p : grid_.lonlat() ) { const double lon = p.lon() * util::Constants::degreesToRadians(); const double lat = p.lat() * util::Constants::degreesToRadians(); { @@ -1050,6 +1058,7 @@ void TransLocal::invtrans_unstructured_precomp( const int truncation, const int } } } + ++ip; } } free_aligned( scl_fourier ); @@ -1063,8 +1072,14 @@ void TransLocal::invtrans_unstructured_precomp( const int truncation, const int void TransLocal::invtrans_unstructured( const int truncation, const int nb_fields, const int nb_vordiv_fields, const double scalar_spectra[], double gp_fields[], const eckit::Configuration& config ) const { - ATLAS_TRACE( "invtrans_uv unstructured" ); - grid::UnstructuredGrid gu = grid_; + ATLAS_TRACE( "invtrans_unstructured" ); + + int warning = warning_; + config.get("warning",warning); + if( warning > 0 && grid_.size() > warning ) { + Log::warning() << "WARNING: Spectral transforms could take a long time (unstructured grid approach)." << std::endl; + } + double* zfn; alloc_aligned( zfn, ( truncation + 1 ) * ( truncation + 1 ) ); compute_zfnopt3( truncation, zfn ); @@ -1080,9 +1095,10 @@ void TransLocal::invtrans_unstructured( const int truncation, const int nb_field alloc_aligned( fouriertp, 2 * ( truncation + 1 ) ); alloc_aligned( gp_opt, nb_fields ); + // loop over all points: - for ( int ip = 0; ip < grid_.size(); ip++ ) { - const PointLonLat p = gu.lonlat( ip ); + int ip = 0; + for ( const PointLonLat p : grid_.lonlat() ) { const double lon = p.lon() * util::Constants::degreesToRadians(); const double lat = p.lat() * util::Constants::degreesToRadians(); compute_legendre_polynomials_latopt3( truncation, lat, legendre, zfn ); @@ -1140,6 +1156,7 @@ void TransLocal::invtrans_unstructured( const int truncation, const int nb_field } } } + ++ip; } free_aligned( legendre ); free_aligned( scl_fourier ); @@ -1170,7 +1187,8 @@ void TransLocal::invtrans_uv( const int truncation, const int nb_scalar_fields, int nb_fields = nb_scalar_fields; // Transform - if ( grid::StructuredGrid g = grid_ ) { + if ( grid::StructuredGrid( grid_ ) && not grid_.projection() ) { + auto g = grid::StructuredGrid( grid_ ); ATLAS_TRACE( "invtrans_uv structured" ); int nlats = g.ny(); int nlons = g.nxmax(); diff --git a/src/atlas/trans/local/TransLocal.h b/src/atlas/trans/local/TransLocal.h index 8dd80359d..41d55162b 100644 --- a/src/atlas/trans/local/TransLocal.h +++ b/src/atlas/trans/local/TransLocal.h @@ -201,6 +201,7 @@ friend class LegendreCacheCreatorLocal; size_t fft_cachesize_{0}; const eckit::linalg::LinearAlgebra& linalg_; + int warning_ = 0; }; //----------------------------------------------------------------------------- diff --git a/src/tests/trans/test_trans_localcache.cc b/src/tests/trans/test_trans_localcache.cc index e9bfb6baf..5c3ca945c 100644 --- a/src/tests/trans/test_trans_localcache.cc +++ b/src/tests/trans/test_trans_localcache.cc @@ -239,13 +239,18 @@ CASE( "test_regional_grids with projection" ) { ("north_pole", std::vector{ 4., 54.} ) ); StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ), projection ); + Trans trans; ATLAS_TRACE_SCOPE("create without cache") - Trans( grid, truncation, option::type("local") ); + trans = Trans( grid, truncation, option::type("local") | option::warning(1) ); // Note: caching not yet implemented for unstructured and projected grids LegendreCacheCreator legendre_cache_creator( grid, truncation, option::type("local") ); ATLAS_DEBUG_VAR( legendre_cache_creator.uid() ); EXPECT( not legendre_cache_creator.supported() ); + + std::vector rspecg( trans.spectralCoefficients(), 0. ); + std::vector rgp( trans.grid().size() ); + trans.invtrans(1,rspecg.data(),rgp.data()); } CASE( "test cache creator to file" ) { From 4d2605b835b1db5ba341c79e03b32657718002a1 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 10 May 2018 08:39:31 +0100 Subject: [PATCH 074/123] cleanup --- src/atlas/trans/local/TransLocal.cc | 12 ++++++++---- src/atlas/trans/local/TransLocal.h | 2 ++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc index e7bb7ee4d..29b7b129f 100644 --- a/src/atlas/trans/local/TransLocal.cc +++ b/src/atlas/trans/local/TransLocal.cc @@ -229,6 +229,12 @@ const eckit::linalg::LinearAlgebra& linear_algebra_backend() { return eckit::linalg::LinearAlgebra::backend(); } +bool TransLocal::warning( const eckit::Configuration& config ) const { + int warning = warning_; + config.get("warning",warning); + return ( warning > 0 && grid_.size() >= warning ); +} + TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& domain, const long truncation, const eckit::Configuration& config ) : grid_( grid, domain ), @@ -573,7 +579,7 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma if ( unstruct_precomp_ ) { ATLAS_TRACE( "Legendre precomputations (unstructured)" ); - if( warning_ > 0 && grid_.size() > warning_ ) { + if( warning() ) { Log::warning() << "WARNING: Precomputations for spectral transforms could take a long time and consume a lot of memory (unstructured grid approach)!" << std::endl; } @@ -1074,9 +1080,7 @@ void TransLocal::invtrans_unstructured( const int truncation, const int nb_field const eckit::Configuration& config ) const { ATLAS_TRACE( "invtrans_unstructured" ); - int warning = warning_; - config.get("warning",warning); - if( warning > 0 && grid_.size() > warning ) { + if( warning(config) ) { Log::warning() << "WARNING: Spectral transforms could take a long time (unstructured grid approach)." << std::endl; } diff --git a/src/atlas/trans/local/TransLocal.h b/src/atlas/trans/local/TransLocal.h index 41d55162b..1a9342cbc 100644 --- a/src/atlas/trans/local/TransLocal.h +++ b/src/atlas/trans/local/TransLocal.h @@ -156,6 +156,8 @@ class TransLocal : public trans::TransImpl { const double scalar_spectra[], double gp_fields[], const eckit::Configuration& = util::NoConfig() ) const; + bool warning( const eckit::Configuration& = util::NoConfig() ) const; + friend class LegendreCacheCreatorLocal; private: From 555639cd48887f577d8256e62e6e02d64cb8d974 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 10 May 2018 11:09:08 +0100 Subject: [PATCH 075/123] Configurable defaults for Trans --- src/atlas/trans/Cache.cc | 16 ++--- src/atlas/trans/Cache.h | 8 ++- src/atlas/trans/LegendreCacheCreator.cc | 15 ++--- src/atlas/trans/Trans.cc | 86 +++++++++++++++++++----- src/atlas/trans/Trans.h | 16 +++++ src/atlas/trans/local/TransLocal.cc | 4 +- src/tests/trans/test_trans_localcache.cc | 56 ++++++--------- 7 files changed, 123 insertions(+), 78 deletions(-) diff --git a/src/atlas/trans/Cache.cc b/src/atlas/trans/Cache.cc index 14e3726d1..ce22fb2c4 100644 --- a/src/atlas/trans/Cache.cc +++ b/src/atlas/trans/Cache.cc @@ -68,10 +68,12 @@ LegendreCache::LegendreCache( const void* address, size_t size ) : } Cache::Cache(const std::shared_ptr& legendre) : + trans_( nullptr ), legendre_( legendre ), fft_( new EmptyCacheEntry() ) {} Cache::Cache(const std::shared_ptr& legendre, const std::shared_ptr& fft) : + trans_( nullptr ), legendre_( legendre ), fft_( fft ) {} @@ -79,20 +81,17 @@ Cache::Cache( const TransImpl* trans ) : trans_( trans ), legendre_( new EmptyCacheEntry() ), fft_( new EmptyCacheEntry() ) { - if( trans_ ) - trans_->attach(); } Cache::Cache() : + trans_( nullptr ), legendre_( new EmptyCacheEntry() ), fft_( new EmptyCacheEntry() ) {} -Cache::Cache(const Cache& other) : +Cache::Cache( const Cache& other ) : trans_( other.trans_ ), legendre_( other.legendre_ ), fft_( other.fft_ ) { - if( trans_ ) - trans_->attach(); } Cache::operator bool() const { @@ -102,13 +101,6 @@ Cache::operator bool() const { Cache::~Cache() { pthread_once( &once, init ); eckit::AutoLock lock( local_mutex ); - if( trans_ ) { - trans_->detach(); - if( trans_->owners() == 0 ) { - delete trans_; - } - trans_ = nullptr; - } } TransCache::TransCache( const Trans& trans ) : diff --git a/src/atlas/trans/Cache.h b/src/atlas/trans/Cache.h index e362d6c34..3fca5edd9 100644 --- a/src/atlas/trans/Cache.h +++ b/src/atlas/trans/Cache.h @@ -14,6 +14,7 @@ #include "eckit/filesystem/PathName.h" #include "eckit/io/Buffer.h" +#include "eckit/memory/SharedPtr.h" //----------------------------------------------------------------------------- // Forward declarations @@ -95,16 +96,17 @@ class Cache { Cache(); Cache( const Cache& other ); operator bool() const; - const TransImpl* trans() const { return trans_; } + const TransImpl* trans() const { return trans_.get(); } const TransCacheEntry& legendre() const { return *legendre_; } const TransCacheEntry& fft() const { return *fft_; } - ~Cache(); + virtual ~Cache(); protected: Cache( const std::shared_ptr& legendre ); Cache( const std::shared_ptr& legendre, const std::shared_ptr& fft ); Cache( const TransImpl* ); private: - const TransImpl* trans_ = nullptr; + eckit::SharedPtr trans_; +// const TransImpl* trans_ = nullptr; std::shared_ptr legendre_; std::shared_ptr fft_; }; diff --git a/src/atlas/trans/LegendreCacheCreator.cc b/src/atlas/trans/LegendreCacheCreator.cc index 8c6b402a3..a51165dd8 100644 --- a/src/atlas/trans/LegendreCacheCreator.cc +++ b/src/atlas/trans/LegendreCacheCreator.cc @@ -19,10 +19,7 @@ // For factory registration only: #if ATLAS_HAVE_TRANS -#define TRANS_DEFAULT "ifs" #include "atlas/trans/ifs/LegendreCacheCreatorIFS.h" -#else -#define TRANS_DEFAULT "local" #endif #include "atlas/trans/local/LegendreCacheCreatorLocal.h" @@ -116,16 +113,14 @@ LegendreCacheCreator::Implementation* LegendreCacheCreatorFactory::build( const static force_link static_linking; - std::string name = config.getString( "type", TRANS_DEFAULT ); + util::Config options = Trans::config(); + options.set( config ); - Log::debug() << "Looking for LegendreCacheCreatorFactory [" << name << "]" << std::endl; + std::string name = options.getString( "type" ); - if ( not config.has( "type" ) and not has( name ) ) { - name = std::string( "local" ); - Log::debug() << "Looking for LegendreCacheCreatorFactory [" << name << "]" << std::endl; - } + Log::debug() << "Looking for LegendreCacheCreatorFactory [" << name << "]" << std::endl; - return factory( name ).make( grid, truncation, config ); + return factory( name ).make( grid, truncation, options ); } LegendreCacheCreator::LegendreCacheCreator() {} diff --git a/src/atlas/trans/Trans.cc b/src/atlas/trans/Trans.cc index 6f67b57bb..ce82ddcdf 100644 --- a/src/atlas/trans/Trans.cc +++ b/src/atlas/trans/Trans.cc @@ -23,15 +23,30 @@ #if ATLAS_HAVE_TRANS #include "atlas/trans/ifs/TransIFSNodeColumns.h" #include "atlas/trans/ifs/TransIFSStructuredColumns.h" -#define TRANS_DEFAULT "ifs" -#else -#define TRANS_DEFAULT "local" #endif #include "atlas/trans/local/TransLocal.h" // --> recommended "local" +namespace { +struct default_backend { +#if ATLAS_HAVE_TRANS + std::string value = "ifs"; +#else + std::string value = "local"; +#endif + static default_backend instance() { + static default_backend x; + return x; + } +private: + default_backend() = default; +}; +} + namespace atlas { namespace trans { +util::Config TransFactory::default_options_ = util::Config( "type", default_backend::instance().value ); + TransImpl::~TransImpl() {} namespace { @@ -79,7 +94,8 @@ TransFactory& factory( const std::string& name ) { } // namespace -TransFactory::TransFactory( const std::string& name ) : name_( name ) { +TransFactory::TransFactory( const std::string& name ) : + name_( name ) { pthread_once( &once, init ); eckit::AutoLock lock( local_mutex ); @@ -103,6 +119,28 @@ bool TransFactory::has( const std::string& name ) { return ( m->find( name ) != m->end() ); } +void TransFactory::backend( const std::string& backend ) { + pthread_once( &once, init ); + eckit::AutoLock lock( local_mutex ); + default_options_.set( "type", backend ); +} + +std::string TransFactory::backend() { + return default_options_.getString("type"); +} + +const eckit::Configuration& TransFactory::config() { + return default_options_; +} + +void TransFactory::config( const eckit::Configuration& config ) { + std::string type = default_options_.getString( "type" ); + default_options_ = config; + if( not config.has("type") ) { + default_options_.set( "type", type ); + } +} + void TransFactory::list( std::ostream& out ) { pthread_once( &once, init ); @@ -135,17 +173,15 @@ Trans::Implementation* TransFactory::build( const Cache& cache, const FunctionSp static force_link static_linking; + util::Config options = default_options_; + options.set( config ); + std::string suffix( "(" + gp.type() + "," + sp.type() + ")" ); - std::string name = config.getString( "type", TRANS_DEFAULT ) + suffix; + std::string name = options.getString( "type" ) + suffix; Log::debug() << "Looking for TransFactory [" << name << "]" << std::endl; - if ( not config.has( "type" ) and not has( name ) ) { - name = std::string( "local" ) + suffix; - Log::debug() << "Looking for TransFactory [" << name << "]" << std::endl; - } - - return factory( name ).make( cache, gp, sp, config ); + return factory( name ).make( cache, gp, sp, options ); } Trans::Implementation* TransFactory::build( const Grid& grid, int truncation, const eckit::Configuration& config ) { @@ -174,20 +210,36 @@ Trans::Implementation* TransFactory::build( const Cache& cache, const Grid& grid static force_link static_linking; - std::string name = config.getString( "type", TRANS_DEFAULT ); + util::Config options = default_options_; + options.set( config ); + + std::string name = options.getString( "type" ); Log::debug() << "Looking for TransFactory [" << name << "]" << std::endl; - if ( not config.has( "type" ) and not has( name ) ) { - name = std::string( "local" ); - Log::debug() << "Looking for TransFactory [" << name << "]" << std::endl; - } + return factory( name ).make( cache, grid, domain, truncation, options ); +} + +bool Trans::hasBackend( const std::string& backend ) { + return TransFactory::has( backend ); +} - return factory( name ).make( cache, grid, domain, truncation, config ); +void Trans::backend( const std::string& backend ) { + ASSERT( hasBackend( backend ) ); + TransFactory::backend( backend ); } +std::string Trans::backend() { + return TransFactory::backend(); +} +const eckit::Configuration& Trans::config() { + return TransFactory::config(); +} +void Trans::config( const eckit::Configuration& options ) { + TransFactory::config( options ); +} Trans::Trans() {} diff --git a/src/atlas/trans/Trans.h b/src/atlas/trans/Trans.h index 63a38817b..e4db195f8 100644 --- a/src/atlas/trans/Trans.h +++ b/src/atlas/trans/Trans.h @@ -149,8 +149,17 @@ class TransFactory { static bool has( const std::string& name ); + static void backend( const std::string& ); + + static std::string backend(); + + static void config( const eckit::Configuration& ); + + static const eckit::Configuration& config(); + private: std::string name_; + static util::Config default_options_; virtual Trans_t* make( const Cache&, const FunctionSpace& gp, const FunctionSpace& sp, const eckit::Configuration& ) { return nullptr; } @@ -203,6 +212,13 @@ class Trans { eckit::SharedPtr impl_; public: + + static bool hasBackend( const std::string& ); + static void backend( const std::string& ); + static std::string backend(); + static void config( const eckit::Configuration& ); + static const eckit::Configuration& config(); + Trans(); Trans( Implementation* ); Trans( const Trans& ); diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc index 29b7b129f..b05f216a8 100644 --- a/src/atlas/trans/local/TransLocal.cc +++ b/src/atlas/trans/local/TransLocal.cc @@ -580,7 +580,7 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma ATLAS_TRACE( "Legendre precomputations (unstructured)" ); if( warning() ) { - Log::warning() << "WARNING: Precomputations for spectral transforms could take a long time and consume a lot of memory (unstructured grid approach)!" << std::endl; + Log::warning() << "WARNING: Precomputations for spectral transforms could take a long time and consume a lot of memory (unstructured grid approach)! Results may contain aliasing errors." << std::endl; } std::vector lats( grid_.size() ); @@ -1081,7 +1081,7 @@ void TransLocal::invtrans_unstructured( const int truncation, const int nb_field ATLAS_TRACE( "invtrans_unstructured" ); if( warning(config) ) { - Log::warning() << "WARNING: Spectral transforms could take a long time (unstructured grid approach)." << std::endl; + Log::warning() << "WARNING: Spectral transforms could take a long time (unstructured grid approach). Results may contain aliasing errors." << std::endl; } double* zfn; diff --git a/src/tests/trans/test_trans_localcache.cc b/src/tests/trans/test_trans_localcache.cc index 5c3ca945c..e505eba46 100644 --- a/src/tests/trans/test_trans_localcache.cc +++ b/src/tests/trans/test_trans_localcache.cc @@ -25,10 +25,6 @@ #include "tests/AtlasTestEnvironment.h" -#if ATLAS_HAVE_TRANS -#include "transi/trans.h" -#endif - namespace atlas { namespace test { @@ -36,16 +32,8 @@ namespace test { struct AtlasTransEnvironment : public AtlasTestEnvironment { AtlasTransEnvironment( int argc, char* argv[] ) : AtlasTestEnvironment( argc, argv ) { -#if ATLAS_HAVE_TRANS - trans_use_mpi( mpi::comm().size() > 1 ); - trans_init(); -#endif - } - - ~AtlasTransEnvironment() { -#if ATLAS_HAVE_TRANS - trans_finalize(); -#endif + trans::Trans::backend( "local" ); + trans::Trans::config( option::warning(1) ); } }; @@ -98,7 +86,7 @@ CASE( "test_global_grids" ) { std::make_pair(Slat(n),t), }; - LegendreCacheCreator F_cache_creator( Grid(F(n)), t, option::type("local") ); + LegendreCacheCreator F_cache_creator( Grid(F(n)), t ); EXPECT( F_cache_creator.supported() ); auto F_cachefile = CacheFile("leg_"+F_cache_creator.uid()+".bin"); F_cache_creator.create( F_cachefile ); @@ -113,7 +101,7 @@ CASE( "test_global_grids" ) { ATLAS_TRACE("Case "+gridname+" T"+std::to_string(truncation)); Grid grid(gridname); - LegendreCacheCreator cache_creator( grid, truncation, option::type("local") ); + LegendreCacheCreator cache_creator( grid, truncation ); EXPECT( cache_creator.supported() ); auto cachefile = CacheFile("leg_"+cache_creator.uid()+".bin"); cache_creator.create( cachefile ); @@ -122,13 +110,13 @@ CASE( "test_global_grids" ) { } ATLAS_TRACE_SCOPE("create without cache") - Trans( grid, truncation, option::type("local") ); + Trans( grid, truncation ); Cache cache; ATLAS_TRACE_SCOPE("read cache") cache = LegendreCache( cachefile ); ATLAS_TRACE_SCOPE("create with cache") - Trans( cache, grid, truncation, option::type("local") ); + Trans( cache, grid, truncation ); } } @@ -160,7 +148,7 @@ CASE( "test_global_grids_with_subdomain" ) { Grid global_grid( gridname ); - LegendreCacheCreator global_cache_creator( Grid(gridname), truncation, option::type("local") ); + LegendreCacheCreator global_cache_creator( Grid(gridname), truncation ); EXPECT( global_cache_creator.supported() ); auto global_cachefile = CacheFile( "leg_" + global_cache_creator.uid() + ".bin" ); ATLAS_TRACE_SCOPE( "Creating cache " + std::string( global_cachefile ) ) @@ -174,7 +162,7 @@ CASE( "test_global_grids_with_subdomain" ) { for( auto domain : domains ) { Grid grid( gridname, domain ); ATLAS_TRACE_SCOPE("create with cache") - Trans( global_cache, global_grid, domain, truncation, option::type("local") ); + Trans( global_cache, global_grid, domain, truncation ); } } } @@ -189,7 +177,7 @@ CASE( "test_regional_grids nested_in_global" ) { ); EXPECT( grid_global.domain().global() ); - LegendreCacheCreator global_cache_creator( grid_global, truncation, option::type("local") ); + LegendreCacheCreator global_cache_creator( grid_global, truncation ); EXPECT( global_cache_creator.supported() ); auto global_cachefile = CacheFile( "leg_" + global_cache_creator.uid() + ".bin" ); ATLAS_TRACE_SCOPE( "Creating cache "+std::string(cachefile) ) @@ -201,11 +189,11 @@ CASE( "test_regional_grids nested_in_global" ) { ATLAS_TRACE_SCOPE("create without cache") - Trans( grid_global, regional.domain(), truncation, option::type("local") ); + Trans( grid_global, regional.domain(), truncation ); ATLAS_TRACE_SCOPE("read cache") cache = LegendreCache( global_cachefile ); ATLAS_TRACE_SCOPE("create with cache") - Trans( cache, grid_global, regional.domain(), truncation, option::type("local") ); + Trans( cache, grid_global, regional.domain(), truncation ); } CASE( "test_regional_grids not nested" ) { @@ -214,7 +202,7 @@ CASE( "test_regional_grids not nested" ) { StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ) ); - LegendreCacheCreator cache_creator( grid, truncation, option::type("local") ); + LegendreCacheCreator cache_creator( grid, truncation ); EXPECT( cache_creator.supported() ); auto cachefile = CacheFile( "leg_" + cache_creator.uid() + ".bin" ); @@ -222,11 +210,11 @@ CASE( "test_regional_grids not nested" ) { cache_creator.create( cachefile ); ATLAS_TRACE_SCOPE("create without cache") - Trans( grid, truncation, option::type("local") ); + Trans( grid, truncation ); ATLAS_TRACE_SCOPE("read cache") cache = LegendreCache( cachefile ); ATLAS_TRACE_SCOPE("create with cache") - Trans( cache, grid, truncation, option::type("local") ); + Trans( cache, grid, truncation ); } CASE( "test_regional_grids with projection" ) { @@ -241,10 +229,10 @@ CASE( "test_regional_grids with projection" ) { StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ), projection ); Trans trans; ATLAS_TRACE_SCOPE("create without cache") - trans = Trans( grid, truncation, option::type("local") | option::warning(1) ); + trans = Trans( grid, truncation ); // Note: caching not yet implemented for unstructured and projected grids - LegendreCacheCreator legendre_cache_creator( grid, truncation, option::type("local") ); + LegendreCacheCreator legendre_cache_creator( grid, truncation ); ATLAS_DEBUG_VAR( legendre_cache_creator.uid() ); EXPECT( not legendre_cache_creator.supported() ); @@ -261,14 +249,14 @@ CASE( "test cache creator to file" ) { LinearSpacing( { 90., -90.}, 181, true ) ); - LegendreCacheCreator legendre_cache_creator( grid_global, truncation, option::type("local") ); + LegendreCacheCreator legendre_cache_creator( grid_global, truncation ); auto cachefile = CacheFile( legendre_cache_creator.uid() ); ATLAS_TRACE_SCOPE( "Creating cache "+std::string(cachefile) ) legendre_cache_creator.create( cachefile ); Cache c = legendre_cache_creator.create(); - auto trans1 = Trans( c, grid_global, truncation, option::type("local") ); - auto trans2 = Trans( c, grid_global, truncation, option::type("local") ); + auto trans1 = Trans( c, grid_global, truncation ); + auto trans2 = Trans( c, grid_global, truncation ); } CASE( "test cache creator in memory" ) { @@ -279,14 +267,14 @@ CASE( "test cache creator in memory" ) { LinearSpacing( { 90., -90.}, 181, true ) ); - LegendreCacheCreator legendre_cache_creator( grid_global, truncation, option::type("local") ); + LegendreCacheCreator legendre_cache_creator( grid_global, truncation ); Cache cache; ATLAS_TRACE_SCOPE( "Creating cache in memory" ) cache = legendre_cache_creator.create(); - auto trans1 = Trans( cache, grid_global, truncation, option::type("local") ); - auto trans2 = Trans( cache, grid_global, truncation, option::type("local") ); + auto trans1 = Trans( cache, grid_global, truncation ); + auto trans2 = Trans( cache, grid_global, truncation ); } } // namespace test From 0bf1dee5de24efc29afd2e9dfcd34d6f9b49e5ea Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 10 May 2018 11:38:54 +0100 Subject: [PATCH 076/123] Default options used for invtrans/dirtrans as well --- src/atlas/trans/Trans.cc | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/src/atlas/trans/Trans.cc b/src/atlas/trans/Trans.cc index ce82ddcdf..7a6a0e6ea 100644 --- a/src/atlas/trans/Trans.cc +++ b/src/atlas/trans/Trans.cc @@ -241,6 +241,14 @@ void Trans::config( const eckit::Configuration& options ) { TransFactory::config( options ); } +namespace { +util::Config options( const eckit::Configuration& config ) { + util::Config opts = Trans::config(); + opts.set(config); + return opts; +} +} + Trans::Trans() {} Trans::Trans( Implementation* impl ) : impl_( impl ) {} @@ -279,37 +287,37 @@ size_t Trans::spectralCoefficients() const { } void Trans::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const { - impl_->dirtrans( gpfield, spfield, config ); + impl_->dirtrans( gpfield, spfield, options(config) ); } void Trans::dirtrans( const FieldSet& gpfields, FieldSet& spfields, const eckit::Configuration& config ) const { - impl_->dirtrans( gpfields, spfields, config ); + impl_->dirtrans( gpfields, spfields, options(config) ); } void Trans::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv, const eckit::Configuration& config ) const { - impl_->dirtrans_wind2vordiv( gpwind, spvor, spdiv, config ); + impl_->dirtrans_wind2vordiv( gpwind, spvor, spdiv, options(config) ); } void Trans::invtrans( const Field& spfield, Field& gpfield, const eckit::Configuration& config ) const { - impl_->invtrans( spfield, gpfield, config ); + impl_->invtrans( spfield, gpfield, options(config) ); } void Trans::invtrans( const FieldSet& spfields, FieldSet& gpfields, const eckit::Configuration& config ) const { - impl_->invtrans( spfields, gpfields, config ); + impl_->invtrans( spfields, gpfields, options(config) ); } void Trans::invtrans_grad( const Field& spfield, Field& gradfield, const eckit::Configuration& config ) const { - impl_->invtrans_grad( spfield, gradfield, config ); + impl_->invtrans_grad( spfield, gradfield, options(config) ); } void Trans::invtrans_grad( const FieldSet& spfields, FieldSet& gradfields, const eckit::Configuration& config ) const { - impl_->invtrans_grad( spfields, gradfields, config ); + impl_->invtrans_grad( spfields, gradfields, options(config) ); } void Trans::invtrans_vordiv2wind( const Field& spvor, const Field& spdiv, Field& gpwind, const eckit::Configuration& config ) const { - impl_->invtrans_vordiv2wind( spvor, spdiv, gpwind, config ); + impl_->invtrans_vordiv2wind( spvor, spdiv, gpwind, options(config) ); } // -- IFS type fields -- @@ -330,7 +338,7 @@ void Trans::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[], const eckit::Configuration& config ) const { impl_->invtrans( nb_scalar_fields, scalar_spectra, nb_vordiv_fields, vorticity_spectra, divergence_spectra, - gp_fields, config ); + gp_fields, options(config) ); } /*! @@ -341,7 +349,7 @@ void Trans::invtrans( const int nb_scalar_fields, const double scalar_spectra[], */ void Trans::invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[], const eckit::Configuration& config ) const { - impl_->invtrans( nb_scalar_fields, scalar_spectra, gp_fields, config ); + impl_->invtrans( nb_scalar_fields, scalar_spectra, gp_fields, options(config) ); } /*! @@ -350,7 +358,7 @@ void Trans::invtrans( const int nb_scalar_fields, const double scalar_spectra[], */ void Trans::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[], const eckit::Configuration& config ) const { - impl_->invtrans( nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config ); + impl_->invtrans( nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, options(config) ); } /*! @@ -358,7 +366,7 @@ void Trans::invtrans( const int nb_vordiv_fields, const double vorticity_spectra */ void Trans::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[], const eckit::Configuration& config ) const { - impl_->dirtrans( nb_fields, scalar_fields, scalar_spectra, config ); + impl_->dirtrans( nb_fields, scalar_fields, scalar_spectra, options(config) ); } /*! @@ -367,7 +375,7 @@ void Trans::dirtrans( const int nb_fields, const double scalar_fields[], double */ void Trans::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[], double divergence_spectra[], const eckit::Configuration& config ) const { - impl_->dirtrans( nb_fields, wind_fields, vorticity_spectra, divergence_spectra, config ); + impl_->dirtrans( nb_fields, wind_fields, vorticity_spectra, divergence_spectra, options(config) ); } } // namespace trans From 1824862c42dd5884ba55cec46fec5be6c4e6ed39 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 10 May 2018 11:40:49 +0100 Subject: [PATCH 077/123] Reduce runtime of atlas_test_transgeneral --- src/tests/trans/test_transgeneral.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index fb976bd25..e6ac605bf 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -973,7 +973,7 @@ CASE( "test_trans_domain" ) { Domain testdomain2 = RectangularDomain( {-1., 1.}, {-5., 40.} ); // Grid: (Adjust the following line if the test takes too long!) - Grid global_grid( "O640" ); + Grid global_grid( "O64" ); Grid g1( global_grid, testdomain1 ); //Grid g2( gridString, testdomain2 ); @@ -983,7 +983,7 @@ CASE( "test_trans_domain" ) { using LinearSpacing = grid::LinearSpacing; StructuredGrid g2( LinearSpacing( {0., 180.}, 181 ), LinearSpacing( {0., 45.}, 46 ) ); - int trc = 640; + int trc = 63; //Log::info() << "rgp1:" << std::endl; if ( eckit::PathName( "legcache.bin" ).exists() ) eckit::PathName( "legcache.bin" ).unlink(); Trace t1( Here(), "translocal1 construction" ); From 92bbedf49ba7d9696952d76e228cb4a0ba1f155d Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 10 May 2018 14:55:58 +0100 Subject: [PATCH 078/123] Warn about possible aliasing errors in Trans for regional grids --- src/atlas/grid/Grid.h | 2 ++ src/atlas/grid/detail/grid/Structured.cc | 4 ++++ src/atlas/grid/detail/grid/Structured.h | 4 ++++ src/atlas/trans/local/TransLocal.cc | 4 +++- 4 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/atlas/grid/Grid.h b/src/atlas/grid/Grid.h index e8d497878..0ac6aa8dc 100644 --- a/src/atlas/grid/Grid.h +++ b/src/atlas/grid/Grid.h @@ -222,6 +222,8 @@ class StructuredGrid : public Grid { bool periodic() const { return grid_->periodic(); } + const XSpace& xspace() const { return grid_->xspace(); } + const YSpace& yspace() const { return grid_->yspace(); } private: diff --git a/src/atlas/grid/detail/grid/Structured.cc b/src/atlas/grid/detail/grid/Structured.cc index a2c6f0dcb..2baa2223c 100644 --- a/src/atlas/grid/detail/grid/Structured.cc +++ b/src/atlas/grid/detail/grid/Structured.cc @@ -226,6 +226,10 @@ Structured::XSpace::Implementation::Implementation( const Spacing& spacing ) : nxmin_ = nx_[0]; } +std::string Structured::XSpace::Implementation::type() const { + return "linear"; +} + Grid::Spec Structured::XSpace::Implementation::spec() const { Grid::Spec spec; diff --git a/src/atlas/grid/detail/grid/Structured.h b/src/atlas/grid/detail/grid/Structured.h index 13be697a4..d26891f20 100644 --- a/src/atlas/grid/detail/grid/Structured.h +++ b/src/atlas/grid/detail/grid/Structured.h @@ -229,6 +229,8 @@ class Structured : public Grid { Spec spec() const; + std::string type() const; + private: void reserve( long ny ); @@ -277,6 +279,8 @@ class Structured : public Grid { Spec spec() const { return impl_->spec(); } + std::string type() const { return impl_->type(); } + private: eckit::SharedPtr impl_; }; diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc index b05f216a8..b4520f9a9 100644 --- a/src/atlas/trans/local/TransLocal.cc +++ b/src/atlas/trans/local/TransLocal.cc @@ -338,7 +338,7 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma // reduce truncation towards the pole for reduced meshes: nlat0_.resize( truncation_ + 1 ); if ( no_nest ) { - for ( int j = 0; j <= truncation_; j++ ) { + for ( int j = 0; j <= truncation_; j++ ) { nlat0_[j] = 0; } } @@ -542,6 +542,8 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma #endif } if ( !useFFT_ ) { + Log::warning() << "WARNING: Spectral transform results may contain aliasing errors. This will be addressed soon." << std::endl; + alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlonsMax ); #if !TRANSLOCAL_DGEMM2 { From a218b02ee90e44ebdc03daa71a24eb0d799390ad Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 10 May 2018 15:56:32 +0100 Subject: [PATCH 079/123] cleanup --- src/atlas/trans/Cache.h | 1 - src/atlas/trans/local/LegendrePolynomials.cc | 16 ++--- src/atlas/trans/local/LegendrePolynomials.h | 20 +++--- src/atlas/trans/local/TransLocal.cc | 68 ++++++++++---------- src/atlas/trans/local/TransLocal.h | 16 ++--- 5 files changed, 60 insertions(+), 61 deletions(-) diff --git a/src/atlas/trans/Cache.h b/src/atlas/trans/Cache.h index 3fca5edd9..8cd6b8097 100644 --- a/src/atlas/trans/Cache.h +++ b/src/atlas/trans/Cache.h @@ -54,7 +54,6 @@ class EmptyCacheEntry final : public TransCacheEntry { }; //----------------------------------------------------------------------------- - class TransCacheFileEntry final : public TransCacheEntry { private: eckit::Buffer buffer_; diff --git a/src/atlas/trans/local/LegendrePolynomials.cc b/src/atlas/trans/local/LegendrePolynomials.cc index 4bd0d67a1..da29d9704 100644 --- a/src/atlas/trans/local/LegendrePolynomials.cc +++ b/src/atlas/trans/local/LegendrePolynomials.cc @@ -21,7 +21,7 @@ namespace trans { //----------------------------------------------------------------------------- -void compute_zfnopt3( const size_t trc, double zfn[] ) { +void compute_zfn( const size_t trc, double zfn[] ) { auto idxzfn = [&]( int jn, int jk ) { return jk + ( trc + 1 ) * jn; }; int iodd = 0; // Compute coefficients for Taylor series in Belousov (19) and (21) @@ -45,7 +45,7 @@ void compute_zfnopt3( const size_t trc, double zfn[] ) { } -void compute_legendre_polynomials_latopt3( const size_t trc, // truncation (in) +void compute_legendre_polynomials_lat( const size_t trc, // truncation (in) const double lat, // latitude in radians (in) double legpol[], // legendre polynomials double zfn[] ) { @@ -149,7 +149,7 @@ void compute_legendre_polynomials_latopt3( const size_t trc, // truncation (in) } -void compute_legendre_polynomialsopt3( +void compute_legendre_polynomials( const size_t trc, // truncation (in) const int nlats, // number of latitudes const double lats[], // latitudes in radians (in) @@ -162,12 +162,12 @@ void compute_legendre_polynomialsopt3( std::vector legpol( legendre_size( trc ) ); std::vector zfn( ( trc + 1 ) * ( trc + 1 ) ); auto idxmn = [&]( int jm, int jn ) { return ( 2 * trc + 3 - jm ) * jm / 2 + jn - jm; }; - compute_zfnopt3( trc, zfn.data() ); + compute_zfn( trc, zfn.data() ); // Loop over latitudes: for ( int jlat = 0; jlat < nlats; ++jlat ) { // compute legendre polynomials for current latitude: - compute_legendre_polynomials_latopt3( trc, lats[jlat], legpol.data(), zfn.data() ); + compute_legendre_polynomials_lat( trc, lats[jlat], legpol.data(), zfn.data() ); // split polynomials into symmetric and antisymmetric parts: { @@ -204,7 +204,7 @@ void compute_legendre_polynomialsopt3( } } -void compute_legendre_polynomials_allopt3( const size_t trc, // truncation (in) +void compute_legendre_polynomials_all( const size_t trc, // truncation (in) const int nlats, // number of latitudes const double lats[], // latitudes in radians (in) double legendre[] ) // legendre polynomials for all latitudes @@ -216,12 +216,12 @@ void compute_legendre_polynomials_allopt3( const size_t trc, // truncation ( auto idxmnl = [&]( int jm, int jn, int jlat ) { return ( 2 * trc + 3 - jm ) * jm / 2 * nlats + jlat * ( trc - jm + 1 ) + jn - jm; }; - compute_zfnopt3( trc, zfn.data() ); + compute_zfn( trc, zfn.data() ); // Loop over latitudes: for ( int jlat = 0; jlat < nlats; ++jlat ) { // compute legendre polynomials for current latitude: - compute_legendre_polynomials_latopt3( trc, lats[jlat], legpol.data(), zfn.data() ); + compute_legendre_polynomials_lat( trc, lats[jlat], legpol.data(), zfn.data() ); for ( int jm = 0; jm <= trc; ++jm ) { for ( int jn = jm; jn <= trc; ++jn ) { diff --git a/src/atlas/trans/local/LegendrePolynomials.h b/src/atlas/trans/local/LegendrePolynomials.h index 93ebb49f7..43edbb221 100644 --- a/src/atlas/trans/local/LegendrePolynomials.h +++ b/src/atlas/trans/local/LegendrePolynomials.h @@ -32,14 +32,14 @@ namespace trans { // Ported to C++ by: // Andreas Mueller *ECMWF* // -void compute_zfnopt3( const size_t trc, double zfn[] ); +void compute_zfn( const size_t trc, double zfn[] ); -void compute_legendre_polynomials_latopt3( const size_t trc, // truncation (in) - const double lat, // latitude in radians (in) - double legpol[], // legendre polynomials - double zfn[] ); +void compute_legendre_polynomials_lat( const size_t trc, // truncation (in) + const double lat, // latitude in radians (in) + double legpol[], // legendre polynomials + double zfn[] ); -void compute_legendre_polynomialsopt3( +void compute_legendre_polynomials( const size_t trc, // truncation (in) const int nlats, // number of latitudes const double lats[], // latitudes in radians (in) @@ -48,10 +48,10 @@ void compute_legendre_polynomialsopt3( size_t leg_start_sym[], // start indices for different zonal wave numbers, symmetric part size_t leg_start_asym[] ); // start indices for different zonal wave numbers, asymmetric part -void compute_legendre_polynomials_allopt3( const size_t trc, // truncation (in) - const int nlats, // number of latitudes - const double lats[], // latitudes in radians (in) - double legendre[] ); // legendre polynomials for all latitudes +void compute_legendre_polynomials_all( const size_t trc, // truncation (in) + const int nlats, // number of latitudes + const double lats[], // latitudes in radians (in) + double legendre[] ); // legendre polynomials for all latitudes // -------------------------------------------------------------------------------------------------------------------- diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc index b4520f9a9..c34cc9f3f 100644 --- a/src/atlas/trans/local/TransLocal.cc +++ b/src/atlas/trans/local/TransLocal.cc @@ -455,9 +455,9 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma } ATLAS_TRACE_SCOPE( "Legendre precomputations (structured)" ) { - compute_legendre_polynomialsopt3( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, - legendre_asym_, legendre_sym_begin_.data(), - legendre_asym_begin_.data() ); + compute_legendre_polynomials( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, + legendre_asym_, legendre_sym_begin_.data(), + legendre_asym_begin_.data() ); } std::string file_path = TransParameters( config ).write_legendre(); if ( file_path.size() ) { @@ -562,7 +562,7 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma } #else { - ATLAS_TRACE( "opt3 precomp Fourier" ); + ATLAS_TRACE( "precomp Fourier" ); int idx = 0; for ( int jlon = 0; jlon < nlonsMax; jlon++ ) { double factor = 1.; @@ -591,7 +591,7 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma for ( PointLonLat p : grid_.lonlat() ) { lats[j++] = p.lat() * util::Constants::degreesToRadians(); } - compute_legendre_polynomials_allopt3( truncation_, grid_.size(), lats.data(), legendre_ ); + compute_legendre_polynomials_all( truncation_, grid_.size(), lats.data(), legendre_ ); } if ( TransParameters( config ).write_legendre().size() ) { throw eckit::NotImplemented( "Caching for unstructured grids or structured grids with projections not yet implemented", Here() ); @@ -678,7 +678,7 @@ void TransLocal::invtrans( const int nb_scalar_fields, const double scalar_spect // -------------------------------------------------------------------------------------------------------------------- -void gp_transposeopt3( const int nb_size, const int nb_fields, const double gp_tmp[], double gp_fields[] ) { +void gp_transpose( const int nb_size, const int nb_fields, const double gp_tmp[], double gp_fields[] ) { for ( int jgp = 0; jgp < nb_size; jgp++ ) { for ( int jfld = 0; jfld < nb_fields; jfld++ ) { gp_fields[jfld * nb_size + jgp] = gp_tmp[jgp * nb_fields + jfld]; @@ -688,9 +688,9 @@ void gp_transposeopt3( const int nb_size, const int nb_fields, const double gp_t // -------------------------------------------------------------------------------------------------------------------- -void TransLocal::invtrans_legendreopt3( const int truncation, const int nlats, const int nb_fields, - const double scalar_spectra[], double scl_fourier[], - const eckit::Configuration& config ) const { +void TransLocal::invtrans_legendre( const int truncation, const int nlats, const int nb_fields, + const double scalar_spectra[], double scl_fourier[], + const eckit::Configuration& config ) const { // Legendre transform: { Log::debug() << "Legendre dgemm: using " << nlatsLegReduced_ - nlat0_[0] << " latitudes out of " @@ -715,14 +715,14 @@ void TransLocal::invtrans_legendreopt3( const int truncation, const int nlats, c alloc_aligned( scl_fourier_sym, size_fourier ); alloc_aligned( scl_fourier_asym, size_fourier ); { - //ATLAS_TRACE( "opt3 Legendre split" ); + //ATLAS_TRACE( "Legendre split" ); int idx = 0, is = 0, ia = 0, ioff = ( 2 * truncation + 3 - jm ) * jm / 2 * nb_fields * 2; // the choice between the following two code lines determines whether // total wavenumbers are summed in an ascending or descending order. // The trans library in IFS uses descending order because it should // be more accurate (higher wavenumbers have smaller contributions). // This also needs to be changed when splitting the spectral data in - // compute_legendre_polynomialsopt3! + // compute_legendre_polynomials! //for ( int jn = jm; jn <= truncation_ + 1; jn++ ) { for ( int jn = truncation_ + 1; jn >= jm; jn-- ) { for ( int imag = 0; imag < n_imag; imag++ ) { @@ -772,7 +772,7 @@ void TransLocal::invtrans_legendreopt3( const int truncation, const int nlats, c } } { - //ATLAS_TRACE( "opt3 merge spheres" ); + //ATLAS_TRACE( "merge spheres" ); // northern hemisphere: for ( int jlat = 0; jlat < nlatsNH_; jlat++ ) { if ( nlatsLegReduced_ - nlat0_[jm] - nlatsNH_ + jlat >= 0 ) { @@ -841,7 +841,7 @@ void TransLocal::invtrans_legendreopt3( const int truncation, const int nlats, c // -------------------------------------------------------------------------------------------------------------------- -void TransLocal::invtrans_fourier_regularopt3( const int nlats, const int nlons, const int nb_fields, +void TransLocal::invtrans_fourier_regular( const int nlats, const int nlons, const int nb_fields, double scl_fourier[], double gp_fields[], const eckit::Configuration& config ) const { // Fourier transformation: @@ -894,38 +894,38 @@ void TransLocal::invtrans_fourier_regularopt3( const int nlats, const int nlons, // dgemm-method 2 // should be faster for small domains or large truncation // but have not found any significant speedup so far - double* gp_opt3; - alloc_aligned( gp_opt3, nb_fields * grid_.size() ); + double* gp; + alloc_aligned( gp, nb_fields * grid_.size() ); { - ATLAS_TRACE( "opt3 Fourier dgemm method 2" ); + ATLAS_TRACE( "Fourier dgemm method 2" ); eckit::linalg::Matrix A( scl_fourier, nb_fields * nlats, ( truncation_ + 1 ) * 2 ); eckit::linalg::Matrix B( fourier_, ( truncation_ + 1 ) * 2, nlons ); - eckit::linalg::Matrix C( gp_opt3, nb_fields * nlats, nlons ); + eckit::linalg::Matrix C( gp, nb_fields * nlats, nlons ); linalg_.gemm( A, B, C ); } // Transposition in grid point space: { - ATLAS_TRACE( "opt3 transposition in gp-space" ); + ATLAS_TRACE( "transposition in gp-space" ); int idx = 0; for ( int jlon = 0; jlon < nlons; jlon++ ) { for ( int jlat = 0; jlat < nlats; jlat++ ) { for ( int jfld = 0; jfld < nb_fields; jfld++ ) { int pos_tp = jlon + nlons * ( jlat + nlats * ( jfld ) ); //int pos = jfld + nb_fields * ( jlat + nlats * ( jlon ) ); - gp_fields[pos_tp] = gp_opt3[idx++]; // = gp_opt3[pos] + gp_fields[pos_tp] = gp[idx++]; // = gp[pos] } } } } - free_aligned( gp_opt3 ); + free_aligned( gp ); #endif } } // -------------------------------------------------------------------------------------------------------------------- -void TransLocal::invtrans_fourier_reducedopt3( const int nlats, const grid::StructuredGrid g, const int nb_fields, +void TransLocal::invtrans_fourier_reduced( const int nlats, const grid::StructuredGrid g, const int nb_fields, double scl_fourier[], double gp_fields[], const eckit::Configuration& config ) const { // Fourier transformation: @@ -1059,7 +1059,7 @@ void TransLocal::invtrans_unstructured_precomp( const int truncation, const int // Computing u,v from U,V: { if ( nb_vordiv_fields > 0 ) { - //ATLAS_TRACE( "opt3 u,v from U,V" ); + //ATLAS_TRACE( " u,v from U,V" ); double coslat = std::cos( lat ); for ( int j = 0; j < nb_fields; j++ ) { gp_fields[ip + j * grid_.size()] /= coslat; @@ -1088,7 +1088,7 @@ void TransLocal::invtrans_unstructured( const int truncation, const int nb_field double* zfn; alloc_aligned( zfn, ( truncation + 1 ) * ( truncation + 1 ) ); - compute_zfnopt3( truncation, zfn ); + compute_zfn( truncation, zfn ); int size_fourier = nb_fields * 2; double* legendre; double* scl_fourier; @@ -1107,7 +1107,7 @@ void TransLocal::invtrans_unstructured( const int truncation, const int nb_field for ( const PointLonLat p : grid_.lonlat() ) { const double lon = p.lon() * util::Constants::degreesToRadians(); const double lat = p.lat() * util::Constants::degreesToRadians(); - compute_legendre_polynomials_latopt3( truncation, lat, legendre, zfn ); + compute_legendre_polynomials_lat( truncation, lat, legendre, zfn ); // Legendre transform: { //ATLAS_TRACE( "opt Legendre dgemm" ); @@ -1155,7 +1155,7 @@ void TransLocal::invtrans_unstructured( const int truncation, const int nb_field // Computing u,v from U,V: { if ( nb_vordiv_fields > 0 ) { - //ATLAS_TRACE( "opt3 u,v from U,V" ); + //ATLAS_TRACE( "u,v from U,V" ); const double coslat = std::cos( lat ); for ( int j = 0; j < nb_fields; j++ ) { gp_fields[ip + j * grid_.size()] /= coslat; @@ -1203,14 +1203,14 @@ void TransLocal::invtrans_uv( const int truncation, const int nb_scalar_fields, alloc_aligned( scl_fourier, size_fourier_max * ( truncation_ + 1 ) ); // Legendre transformation: - invtrans_legendreopt3( truncation, nlats, nb_scalar_fields, scalar_spectra, scl_fourier, config ); + invtrans_legendre( truncation, nlats, nb_scalar_fields, scalar_spectra, scl_fourier, config ); // Fourier transformation: if ( grid::RegularGrid( gridGlobal_ ) ) { - invtrans_fourier_regularopt3( nlats, nlons, nb_fields, scl_fourier, gp_fields, config ); + invtrans_fourier_regular( nlats, nlons, nb_fields, scl_fourier, gp_fields, config ); } else { - invtrans_fourier_reducedopt3( nlats, g, nb_fields, scl_fourier, gp_fields, config ); + invtrans_fourier_reduced( nlats, g, nb_fields, scl_fourier, gp_fields, config ); } // Computing u,v from U,V: @@ -1257,8 +1257,8 @@ void TransLocal::invtrans( const int nb_vordiv_fields, const double vorticity_sp // -------------------------------------------------------------------------------------------------------------------- -void extend_truncationopt3( const int old_truncation, const int nb_fields, const double old_spectra[], - double new_spectra[] ) { +void extend_truncation( const int old_truncation, const int nb_fields, const double old_spectra[], + double new_spectra[] ) { int k = 0, k_old = 0; for ( int m = 0; m <= old_truncation + 1; m++ ) { // zonal wavenumber for ( int n = m; n <= old_truncation + 1; n++ ) { // total wavenumber @@ -1291,10 +1291,10 @@ void TransLocal::invtrans( const int nb_scalar_fields, const double scalar_spect { ATLAS_TRACE( "extend vordiv" ); // increase truncation in vorticity_spectra and divergence_spectra: - extend_truncationopt3( truncation_, nb_vordiv_fields, vorticity_spectra, - vorticity_spectra_extended.data() ); - extend_truncationopt3( truncation_, nb_vordiv_fields, divergence_spectra, - divergence_spectra_extended.data() ); + extend_truncation( truncation_, nb_vordiv_fields, vorticity_spectra, + vorticity_spectra_extended.data() ); + extend_truncation( truncation_, nb_vordiv_fields, divergence_spectra, + divergence_spectra_extended.data() ); } { diff --git a/src/atlas/trans/local/TransLocal.h b/src/atlas/trans/local/TransLocal.h index 1a9342cbc..abbee7c10 100644 --- a/src/atlas/trans/local/TransLocal.h +++ b/src/atlas/trans/local/TransLocal.h @@ -133,16 +133,16 @@ class TransLocal : public trans::TransImpl { #endif }; - void invtrans_legendreopt3( const int truncation, const int nlats, const int nb_fields, - const double scalar_spectra[], double scl_fourier[], - const eckit::Configuration& config ) const; + void invtrans_legendre( const int truncation, const int nlats, const int nb_fields, + const double scalar_spectra[], double scl_fourier[], + const eckit::Configuration& config ) const; - void invtrans_fourier_regularopt3( const int nlats, const int nlons, const int nb_fields, double scl_fourier[], - double gp_fields[], const eckit::Configuration& config ) const; + void invtrans_fourier_regular( const int nlats, const int nlons, const int nb_fields, double scl_fourier[], + double gp_fields[], const eckit::Configuration& config ) const; - void invtrans_fourier_reducedopt3( const int nlats, const grid::StructuredGrid g, const int nb_fields, - double scl_fourier[], double gp_fields[], - const eckit::Configuration& config ) const; + void invtrans_fourier_reduced( const int nlats, const grid::StructuredGrid g, const int nb_fields, + double scl_fourier[], double gp_fields[], + const eckit::Configuration& config ) const; void invtrans_unstructured_precomp( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields, const double scalar_spectra[], double gp_fields[], From 4e0ff1abccfba811216e861484f9bb51d20bcac6 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 10 May 2018 15:57:46 +0100 Subject: [PATCH 080/123] ECKIT-326 Config::hash now implemented by eckit::Configuration::hash --- src/atlas/util/Config.cc | 10 ---------- src/atlas/util/Config.h | 2 -- src/atlas/util/Metadata.cc | 10 ---------- src/atlas/util/Metadata.h | 2 -- 4 files changed, 24 deletions(-) diff --git a/src/atlas/util/Config.cc b/src/atlas/util/Config.cc index a0802dac7..6284d7c81 100644 --- a/src/atlas/util/Config.cc +++ b/src/atlas/util/Config.cc @@ -90,16 +90,6 @@ bool Config::get( const std::string& name, std::vector& value ) const { return found; } -void Config::hash( eckit::Hash& hsh ) const { - eckit::ValueMap map = get(); - for ( eckit::ValueMap::const_iterator vit = map.begin(); vit != map.end(); ++vit ) { - hsh.add( vit->first.as() ); - /// @note below, we assume all Values translate to std::string, this needs - /// more verification - hsh.add( vit->second.as() ); - } -} - //================================================================== // ------------------------------------------------------------------ diff --git a/src/atlas/util/Config.h b/src/atlas/util/Config.h index bf498698d..da77b78a1 100644 --- a/src/atlas/util/Config.h +++ b/src/atlas/util/Config.h @@ -67,8 +67,6 @@ class Config : public eckit::LocalConfiguration { using eckit::LocalConfiguration::get; bool get( const std::string& name, std::vector& value ) const; - - void hash( eckit::Hash& ) const; }; // ------------------------------------------------------------------ diff --git a/src/atlas/util/Metadata.cc b/src/atlas/util/Metadata.cc index 829621164..c0831bb77 100644 --- a/src/atlas/util/Metadata.cc +++ b/src/atlas/util/Metadata.cc @@ -115,16 +115,6 @@ void Metadata::broadcast( Metadata& dest, const size_t root ) const { } } -void Metadata::hash( eckit::Hash& hsh ) const { - eckit::ValueMap map = get(); - for ( eckit::ValueMap::const_iterator vit = map.begin(); vit != map.end(); ++vit ) { - hsh.add( vit->first.as() ); - /// @note below, we assume all Values translate to std::string, this needs - /// more verification - hsh.add( vit->second.as() ); - } -} - Metadata::Metadata( const eckit::Value& value ) : eckit::LocalConfiguration( value ) {} // ------------------------------------------------------------------ diff --git a/src/atlas/util/Metadata.h b/src/atlas/util/Metadata.h index 3c60a41e8..f8f10d7da 100644 --- a/src/atlas/util/Metadata.h +++ b/src/atlas/util/Metadata.h @@ -57,8 +57,6 @@ class Metadata : public eckit::LocalConfiguration { size_t footprint() const; - void hash( eckit::Hash& ) const; - private: void throw_exception( const std::string& ) const; From 6500aace294f66b7a40c2e7b79211e8303e3cbd7 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 10 May 2018 15:59:36 +0100 Subject: [PATCH 081/123] Require eckit 0.21.0 --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 787c96898..917d6da60 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,7 +31,7 @@ ecbuild_declare_project() ### eckit -ecbuild_use_package( PROJECT eckit VERSION 0.20.0 REQUIRED ) +ecbuild_use_package( PROJECT eckit VERSION 0.21.0 REQUIRED ) ecbuild_debug( " ECKIT_FEATURES : [${ECKIT_FEATURES}]" ) # options & dependencies From 61c60858bd539c5867d90e3dd3e34002417d0cbf Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 10 May 2018 16:09:22 +0100 Subject: [PATCH 082/123] Fix version suffix --- VERSION.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION.cmake b/VERSION.cmake index 62025fc5d..7cd7d9260 100644 --- a/VERSION.cmake +++ b/VERSION.cmake @@ -6,5 +6,5 @@ # granted to it by virtue of its status as an intergovernmental organisation nor # does it submit to any jurisdiction. -set ( ${PROJECT_NAME}_VERSION_STR "0.14.0-opt-translocal" ) +set ( ${PROJECT_NAME}_VERSION_STR "0.14.0-develop" ) From 8ccb4901fe0164dcb94e425185aa50f2f8156848 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 10 May 2018 16:36:59 +0100 Subject: [PATCH 083/123] Script to apply clang-format --- tools/apply-clang-format.sh | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100755 tools/apply-clang-format.sh diff --git a/tools/apply-clang-format.sh b/tools/apply-clang-format.sh new file mode 100755 index 000000000..51f68ba34 --- /dev/null +++ b/tools/apply-clang-format.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +cd $SCRIPTDIR/../src +find . -iname *.h -o -iname *.cc | xargs clang-format -i -style=file + From 3ed0bc8e8081808962e72f69405536e4d651408a Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 10 May 2018 16:31:34 +0100 Subject: [PATCH 084/123] Apply clang-format --- src/atlas/array/SVector.h | 11 +- src/atlas/array/gridtools/GridToolsTraits.h | 2 +- src/atlas/array/helpers/ArraySlicer.h | 6 +- src/atlas/grid/Grid.cc | 2 +- src/atlas/grid/detail/grid/Grid.cc | 9 +- src/atlas/grid/detail/grid/Structured.cc | 6 +- .../interpolation/method/FiniteElement.cc | 2 +- src/atlas/mesh/actions/BuildStatistics.cc | 8 +- src/atlas/mesh/actions/ExtendNodesGlobal.cc | 18 +- .../meshgenerator/StructuredMeshGenerator.cc | 36 +-- src/atlas/option/TransOptions.cc | 3 +- src/atlas/option/TransOptions.h | 4 - src/atlas/runtime/trace/CallStack.cc | 2 +- src/atlas/runtime/trace/Nesting.cc | 5 +- src/atlas/trans/Cache.cc | 110 ++++----- src/atlas/trans/Cache.h | 18 +- src/atlas/trans/LegendreCacheCreator.cc | 6 +- src/atlas/trans/LegendreCacheCreator.h | 6 +- src/atlas/trans/Trans.cc | 56 ++--- src/atlas/trans/Trans.h | 18 +- .../trans/ifs/LegendreCacheCreatorIFS.cc | 117 +++++---- src/atlas/trans/ifs/LegendreCacheCreatorIFS.h | 4 +- src/atlas/trans/ifs/TransIFS.cc | 7 +- src/atlas/trans/ifs/TransIFS.h | 3 +- .../trans/local/LegendreCacheCreatorLocal.cc | 154 ++++++------ .../trans/local/LegendreCacheCreatorLocal.h | 4 +- src/atlas/trans/local/LegendrePolynomials.cc | 12 +- src/atlas/trans/local/TransLocal.cc | 143 +++++------ src/atlas/trans/local/TransLocal.h | 27 +-- src/atlas/trans/local/VorDivToUVLocal.cc | 4 +- src/atlas/util/Earth.h | 2 +- src/atlas/util/Rotation.cc | 2 +- src/atlas/util/SphericalPolygon.cc | 4 +- src/atlas/util/UnitSphere.h | 4 +- src/sandbox/CMakeLists.txt | 1 - src/sandbox/fortran_modinc/CMakeLists.txt | 16 -- src/sandbox/fortran_modinc/mod1.f | 6 - src/sandbox/fortran_modinc/mod1.h | 4 - src/sandbox/fortran_modinc/mod2.f | 6 - src/sandbox/fortran_modinc/mod2.h | 4 - src/sandbox/fortran_modinc/sb_modinc.F90 | 26 -- src/tests/AtlasTestEnvironment.h | 4 +- src/tests/array/test_array.cc | 5 +- src/tests/array/test_table.cc | 2 +- src/tests/functionspace/test_pointcloud.cc | 2 +- src/tests/grid/test_field.cc | 2 +- src/tests/grid/test_grid_ptr.cc | 4 +- src/tests/grid/test_state.cc | 2 +- .../test_interpolation_finite_element.cc | 2 +- src/tests/io/test_gmsh.cc | 2 +- src/tests/io/test_pointcloud_io.cc | 2 +- src/tests/mesh/test_accumulate_facets.cc | 6 +- src/tests/mesh/test_connectivity.cc | 2 +- src/tests/mesh/test_distmesh.cc | 2 +- src/tests/mesh/test_elements.cc | 8 +- src/tests/mesh/test_halo.cc | 2 +- src/tests/parallel/test_haloexchange.cc | 4 +- src/tests/trans/test_trans_localcache.cc | 223 ++++++++---------- src/tests/trans/test_transgeneral.cc | 2 +- src/tests/util/test_earth.cc | 4 +- src/tests/util/test_indexview.cc | 2 +- 61 files changed, 539 insertions(+), 621 deletions(-) delete mode 100644 src/sandbox/fortran_modinc/CMakeLists.txt delete mode 100644 src/sandbox/fortran_modinc/mod1.f delete mode 100644 src/sandbox/fortran_modinc/mod1.h delete mode 100644 src/sandbox/fortran_modinc/mod2.f delete mode 100644 src/sandbox/fortran_modinc/mod2.h delete mode 100644 src/sandbox/fortran_modinc/sb_modinc.F90 diff --git a/src/atlas/array/SVector.h b/src/atlas/array/SVector.h index 3180ef4bb..cb74eebd1 100644 --- a/src/atlas/array/SVector.h +++ b/src/atlas/array/SVector.h @@ -28,7 +28,6 @@ namespace array { template class SVector { public: - ATLAS_HOST_DEVICE SVector() : data_( nullptr ), size_( 0 ), externally_allocated_( false ) {} @@ -41,13 +40,13 @@ class SVector { ATLAS_HOST_DEVICE SVector( T* data, size_t size ) : data_( data ), size_( size ) {} - SVector( size_t N ) : data_( nullptr), size_( N ), externally_allocated_( false ) { - if( N != 0 ) { + SVector( size_t N ) : data_( nullptr ), size_( N ), externally_allocated_( false ) { + if ( N != 0 ) { #if ATLAS_GRIDTOOLS_STORAGE_BACKEND_CUDA - cudaError_t err = cudaMallocManaged( &data_, N * sizeof( T ) ); - if ( err != cudaSuccess ) throw eckit::AssertionFailed( "failed to allocate GPU memory" ); + cudaError_t err = cudaMallocManaged( &data_, N * sizeof( T ) ); + if ( err != cudaSuccess ) throw eckit::AssertionFailed( "failed to allocate GPU memory" ); #else - data_ = (T*)malloc( N * sizeof( T ) ); + data_ = (T*)malloc( N * sizeof( T ) ); #endif } } diff --git a/src/atlas/array/gridtools/GridToolsTraits.h b/src/atlas/array/gridtools/GridToolsTraits.h index 4aeb071ff..2be09472c 100644 --- a/src/atlas/array/gridtools/GridToolsTraits.h +++ b/src/atlas/array/gridtools/GridToolsTraits.h @@ -1,7 +1,7 @@ #pragma once -#include "gridtools/common/generic_metafunctions/is_all_integrals.hpp" #include "gridtools/common/generic_metafunctions/accumulate.hpp" +#include "gridtools/common/generic_metafunctions/is_all_integrals.hpp" #include "gridtools/storage/storage-facility.hpp" #include "atlas/array/ArrayViewDefs.h" diff --git a/src/atlas/array/helpers/ArraySlicer.h b/src/atlas/array/helpers/ArraySlicer.h index c12060851..b87c464bf 100644 --- a/src/atlas/array/helpers/ArraySlicer.h +++ b/src/atlas/array/helpers/ArraySlicer.h @@ -162,15 +162,13 @@ class ArraySlicer { return idx * view.stride( i_view++ ); } - static int offset_part( View& view, int& i_view, Range range ) { - return range.start() * view.stride( i_view++ ); } + static int offset_part( View& view, int& i_view, Range range ) { return range.start() * view.stride( i_view++ ); } static int offset_part( View& view, int& i_view, RangeAll range ) { return range.start() * view.stride( i_view++ ); } - static int offset_part( View& view, int& i_view, RangeTo range ) { - return range.start() * view.stride( i_view++ ); } + static int offset_part( View& view, int& i_view, RangeTo range ) { return range.start() * view.stride( i_view++ ); } static int offset_part( View& view, int& i_view, RangeFrom range ) { return range.start() * view.stride( i_view++ ); diff --git a/src/atlas/grid/Grid.cc b/src/atlas/grid/Grid.cc index 1c6a03b22..c4e04d55c 100644 --- a/src/atlas/grid/Grid.cc +++ b/src/atlas/grid/Grid.cc @@ -94,7 +94,7 @@ StructuredGrid::StructuredGrid( const XSpace& xspace, const YSpace& yspace, cons Grid( new detail::grid::Structured( xspace, yspace, projection, domain ) ), grid_( structured_grid( get() ) ) {} -StructuredGrid::StructuredGrid( const Grid& grid , const Grid::Domain& domain ) : +StructuredGrid::StructuredGrid( const Grid& grid, const Grid::Domain& domain ) : Grid( grid, domain ), grid_( structured_grid( get() ) ) {} diff --git a/src/atlas/grid/detail/grid/Grid.cc b/src/atlas/grid/detail/grid/Grid.cc index 68e937678..1b887421c 100644 --- a/src/atlas/grid/detail/grid/Grid.cc +++ b/src/atlas/grid/detail/grid/Grid.cc @@ -76,11 +76,12 @@ const Grid* Grid::create( const std::string& name, const Grid::Config& config ) // return GridBuilder::createNamed(name); } -const Grid* Grid::create( const Grid& grid, const Domain& domain) { - if( grid.type() == "structured" ) { - const Structured& g = dynamic_cast(grid); +const Grid* Grid::create( const Grid& grid, const Domain& domain ) { + if ( grid.type() == "structured" ) { + const Structured& g = dynamic_cast( grid ); return new Structured( g.name(), g.xspace(), g.yspace(), g.projection(), domain ); - } else { + } + else { NOTIMP; } } diff --git a/src/atlas/grid/detail/grid/Structured.cc b/src/atlas/grid/detail/grid/Structured.cc index 2baa2223c..6cb838a1e 100644 --- a/src/atlas/grid/detail/grid/Structured.cc +++ b/src/atlas/grid/detail/grid/Structured.cc @@ -367,7 +367,7 @@ void Structured::crop( const Domain& dom ) { jmax = std::max( j, jmax ); } } - ASSERT(jmax >= jmin); + ASSERT( jmax >= jmin ); size_t cropped_ny = jmax - jmin + 1; std::vector cropped_y( y_.begin() + jmin, y_.begin() + jmin + cropped_ny ); @@ -445,10 +445,10 @@ void Structured::computeTruePeriodicity() { const PointLonLat Pllmax = projection().lonlat( PointXY( xmax_[j], y_[j] ) ); Point3 Pxmin; - util::UnitSphere::convertSphericalToCartesian(Pllmin, Pxmin ); + util::UnitSphere::convertSphericalToCartesian( Pllmin, Pxmin ); Point3 Pxmax; - util::UnitSphere::convertSphericalToCartesian(Pllmax, Pxmax ); + util::UnitSphere::convertSphericalToCartesian( Pllmax, Pxmax ); periodic_x_ = points_equal( Pxmin, Pxmax ); } diff --git a/src/atlas/interpolation/method/FiniteElement.cc b/src/atlas/interpolation/method/FiniteElement.cc index 166e9132c..b7b255ef4 100644 --- a/src/atlas/interpolation/method/FiniteElement.cc +++ b/src/atlas/interpolation/method/FiniteElement.cc @@ -165,7 +165,7 @@ void FiniteElement::setup( const FunctionSpace& source ) { for ( std::vector::const_iterator i = failures.begin(); i != failures.end(); ++i ) { const PointXYZ p{( *ocoords_ )( *i, 0 ), ( *ocoords_ )( *i, 1 ), ( *ocoords_ )( *i, 2 )}; // lookup point PointLonLat pll; - util::Earth::convertCartesianToSpherical(p, pll); + util::Earth::convertCartesianToSpherical( p, pll ); msg << "\t(lon,lat) = " << pll << "\n"; } diff --git a/src/atlas/mesh/actions/BuildStatistics.cc b/src/atlas/mesh/actions/BuildStatistics.cc index 4ae572550..02e45ef32 100644 --- a/src/atlas/mesh/actions/BuildStatistics.cc +++ b/src/atlas/mesh/actions/BuildStatistics.cc @@ -65,10 +65,10 @@ void quad_quality( double& eta, double& rho, const PointLonLat& p1, const PointL // see http://geuz.org/gmsh/doc/preprints/gmsh_quad_preprint.pdf PointXYZ xyz[4]; - util::UnitSphere::convertSphericalToCartesian(p1, xyz[0]); - util::UnitSphere::convertSphericalToCartesian(p2, xyz[1]); - util::UnitSphere::convertSphericalToCartesian(p3, xyz[2]); - util::UnitSphere::convertSphericalToCartesian(p4, xyz[3]); + util::UnitSphere::convertSphericalToCartesian( p1, xyz[0] ); + util::UnitSphere::convertSphericalToCartesian( p2, xyz[1] ); + util::UnitSphere::convertSphericalToCartesian( p3, xyz[2] ); + util::UnitSphere::convertSphericalToCartesian( p4, xyz[3] ); PointXYZ l2m1( PointXYZ::sub( xyz[1], xyz[0] ) ); PointXYZ l3m2( PointXYZ::sub( xyz[2], xyz[1] ) ); diff --git a/src/atlas/mesh/actions/ExtendNodesGlobal.cc b/src/atlas/mesh/actions/ExtendNodesGlobal.cc index d59a973e3..0d5d596de 100644 --- a/src/atlas/mesh/actions/ExtendNodesGlobal.cc +++ b/src/atlas/mesh/actions/ExtendNodesGlobal.cc @@ -10,13 +10,13 @@ #include "atlas/mesh/actions/ExtendNodesGlobal.h" -#include "eckit/exception/Exceptions.h" #include "atlas/field/Field.h" #include "atlas/grid/Grid.h" #include "atlas/mesh/Mesh.h" #include "atlas/mesh/Nodes.h" #include "atlas/util/CoordinateEnums.h" #include "atlas/util/Earth.h" +#include "eckit/exception/Exceptions.h" namespace atlas { namespace mesh { @@ -68,14 +68,14 @@ void ExtendNodesGlobal::operator()( const Grid& grid, Mesh& mesh ) const { PointXYZ pXYZ; util::Earth::convertSphericalToCartesian( pLL, pXYZ ); - xyz( n, XX ) = pXYZ.x(); - xyz( n, YY ) = pXYZ.y(); - xyz( n, ZZ ) = pXYZ.z(); - xy( n, XX ) = extended_pts[i].x(); - xy( n, YY ) = extended_pts[i].y(); - lonlat( n, LON ) = pLL.lon(); - lonlat( n, LAT ) = pLL.lat(); - gidx( n ) = n + 1; + xyz( n, XX ) = pXYZ.x(); + xyz( n, YY ) = pXYZ.y(); + xyz( n, ZZ ) = pXYZ.z(); + xy( n, XX ) = extended_pts[i].x(); + xy( n, YY ) = extended_pts[i].y(); + lonlat( n, LON ) = pLL.lon(); + lonlat( n, LAT ) = pLL.lat(); + gidx( n ) = n + 1; } } diff --git a/src/atlas/meshgenerator/StructuredMeshGenerator.cc b/src/atlas/meshgenerator/StructuredMeshGenerator.cc index 6f5497e69..6ee5e8936 100644 --- a/src/atlas/meshgenerator/StructuredMeshGenerator.cc +++ b/src/atlas/meshgenerator/StructuredMeshGenerator.cc @@ -78,8 +78,10 @@ StructuredMeshGenerator::StructuredMeshGenerator( const eckit::Parametrisation& if ( p.get( "unique_pole", unique_pole ) ) options.set( "unique_pole", unique_pole ); bool force_include_pole; - if ( p.get( "force_include_north_pole", force_include_pole ) ) options.set( "force_include_north_pole", force_include_pole ); - if ( p.get( "force_include_south_pole", force_include_pole ) ) options.set( "force_include_south_pole", force_include_pole ); + if ( p.get( "force_include_north_pole", force_include_pole ) ) + options.set( "force_include_north_pole", force_include_pole ); + if ( p.get( "force_include_south_pole", force_include_pole ) ) + options.set( "force_include_south_pole", force_include_pole ); bool three_dimensional; if ( p.get( "three_dimensional", three_dimensional ) || p.get( "3d", three_dimensional ) ) @@ -700,19 +702,23 @@ void StructuredMeshGenerator::generate_mesh( const grid::StructuredGrid& rg, con bool has_point_at_north_pole = rg.y().front() == 90 && rg.nx().front() > 0; bool has_point_at_south_pole = rg.y().back() == -90 && rg.nx().back() > 0; - bool possible_north_pole = !has_point_at_north_pole && rg.domain().containsNorthPole() && ( mypart == 0 ); + bool possible_north_pole = !has_point_at_north_pole && rg.domain().containsNorthPole() && ( mypart == 0 ); bool possible_south_pole = !has_point_at_south_pole && rg.domain().containsSouthPole() && ( mypart == nparts - 1 ); - bool force_include_north_pole(options.has("force_include_north_pole") && options.get( "force_include_north_pole" )); - bool force_include_south_pole(options.has("force_include_south_pole") && options.get( "force_include_south_pole" )); + bool force_include_north_pole( options.has( "force_include_north_pole" ) && + options.get( "force_include_north_pole" ) ); + bool force_include_south_pole( options.has( "force_include_south_pole" ) && + options.get( "force_include_south_pole" ) ); - bool include_north_pole = (possible_north_pole && options.get( "include_pole" )) || force_include_north_pole; - bool include_south_pole = (possible_south_pole && options.get( "include_pole" )) || force_include_south_pole; - bool patch_north_pole = possible_north_pole && options.get( "patch_pole" ) && rg.nx( 1 ) > 0; - bool patch_south_pole = possible_south_pole && options.get( "patch_pole" ) && rg.nx( rg.ny() - 2 ) > 0; + bool include_north_pole = + ( possible_north_pole && options.get( "include_pole" ) ) || force_include_north_pole; + bool include_south_pole = + ( possible_south_pole && options.get( "include_pole" ) ) || force_include_south_pole; + bool patch_north_pole = possible_north_pole && options.get( "patch_pole" ) && rg.nx( 1 ) > 0; + bool patch_south_pole = possible_south_pole && options.get( "patch_pole" ) && rg.nx( rg.ny() - 2 ) > 0; - int nnewnodes = (!has_point_at_north_pole && include_north_pole ? 1 : 0) - + (!has_point_at_south_pole && include_south_pole ? 1 : 0); + int nnewnodes = ( !has_point_at_north_pole && include_north_pole ? 1 : 0 ) + + ( !has_point_at_south_pole && include_south_pole ? 1 : 0 ); if ( three_dimensional && nparts != 1 ) throw BadParameter( "Cannot generate three_dimensional mesh in parallel", Here() ); @@ -827,7 +833,7 @@ void StructuredMeshGenerator::generate_mesh( const grid::StructuredGrid& rg, con } else if ( include_periodic_ghost_points ) // add periodic point { -//#warning TODO: use commented approach + //#warning TODO: use commented approach part( jnode ) = mypart; // part(jnode) = parts.at( offset_glb.at(jlat) ); ghost( jnode ) = 1; @@ -922,7 +928,7 @@ void StructuredMeshGenerator::generate_mesh( const grid::StructuredGrid& rg, con lonlat( inode, LAT ) = crd[LAT]; glb_idx( inode ) = periodic_glb.at( jlat ) + 1; -//#warning TODO: use commented approach + //#warning TODO: use commented approach // part(inode) = parts.at( offset_glb.at(jlat) ); part( inode ) = mypart; // The actual part will be fixed later ghost( inode ) = 1; @@ -983,8 +989,8 @@ void StructuredMeshGenerator::generate_mesh( const grid::StructuredGrid& rg, con ++jnode; } - nodes.metadata().set( "NbRealPts", size_t(nnodes - nnewnodes) ); - nodes.metadata().set( "NbVirtualPts", size_t(nnewnodes) ); + nodes.metadata().set( "NbRealPts", size_t( nnodes - nnewnodes ) ); + nodes.metadata().set( "NbVirtualPts", size_t( nnewnodes ) ); nodes.global_index().metadata().set( "human_readable", true ); nodes.global_index().metadata().set( "min", 1 ); diff --git a/src/atlas/option/TransOptions.cc b/src/atlas/option/TransOptions.cc index 98e036acc..d9aa0565e 100644 --- a/src/atlas/option/TransOptions.cc +++ b/src/atlas/option/TransOptions.cc @@ -33,7 +33,8 @@ flt::flt( bool flt ) { } fft::fft( FFT fft ) { - static const std::map FFT_to_string = { {FFT::OFF, "OFF"}, {FFT::FFT992, "FFT992"}, {FFT::FFTW, "FFTW"}}; + static const std::map FFT_to_string = { + {FFT::OFF, "OFF"}, {FFT::FFT992, "FFT992"}, {FFT::FFTW, "FFTW"}}; set( "fft", FFT_to_string.at( fft ) ); } diff --git a/src/atlas/option/TransOptions.h b/src/atlas/option/TransOptions.h index d87bcdab3..bb5c0636c 100644 --- a/src/atlas/option/TransOptions.h +++ b/src/atlas/option/TransOptions.h @@ -14,10 +14,6 @@ // ---------------------------------------------------------------------------- -namespace atlas { class Grid; } - -// ---------------------------------------------------------------------------- - namespace atlas { namespace option { diff --git a/src/atlas/runtime/trace/CallStack.cc b/src/atlas/runtime/trace/CallStack.cc index 41b6366a9..d441c2930 100644 --- a/src/atlas/runtime/trace/CallStack.cc +++ b/src/atlas/runtime/trace/CallStack.cc @@ -10,7 +10,7 @@ namespace runtime { namespace trace { void CallStack::push_front( const eckit::CodeLocation& loc, const std::string& id ) { - stack_.push_front( std::hash{}( loc.asString()+id ) ); + stack_.push_front( std::hash{}( loc.asString() + id ) ); } void CallStack::pop_front() { diff --git a/src/atlas/runtime/trace/Nesting.cc b/src/atlas/runtime/trace/Nesting.cc index fe46dbc8c..c01d5f2d8 100644 --- a/src/atlas/runtime/trace/Nesting.cc +++ b/src/atlas/runtime/trace/Nesting.cc @@ -36,11 +36,10 @@ class NestingState { void pop() { stack_.pop_front(); } }; -Nesting::Nesting( const eckit::CodeLocation& loc, const std::string& id ) : +Nesting::Nesting( const eckit::CodeLocation& loc, const std::string& id ) : loc_( loc ), id_( id ), - stack_( NestingState::instance().push( loc, id ) ) { -} + stack_( NestingState::instance().push( loc, id ) ) {} Nesting::~Nesting() { stop(); diff --git a/src/atlas/trans/Cache.cc b/src/atlas/trans/Cache.cc index ce22fb2c4..fa6cf1d26 100644 --- a/src/atlas/trans/Cache.cc +++ b/src/atlas/trans/Cache.cc @@ -8,116 +8,96 @@ * nor does it submit to any jurisdiction. */ -#include #include "atlas/trans/Cache.h" +#include -#include "eckit/io/DataHandle.h" #include "eckit/exception/Exceptions.h" +#include "eckit/io/DataHandle.h" #include "eckit/thread/AutoLock.h" #include "eckit/thread/Mutex.h" -#include "atlas/runtime/Trace.h" #include "atlas/runtime/Log.h" +#include "atlas/runtime/Trace.h" #include "atlas/trans/Trans.h" namespace { -static eckit::Mutex* local_mutex = 0; -static pthread_once_t once = PTHREAD_ONCE_INIT; +static eckit::Mutex* local_mutex = 0; +static pthread_once_t once = PTHREAD_ONCE_INIT; static void init() { local_mutex = new eckit::Mutex(); } -} +} // namespace namespace atlas { namespace trans { -TransCacheFileEntry::TransCacheFileEntry(const eckit::PathName& path) : buffer_( path.size() ) { - ATLAS_TRACE(); - Log::debug() << "Loading cache from file " << path << std::endl; - std::unique_ptr dh( path.fileHandle() ); - dh->openForRead(); - dh->read( buffer_.data(), buffer_.size() ); - dh->close(); +TransCacheFileEntry::TransCacheFileEntry( const eckit::PathName& path ) : buffer_( path.size() ) { + ATLAS_TRACE(); + Log::debug() << "Loading cache from file " << path << std::endl; + std::unique_ptr dh( path.fileHandle() ); + dh->openForRead(); + dh->read( buffer_.data(), buffer_.size() ); + dh->close(); } -TransCacheMemoryEntry::TransCacheMemoryEntry(const void* data, size_t size) : data_(data), size_(size) { - ASSERT(data_); - ASSERT(size_); +TransCacheMemoryEntry::TransCacheMemoryEntry( const void* data, size_t size ) : data_( data ), size_( size ) { + ASSERT( data_ ); + ASSERT( size_ ); } -LegendreFFTCache::LegendreFFTCache( const void* legendre_address, size_t legendre_size, const void* fft_address, size_t fft_size ) : - Cache( std::make_shared( legendre_address, legendre_size ), - std::make_shared( fft_address, fft_size ) ) { -} +LegendreFFTCache::LegendreFFTCache( const void* legendre_address, size_t legendre_size, const void* fft_address, + size_t fft_size ) : + Cache( std::make_shared( legendre_address, legendre_size ), + std::make_shared( fft_address, fft_size ) ) {} LegendreFFTCache::LegendreFFTCache( const eckit::PathName& legendre_path, const eckit::PathName& fft_path ) : - Cache( std::shared_ptr( new TransCacheFileEntry( legendre_path ) ), - std::shared_ptr( new TransCacheFileEntry( fft_path ) ) ) { -} + Cache( std::shared_ptr( new TransCacheFileEntry( legendre_path ) ), + std::shared_ptr( new TransCacheFileEntry( fft_path ) ) ) {} LegendreCache::LegendreCache( const eckit::PathName& path ) : - Cache( std::shared_ptr( new TransCacheFileEntry( path ) ) ) { -} + Cache( std::shared_ptr( new TransCacheFileEntry( path ) ) ) {} -LegendreCache::LegendreCache( size_t size) : - Cache( std::make_shared( size ) ) { -} +LegendreCache::LegendreCache( size_t size ) : Cache( std::make_shared( size ) ) {} LegendreCache::LegendreCache( const void* address, size_t size ) : - Cache( std::make_shared( address, size ) ) { -} + Cache( std::make_shared( address, size ) ) {} -Cache::Cache(const std::shared_ptr& legendre) : - trans_( nullptr ), - legendre_( legendre ), - fft_( new EmptyCacheEntry() ) {} +Cache::Cache( const std::shared_ptr& legendre ) : + trans_( nullptr ), + legendre_( legendre ), + fft_( new EmptyCacheEntry() ) {} -Cache::Cache(const std::shared_ptr& legendre, const std::shared_ptr& fft) : - trans_( nullptr ), - legendre_( legendre ), - fft_( fft ) {} +Cache::Cache( const std::shared_ptr& legendre, const std::shared_ptr& fft ) : + trans_( nullptr ), + legendre_( legendre ), + fft_( fft ) {} Cache::Cache( const TransImpl* trans ) : - trans_( trans ), - legendre_( new EmptyCacheEntry() ), - fft_( new EmptyCacheEntry() ) { -} + trans_( trans ), + legendre_( new EmptyCacheEntry() ), + fft_( new EmptyCacheEntry() ) {} -Cache::Cache() : - trans_( nullptr ), - legendre_( new EmptyCacheEntry() ), - fft_( new EmptyCacheEntry() ) {} +Cache::Cache() : trans_( nullptr ), legendre_( new EmptyCacheEntry() ), fft_( new EmptyCacheEntry() ) {} -Cache::Cache( const Cache& other ) : - trans_( other.trans_ ), - legendre_( other.legendre_ ), - fft_( other.fft_ ) { -} +Cache::Cache( const Cache& other ) : trans_( other.trans_ ), legendre_( other.legendre_ ), fft_( other.fft_ ) {} Cache::operator bool() const { - return trans_ || bool(legendre()) ; + return trans_ || bool( legendre() ); } Cache::~Cache() { - pthread_once( &once, init ); - eckit::AutoLock lock( local_mutex ); + pthread_once( &once, init ); + eckit::AutoLock lock( local_mutex ); } -TransCache::TransCache( const Trans& trans ) : - Cache( trans.get() ) { -} +TransCache::TransCache( const Trans& trans ) : Cache( trans.get() ) {} -TransCacheOwnedMemoryEntry::TransCacheOwnedMemoryEntry(size_t size) : - size_(size) { - if( size_ ) { - data_ = std::malloc( size_ ); - } +TransCacheOwnedMemoryEntry::TransCacheOwnedMemoryEntry( size_t size ) : size_( size ) { + if ( size_ ) { data_ = std::malloc( size_ ); } } TransCacheOwnedMemoryEntry::~TransCacheOwnedMemoryEntry() { - if( size_ ) { - std::free( data_ ); - } + if ( size_ ) { std::free( data_ ); } } } // namespace trans diff --git a/src/atlas/trans/Cache.h b/src/atlas/trans/Cache.h index 8cd6b8097..318ba373c 100644 --- a/src/atlas/trans/Cache.h +++ b/src/atlas/trans/Cache.h @@ -56,7 +56,8 @@ class EmptyCacheEntry final : public TransCacheEntry { //----------------------------------------------------------------------------- class TransCacheFileEntry final : public TransCacheEntry { private: - eckit::Buffer buffer_; + eckit::Buffer buffer_; + public: TransCacheFileEntry( const eckit::PathName& path ); virtual size_t size() const override { return buffer_.size(); } @@ -67,9 +68,10 @@ class TransCacheFileEntry final : public TransCacheEntry { class TransCacheMemoryEntry final : public TransCacheEntry { public: - TransCacheMemoryEntry(const void* data, size_t size); + TransCacheMemoryEntry( const void* data, size_t size ); virtual const void* data() const override { return data_; } virtual size_t size() const override { return size_; } + private: const void* data_; const size_t size_; @@ -79,12 +81,13 @@ class TransCacheMemoryEntry final : public TransCacheEntry { class TransCacheOwnedMemoryEntry final : public TransCacheEntry { public: - TransCacheOwnedMemoryEntry(size_t size); + TransCacheOwnedMemoryEntry( size_t size ); ~TransCacheOwnedMemoryEntry(); virtual const void* data() const override { return data_; } virtual size_t size() const override { return size_; } + private: - void* data_ = nullptr; + void* data_ = nullptr; const size_t size_ = 0; }; @@ -99,13 +102,15 @@ class Cache { const TransCacheEntry& legendre() const { return *legendre_; } const TransCacheEntry& fft() const { return *fft_; } virtual ~Cache(); + protected: Cache( const std::shared_ptr& legendre ); Cache( const std::shared_ptr& legendre, const std::shared_ptr& fft ); Cache( const TransImpl* ); + private: eckit::SharedPtr trans_; -// const TransImpl* trans_ = nullptr; + // const TransImpl* trans_ = nullptr; std::shared_ptr legendre_; std::shared_ptr fft_; }; @@ -125,8 +130,7 @@ class LegendreCache : public Cache { class LegendreFFTCache : public Cache { public: - LegendreFFTCache( const void* legendre_address, size_t legendre_size, - const void* fft_address, size_t fft_size ); + LegendreFFTCache( const void* legendre_address, size_t legendre_size, const void* fft_address, size_t fft_size ); LegendreFFTCache( const eckit::PathName& legendre_path, const eckit::PathName& fft_path ); }; diff --git a/src/atlas/trans/LegendreCacheCreator.cc b/src/atlas/trans/LegendreCacheCreator.cc index a51165dd8..f5411e6b6 100644 --- a/src/atlas/trans/LegendreCacheCreator.cc +++ b/src/atlas/trans/LegendreCacheCreator.cc @@ -30,9 +30,9 @@ LegendreCacheCreatorImpl::~LegendreCacheCreatorImpl() {} namespace { -static eckit::Mutex* local_mutex = 0; +static eckit::Mutex* local_mutex = 0; static std::map* m = 0; -static pthread_once_t once = PTHREAD_ONCE_INIT; +static pthread_once_t once = PTHREAD_ONCE_INIT; static void init() { local_mutex = new eckit::Mutex(); @@ -106,7 +106,7 @@ void LegendreCacheCreatorFactory::list( std::ostream& out ) { } LegendreCacheCreator::Implementation* LegendreCacheCreatorFactory::build( const Grid& grid, int truncation, - const eckit::Configuration& config ) { + const eckit::Configuration& config ) { pthread_once( &once, init ); eckit::AutoLock lock( local_mutex ); diff --git a/src/atlas/trans/LegendreCacheCreator.h b/src/atlas/trans/LegendreCacheCreator.h index db0f71aa8..f6be334c4 100644 --- a/src/atlas/trans/LegendreCacheCreator.h +++ b/src/atlas/trans/LegendreCacheCreator.h @@ -16,8 +16,8 @@ #include "eckit/memory/Owned.h" #include "eckit/memory/SharedPtr.h" -#include "atlas/util/Config.h" #include "atlas/trans/Trans.h" +#include "atlas/util/Config.h" //----------------------------------------------------------------------------- // Forward declarations @@ -97,7 +97,9 @@ class LegendreCacheCreatorFactory { private: std::string name_; - virtual LegendreCacheCreatorImpl* make( const Grid& gp, int truncation, const eckit::Configuration& ) { return nullptr; } + virtual LegendreCacheCreatorImpl* make( const Grid& gp, int truncation, const eckit::Configuration& ) { + return nullptr; + } protected: LegendreCacheCreatorFactory( const std::string& ); diff --git a/src/atlas/trans/Trans.cc b/src/atlas/trans/Trans.cc index 7a6a0e6ea..a8e7f6f2a 100644 --- a/src/atlas/trans/Trans.cc +++ b/src/atlas/trans/Trans.cc @@ -24,7 +24,7 @@ #include "atlas/trans/ifs/TransIFSNodeColumns.h" #include "atlas/trans/ifs/TransIFSStructuredColumns.h" #endif -#include "atlas/trans/local/TransLocal.h" // --> recommended "local" +#include "atlas/trans/local/TransLocal.h" // --> recommended "local" namespace { struct default_backend { @@ -37,10 +37,11 @@ struct default_backend { static default_backend x; return x; } + private: default_backend() = default; }; -} +} // namespace namespace atlas { namespace trans { @@ -94,8 +95,7 @@ TransFactory& factory( const std::string& name ) { } // namespace -TransFactory::TransFactory( const std::string& name ) : - name_( name ) { +TransFactory::TransFactory( const std::string& name ) : name_( name ) { pthread_once( &once, init ); eckit::AutoLock lock( local_mutex ); @@ -126,7 +126,7 @@ void TransFactory::backend( const std::string& backend ) { } std::string TransFactory::backend() { - return default_options_.getString("type"); + return default_options_.getString( "type" ); } const eckit::Configuration& TransFactory::config() { @@ -136,9 +136,7 @@ const eckit::Configuration& TransFactory::config() { void TransFactory::config( const eckit::Configuration& config ) { std::string type = default_options_.getString( "type" ); default_options_ = config; - if( not config.has("type") ) { - default_options_.set( "type", type ); - } + if ( not config.has( "type" ) ) { default_options_.set( "type", type ); } } void TransFactory::list( std::ostream& out ) { @@ -162,7 +160,7 @@ Trans::Implementation* TransFactory::build( const FunctionSpace& gp, const Funct Trans::Implementation* TransFactory::build( const Cache& cache, const FunctionSpace& gp, const FunctionSpace& sp, const eckit::Configuration& config ) { - if( cache.trans() ) { + if ( cache.trans() ) { Log::debug() << "Creating Trans from cache, ignoring any other arguments" << std::endl; return cache.trans(); } @@ -188,7 +186,8 @@ Trans::Implementation* TransFactory::build( const Grid& grid, int truncation, co return build( Cache(), grid, truncation, config ); } -Trans::Implementation* TransFactory::build( const Grid& grid, const Domain& domain, int truncation, const eckit::Configuration& config ) { +Trans::Implementation* TransFactory::build( const Grid& grid, const Domain& domain, int truncation, + const eckit::Configuration& config ) { return build( Cache(), grid, domain, truncation, config ); } @@ -199,7 +198,7 @@ Trans::Implementation* TransFactory::build( const Cache& cache, const Grid& grid Trans::Implementation* TransFactory::build( const Cache& cache, const Grid& grid, const Domain& domain, int truncation, const eckit::Configuration& config ) { - if( cache.trans() ) { + if ( cache.trans() ) { Log::debug() << "Creating Trans from cache, ignoring any other arguments" << std::endl; return cache.trans(); } @@ -237,17 +236,17 @@ const eckit::Configuration& Trans::config() { return TransFactory::config(); } -void Trans::config( const eckit::Configuration& options ) { +void Trans::config( const eckit::Configuration& options ) { TransFactory::config( options ); } namespace { util::Config options( const eckit::Configuration& config ) { util::Config opts = Trans::config(); - opts.set(config); + opts.set( config ); return opts; } -} +} // namespace Trans::Trans() {} @@ -269,7 +268,8 @@ Trans::Trans( const Cache& cache, const FunctionSpace& gp, const FunctionSpace& Trans::Trans( const Cache& cache, const Grid& grid, int truncation, const eckit::Configuration& config ) : impl_( TransFactory::build( cache, grid, truncation, config ) ) {} -Trans::Trans( const Cache& cache, const Grid& grid, const Domain& domain, int truncation, const eckit::Configuration& config ) : +Trans::Trans( const Cache& cache, const Grid& grid, const Domain& domain, int truncation, + const eckit::Configuration& config ) : impl_( TransFactory::build( cache, grid, domain, truncation, config ) ) {} Trans::Trans( const Trans& trans ) : impl_( trans.impl_ ) {} @@ -287,37 +287,37 @@ size_t Trans::spectralCoefficients() const { } void Trans::dirtrans( const Field& gpfield, Field& spfield, const eckit::Configuration& config ) const { - impl_->dirtrans( gpfield, spfield, options(config) ); + impl_->dirtrans( gpfield, spfield, options( config ) ); } void Trans::dirtrans( const FieldSet& gpfields, FieldSet& spfields, const eckit::Configuration& config ) const { - impl_->dirtrans( gpfields, spfields, options(config) ); + impl_->dirtrans( gpfields, spfields, options( config ) ); } void Trans::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv, const eckit::Configuration& config ) const { - impl_->dirtrans_wind2vordiv( gpwind, spvor, spdiv, options(config) ); + impl_->dirtrans_wind2vordiv( gpwind, spvor, spdiv, options( config ) ); } void Trans::invtrans( const Field& spfield, Field& gpfield, const eckit::Configuration& config ) const { - impl_->invtrans( spfield, gpfield, options(config) ); + impl_->invtrans( spfield, gpfield, options( config ) ); } void Trans::invtrans( const FieldSet& spfields, FieldSet& gpfields, const eckit::Configuration& config ) const { - impl_->invtrans( spfields, gpfields, options(config) ); + impl_->invtrans( spfields, gpfields, options( config ) ); } void Trans::invtrans_grad( const Field& spfield, Field& gradfield, const eckit::Configuration& config ) const { - impl_->invtrans_grad( spfield, gradfield, options(config) ); + impl_->invtrans_grad( spfield, gradfield, options( config ) ); } void Trans::invtrans_grad( const FieldSet& spfields, FieldSet& gradfields, const eckit::Configuration& config ) const { - impl_->invtrans_grad( spfields, gradfields, options(config) ); + impl_->invtrans_grad( spfields, gradfields, options( config ) ); } void Trans::invtrans_vordiv2wind( const Field& spvor, const Field& spdiv, Field& gpwind, const eckit::Configuration& config ) const { - impl_->invtrans_vordiv2wind( spvor, spdiv, gpwind, options(config) ); + impl_->invtrans_vordiv2wind( spvor, spdiv, gpwind, options( config ) ); } // -- IFS type fields -- @@ -338,7 +338,7 @@ void Trans::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[], const eckit::Configuration& config ) const { impl_->invtrans( nb_scalar_fields, scalar_spectra, nb_vordiv_fields, vorticity_spectra, divergence_spectra, - gp_fields, options(config) ); + gp_fields, options( config ) ); } /*! @@ -349,7 +349,7 @@ void Trans::invtrans( const int nb_scalar_fields, const double scalar_spectra[], */ void Trans::invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[], const eckit::Configuration& config ) const { - impl_->invtrans( nb_scalar_fields, scalar_spectra, gp_fields, options(config) ); + impl_->invtrans( nb_scalar_fields, scalar_spectra, gp_fields, options( config ) ); } /*! @@ -358,7 +358,7 @@ void Trans::invtrans( const int nb_scalar_fields, const double scalar_spectra[], */ void Trans::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[], const eckit::Configuration& config ) const { - impl_->invtrans( nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, options(config) ); + impl_->invtrans( nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, options( config ) ); } /*! @@ -366,7 +366,7 @@ void Trans::invtrans( const int nb_vordiv_fields, const double vorticity_spectra */ void Trans::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[], const eckit::Configuration& config ) const { - impl_->dirtrans( nb_fields, scalar_fields, scalar_spectra, options(config) ); + impl_->dirtrans( nb_fields, scalar_fields, scalar_spectra, options( config ) ); } /*! @@ -375,7 +375,7 @@ void Trans::dirtrans( const int nb_fields, const double scalar_fields[], double */ void Trans::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[], double divergence_spectra[], const eckit::Configuration& config ) const { - impl_->dirtrans( nb_fields, wind_fields, vorticity_spectra, divergence_spectra, options(config) ); + impl_->dirtrans( nb_fields, wind_fields, vorticity_spectra, divergence_spectra, options( config ) ); } } // namespace trans diff --git a/src/atlas/trans/Trans.h b/src/atlas/trans/Trans.h index e4db195f8..7a8f18a12 100644 --- a/src/atlas/trans/Trans.h +++ b/src/atlas/trans/Trans.h @@ -13,8 +13,8 @@ #include "eckit/memory/Owned.h" #include "eckit/memory/SharedPtr.h" -#include "atlas/util/Config.h" #include "atlas/trans/Cache.h" +#include "atlas/util/Config.h" //----------------------------------------------------------------------------- // Forward declarations @@ -124,23 +124,25 @@ class TransImpl : public eckit::Owned { class TransFactory { protected: using Trans_t = const TransImpl; + public: /*! * \brief build Trans * \return TransImpl */ static Trans_t* build( const FunctionSpace& gp, const FunctionSpace& sp, - const eckit::Configuration& = util::Config() ); + const eckit::Configuration& = util::Config() ); static Trans_t* build( const Grid&, int truncation, const eckit::Configuration& = util::Config() ); static Trans_t* build( const Grid&, const Domain&, int truncation, const eckit::Configuration& = util::Config() ); static Trans_t* build( const Cache&, const FunctionSpace& gp, const FunctionSpace& sp, - const eckit::Configuration& = util::Config() ); + const eckit::Configuration& = util::Config() ); static Trans_t* build( const Cache&, const Grid&, int truncation, const eckit::Configuration& = util::Config() ); - static Trans_t* build( const Cache&, const Grid&, const Domain&, int truncation, const eckit::Configuration& = util::Config() ); + static Trans_t* build( const Cache&, const Grid&, const Domain&, int truncation, + const eckit::Configuration& = util::Config() ); /*! * \brief list all registered trans implementations @@ -160,7 +162,8 @@ class TransFactory { private: std::string name_; static util::Config default_options_; - virtual Trans_t* make( const Cache&, const FunctionSpace& gp, const FunctionSpace& sp, const eckit::Configuration& ) { + virtual Trans_t* make( const Cache&, const FunctionSpace& gp, const FunctionSpace& sp, + const eckit::Configuration& ) { return nullptr; } virtual Trans_t* make( const Cache&, const Grid& gp, const Domain&, int truncation, const eckit::Configuration& ) { @@ -177,7 +180,7 @@ class TransFactory { template class TransBuilderFunctionSpace : public TransFactory { virtual Trans_t* make( const Cache& cache, const FunctionSpace& gp, const FunctionSpace& sp, - const eckit::Configuration& config ) { + const eckit::Configuration& config ) { return new T( cache, gp, sp, config ); } virtual Trans_t* make( const Cache&, const Grid&, const Domain&, int, const eckit::Configuration& ) { @@ -191,7 +194,7 @@ class TransBuilderFunctionSpace : public TransFactory { template class TransBuilderGrid : public TransFactory { virtual Trans_t* make( const Cache& cache, const Grid& grid, const Domain& domain, int truncation, - const eckit::Configuration& config ) { + const eckit::Configuration& config ) { return new T( cache, grid, domain, truncation, config ); } virtual Trans_t* make( const Cache&, const FunctionSpace&, const FunctionSpace&, const eckit::Configuration& ) { @@ -212,7 +215,6 @@ class Trans { eckit::SharedPtr impl_; public: - static bool hasBackend( const std::string& ); static void backend( const std::string& ); static std::string backend(); diff --git a/src/atlas/trans/ifs/LegendreCacheCreatorIFS.cc b/src/atlas/trans/ifs/LegendreCacheCreatorIFS.cc index 7ac8e74b0..54b276d64 100644 --- a/src/atlas/trans/ifs/LegendreCacheCreatorIFS.cc +++ b/src/atlas/trans/ifs/LegendreCacheCreatorIFS.cc @@ -9,12 +9,12 @@ */ #include "atlas/trans/ifs/LegendreCacheCreatorIFS.h" -#include #include -#include "eckit/utils/MD5.h" +#include #include "atlas/grid.h" #include "atlas/option.h" #include "atlas/trans/Trans.h" +#include "eckit/utils/MD5.h" namespace atlas { namespace trans { @@ -26,92 +26,91 @@ static LegendreCacheCreatorBuilder builder( "ifs" ); namespace { std::string truncate( const std::string& str ) { - const int trunc = std::min(10ul,str.size()); - return str.substr( 0, trunc ); + const int trunc = std::min( 10ul, str.size() ); + return str.substr( 0, trunc ); } std::string hash( const Grid& grid ) { - eckit::MD5 h; - if( grid::StructuredGrid( grid ) && not grid.projection() ) { - auto g = grid::StructuredGrid( grid ); - h.add( g.y().data(), g.y().size() * sizeof(double) ); - } else { - grid.hash( h ); - } - return truncate( h.digest() ); + eckit::MD5 h; + if ( grid::StructuredGrid( grid ) && not grid.projection() ) { + auto g = grid::StructuredGrid( grid ); + h.add( g.y().data(), g.y().size() * sizeof( double ) ); + } + else { + grid.hash( h ); + } + return truncate( h.digest() ); } std::string hash( const eckit::Configuration& config ) { - eckit::MD5 h; + eckit::MD5 h; - // Add options and other unique keys - h << "flt" << config.getBool( "flt", false ); + // Add options and other unique keys + h << "flt" << config.getBool( "flt", false ); - return truncate( h.digest() ); + return truncate( h.digest() ); } -} +} // namespace std::string LegendreCacheCreatorIFS::uid() const { - if( unique_identifier_.empty() ) { - std::ostringstream stream; - stream << "ifs-T" << truncation_ << "-"; - if( grid::GaussianGrid( grid_ ) ) { - if( grid::RegularGaussianGrid( grid_ ) ) { - stream << "RegularGaussianN" << grid::GaussianGrid( grid_ ).N(); - } else { - stream << "ReducedGaussianN" << grid::GaussianGrid( grid_ ).N() << "-PL"; - stream << hash( grid_ ); - } - } else if( grid::RegularLonLatGrid( grid_ ) ) { - auto g = grid::RegularLonLatGrid( grid_ ); - if( g.standard() || g.shifted() ) { - stream << ( g.standard() ? "L" : "S" ) << g.nx() << "x" << g.ny(); - } else { - // We cannot make more assumptions on reusability for different grids - stream << "grid-" << hash( grid_ ); - } - } else { - // We cannot make more assumptions on reusability for different grids - stream << "grid-" << hash( grid_ ); + if ( unique_identifier_.empty() ) { + std::ostringstream stream; + stream << "ifs-T" << truncation_ << "-"; + if ( grid::GaussianGrid( grid_ ) ) { + if ( grid::RegularGaussianGrid( grid_ ) ) { + stream << "RegularGaussianN" << grid::GaussianGrid( grid_ ).N(); + } + else { + stream << "ReducedGaussianN" << grid::GaussianGrid( grid_ ).N() << "-PL"; + stream << hash( grid_ ); + } + } + else if ( grid::RegularLonLatGrid( grid_ ) ) { + auto g = grid::RegularLonLatGrid( grid_ ); + if ( g.standard() || g.shifted() ) { stream << ( g.standard() ? "L" : "S" ) << g.nx() << "x" << g.ny(); } + else { + // We cannot make more assumptions on reusability for different grids + stream << "grid-" << hash( grid_ ); + } + } + else { + // We cannot make more assumptions on reusability for different grids + stream << "grid-" << hash( grid_ ); + } + stream << "-OPT" << hash( config_ ); + unique_identifier_ = stream.str(); } - stream << "-OPT" << hash( config_ ); - unique_identifier_ = stream.str(); - } - return unique_identifier_; + return unique_identifier_; } LegendreCacheCreatorIFS::~LegendreCacheCreatorIFS() {} bool LegendreCacheCreatorIFS::supported() const { - if( grid::GaussianGrid( grid_ ) ) { - return true; - } else if( grid::RegularLonLatGrid( grid_ ) ) { - auto g = grid::RegularLonLatGrid( grid_ ); - if( g.standard() || g.shifted() ) { - return true; + if ( grid::GaussianGrid( grid_ ) ) { return true; } + else if ( grid::RegularLonLatGrid( grid_ ) ) { + auto g = grid::RegularLonLatGrid( grid_ ); + if ( g.standard() || g.shifted() ) { return true; } } - } - return false; + return false; } -LegendreCacheCreatorIFS::LegendreCacheCreatorIFS( const Grid& grid, int truncation, const eckit::Configuration& config ) : - grid_(grid), - truncation_(truncation), - config_(config) { -} +LegendreCacheCreatorIFS::LegendreCacheCreatorIFS( const Grid& grid, int truncation, + const eckit::Configuration& config ) : + grid_( grid ), + truncation_( truncation ), + config_( config ) {} void LegendreCacheCreatorIFS::create( const std::string& path ) const { - Trans( grid_, truncation_, config_ | option::type("ifs") | option::write_legendre( path ) ); + Trans( grid_, truncation_, config_ | option::type( "ifs" ) | option::write_legendre( path ) ); } Cache LegendreCacheCreatorIFS::create() const { - return TransCache( - Trans( grid_, truncation_, config_ | option::type("ifs") ) ); + return TransCache( Trans( grid_, truncation_, config_ | option::type( "ifs" ) ) ); } size_t LegendreCacheCreatorIFS::estimate() const { - return size_t(truncation_ * truncation_ * truncation_) / 2 * sizeof(double); + return size_t( truncation_ * truncation_ * truncation_ ) / 2 * sizeof( double ); } diff --git a/src/atlas/trans/ifs/LegendreCacheCreatorIFS.h b/src/atlas/trans/ifs/LegendreCacheCreatorIFS.h index e6629ebea..947f72599 100644 --- a/src/atlas/trans/ifs/LegendreCacheCreatorIFS.h +++ b/src/atlas/trans/ifs/LegendreCacheCreatorIFS.h @@ -10,8 +10,8 @@ #pragma once -#include "atlas/trans/LegendreCacheCreator.h" #include "atlas/grid/Grid.h" +#include "atlas/trans/LegendreCacheCreator.h" #include "atlas/util/Config.h" //----------------------------------------------------------------------------- @@ -31,7 +31,7 @@ class LegendreCacheCreatorIFS : public trans::LegendreCacheCreatorImpl { virtual std::string uid() const override; - virtual void create(const std::string &path) const override; + virtual void create( const std::string& path ) const override; virtual Cache create() const override; diff --git a/src/atlas/trans/ifs/TransIFS.cc b/src/atlas/trans/ifs/TransIFS.cc index 7532f237d..62a02568c 100644 --- a/src/atlas/trans/ifs/TransIFS.cc +++ b/src/atlas/trans/ifs/TransIFS.cc @@ -638,13 +638,14 @@ TransIFS::TransIFS( const Grid& grid, const eckit::Configuration& config ) : TransIFS( grid, /*grid-only*/ -1, config ) {} - -TransIFS::TransIFS( const Grid& grid, const Domain& domain, const long truncation, const eckit::Configuration& config ) : +TransIFS::TransIFS( const Grid& grid, const Domain& domain, const long truncation, + const eckit::Configuration& config ) : TransIFS( Cache(), grid, truncation, config ) { ASSERT( domain.global() ); } -TransIFS::TransIFS( const Cache& cache, const Grid& grid, const Domain& domain, const long truncation, const eckit::Configuration& config ) : +TransIFS::TransIFS( const Cache& cache, const Grid& grid, const Domain& domain, const long truncation, + const eckit::Configuration& config ) : TransIFS( cache, grid, truncation, config ) { ASSERT( domain.global() ); } diff --git a/src/atlas/trans/ifs/TransIFS.h b/src/atlas/trans/ifs/TransIFS.h index ee6bf8909..1ae224e2e 100644 --- a/src/atlas/trans/ifs/TransIFS.h +++ b/src/atlas/trans/ifs/TransIFS.h @@ -78,7 +78,8 @@ class TransIFS : public trans::TransImpl { TransIFS( const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() ); TransIFS( const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() ); TransIFS( const Cache&, const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() ); - TransIFS( const Cache&, const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() ); + TransIFS( const Cache&, const Grid&, const Domain&, const long truncation, + const eckit::Configuration& = util::NoConfig() ); virtual ~TransIFS(); operator ::Trans_t*() const { return trans(); } diff --git a/src/atlas/trans/local/LegendreCacheCreatorLocal.cc b/src/atlas/trans/local/LegendreCacheCreatorLocal.cc index 4e3488aed..54e58060c 100644 --- a/src/atlas/trans/local/LegendreCacheCreatorLocal.cc +++ b/src/atlas/trans/local/LegendreCacheCreatorLocal.cc @@ -9,14 +9,14 @@ */ #include "atlas/trans/local/LegendreCacheCreatorLocal.h" -#include #include -#include "eckit/utils/MD5.h" -#include "eckit/types/FloatCompare.h" +#include #include "atlas/grid.h" #include "atlas/option.h" #include "atlas/trans/Trans.h" #include "atlas/trans/local/TransLocal.h" +#include "eckit/types/FloatCompare.h" +#include "eckit/utils/MD5.h" namespace atlas { namespace trans { @@ -28,104 +28,112 @@ static LegendreCacheCreatorBuilder builder( "local" ) namespace { std::string truncate( const std::string& str ) { - const int trunc = std::min(10ul,str.size()); - return str.substr( 0, trunc ); + const int trunc = std::min( 10ul, str.size() ); + return str.substr( 0, trunc ); } std::string hash( const Grid& grid ) { - eckit::MD5 h; - if( grid::StructuredGrid( grid ) && not grid.projection() ) { - auto g = grid::StructuredGrid( grid ); - h.add( g.y().data(), g.y().size() * sizeof(double) ); - } else { - grid.hash( h ); - } - return truncate( h.digest() ); + eckit::MD5 h; + if ( grid::StructuredGrid( grid ) && not grid.projection() ) { + auto g = grid::StructuredGrid( grid ); + h.add( g.y().data(), g.y().size() * sizeof( double ) ); + } + else { + grid.hash( h ); + } + return truncate( h.digest() ); } std::string hash( const eckit::Configuration& config ) { - eckit::MD5 h; + eckit::MD5 h; - // Add options and other unique keys - h << "flt" << config.getBool( "flt", false ); + // Add options and other unique keys + h << "flt" << config.getBool( "flt", false ); - return truncate( h.digest() ); + return truncate( h.digest() ); } -} +} // namespace std::string LegendreCacheCreatorLocal::uid() const { - if( unique_identifier_.empty() ) { - std::ostringstream stream; - auto give_up = [&]() { - // We cannot make more assumptions on reusability for different grids - stream << "grid-" << hash( grid_ ); - }; - stream << "local-T" << truncation_ << "-"; - grid::StructuredGrid structured ( grid_ ); - if( grid::GaussianGrid( grid_ ) ) { - // Same cache for any global Gaussian grid - stream << "GaussianN" << grid::GaussianGrid( grid_ ).N(); - } else if( grid::RegularLonLatGrid( grid_ ) ) { - // Same cache for any global regular grid - auto g = grid::RegularLonLatGrid( grid_ ); - - const double dy_2 = 90. / double(g.ny()); - bool shifted_lat = eckit::types::is_approximately_equal( g.y().front(), 90. - dy_2 ) && - eckit::types::is_approximately_equal( g.y().back(), -90. + dy_2 ); - bool standard_lat = eckit::types::is_approximately_equal( g.y().front(), 90. ) && - eckit::types::is_approximately_equal( g.y().back(), -90. ); - - if( standard_lat ) { - stream << "L" << "-ny" << g.ny(); - } else if( shifted_lat ) { - stream << "S" << "-ny" << g.ny(); - } else { // I don't think we get here, but just in case, give up - give_up(); - } - } else if ( grid::RegularGrid( grid_ ) && not grid_.projection() && structured.yspace().type() == "linear" ) { - RectangularDomain domain( grid_.domain() ); - ASSERT( domain ); - stream << "Regional"; - stream << "-south" << domain.ymin(); - stream << "-north" << domain.ymax(); - stream << "-ny" << structured.ny(); - } else { // It gets too complicated, so let's not be smart - give_up(); + if ( unique_identifier_.empty() ) { + std::ostringstream stream; + auto give_up = [&]() { + // We cannot make more assumptions on reusability for different grids + stream << "grid-" << hash( grid_ ); + }; + stream << "local-T" << truncation_ << "-"; + grid::StructuredGrid structured( grid_ ); + if ( grid::GaussianGrid( grid_ ) ) { + // Same cache for any global Gaussian grid + stream << "GaussianN" << grid::GaussianGrid( grid_ ).N(); + } + else if ( grid::RegularLonLatGrid( grid_ ) ) { + // Same cache for any global regular grid + auto g = grid::RegularLonLatGrid( grid_ ); + + const double dy_2 = 90. / double( g.ny() ); + bool shifted_lat = eckit::types::is_approximately_equal( g.y().front(), 90. - dy_2 ) && + eckit::types::is_approximately_equal( g.y().back(), -90. + dy_2 ); + bool standard_lat = eckit::types::is_approximately_equal( g.y().front(), 90. ) && + eckit::types::is_approximately_equal( g.y().back(), -90. ); + + if ( standard_lat ) { + stream << "L" + << "-ny" << g.ny(); + } + else if ( shifted_lat ) { + stream << "S" + << "-ny" << g.ny(); + } + else { // I don't think we get here, but just in case, give up + give_up(); + } + } + else if ( grid::RegularGrid( grid_ ) && not grid_.projection() && structured.yspace().type() == "linear" ) { + RectangularDomain domain( grid_.domain() ); + ASSERT( domain ); + stream << "Regional"; + stream << "-south" << domain.ymin(); + stream << "-north" << domain.ymax(); + stream << "-ny" << structured.ny(); + } + else { // It gets too complicated, so let's not be smart + give_up(); + } + stream << "-OPT" << hash( config_ ); + unique_identifier_ = stream.str(); } - stream << "-OPT" << hash( config_ ); - unique_identifier_ = stream.str(); - } - return unique_identifier_; + return unique_identifier_; } LegendreCacheCreatorLocal::~LegendreCacheCreatorLocal() {} -LegendreCacheCreatorLocal::LegendreCacheCreatorLocal( const Grid& grid, int truncation, const eckit::Configuration& config ) : - grid_(grid), - truncation_(truncation), - config_(config) { -} +LegendreCacheCreatorLocal::LegendreCacheCreatorLocal( const Grid& grid, int truncation, + const eckit::Configuration& config ) : + grid_( grid ), + truncation_( truncation ), + config_( config ) {} bool LegendreCacheCreatorLocal::supported() const { - if( not grid::StructuredGrid( grid_ ) ) return false; - if( grid_.projection() ) return false; - return true; + if ( not grid::StructuredGrid( grid_ ) ) return false; + if ( grid_.projection() ) return false; + return true; } void LegendreCacheCreatorLocal::create( const std::string& path ) const { - Trans tmp( grid_, truncation_, config_ | option::type("local") | option::write_legendre( path ) ); + Trans tmp( grid_, truncation_, config_ | option::type( "local" ) | option::write_legendre( path ) ); } Cache LegendreCacheCreatorLocal::create() const { - util::Config export_legendre("export_legendre",true); - Trans tmp( grid_, truncation_, config_ | option::type("local") | export_legendre ); - auto impl = dynamic_cast( tmp.get() ); - return impl->export_legendre_; + util::Config export_legendre( "export_legendre", true ); + Trans tmp( grid_, truncation_, config_ | option::type( "local" ) | export_legendre ); + auto impl = dynamic_cast( tmp.get() ); + return impl->export_legendre_; } size_t LegendreCacheCreatorLocal::estimate() const { - return size_t(truncation_ * truncation_ * truncation_) / 2 * sizeof(double); + return size_t( truncation_ * truncation_ * truncation_ ) / 2 * sizeof( double ); } diff --git a/src/atlas/trans/local/LegendreCacheCreatorLocal.h b/src/atlas/trans/local/LegendreCacheCreatorLocal.h index b21237e5b..ff3d14752 100644 --- a/src/atlas/trans/local/LegendreCacheCreatorLocal.h +++ b/src/atlas/trans/local/LegendreCacheCreatorLocal.h @@ -10,8 +10,8 @@ #pragma once -#include "atlas/trans/LegendreCacheCreator.h" #include "atlas/grid/Grid.h" +#include "atlas/trans/LegendreCacheCreator.h" #include "atlas/util/Config.h" //----------------------------------------------------------------------------- @@ -31,7 +31,7 @@ class LegendreCacheCreatorLocal : public trans::LegendreCacheCreatorImpl { virtual std::string uid() const override; - virtual void create(const std::string &path) const override; + virtual void create( const std::string& path ) const override; virtual Cache create() const override; diff --git a/src/atlas/trans/local/LegendrePolynomials.cc b/src/atlas/trans/local/LegendrePolynomials.cc index da29d9704..38084bdd5 100644 --- a/src/atlas/trans/local/LegendrePolynomials.cc +++ b/src/atlas/trans/local/LegendrePolynomials.cc @@ -46,9 +46,9 @@ void compute_zfn( const size_t trc, double zfn[] ) { void compute_legendre_polynomials_lat( const size_t trc, // truncation (in) - const double lat, // latitude in radians (in) - double legpol[], // legendre polynomials - double zfn[] ) { + const double lat, // latitude in radians (in) + double legpol[], // legendre polynomials + double zfn[] ) { auto idxmn = [&]( int jm, int jn ) { return ( 2 * trc + 3 - jm ) * jm / 2 + jn - jm; }; auto idxzfn = [&]( int jn, int jk ) { return jk + ( trc + 1 ) * jn; }; { //ATLAS_TRACE( "compute Legendre polynomials" ); @@ -205,9 +205,9 @@ void compute_legendre_polynomials( } void compute_legendre_polynomials_all( const size_t trc, // truncation (in) - const int nlats, // number of latitudes - const double lats[], // latitudes in radians (in) - double legendre[] ) // legendre polynomials for all latitudes + const int nlats, // number of latitudes + const double lats[], // latitudes in radians (in) + double legendre[] ) // legendre polynomials for all latitudes { auto legendre_size = [&]( int truncation ) { return ( truncation + 2 ) * ( truncation + 1 ) / 2; }; std::vector legpol( legendre_size( trc ) ); diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc index c34cc9f3f..6dc294c04 100644 --- a/src/atlas/trans/local/TransLocal.cc +++ b/src/atlas/trans/local/TransLocal.cc @@ -9,8 +9,8 @@ */ #include "atlas/trans/local/TransLocal.h" -#include #include +#include #include "atlas/array.h" #include "atlas/option.h" #include "atlas/parallel/mpi/mpi.h" @@ -173,7 +173,7 @@ int num_n( const int truncation, const int m, const bool symmetric ) { void alloc_aligned( double*& ptr, size_t n ) { const size_t alignment = 64 * sizeof( double ); - ptr = (double*) aligned_alloc( alignment, sizeof( double ) * n ); + ptr = (double*)aligned_alloc( alignment, sizeof( double ) * n ); } void free_aligned( double*& ptr ) { @@ -222,17 +222,17 @@ int fourier_truncation( const int truncation, // truncation // -------------------------------------------------------------------------------------------------------------------- const eckit::linalg::LinearAlgebra& linear_algebra_backend() { - if( eckit::linalg::LinearAlgebra::hasBackend("mkl") ) { - return eckit::linalg::LinearAlgebra::getBackend("mkl"); + if ( eckit::linalg::LinearAlgebra::hasBackend( "mkl" ) ) { + return eckit::linalg::LinearAlgebra::getBackend( "mkl" ); } // Default backend return eckit::linalg::LinearAlgebra::backend(); } bool TransLocal::warning( const eckit::Configuration& config ) const { - int warning = warning_; - config.get("warning",warning); - return ( warning > 0 && grid_.size() >= warning ); + int warning = warning_; + config.get( "warning", warning ); + return ( warning > 0 && grid_.size() >= warning ); } TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& domain, const long truncation, @@ -246,8 +246,7 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma fft_cache_( cache.fft().data() ), fft_cachesize_( cache.fft().size() ), linalg_( linear_algebra_backend() ), - warning_( TransParameters(config).warning() ) -{ + warning_( TransParameters( config ).warning() ) { ATLAS_TRACE( "TransLocal constructor" ); double fft_threshold = 0.0; // fraction of latitudes of the full grid down to which FFT is used. // This threshold needs to be adjusted depending on the dgemm and FFT performance of the machine @@ -289,7 +288,7 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma } gridGlobal_ = grid; - if( not gridGlobal_.domain().global() ) { + if ( not gridGlobal_.domain().global() ) { if ( grid::RegularGrid( grid_ ) ) { // non-nested regular grid no_nest = true; @@ -300,7 +299,8 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma nlatsLegDomain_ = nlatsNH_; gridGlobal_ = grid_; useGlobalLeg = false; - } else { + } + else { NOTIMP; // non-nested reduced grids are not supported } @@ -338,7 +338,7 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma // reduce truncation towards the pole for reduced meshes: nlat0_.resize( truncation_ + 1 ); if ( no_nest ) { - for ( int j = 0; j <= truncation_; j++ ) { + for ( int j = 0; j <= truncation_; j++ ) { nlat0_[j] = 0; } } @@ -439,22 +439,22 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma // TODO: check this is all aligned... } else { + if ( TransParameters( config ).export_legendre() ) { + ASSERT( not cache_.legendre() ); + export_legendre_ = LegendreCache( sizeof( double ) * ( size_sym + size_asym ) ); + legendre_cachesize_ = export_legendre_.legendre().size(); + legendre_cache_ = export_legendre_.legendre().data(); + legendre_cache_ = std::malloc( legendre_cachesize_ ); + ReadCache legendre( legendre_cache_ ); + legendre_sym_ = legendre.read( size_sym ); + legendre_asym_ = legendre.read( size_asym ); + } + else { + alloc_aligned( legendre_sym_, size_sym ); + alloc_aligned( legendre_asym_, size_asym ); + } - if( TransParameters(config).export_legendre() ) { - ASSERT( not cache_.legendre() ); - export_legendre_ = LegendreCache( sizeof(double) * ( size_sym + size_asym ) ); - legendre_cachesize_ = export_legendre_.legendre().size(); - legendre_cache_ = export_legendre_.legendre().data(); - legendre_cache_ = std::malloc( legendre_cachesize_ ); - ReadCache legendre( legendre_cache_ ); - legendre_sym_ = legendre.read( size_sym ); - legendre_asym_ = legendre.read( size_asym ); - } else { - alloc_aligned( legendre_sym_, size_sym ); - alloc_aligned( legendre_asym_, size_asym ); - } - - ATLAS_TRACE_SCOPE( "Legendre precomputations (structured)" ) { + ATLAS_TRACE_SCOPE( "Legendre precomputations (structured)" ) { compute_legendre_polynomials( truncation_ + 1, nlatsLeg_, lats.data(), legendre_sym_, legendre_asym_, legendre_sym_begin_.data(), legendre_asym_begin_.data() ); @@ -531,7 +531,7 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma } // other FFT implementations should be added with #elif statements #else - useFFT_ = false; // no FFT implemented => default to dgemm + useFFT_ = false; // no FFT implemented => default to dgemm std::string file_path = TransParameters( config ).write_fft(); if ( file_path.size() ) { std::ofstream write( file_path ); @@ -542,7 +542,9 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma #endif } if ( !useFFT_ ) { - Log::warning() << "WARNING: Spectral transform results may contain aliasing errors. This will be addressed soon." << std::endl; + Log::warning() + << "WARNING: Spectral transform results may contain aliasing errors. This will be addressed soon." + << std::endl; alloc_aligned( fourier_, 2 * ( truncation_ + 1 ) * nlonsMax ); #if !TRANSLOCAL_DGEMM2 @@ -581,8 +583,10 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma if ( unstruct_precomp_ ) { ATLAS_TRACE( "Legendre precomputations (unstructured)" ); - if( warning() ) { - Log::warning() << "WARNING: Precomputations for spectral transforms could take a long time and consume a lot of memory (unstructured grid approach)! Results may contain aliasing errors." << std::endl; + if ( warning() ) { + Log::warning() << "WARNING: Precomputations for spectral transforms could take a long time and consume " + "a lot of memory (unstructured grid approach)! Results may contain aliasing errors." + << std::endl; } std::vector lats( grid_.size() ); @@ -594,7 +598,8 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma compute_legendre_polynomials_all( truncation_, grid_.size(), lats.data(), legendre_ ); } if ( TransParameters( config ).write_legendre().size() ) { - throw eckit::NotImplemented( "Caching for unstructured grids or structured grids with projections not yet implemented", Here() ); + throw eckit::NotImplemented( + "Caching for unstructured grids or structured grids with projections not yet implemented", Here() ); } } } // namespace trans @@ -604,10 +609,12 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma TransLocal::TransLocal( const Grid& grid, const long truncation, const eckit::Configuration& config ) : TransLocal( Cache(), grid, grid.domain(), truncation, config ) {} -TransLocal::TransLocal( const Grid& grid, const Domain& domain, const long truncation, const eckit::Configuration& config ) : +TransLocal::TransLocal( const Grid& grid, const Domain& domain, const long truncation, + const eckit::Configuration& config ) : TransLocal( Cache(), grid, domain, truncation, config ) {} -TransLocal::TransLocal( const Cache& cache, const Grid& grid, const long truncation, const eckit::Configuration& config ) : +TransLocal::TransLocal( const Cache& cache, const Grid& grid, const long truncation, + const eckit::Configuration& config ) : TransLocal( cache, grid, grid.domain(), truncation, config ) {} // -------------------------------------------------------------------------------------------------------------------- @@ -644,8 +651,7 @@ void TransLocal::invtrans( const Field& spfield, Field& gpfield, const eckit::Co // -------------------------------------------------------------------------------------------------------------------- -void TransLocal::invtrans( const FieldSet& spfields, FieldSet& gpfields, - const eckit::Configuration& config ) const { +void TransLocal::invtrans( const FieldSet& spfields, FieldSet& gpfields, const eckit::Configuration& config ) const { NOTIMP; } @@ -658,21 +664,21 @@ void TransLocal::invtrans_grad( const Field& spfield, Field& gradfield, const ec // -------------------------------------------------------------------------------------------------------------------- void TransLocal::invtrans_grad( const FieldSet& spfields, FieldSet& gradfields, - const eckit::Configuration& config ) const { + const eckit::Configuration& config ) const { NOTIMP; } // -------------------------------------------------------------------------------------------------------------------- void TransLocal::invtrans_vordiv2wind( const Field& spvor, const Field& spdiv, Field& gpwind, - const eckit::Configuration& config ) const { + const eckit::Configuration& config ) const { NOTIMP; } // -------------------------------------------------------------------------------------------------------------------- void TransLocal::invtrans( const int nb_scalar_fields, const double scalar_spectra[], double gp_fields[], - const eckit::Configuration& config ) const { + const eckit::Configuration& config ) const { invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields, config ); } @@ -841,9 +847,8 @@ void TransLocal::invtrans_legendre( const int truncation, const int nlats, const // -------------------------------------------------------------------------------------------------------------------- -void TransLocal::invtrans_fourier_regular( const int nlats, const int nlons, const int nb_fields, - double scl_fourier[], double gp_fields[], - const eckit::Configuration& config ) const { +void TransLocal::invtrans_fourier_regular( const int nlats, const int nlons, const int nb_fields, double scl_fourier[], + double gp_fields[], const eckit::Configuration& config ) const { // Fourier transformation: if ( useFFT_ ) { #if ATLAS_HAVE_FFTW && !TRANSLOCAL_DGEMM2 @@ -926,8 +931,8 @@ void TransLocal::invtrans_fourier_regular( const int nlats, const int nlons, con // -------------------------------------------------------------------------------------------------------------------- void TransLocal::invtrans_fourier_reduced( const int nlats, const grid::StructuredGrid g, const int nb_fields, - double scl_fourier[], double gp_fields[], - const eckit::Configuration& config ) const { + double scl_fourier[], double gp_fields[], + const eckit::Configuration& config ) const { // Fourier transformation: int nlonsMax = g.nxmax(); if ( useFFT_ ) { @@ -984,14 +989,13 @@ void TransLocal::invtrans_fourier_reduced( const int nlats, const grid::Structur // -------------------------------------------------------------------------------------------------------------------- -void TransLocal::invtrans_unstructured_precomp( const int truncation, const int nb_fields, - const int nb_vordiv_fields, const double scalar_spectra[], - double gp_fields[], const eckit::Configuration& config ) const { - +void TransLocal::invtrans_unstructured_precomp( const int truncation, const int nb_fields, const int nb_vordiv_fields, + const double scalar_spectra[], double gp_fields[], + const eckit::Configuration& config ) const { ATLAS_TRACE( "invtrans_uv unstructured" ); - const int nlats = grid_.size(); - const int size_fourier = nb_fields * 2; + const int nlats = grid_.size(); + const int size_fourier = nb_fields * 2; double* legendre; double* scl_fourier; double* scl_fourier_tp; @@ -1018,7 +1022,7 @@ void TransLocal::invtrans_unstructured_precomp( const int truncation, const int { ATLAS_TRACE( "Inverse Fourier Transform (NoFFT)" ); int ip = 0; - for( const PointLonLat p : grid_.lonlat() ) { + for ( const PointLonLat p : grid_.lonlat() ) { const double lon = p.lon() * util::Constants::degreesToRadians(); const double lat = p.lat() * util::Constants::degreesToRadians(); { @@ -1078,12 +1082,14 @@ void TransLocal::invtrans_unstructured_precomp( const int truncation, const int // -------------------------------------------------------------------------------------------------------------------- void TransLocal::invtrans_unstructured( const int truncation, const int nb_fields, const int nb_vordiv_fields, - const double scalar_spectra[], double gp_fields[], - const eckit::Configuration& config ) const { + const double scalar_spectra[], double gp_fields[], + const eckit::Configuration& config ) const { ATLAS_TRACE( "invtrans_unstructured" ); - if( warning(config) ) { - Log::warning() << "WARNING: Spectral transforms could take a long time (unstructured grid approach). Results may contain aliasing errors." << std::endl; + if ( warning( config ) ) { + Log::warning() << "WARNING: Spectral transforms could take a long time (unstructured grid approach). Results " + "may contain aliasing errors." + << std::endl; } double* zfn; @@ -1187,8 +1193,8 @@ void TransLocal::invtrans_unstructured( const int truncation, const int nb_field // Andreas Mueller *ECMWF* // void TransLocal::invtrans_uv( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields, - const double scalar_spectra[], double gp_fields[], - const eckit::Configuration& config ) const { + const double scalar_spectra[], double gp_fields[], + const eckit::Configuration& config ) const { if ( nb_scalar_fields > 0 ) { int nb_fields = nb_scalar_fields; @@ -1250,8 +1256,8 @@ void TransLocal::invtrans_uv( const int truncation, const int nb_scalar_fields, // -------------------------------------------------------------------------------------------------------------------- void TransLocal::invtrans( const int nb_vordiv_fields, const double vorticity_spectra[], - const double divergence_spectra[], double gp_fields[], - const eckit::Configuration& config ) const { + const double divergence_spectra[], double gp_fields[], + const eckit::Configuration& config ) const { invtrans( 0, nullptr, nb_vordiv_fields, vorticity_spectra, divergence_spectra, gp_fields, config ); } @@ -1277,8 +1283,8 @@ void extend_truncation( const int old_truncation, const int nb_fields, const dou // -------------------------------------------------------------------------------------------------------------------- void TransLocal::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields, - const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[], - const eckit::Configuration& config ) const { + const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[], + const eckit::Configuration& config ) const { ATLAS_TRACE( "TransLocal::invtrans" ); int nb_gp = grid_.size(); int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields; @@ -1291,10 +1297,8 @@ void TransLocal::invtrans( const int nb_scalar_fields, const double scalar_spect { ATLAS_TRACE( "extend vordiv" ); // increase truncation in vorticity_spectra and divergence_spectra: - extend_truncation( truncation_, nb_vordiv_fields, vorticity_spectra, - vorticity_spectra_extended.data() ); - extend_truncation( truncation_, nb_vordiv_fields, divergence_spectra, - divergence_spectra_extended.data() ); + extend_truncation( truncation_, nb_vordiv_fields, vorticity_spectra, vorticity_spectra_extended.data() ); + extend_truncation( truncation_, nb_vordiv_fields, divergence_spectra, divergence_spectra_extended.data() ); } { @@ -1326,8 +1330,7 @@ void TransLocal::dirtrans( const Field& gpfield, Field& spfield, const eckit::Co // -------------------------------------------------------------------------------------------------------------------- -void TransLocal::dirtrans( const FieldSet& gpfields, FieldSet& spfields, - const eckit::Configuration& config ) const { +void TransLocal::dirtrans( const FieldSet& gpfields, FieldSet& spfields, const eckit::Configuration& config ) const { NOTIMP; // Not implemented and not planned. // Use the TransIFS implementation instead. @@ -1336,7 +1339,7 @@ void TransLocal::dirtrans( const FieldSet& gpfields, FieldSet& spfields, // -------------------------------------------------------------------------------------------------------------------- void TransLocal::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& spdiv, - const eckit::Configuration& config ) const { + const eckit::Configuration& config ) const { NOTIMP; // Not implemented and not planned. // Use the TransIFS implementation instead. @@ -1345,7 +1348,7 @@ void TransLocal::dirtrans_wind2vordiv( const Field& gpwind, Field& spvor, Field& // -------------------------------------------------------------------------------------------------------------------- void TransLocal::dirtrans( const int nb_fields, const double scalar_fields[], double scalar_spectra[], - const eckit::Configuration& ) const { + const eckit::Configuration& ) const { NOTIMP; // Not implemented and not planned. // Use the TransIFS implementation instead. @@ -1354,7 +1357,7 @@ void TransLocal::dirtrans( const int nb_fields, const double scalar_fields[], do // -------------------------------------------------------------------------------------------------------------------- void TransLocal::dirtrans( const int nb_fields, const double wind_fields[], double vorticity_spectra[], - double divergence_spectra[], const eckit::Configuration& ) const { + double divergence_spectra[], const eckit::Configuration& ) const { NOTIMP; // Not implemented and not planned. // Use the TransIFS implementation instead. diff --git a/src/atlas/trans/local/TransLocal.h b/src/atlas/trans/local/TransLocal.h index abbee7c10..29005083f 100644 --- a/src/atlas/trans/local/TransLocal.h +++ b/src/atlas/trans/local/TransLocal.h @@ -41,12 +41,12 @@ namespace atlas { namespace trans { class LegendreCacheCreatorLocal; -int fourier_truncation( const int truncation, // truncation - const int nx, // number of longitudes - const int nxmax, // maximum nx - const int ndgl, // number of latitudes - const double lat, // latitude in radian - const bool regular ); // regular grid +int fourier_truncation( const int truncation, // truncation + const int nx, // number of longitudes + const int nxmax, // maximum nx + const int ndgl, // number of latitudes + const double lat, // latitude in radian + const bool regular ); // regular grid //----------------------------------------------------------------------------- @@ -66,10 +66,9 @@ class TransLocal : public trans::TransImpl { public: TransLocal( const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() ); TransLocal( const Grid&, const Domain&, const long truncation, const eckit::Configuration& = util::NoConfig() ); - TransLocal( const Cache&, const Grid&, const long truncation, - const eckit::Configuration& = util::NoConfig() ); + TransLocal( const Cache&, const Grid&, const long truncation, const eckit::Configuration& = util::NoConfig() ); TransLocal( const Cache&, const Grid&, const Domain&, const long truncation, - const eckit::Configuration& = util::NoConfig() ); + const eckit::Configuration& = util::NoConfig() ); virtual ~TransLocal(); @@ -133,16 +132,14 @@ class TransLocal : public trans::TransImpl { #endif }; - void invtrans_legendre( const int truncation, const int nlats, const int nb_fields, - const double scalar_spectra[], double scl_fourier[], - const eckit::Configuration& config ) const; + void invtrans_legendre( const int truncation, const int nlats, const int nb_fields, const double scalar_spectra[], + double scl_fourier[], const eckit::Configuration& config ) const; void invtrans_fourier_regular( const int nlats, const int nlons, const int nb_fields, double scl_fourier[], double gp_fields[], const eckit::Configuration& config ) const; void invtrans_fourier_reduced( const int nlats, const grid::StructuredGrid g, const int nb_fields, - double scl_fourier[], double gp_fields[], - const eckit::Configuration& config ) const; + double scl_fourier[], double gp_fields[], const eckit::Configuration& config ) const; void invtrans_unstructured_precomp( const int truncation, const int nb_scalar_fields, const int nb_vordiv_fields, const double scalar_spectra[], double gp_fields[], @@ -158,7 +155,7 @@ class TransLocal : public trans::TransImpl { bool warning( const eckit::Configuration& = util::NoConfig() ) const; -friend class LegendreCacheCreatorLocal; + friend class LegendreCacheCreatorLocal; private: Grid grid_; diff --git a/src/atlas/trans/local/VorDivToUVLocal.cc b/src/atlas/trans/local/VorDivToUVLocal.cc index 9d23a9db7..647fe6382 100644 --- a/src/atlas/trans/local/VorDivToUVLocal.cc +++ b/src/atlas/trans/local/VorDivToUVLocal.cc @@ -165,8 +165,8 @@ void vd2uvopt3( const int truncation, // truncation } void VorDivToUVLocal::execute( const int nb_coeff, const int nb_fields, const double vorticity[], - const double divergence[], double U[], double V[], - const eckit::Configuration& config ) const { + const double divergence[], double U[], double V[], + const eckit::Configuration& config ) const { for ( int jm = 0; jm <= truncation_; ++jm ) { vd2uvopt3( truncation_, jm, nb_fields, vorticity, divergence, U, V, config ); } diff --git a/src/atlas/util/Earth.h b/src/atlas/util/Earth.h index 43b69c5b9..9e73a08b0 100644 --- a/src/atlas/util/Earth.h +++ b/src/atlas/util/Earth.h @@ -34,7 +34,7 @@ struct DatumWGS84SemiMajorAxis { //------------------------------------------------------------------------------------------------------ -typedef eckit::geometry::SphereT< DatumIFS > Earth; +typedef eckit::geometry::SphereT Earth; //------------------------------------------------------------------------------------------------------ diff --git a/src/atlas/util/Rotation.cc b/src/atlas/util/Rotation.cc index 4fe6fad65..1ea3ed83c 100644 --- a/src/atlas/util/Rotation.cc +++ b/src/atlas/util/Rotation.cc @@ -13,10 +13,10 @@ #include #include -#include "eckit/config/Parametrisation.h" #include "atlas/util/Constants.h" #include "atlas/util/CoordinateEnums.h" #include "atlas/util/UnitSphere.h" +#include "eckit/config/Parametrisation.h" // Temporary option to activate implementation by RMI during ESCAPE #define OLD_IMPLEMENTATION 0 diff --git a/src/atlas/util/SphericalPolygon.cc b/src/atlas/util/SphericalPolygon.cc index 20cf0abdb..2b1c036e7 100644 --- a/src/atlas/util/SphericalPolygon.cc +++ b/src/atlas/util/SphericalPolygon.cc @@ -50,9 +50,7 @@ bool SphericalPolygon::contains( const PointLonLat& P ) const { const double lat = util::Earth::greatCircleLatitudeGivenLongitude( A, B, P.lon() ); ASSERT( !std::isnan( lat ) ); - if ( eckit::types::is_approximately_equal( P.lat(), lat ) ) { - return true; - } + if ( eckit::types::is_approximately_equal( P.lat(), lat ) ) { return true; } wn += ( P.lat() > lat ? -1 : 1 ) * ( APB ? -1 : 1 ); } diff --git a/src/atlas/util/UnitSphere.h b/src/atlas/util/UnitSphere.h index 3a49f230d..de3889222 100644 --- a/src/atlas/util/UnitSphere.h +++ b/src/atlas/util/UnitSphere.h @@ -23,5 +23,5 @@ using eckit::geometry::UnitSphere; //------------------------------------------------------------------------------------------------------ -} // namespace util -} // namespace atlas +} // namespace util +} // namespace atlas diff --git a/src/sandbox/CMakeLists.txt b/src/sandbox/CMakeLists.txt index f4714d0e4..bc5e1748c 100644 --- a/src/sandbox/CMakeLists.txt +++ b/src/sandbox/CMakeLists.txt @@ -7,7 +7,6 @@ # does it submit to any jurisdiction. add_subdirectory( fortran_submodule ) -add_subdirectory( fortran_modinc ) add_subdirectory( fortran_object ) add_subdirectory( example_fortran ) add_subdirectory( fortran_acc_fields ) diff --git a/src/sandbox/fortran_modinc/CMakeLists.txt b/src/sandbox/fortran_modinc/CMakeLists.txt deleted file mode 100644 index 7384f276c..000000000 --- a/src/sandbox/fortran_modinc/CMakeLists.txt +++ /dev/null @@ -1,16 +0,0 @@ - -if( CMAKE_Fortran_COMPILER_LOADED ) - -add_custom_target( atlas_sandbox_fortran_modinc_includes SOURCES - mod1.h - mod1.f - mod2.h - mod2.f -) - -ecbuild_add_library( TARGET atlas_sandbox_fortran_modinc - CONDITION ON - SOURCES sb_modinc.F90 -) - -endif() diff --git a/src/sandbox/fortran_modinc/mod1.f b/src/sandbox/fortran_modinc/mod1.f deleted file mode 100644 index 9c5b402e3..000000000 --- a/src/sandbox/fortran_modinc/mod1.f +++ /dev/null @@ -1,6 +0,0 @@ -! (C) Copyright 2013-2015 ECMWF. - -subroutine do_something_with_T2(v2) - type(T2) :: v2 - v2%cpp_object_ptr = 2 -end subroutine diff --git a/src/sandbox/fortran_modinc/mod1.h b/src/sandbox/fortran_modinc/mod1.h deleted file mode 100644 index d6e3d6e3a..000000000 --- a/src/sandbox/fortran_modinc/mod1.h +++ /dev/null @@ -1,4 +0,0 @@ -! (C) Copyright 2013 ECMWF. - -type, extends( fckit_object ), public :: T1 -end type diff --git a/src/sandbox/fortran_modinc/mod2.f b/src/sandbox/fortran_modinc/mod2.f deleted file mode 100644 index 20b0a4587..000000000 --- a/src/sandbox/fortran_modinc/mod2.f +++ /dev/null @@ -1,6 +0,0 @@ -! (C) Copyright 2013-2015 ECMWF. - -subroutine do_something_with_T1(v1) - type(T1) :: v1 - v1%cpp_object_ptr = 1 -end subroutine diff --git a/src/sandbox/fortran_modinc/mod2.h b/src/sandbox/fortran_modinc/mod2.h deleted file mode 100644 index be0fca4a2..000000000 --- a/src/sandbox/fortran_modinc/mod2.h +++ /dev/null @@ -1,4 +0,0 @@ -! (C) Copyright 2013 ECMWF. - -type, extends( fckit_object ), public :: T2 -end type diff --git a/src/sandbox/fortran_modinc/sb_modinc.F90 b/src/sandbox/fortran_modinc/sb_modinc.F90 deleted file mode 100644 index 28ae9f7e7..000000000 --- a/src/sandbox/fortran_modinc/sb_modinc.F90 +++ /dev/null @@ -1,26 +0,0 @@ -! (C) Copyright 2013-2015 ECMWF. - -#include "atlas/atlas_f.h" - -module sb_mod - -type, public :: fckit_object - integer,public :: cpp_object_ptr -end type - -#include "mod1.h" -#include "mod2.h" -contains -#include "mod1.f" -#include "mod2.f" -end module sb_mod - - -program sb_program -use sb_mod -type(T1) :: v1 -type(T2) :: v2 -integer :: res -res = v1%cpp_object_ptr + v2%cpp_object_ptr -end program sb_program - diff --git a/src/tests/AtlasTestEnvironment.h b/src/tests/AtlasTestEnvironment.h index 9d529329d..9da6d7e3a 100644 --- a/src/tests/AtlasTestEnvironment.h +++ b/src/tests/AtlasTestEnvironment.h @@ -50,8 +50,8 @@ namespace test { if ( atlas::test::barrier_timeout( atlas::test::ATLAS_MPI_BARRIER_TIMEOUT() ) ) { \ atlas::Log::warning() << "\nWARNING: Test \"" << description \ << "\" failed with MPI deadlock. (${ATLAS_MPI_BARRIER_TIMEOUT}=" \ - << atlas::test::ATLAS_MPI_BARRIER_TIMEOUT() \ - << ").\nCalling MPI_Abort..." << std::endl; \ + << atlas::test::ATLAS_MPI_BARRIER_TIMEOUT() << ").\nCalling MPI_Abort..." \ + << std::endl; \ eckit::mpi::comm().abort(); \ } \ } \ diff --git a/src/tests/array/test_array.cc b/src/tests/array/test_array.cc index a093e56c2..4b406762e 100644 --- a/src/tests/array/test_array.cc +++ b/src/tests/array/test_array.cc @@ -570,10 +570,11 @@ CASE( "test_wrap" ) { CASE( "test_acc_map" ) { Array* ds = Array::create( 2, 3, 4 ); - if( ATLAS_HAVE_ACC ) { + if ( ATLAS_HAVE_ACC ) { EXPECT( ds->accMap() == true ); EXPECT( ds->accMap() == true ); - } else { + } + else { EXPECT( ds->accMap() == false ); } } diff --git a/src/tests/array/test_table.cc b/src/tests/array/test_table.cc index 4f5b81f16..6e94710a9 100644 --- a/src/tests/array/test_table.cc +++ b/src/tests/array/test_table.cc @@ -9,8 +9,8 @@ */ #include "atlas/array/Table.h" -#include "atlas/runtime/Log.h" #include "atlas/library/defines.h" +#include "atlas/runtime/Log.h" #include "tests/AtlasTestEnvironment.h" using namespace atlas::array; diff --git a/src/tests/functionspace/test_pointcloud.cc b/src/tests/functionspace/test_pointcloud.cc index 636a01257..c01fc6d99 100644 --- a/src/tests/functionspace/test_pointcloud.cc +++ b/src/tests/functionspace/test_pointcloud.cc @@ -8,8 +8,8 @@ * nor does it submit to any jurisdiction. */ -#include "atlas/functionspace/PointCloud.h" #include "atlas/array.h" +#include "atlas/functionspace/PointCloud.h" #include "tests/AtlasTestEnvironment.h" diff --git a/src/tests/grid/test_field.cc b/src/tests/grid/test_field.cc index 0f52d9760..30bc597c2 100644 --- a/src/tests/grid/test_field.cc +++ b/src/tests/grid/test_field.cc @@ -12,7 +12,6 @@ #include "eckit/runtime/Tool.h" #include "eckit/value/CompositeParams.h" -#include "atlas/runtime/Log.h" #include "atlas/array/DataType.h" #include "atlas/array/MakeView.h" #include "atlas/field/FieldSet.h" @@ -24,6 +23,7 @@ #include "atlas/mesh/Nodes.h" #include "atlas/meshgenerator/DelaunayMeshGenerator.h" #include "atlas/parallel/mpi/mpi.h" +#include "atlas/runtime/Log.h" #include "tests/AtlasTestEnvironment.h" diff --git a/src/tests/grid/test_grid_ptr.cc b/src/tests/grid/test_grid_ptr.cc index 224c3a9fd..4cf73042d 100644 --- a/src/tests/grid/test_grid_ptr.cc +++ b/src/tests/grid/test_grid_ptr.cc @@ -13,11 +13,11 @@ #include #include "atlas/grid/Grid.h" -#include "atlas/runtime/Log.h" -#include "atlas/util/Config.h" #include "atlas/mesh/Mesh.h" #include "atlas/meshgenerator/StructuredMeshGenerator.h" #include "atlas/output/Gmsh.h" +#include "atlas/runtime/Log.h" +#include "atlas/util/Config.h" #include "tests/AtlasTestEnvironment.h" diff --git a/src/tests/grid/test_state.cc b/src/tests/grid/test_state.cc index e828014d6..9a61d5c2a 100644 --- a/src/tests/grid/test_state.cc +++ b/src/tests/grid/test_state.cc @@ -16,7 +16,6 @@ #include "eckit/parser/JSON.h" #include "eckit/parser/JSONParser.h" -#include "atlas/library/config.h" #include "atlas/array/ArrayView.h" #include "atlas/array/DataType.h" #include "atlas/array/MakeView.h" @@ -24,6 +23,7 @@ #include "atlas/field/State.h" #include "atlas/grid/Grid.h" #include "atlas/library/Library.h" +#include "atlas/library/config.h" #include "atlas/mesh/Mesh.h" #include "atlas/runtime/Log.h" diff --git a/src/tests/interpolation/test_interpolation_finite_element.cc b/src/tests/interpolation/test_interpolation_finite_element.cc index dd7718829..2a3c614a0 100644 --- a/src/tests/interpolation/test_interpolation_finite_element.cc +++ b/src/tests/interpolation/test_interpolation_finite_element.cc @@ -12,9 +12,9 @@ #include "eckit/types/FloatCompare.h" -#include "atlas/functionspace/PointCloud.h" #include "atlas/array.h" #include "atlas/functionspace.h" +#include "atlas/functionspace/PointCloud.h" #include "atlas/grid.h" #include "atlas/interpolation.h" #include "atlas/mesh.h" diff --git a/src/tests/io/test_gmsh.cc b/src/tests/io/test_gmsh.cc index bf96399ff..016b96cc5 100644 --- a/src/tests/io/test_gmsh.cc +++ b/src/tests/io/test_gmsh.cc @@ -12,8 +12,8 @@ #include "atlas/output/Gmsh.h" #include "atlas/output/Output.h" -#include "tests/TestMeshes.h" #include "tests/AtlasTestEnvironment.h" +#include "tests/TestMeshes.h" namespace atlas { namespace test { diff --git a/src/tests/io/test_pointcloud_io.cc b/src/tests/io/test_pointcloud_io.cc index 990b3d6ed..56c86dace 100644 --- a/src/tests/io/test_pointcloud_io.cc +++ b/src/tests/io/test_pointcloud_io.cc @@ -15,7 +15,6 @@ #include "eckit/memory/ScopedPtr.h" #include "eckit/types/FloatCompare.h" -#include "atlas/library/config.h" #include "atlas/array/MakeView.h" #include "atlas/field/Field.h" #include "atlas/field/FieldSet.h" @@ -23,6 +22,7 @@ #include "atlas/functionspace/NodeColumns.h" #include "atlas/grid/Grid.h" #include "atlas/grid/detail/grid/Unstructured.h" +#include "atlas/library/config.h" #include "atlas/mesh/Mesh.h" #include "atlas/mesh/Nodes.h" #include "atlas/output/detail/PointCloudIO.h" diff --git a/src/tests/mesh/test_accumulate_facets.cc b/src/tests/mesh/test_accumulate_facets.cc index ea80c4bc5..d02eecce6 100644 --- a/src/tests/mesh/test_accumulate_facets.cc +++ b/src/tests/mesh/test_accumulate_facets.cc @@ -8,13 +8,13 @@ * nor does it submit to any jurisdiction. */ -#include "atlas/library/config.h" -#include "atlas/library/Library.h" -#include "atlas/mesh/detail/AccumulateFacets.h" #include "atlas/grid/Grid.h" +#include "atlas/library/Library.h" +#include "atlas/library/config.h" #include "atlas/mesh/HybridElements.h" #include "atlas/mesh/Mesh.h" #include "atlas/mesh/actions/BuildEdges.h" +#include "atlas/mesh/detail/AccumulateFacets.h" #include "atlas/meshgenerator/StructuredMeshGenerator.h" #include "atlas/util/Unique.h" diff --git a/src/tests/mesh/test_connectivity.cc b/src/tests/mesh/test_connectivity.cc index ce887a8be..81356dd94 100644 --- a/src/tests/mesh/test_connectivity.cc +++ b/src/tests/mesh/test_connectivity.cc @@ -8,10 +8,10 @@ * nor does it submit to any jurisdiction. */ +#include "atlas/library/defines.h" #include "atlas/mesh/Connectivity.h" #include "atlas/runtime/Log.h" #include "atlas/runtime/Trace.h" -#include "atlas/library/defines.h" #include "tests/AtlasTestEnvironment.h" diff --git a/src/tests/mesh/test_distmesh.cc b/src/tests/mesh/test_distmesh.cc index 92c1e81e6..e1bf2bed3 100644 --- a/src/tests/mesh/test_distmesh.cc +++ b/src/tests/mesh/test_distmesh.cc @@ -31,8 +31,8 @@ #include "atlas/runtime/Log.h" #include "atlas/util/CoordinateEnums.h" -#include "tests/TestMeshes.h" #include "tests/AtlasTestEnvironment.h" +#include "tests/TestMeshes.h" using namespace atlas; using namespace atlas::output; diff --git a/src/tests/mesh/test_elements.cc b/src/tests/mesh/test_elements.cc index 8d6c296e0..ef66b3ef1 100644 --- a/src/tests/mesh/test_elements.cc +++ b/src/tests/mesh/test_elements.cc @@ -15,17 +15,17 @@ #include "eckit/exception/Exceptions.h" #include "eckit/memory/ScopedPtr.h" -#include "atlas/library/config.h" #include "atlas/field/Field.h" +#include "atlas/grid/Grid.h" #include "atlas/library/Library.h" +#include "atlas/library/config.h" #include "atlas/mesh/Connectivity.h" #include "atlas/mesh/ElementType.h" #include "atlas/mesh/Elements.h" -#include "atlas/mesh/Nodes.h" -#include "atlas/runtime/Log.h" -#include "atlas/grid/Grid.h" #include "atlas/mesh/Mesh.h" +#include "atlas/mesh/Nodes.h" #include "atlas/meshgenerator/StructuredMeshGenerator.h" +#include "atlas/runtime/Log.h" #include "tests/AtlasTestEnvironment.h" diff --git a/src/tests/mesh/test_halo.cc b/src/tests/mesh/test_halo.cc index 8a5731158..c08c9f3dc 100644 --- a/src/tests/mesh/test_halo.cc +++ b/src/tests/mesh/test_halo.cc @@ -32,8 +32,8 @@ #include "atlas/util/MicroDeg.h" #include "atlas/util/Unique.h" -#include "tests/TestMeshes.h" #include "tests/AtlasTestEnvironment.h" +#include "tests/TestMeshes.h" using namespace atlas::output; using namespace atlas::util; diff --git a/src/tests/parallel/test_haloexchange.cc b/src/tests/parallel/test_haloexchange.cc index de2feb678..cd7d1b7c8 100644 --- a/src/tests/parallel/test_haloexchange.cc +++ b/src/tests/parallel/test_haloexchange.cc @@ -43,12 +43,12 @@ size_t eval_idx( size_t pos, std::array& strides, FirstDim first ) template size_t eval_idx( size_t pos, std::array& strides, FirstDim first, SecondDim second ) { - return first * strides[pos] + eval_idx( pos+1, strides, second ); + return first * strides[pos] + eval_idx( pos + 1, strides, second ); } template size_t eval_idx( size_t pos, std::array& strides, FirstDim first, SecondDim second, ThirdDim third ) { - return first * strides[pos] + eval_idx( pos+1, strides, second, third ); + return first * strides[pos] + eval_idx( pos + 1, strides, second, third ); } template diff --git a/src/tests/trans/test_trans_localcache.cc b/src/tests/trans/test_trans_localcache.cc index e505eba46..6220706ec 100644 --- a/src/tests/trans/test_trans_localcache.cc +++ b/src/tests/trans/test_trans_localcache.cc @@ -14,13 +14,13 @@ #include "eckit/utils/MD5.h" #include "atlas/grid.h" -#include "atlas/option.h" #include "atlas/library/Library.h" #include "atlas/meshgenerator/StructuredMeshGenerator.h" +#include "atlas/option.h" #include "atlas/parallel/mpi/mpi.h" #include "atlas/runtime/Trace.h" -#include "atlas/trans/Trans.h" #include "atlas/trans/LegendreCacheCreator.h" +#include "atlas/trans/Trans.h" #include "atlas/util/Constants.h" #include "tests/AtlasTestEnvironment.h" @@ -33,23 +33,23 @@ namespace test { struct AtlasTransEnvironment : public AtlasTestEnvironment { AtlasTransEnvironment( int argc, char* argv[] ) : AtlasTestEnvironment( argc, argv ) { trans::Trans::backend( "local" ); - trans::Trans::config( option::warning(1) ); + trans::Trans::config( option::warning( 1 ) ); } }; -using trans::Trans; +using grid::GaussianGrid; +using grid::StructuredGrid; +using trans::Cache; using trans::LegendreCache; using trans::LegendreCacheCreator; -using trans::Cache; -using grid::StructuredGrid; -using grid::GaussianGrid; -using XSpace = StructuredGrid::XSpace; -using YSpace = StructuredGrid::YSpace; +using trans::Trans; +using XSpace = StructuredGrid::XSpace; +using YSpace = StructuredGrid::YSpace; using LinearSpacing = grid::LinearSpacing; -eckit::PathName CacheFile(const std::string& path) { - eckit::PathName cachefile(path); - if( cachefile.exists() ) cachefile.unlink(); +eckit::PathName CacheFile( const std::string& path ) { + eckit::PathName cachefile( path ); + if ( cachefile.exists() ) cachefile.unlink(); return cachefile; } @@ -58,178 +58,171 @@ std::string hash( const trans::Cache& c ) { } std::string hash( const eckit::PathName& f ) { - return hash( LegendreCache(f) ); + return hash( LegendreCache( f ) ); } -std::string F(int n) { return "F" +std::to_string(n); } -std::string O(int n) { return "O" +std::to_string(n); } -std::string N(int n) { return "N" +std::to_string(n); } -std::string L(int n) { return "L" +std::to_string(n); } -std::string S(int n) { return "S" +std::to_string(n); } -std::string Slon(int n) { return "Slon"+std::to_string(n); } -std::string Slat(int n) { return "Slat"+std::to_string(n); } +std::string F( int n ) { + return "F" + std::to_string( n ); +} +std::string O( int n ) { + return "O" + std::to_string( n ); +} +std::string N( int n ) { + return "N" + std::to_string( n ); +} +std::string L( int n ) { + return "L" + std::to_string( n ); +} +std::string S( int n ) { + return "S" + std::to_string( n ); +} +std::string Slon( int n ) { + return "Slon" + std::to_string( n ); +} +std::string Slat( int n ) { + return "Slat" + std::to_string( n ); +} //----------------------------------------------------------------------------- CASE( "test_global_grids" ) { // auto resolutions = { 32, 64, 160, 320, 640 }; - auto resolutions = { 32, 64 }; - for( int n : resolutions ) { - int t = n-1; + auto resolutions = {32, 64}; + for ( int n : resolutions ) { + int t = n - 1; auto cases = { - std::make_pair(F(n),t), - std::make_pair(O(n),t), - std::make_pair(N(n),t), - std::make_pair(L(n),t), - std::make_pair(S(n),t), - std::make_pair(Slon(n),t), - std::make_pair(Slat(n),t), + std::make_pair( F( n ), t ), std::make_pair( O( n ), t ), std::make_pair( N( n ), t ), + std::make_pair( L( n ), t ), std::make_pair( S( n ), t ), std::make_pair( Slon( n ), t ), + std::make_pair( Slat( n ), t ), }; - LegendreCacheCreator F_cache_creator( Grid(F(n)), t ); + LegendreCacheCreator F_cache_creator( Grid( F( n ) ), t ); EXPECT( F_cache_creator.supported() ); - auto F_cachefile = CacheFile("leg_"+F_cache_creator.uid()+".bin"); + auto F_cachefile = CacheFile( "leg_" + F_cache_creator.uid() + ".bin" ); F_cache_creator.create( F_cachefile ); - Cache F_cache = LegendreCache( F_cachefile ); - auto F_cache_hash = hash(F_cache); + Cache F_cache = LegendreCache( F_cachefile ); + auto F_cache_hash = hash( F_cache ); - for( auto _case : cases ) - { + for ( auto _case : cases ) { auto gridname = _case.first; auto truncation = _case.second; - Log::info() << "Case "+gridname+" T"+std::to_string(truncation) << std::endl; - ATLAS_TRACE("Case "+gridname+" T"+std::to_string(truncation)); - Grid grid(gridname); + Log::info() << "Case " + gridname + " T" + std::to_string( truncation ) << std::endl; + ATLAS_TRACE( "Case " + gridname + " T" + std::to_string( truncation ) ); + Grid grid( gridname ); LegendreCacheCreator cache_creator( grid, truncation ); EXPECT( cache_creator.supported() ); - auto cachefile = CacheFile("leg_"+cache_creator.uid()+".bin"); + auto cachefile = CacheFile( "leg_" + cache_creator.uid() + ".bin" ); cache_creator.create( cachefile ); - if( GaussianGrid(grid) ) { - EXPECT( hash(cachefile) == F_cache_hash ); - } + if ( GaussianGrid( grid ) ) { EXPECT( hash( cachefile ) == F_cache_hash ); } - ATLAS_TRACE_SCOPE("create without cache") - Trans( grid, truncation ); + ATLAS_TRACE_SCOPE( "create without cache" ) + Trans( grid, truncation ); Cache cache; - ATLAS_TRACE_SCOPE("read cache") - cache = LegendreCache( cachefile ); - ATLAS_TRACE_SCOPE("create with cache") - Trans( cache, grid, truncation ); - + ATLAS_TRACE_SCOPE( "read cache" ) + cache = LegendreCache( cachefile ); + ATLAS_TRACE_SCOPE( "create with cache" ) + Trans( cache, grid, truncation ); } } } CASE( "test_global_grids_with_subdomain" ) { - int n = 64; - int t = n-1; - auto cases = { - std::make_pair(F(n),t), - std::make_pair(O(n),t), - std::make_pair(N(n),t), - std::make_pair(L(n),t), - std::make_pair(S(n),t), - std::make_pair(Slon(n),t), - std::make_pair(Slat(n),t) - }; + int n = 64; + int t = n - 1; + auto cases = {std::make_pair( F( n ), t ), std::make_pair( O( n ), t ), std::make_pair( N( n ), t ), + std::make_pair( L( n ), t ), std::make_pair( S( n ), t ), std::make_pair( Slon( n ), t ), + std::make_pair( Slat( n ), t )}; auto domains = std::vector{ - ZonalBandDomain ( {-10., 5.} ), + ZonalBandDomain( {-10., 5.} ), RectangularDomain( {-1., 1.}, {50., 55.} ), RectangularDomain( {-1., 1.}, {-5., 40.} ), }; - for( auto _case : cases ) - { + for ( auto _case : cases ) { auto gridname = _case.first; auto truncation = _case.second; - ATLAS_TRACE("Case "+gridname+" T"+std::to_string(truncation)); + ATLAS_TRACE( "Case " + gridname + " T" + std::to_string( truncation ) ); Grid global_grid( gridname ); - LegendreCacheCreator global_cache_creator( Grid(gridname), truncation ); + LegendreCacheCreator global_cache_creator( Grid( gridname ), truncation ); EXPECT( global_cache_creator.supported() ); auto global_cachefile = CacheFile( "leg_" + global_cache_creator.uid() + ".bin" ); ATLAS_TRACE_SCOPE( "Creating cache " + std::string( global_cachefile ) ) - global_cache_creator.create( global_cachefile ); + global_cache_creator.create( global_cachefile ); Cache global_cache; - ATLAS_TRACE_SCOPE("read cache") - global_cache = LegendreCache( global_cachefile ); - auto global_hash = hash(global_cache); + ATLAS_TRACE_SCOPE( "read cache" ) + global_cache = LegendreCache( global_cachefile ); + auto global_hash = hash( global_cache ); - for( auto domain : domains ) { + for ( auto domain : domains ) { Grid grid( gridname, domain ); - ATLAS_TRACE_SCOPE("create with cache") - Trans( global_cache, global_grid, domain, truncation ); + ATLAS_TRACE_SCOPE( "create with cache" ) + Trans( global_cache, global_grid, domain, truncation ); } } } CASE( "test_regional_grids nested_in_global" ) { - auto cachefile = CacheFile("regional_lonlat.bin"); + auto cachefile = CacheFile( "regional_lonlat.bin" ); auto truncation = 89; Cache cache; - StructuredGrid grid_global( - LinearSpacing( { 0., 360.}, 360, false ), - LinearSpacing( { 90., -90.}, 181, true ) - ); + StructuredGrid grid_global( LinearSpacing( {0., 360.}, 360, false ), LinearSpacing( {90., -90.}, 181, true ) ); EXPECT( grid_global.domain().global() ); LegendreCacheCreator global_cache_creator( grid_global, truncation ); EXPECT( global_cache_creator.supported() ); auto global_cachefile = CacheFile( "leg_" + global_cache_creator.uid() + ".bin" ); - ATLAS_TRACE_SCOPE( "Creating cache "+std::string(cachefile) ) - global_cache_creator.create( global_cachefile ); + ATLAS_TRACE_SCOPE( "Creating cache " + std::string( cachefile ) ) + global_cache_creator.create( global_cachefile ); + StructuredGrid regional( LinearSpacing( {0., 180.}, 181 ), LinearSpacing( {0., 45.}, 46 ) ); - StructuredGrid regional( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ) ); - - ATLAS_TRACE_SCOPE("create without cache") - Trans( grid_global, regional.domain(), truncation ); - ATLAS_TRACE_SCOPE("read cache") - cache = LegendreCache( global_cachefile ); - ATLAS_TRACE_SCOPE("create with cache") - Trans( cache, grid_global, regional.domain(), truncation ); + ATLAS_TRACE_SCOPE( "create without cache" ) + Trans( grid_global, regional.domain(), truncation ); + ATLAS_TRACE_SCOPE( "read cache" ) + cache = LegendreCache( global_cachefile ); + ATLAS_TRACE_SCOPE( "create with cache" ) + Trans( cache, grid_global, regional.domain(), truncation ); } CASE( "test_regional_grids not nested" ) { auto truncation = 89; Cache cache; - StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ) ); + StructuredGrid grid( LinearSpacing( {0., 180.}, 181 ), LinearSpacing( {0., 45.}, 46 ) ); LegendreCacheCreator cache_creator( grid, truncation ); EXPECT( cache_creator.supported() ); auto cachefile = CacheFile( "leg_" + cache_creator.uid() + ".bin" ); - ATLAS_TRACE_SCOPE( "Creating cache "+std::string(cachefile) ) - cache_creator.create( cachefile ); + ATLAS_TRACE_SCOPE( "Creating cache " + std::string( cachefile ) ) + cache_creator.create( cachefile ); - ATLAS_TRACE_SCOPE("create without cache") - Trans( grid, truncation ); - ATLAS_TRACE_SCOPE("read cache") - cache = LegendreCache( cachefile ); - ATLAS_TRACE_SCOPE("create with cache") - Trans( cache, grid, truncation ); + ATLAS_TRACE_SCOPE( "create without cache" ) + Trans( grid, truncation ); + ATLAS_TRACE_SCOPE( "read cache" ) + cache = LegendreCache( cachefile ); + ATLAS_TRACE_SCOPE( "create with cache" ) + Trans( cache, grid, truncation ); } CASE( "test_regional_grids with projection" ) { - auto cachefile = CacheFile("cache-regional.bin"); + auto cachefile = CacheFile( "cache-regional.bin" ); auto truncation = 89; Cache cache; - Projection projection( util::Config - ( "type", "rotated_lonlat") - ("north_pole", std::vector{ 4., 54.} ) ); + Projection projection( util::Config( "type", "rotated_lonlat" )( "north_pole", std::vector{4., 54.} ) ); - StructuredGrid grid( LinearSpacing( {0.,180.}, 181 ), LinearSpacing( {0.,45.}, 46 ), projection ); + StructuredGrid grid( LinearSpacing( {0., 180.}, 181 ), LinearSpacing( {0., 45.}, 46 ), projection ); Trans trans; - ATLAS_TRACE_SCOPE("create without cache") - trans = Trans( grid, truncation ); + ATLAS_TRACE_SCOPE( "create without cache" ) + trans = Trans( grid, truncation ); // Note: caching not yet implemented for unstructured and projected grids LegendreCacheCreator legendre_cache_creator( grid, truncation ); @@ -238,40 +231,32 @@ CASE( "test_regional_grids with projection" ) { std::vector rspecg( trans.spectralCoefficients(), 0. ); std::vector rgp( trans.grid().size() ); - trans.invtrans(1,rspecg.data(),rgp.data()); + trans.invtrans( 1, rspecg.data(), rgp.data() ); } CASE( "test cache creator to file" ) { - auto truncation = 89; - StructuredGrid grid_global( - LinearSpacing( { 0., 360.}, 360, false ), - LinearSpacing( { 90., -90.}, 181, true ) - ); + StructuredGrid grid_global( LinearSpacing( {0., 360.}, 360, false ), LinearSpacing( {90., -90.}, 181, true ) ); LegendreCacheCreator legendre_cache_creator( grid_global, truncation ); auto cachefile = CacheFile( legendre_cache_creator.uid() ); - ATLAS_TRACE_SCOPE( "Creating cache "+std::string(cachefile) ) - legendre_cache_creator.create( cachefile ); + ATLAS_TRACE_SCOPE( "Creating cache " + std::string( cachefile ) ) + legendre_cache_creator.create( cachefile ); - Cache c = legendre_cache_creator.create(); + Cache c = legendre_cache_creator.create(); auto trans1 = Trans( c, grid_global, truncation ); auto trans2 = Trans( c, grid_global, truncation ); } CASE( "test cache creator in memory" ) { - auto truncation = 89; - StructuredGrid grid_global( - LinearSpacing( { 0., 360.}, 360, false ), - LinearSpacing( { 90., -90.}, 181, true ) - ); + StructuredGrid grid_global( LinearSpacing( {0., 360.}, 360, false ), LinearSpacing( {90., -90.}, 181, true ) ); LegendreCacheCreator legendre_cache_creator( grid_global, truncation ); Cache cache; ATLAS_TRACE_SCOPE( "Creating cache in memory" ) - cache = legendre_cache_creator.create(); + cache = legendre_cache_creator.create(); auto trans1 = Trans( cache, grid_global, truncation ); auto trans2 = Trans( cache, grid_global, truncation ); diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index e6ac605bf..c775fc9f0 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -29,9 +29,9 @@ #include "atlas/parallel/mpi/mpi.h" #include "atlas/runtime/Trace.h" #include "atlas/trans/Trans.h" +#include "atlas/trans/local/TransLocal.h" #include "atlas/util/Constants.h" #include "atlas/util/Earth.h" -#include "atlas/trans/local/TransLocal.h" #include "tests/AtlasTestEnvironment.h" diff --git a/src/tests/util/test_earth.cc b/src/tests/util/test_earth.cc index 3013aaa27..7ca1f7474 100644 --- a/src/tests/util/test_earth.cc +++ b/src/tests/util/test_earth.cc @@ -135,8 +135,8 @@ CASE( "test_earth_lon_135" ) { CASE( "test_earth_lon_225" ) { const PointLonLat p1[2] = {{225., 0.}, {-135., 0.}}; PointXYZ p2[2]; - Earth::convertSphericalToCartesian( p1[0], p2[0]); - Earth::convertSphericalToCartesian( p1[1], p2[1]); + Earth::convertSphericalToCartesian( p1[0], p2[0] ); + Earth::convertSphericalToCartesian( p1[1], p2[1] ); EXPECT( eckit::types::is_approximately_equal( p2[0].x(), -L ) ); EXPECT( eckit::types::is_approximately_equal( p2[0].y(), -L ) ); diff --git a/src/tests/util/test_indexview.cc b/src/tests/util/test_indexview.cc index a204e2f00..c62cfa00c 100644 --- a/src/tests/util/test_indexview.cc +++ b/src/tests/util/test_indexview.cc @@ -12,8 +12,8 @@ #include "atlas/array/ArrayView.h" #include "atlas/array/IndexView.h" #include "atlas/array/MakeView.h" -#include "atlas/parallel/mpi/mpi.h" #include "atlas/library/defines.h" +#include "atlas/parallel/mpi/mpi.h" #include "tests/AtlasTestEnvironment.h" From 51fd87f52b5aa067137548cdfc70e792f2f6ac1f Mon Sep 17 00:00:00 2001 From: Tiago Quintino Date: Thu, 10 May 2018 17:19:20 +0100 Subject: [PATCH 085/123] Fix Bamboo Intel compiler cmake requirements --- bamboo/INTEL-env.sh | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/bamboo/INTEL-env.sh b/bamboo/INTEL-env.sh index b9fb01587..6715f9e8c 100644 --- a/bamboo/INTEL-env.sh +++ b/bamboo/INTEL-env.sh @@ -1,11 +1,16 @@ -# Initialise module environment if it is not +#!/bin/bash + +# initialise module environment if it is not if [[ ! $(command -v module > /dev/null 2>&1) ]]; then . /usr/local/apps/module/init/bash fi -# unload modules not available for intel + +module unload grib_api module unload eccodes module unload emos module unload fftw module unload libemos -module switch gnu intel/16.0.3 +module load cmake/3.10.2 + +module switch gnu intel/16.0.3 \ No newline at end of file From 7b9ccddc555d40ac17294917f63fb5a67438dd21 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 10 May 2018 17:25:46 +0100 Subject: [PATCH 086/123] Add code coverage with travis and codecov --- .travis.yml | 15 ++++++++++++++- README.md | 1 + 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 0247644e7..ec4ac30bf 100644 --- a/.travis.yml +++ b/.travis.yml @@ -54,7 +54,7 @@ matrix: - CACHE_NAME=linux-gcc7-mpich - CXX_COMPILER='g++-7' C_COMPILER='gcc-7' Fortran_COMPILER='gfortran-7' - MPI='mpich' - - ATLAS_CMAKE_OPTIONS="-DCMAKE_BUILD_TYPE=DEBUG" + - ATLAS_CMAKE_OPTIONS="-DCMAKE_BUILD_TYPE=DEBUG -DENABLE_GPROF=ON" addons: apt: sources: ['ubuntu-toolchain-r-test'] @@ -233,6 +233,19 @@ script: ################################################################# - ctest +after_success: + + - | + if [[ "${TRAVIS_OS_NAME}" == "linux" ]]; then + # Creating report + cd ${ATLAS_BUILD_DIR} + lcov --directory . --capture --output-file coverage.info # capture coverage info + lcov --remove coverage.info '/usr/*' --output-file coverage.info # filter out system + lcov --list coverage.info #debug info + # Uploading report to CodeCov + bash <(curl -s https://codecov.io/bash) -t 9c489980-d292-499c-8615-af02df3b20d1 || echo "Codecov did not collect coverage reports" + fi + after_failure: - cd ${ATLAS_BUILD_DIR} diff --git a/README.md b/README.md index f0a4014ea..e93a28e98 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,7 @@ Atlas [![travis master](https://img.shields.io/travis/ecmwf/atlas/master.svg?label=master&logo=travis)](http://travis-ci.org/ecmwf/atlas "master") [![travis develop](https://img.shields.io/travis/ecmwf/atlas/develop.svg?label=develop&logo=travis)](http://travis-ci.org/ecmwf/atlas "develop") +[![codecov](https://codecov.io/gh/ecmwf/atlas/branch/develop/graph/badge.svg)](https://codecov.io/gh/ecmwf/atlas) Project home: https://software.ecmwf.int/wiki/display/ATLAS Contact: Willem Deconinck (willem.deconinck@ecmwf.int) From 63b7adba61fdd5c49b57dfcde1796fbf25d2a017 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 10 May 2018 17:52:02 +0100 Subject: [PATCH 087/123] aligned_alloc not supported on MacOSX --- src/atlas/trans/local/TransLocal.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc index 6dc294c04..fbd2dbc65 100644 --- a/src/atlas/trans/local/TransLocal.cc +++ b/src/atlas/trans/local/TransLocal.cc @@ -173,7 +173,7 @@ int num_n( const int truncation, const int m, const bool symmetric ) { void alloc_aligned( double*& ptr, size_t n ) { const size_t alignment = 64 * sizeof( double ); - ptr = (double*)aligned_alloc( alignment, sizeof( double ) * n ); + posix_memalign( (void**)&ptr, alignment, sizeof( double ) * n ); } void free_aligned( double*& ptr ) { From a46dd171e240ad2b01b7e09bfbf90f3407e2985b Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 10 May 2018 18:07:46 +0100 Subject: [PATCH 088/123] Disable atlas_test_transgeneral if FFTW is not found --- src/tests/trans/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/tests/trans/CMakeLists.txt b/src/tests/trans/CMakeLists.txt index e9cbad59f..8d0f283b8 100644 --- a/src/tests/trans/CMakeLists.txt +++ b/src/tests/trans/CMakeLists.txt @@ -49,6 +49,7 @@ ecbuild_add_test( TARGET atlas_test_transgeneral SOURCES test_transgeneral.cc LIBS atlas ENVIRONMENT ATLAS_TRACE_REPORT=1 + CONDITION ATLAS_HAVE_FFTW ) ecbuild_add_test( TARGET atlas_test_trans_localcache From 2bf70d596a699997eefe4b00b8bedfbb56342f21 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Fri, 11 May 2018 09:48:50 +0100 Subject: [PATCH 089/123] travis: install fftw --- .travis.yml | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/.travis.yml b/.travis.yml index ec4ac30bf..d9389979c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -46,7 +46,7 @@ matrix: addons: apt: sources: ['ubuntu-toolchain-r-test'] - packages: ['g++-5', 'gcc-5', 'gfortran-5'] + packages: ['g++-5', 'gcc-5', 'gfortran-5', 'libfftw3-dev'] - os: linux compiler: gcc @@ -55,10 +55,11 @@ matrix: - CXX_COMPILER='g++-7' C_COMPILER='gcc-7' Fortran_COMPILER='gfortran-7' - MPI='mpich' - ATLAS_CMAKE_OPTIONS="-DCMAKE_BUILD_TYPE=DEBUG -DENABLE_GPROF=ON" + - COVERAGE=ON addons: apt: sources: ['ubuntu-toolchain-r-test'] - packages: ['g++-7', 'gcc-7', 'gfortran-7'] + packages: ['g++-7', 'gcc-7', 'gfortran-7', 'libfftw3-dev', 'lcov'] - os: linux compiler: gcc @@ -143,16 +144,6 @@ install: source ${DEPS_DIR}/pgi/env.sh fi - - ################################################################# - # Install CGAL - ################################################################# - - | - ### Install CGAL - if [[ "${TRAVIS_OS_NAME}" == "osx" ]]; then - brew upgrade cgal || brew install cgal - fi - ################################################################# # Install MPI ################################################################# @@ -179,6 +170,24 @@ install: fi cmake --version + ################################################################# + # Install FFTW + ################################################################# + - | + ### Install FFTW + if [[ "${TRAVIS_OS_NAME}" == "osx" ]]; then + brew upgrade fftw || brew install fftw + fi + + ################################################################# + # Install CGAL + ################################################################# + - | + ### Install CGAL + if [[ "${TRAVIS_OS_NAME}" == "osx" ]]; then + brew upgrade cgal || brew install cgal + fi + ################################################################# # Install ecbuild ################################################################# @@ -236,7 +245,7 @@ script: after_success: - | - if [[ "${TRAVIS_OS_NAME}" == "linux" ]]; then + if [[ "${COVERAGE}" == "ON" ]]; then # Creating report cd ${ATLAS_BUILD_DIR} lcov --directory . --capture --output-file coverage.info # capture coverage info From 045c539a117bd14b9f2c12615e367023aac51b5c Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Fri, 11 May 2018 15:05:41 +0100 Subject: [PATCH 090/123] travis: always verbose ctest (for now) --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index d9389979c..f02f88aa1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -240,7 +240,7 @@ script: ################################################################# # Test Atlas ################################################################# - - ctest + - ctest -VV after_success: From 8e708e33aa3c9bbc0c8d688b02af4386e78a6988 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Fri, 11 May 2018 18:22:18 +0100 Subject: [PATCH 091/123] ATLAS-158 Possible bug exposed for TransLocal::invtrans to regional grid --- src/atlas/trans/local/TransLocal.cc | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc index fbd2dbc65..bbb417962 100644 --- a/src/atlas/trans/local/TransLocal.cc +++ b/src/atlas/trans/local/TransLocal.cc @@ -893,7 +893,27 @@ void TransLocal::invtrans_fourier_regular( const int nlats, const int nlons, con eckit::linalg::Matrix A( fourier_, nlons, ( truncation_ + 1 ) * 2 ); eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * nlats ); eckit::linalg::Matrix C( gp_fields, nlons, nb_fields * nlats ); + +// BUG ATLAS-159: valgrind warns here, saying that B(1,:) is uninitialised +// if workaround above labeled ATLAS-159 is not applied. +// +// for( int i=0; i Date: Fri, 11 May 2018 18:23:34 +0100 Subject: [PATCH 092/123] grid::RegularGaussian(N,domain) constructor --- src/atlas/grid/Grid.cc | 4 ++++ src/atlas/grid/Grid.h | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/atlas/grid/Grid.cc b/src/atlas/grid/Grid.cc index c4e04d55c..a0c18922e 100644 --- a/src/atlas/grid/Grid.cc +++ b/src/atlas/grid/Grid.cc @@ -12,6 +12,7 @@ #include #include +#include #include "eckit/config/Parametrisation.h" #include "eckit/exception/Exceptions.h" @@ -104,5 +105,8 @@ ReducedGaussianGrid::ReducedGaussianGrid( const std::vector& nx, const Dom ReducedGaussianGrid::ReducedGaussianGrid( const std::initializer_list& nx ) : ReducedGaussianGrid( std::vector( nx ) ) {} +RegularGaussianGrid::RegularGaussianGrid( int N, const Grid::Domain& domain ) : + RegularGaussianGrid::grid_t( "F" + std::to_string( N ), domain ) {} + } // namespace grid } // namespace atlas diff --git a/src/atlas/grid/Grid.h b/src/atlas/grid/Grid.h index 0ac6aa8dc..9f45b14b6 100644 --- a/src/atlas/grid/Grid.h +++ b/src/atlas/grid/Grid.h @@ -302,7 +302,7 @@ class ReducedGaussianGrid : public Gaussian { public: using grid_t::grid_t; ReducedGaussianGrid( const std::initializer_list& pl ); - ReducedGaussianGrid( const std::vector& pl, const Domain& domain = Domain() ); + ReducedGaussianGrid( const std::vector& pl, const Domain& = Domain() ); operator bool() const { return valid(); } @@ -316,6 +316,7 @@ class RegularGaussianGrid : public Gaussian { public: using grid_t::grid_t; + RegularGaussianGrid( int N , const Domain& = Domain() ); inline double lon( size_t i ) const { return x( i ); } From d4a7a104cd46bf4867cbf37ebf57bca2f2a31a10 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Wed, 23 May 2018 17:43:04 +0000 Subject: [PATCH 093/123] FFTW include must be public --- src/atlas/CMakeLists.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/atlas/CMakeLists.txt b/src/atlas/CMakeLists.txt index 9bf883d18..b31ed29cb 100644 --- a/src/atlas/CMakeLists.txt +++ b/src/atlas/CMakeLists.txt @@ -559,8 +559,9 @@ ecbuild_add_library( TARGET atlas PRIVATE_INCLUDES "${CGAL_INCLUDE_DIRS}" "${TRANSI_INCLUDE_DIRS}" - "${MPI_CXX_INCLUDE_DIRS}" - "${FFTW_INCLUDES}" + + PUBLIC_INCLUDES "${FFTW_INCLUDES}" + LIBS eckit_geometry eckit_linalg From 30c9d88f044c1912add061d5cabc91346ac28934 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Wed, 30 May 2018 12:09:27 +0100 Subject: [PATCH 094/123] ATLAS-160 Create atlas_atest_mgrids executable to chase problems --- src/atlas/grid/Partitioner.h | 2 +- src/tests/CMakeLists.txt | 1 + src/tests/acceptance_tests/CMakeLists.txt | 4 + src/tests/acceptance_tests/atest_mgrids.cc | 99 ++++++++++++++++++++++ 4 files changed, 105 insertions(+), 1 deletion(-) create mode 100644 src/tests/acceptance_tests/CMakeLists.txt create mode 100644 src/tests/acceptance_tests/atest_mgrids.cc diff --git a/src/atlas/grid/Partitioner.h b/src/atlas/grid/Partitioner.h index 388bbb914..6ba0b0683 100644 --- a/src/atlas/grid/Partitioner.h +++ b/src/atlas/grid/Partitioner.h @@ -63,7 +63,7 @@ class MatchingMeshPartitioner : public Partitioner { public: MatchingMeshPartitioner(); - MatchingMeshPartitioner( const Mesh& mesh, const Config& config ); + MatchingMeshPartitioner( const Mesh& mesh, const Config& config = util::NoConfig() ); }; // ------------------------------------------------------------------ diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 851fd3f6b..8596b6405 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -80,3 +80,4 @@ add_subdirectory( numerics ) add_subdirectory( trans ) add_subdirectory( interpolation ) +add_subdirectory( acceptance_tests ) diff --git a/src/tests/acceptance_tests/CMakeLists.txt b/src/tests/acceptance_tests/CMakeLists.txt new file mode 100644 index 000000000..149843ed4 --- /dev/null +++ b/src/tests/acceptance_tests/CMakeLists.txt @@ -0,0 +1,4 @@ +ecbuild_add_executable( TARGET atlas_atest_mgrids + SOURCES atest_mgrids.cc + LIBS atlas + ) diff --git a/src/tests/acceptance_tests/atest_mgrids.cc b/src/tests/acceptance_tests/atest_mgrids.cc new file mode 100644 index 000000000..948a4a9e7 --- /dev/null +++ b/src/tests/acceptance_tests/atest_mgrids.cc @@ -0,0 +1,99 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "atlas/grid.h" +#include "atlas/mesh.h" +#include "atlas/functionspace.h" +#include "atlas/field.h" +#include "atlas/meshgenerator.h" +#include "atlas/option.h" +#include "atlas/parallel/mpi/mpi.h" +#include "atlas/runtime/AtlasTool.h" +#include "atlas/runtime/Log.h" +#include "atlas/util/Config.h" +#include "atlas/output/Gmsh.h" +#include "atlas/numerics/fvm/Method.h" +#include "atlas/interpolation/Interpolation.h" + +#include "atlas/mesh/actions/BuildHalo.h" + +using namespace atlas; + +//------------------------------------------------------------------------------ + +class Program : public AtlasTool { + virtual void execute( const Args& args ); +public: + Program( int argc, char** argv ); + }; + +//----------------------------------------------------------------------------- + +Program::Program( int argc, char** argv ) : AtlasTool( argc, argv ) { + add_option( new SimpleOption( "gridA", "grid A" ) ); + add_option( new SimpleOption( "gridB", "grid B" ) ); + add_option( new SimpleOption( "ghost", "Output ghost elements" ) ); + add_option( new SimpleOption( "haloA", "Halo size" ) ); + add_option( new SimpleOption( "haloB", "Halo size" ) ); +} + +//----------------------------------------------------------------------------- + +void Program::execute( const Args& args ) { + + auto ghost = util::Config("ghost",args.getBool("ghost",false)); + auto haloA = option::halo( args.getLong("haloA",1) ); + auto haloB = option::halo( args.getLong("haloB",1) ); + + auto gridA = Grid( args.getString("gridA") ); + auto gridB = Grid( args.getString("gridB") ); + + auto meshgenerator = MeshGenerator( "structured" ); + + auto distA = grid::Distribution( gridA, grid::Partitioner( "trans" ) ); + + auto meshA = meshgenerator.generate( gridA, distA ); + + numerics::fvm::Method fvmA(meshA,haloA); + auto gmshA = output::Gmsh( "meshA.msh", ghost ); + gmshA.write(meshA); + + + auto distB = grid::Distribution( gridB, grid::MatchingMeshPartitioner( meshA ) ); + + auto meshB = meshgenerator.generate( gridB, distB ); + + numerics::fvm::Method fvmB(meshB,haloB); + + // Field fieldB = fvmB.node_columns().createField(); + + output::Gmsh gmshB( "meshB.msh", ghost ); + gmshB.write(meshB); + // gmshB.write(fieldB); + + Interpolation AtoB( option::type("finite-element"), fvmA.node_columns(), fvmB.node_columns() ); + Interpolation BtoA( option::type("finite-element"), fvmB.node_columns(), fvmA.node_columns() ); + +} + +//------------------------------------------------------------------------------ + +int main( int argc, char** argv ) { + Program tool( argc, argv ); + return tool.start(); +} From abbe5a6a55a817642b7691de8e3a48f1c1c43150 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Wed, 30 May 2018 14:11:10 +0100 Subject: [PATCH 095/123] ATLAS-160 Relax too strict sanity check --- src/atlas/mesh/actions/BuildParallelFields.cc | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/atlas/mesh/actions/BuildParallelFields.cc b/src/atlas/mesh/actions/BuildParallelFields.cc index cca09905b..20d7cb732 100644 --- a/src/atlas/mesh/actions/BuildParallelFields.cc +++ b/src/atlas/mesh/actions/BuildParallelFields.cc @@ -503,15 +503,18 @@ Field& build_edges_partition( Mesh& mesh ) { bool edge_partition_is_same_as_one_of_nodes = ( p == pn1 || p == pn2 ); if ( edge_is_partition_boundary ) { if ( not edge_partition_is_same_as_one_of_nodes ) { - if ( elem1 != edge_to_elem.missing_value() ) { - Log::error() << EDGE( jedge ) << " [p" << p << "] is not correct elem1[p" << elem_part( elem1 ) - << "]" << std::endl; + // If this is a ghost edge, we could trust it. + if( edge_part(jedge) == mypart ) { + if ( elem1 != edge_to_elem.missing_value() ) { + Log::error() << "[" << mypart << "] " << EDGE( jedge ) << " [p" << p << "] is not correct elem1[p" << elem_part( elem1 ) + << "]" << std::endl; + } + else { + Log::error() << "[" << mypart << "] " << EDGE( jedge ) << " [p" << p << "] is not correct elem2[p" << elem_part( elem2 ) + << "]" << std::endl; + } + insane = 1; } - else { - Log::error() << EDGE( jedge ) << " [p" << p << "] is not correct elem2[p" << elem_part( elem2 ) - << "]" << std::endl; - } - insane = 1; } } else { From deab74c0c808a812ac92b6f2e565fd358ba0da01 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 31 May 2018 09:33:10 +0100 Subject: [PATCH 096/123] ATLAS-160 Gmsh writer was converting gidx_t to int --- src/atlas/output/detail/GmshIO.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/atlas/output/detail/GmshIO.cc b/src/atlas/output/detail/GmshIO.cc index f3f4cb25e..c8c2a60e4 100644 --- a/src/atlas/output/detail/GmshIO.cc +++ b/src/atlas/output/detail/GmshIO.cc @@ -753,13 +753,13 @@ void GmshIO::write( const Mesh& mesh, const PathName& file_path ) const { file << nb_nodes << "\n"; double xyz[3] = {0., 0., 0.}; for ( size_t n = 0; n < nb_nodes; ++n ) { - int g = glb_idx( n ); + gidx_t g = glb_idx( n ); for ( size_t d = 0; d < surfdim; ++d ) xyz[d] = coords( n, d ); if ( binary ) { - file.write( reinterpret_cast( &g ), sizeof( int ) ); + file.write( reinterpret_cast( &g ), sizeof( gidx_t ) ); file.write( reinterpret_cast( &xyz ), sizeof( double ) * 3 ); } else { From aff3f98ad36f864f669ba31f0cd14e2003a9d416 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 31 May 2018 09:45:27 +0100 Subject: [PATCH 097/123] ATLAS-160 Fix edge partition numbers for certain boundary edges --- src/atlas/mesh/actions/BuildParallelFields.cc | 35 ++++++++----------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/src/atlas/mesh/actions/BuildParallelFields.cc b/src/atlas/mesh/actions/BuildParallelFields.cc index 20d7cb732..120a4f86a 100644 --- a/src/atlas/mesh/actions/BuildParallelFields.cc +++ b/src/atlas/mesh/actions/BuildParallelFields.cc @@ -358,6 +358,7 @@ Field& build_edges_partition( Mesh& mesh ) { array::ArrayView node_gidx = array::make_view( nodes.global_index() ); array::ArrayView elem_part = array::make_view( mesh.cells().partition() ); + array::ArrayView elem_halo = array::make_view( mesh.cells().halo() ); auto check_flags = [&]( idx_t jedge, int flag ) { idx_t ip1 = edge_nodes( jedge, 0 ); @@ -412,6 +413,9 @@ Field& build_edges_partition( Mesh& mesh ) { // if( not domain_bdry(jedge) ) { bdry_edges.push_back( edge_glb_idx( jedge ) ); p = elem_part( elem1 ); + if( pn1 != p && pn2 == pn1 && elem_halo( elem1 ) > 0 ) { + p = pn1; + } // } } else if ( p != elem_part( elem1 ) && p != elem_part( elem2 ) ) { @@ -503,18 +507,15 @@ Field& build_edges_partition( Mesh& mesh ) { bool edge_partition_is_same_as_one_of_nodes = ( p == pn1 || p == pn2 ); if ( edge_is_partition_boundary ) { if ( not edge_partition_is_same_as_one_of_nodes ) { - // If this is a ghost edge, we could trust it. - if( edge_part(jedge) == mypart ) { - if ( elem1 != edge_to_elem.missing_value() ) { - Log::error() << "[" << mypart << "] " << EDGE( jedge ) << " [p" << p << "] is not correct elem1[p" << elem_part( elem1 ) - << "]" << std::endl; - } - else { - Log::error() << "[" << mypart << "] " << EDGE( jedge ) << " [p" << p << "] is not correct elem2[p" << elem_part( elem2 ) - << "]" << std::endl; - } - insane = 1; + if ( elem1 != edge_to_elem.missing_value() ) { + Log::error() << "[" << mypart << "] " << EDGE( jedge ) << " [p" << p << "] is not correct elem1[p" << elem_part( elem1 ) + << "]" << std::endl; + } + else { + Log::error() << "[" << mypart << "] " << EDGE( jedge ) << " [p" << p << "] is not correct elem2[p" << elem_part( elem2 ) + << "]" << std::endl; } + insane = 1; } } else { @@ -541,12 +542,6 @@ Field& build_edges_partition( Mesh& mesh ) { // DEBUG_VAR( " the part is " << edge_part(jedge) ); //#endif // } - // /// TODO: Make sure that the edge-partition is at least one of the - // partition numbers of the - // /// neighbouring elements. - // /// Because of this problem, the size of the halo should be set to 2 - // instead of 1!!! - // /// This will be addressed with JIRA issue ATLAS-12 return edges.partition(); } @@ -670,13 +665,13 @@ Field& build_edges_remote_idx( Mesh& mesh ) { else { std::stringstream msg; #ifdef DEBUGGING_PARFIELDS - msg << "Edge(" << recv_edge( jedge, 2 ) << "[p" << recv_edge( jedge, 4 ) << "] " - << recv_edge( jedge, 3 ) << "[p" << recv_edge( jedge, 5 ) << "])"; + msg << "Edge(" << recv_edge[ jedge * varsize + 2 ] << "[p" << recv_edge[ jedge * varsize + 4 ] << "] " + << recv_edge[ jedge *varsize + 3 ] << "[p" << recv_edge[ jedge * varsize + 5 ] << "])"; #else msg << "Edge with uid " << recv_uid; #endif msg << " requested by rank [" << jpart << "]"; - msg << " that should be owned is not found. This could be because no " + msg << " that should be owned by " << mpi::comm().rank() << " is not found. This could be because no " "halo was built."; // throw eckit::SeriousBug(msg.str(),Here()); Log::warning() << msg.str() << " @ " << Here() << std::endl; From fba1f3f6c6c54098b7370adfd9c28e41e24df52b Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 31 May 2018 09:56:28 +0100 Subject: [PATCH 098/123] ATLAS-160 Fix wrong global index calculations in periodic region --- src/atlas/mesh/HybridElements.cc | 2 + src/atlas/mesh/HybridElements.h | 3 ++ src/atlas/mesh/actions/BuildHalo.cc | 64 +++++++++++++++++++++-------- src/atlas/mesh/actions/BuildHalo.h | 4 +- 4 files changed, 54 insertions(+), 19 deletions(-) diff --git a/src/atlas/mesh/HybridElements.cc b/src/atlas/mesh/HybridElements.cc index ad898f6f0..91c345438 100644 --- a/src/atlas/mesh/HybridElements.cc +++ b/src/atlas/mesh/HybridElements.cc @@ -49,6 +49,7 @@ static void set_uninitialized_fields_to_zero( HybridElements& elems, size_t begi IndexView remote_index = make_indexview( elems.remote_index() ); ArrayView partition = make_view( elems.partition() ); ArrayView halo = make_view( elems.halo() ); + ArrayView flags = make_view( elems.flags() ); ArrayView patch = make_view( elems.field( "patch" ) ); for ( size_t j = begin; j < elems.size(); ++j ) { @@ -68,6 +69,7 @@ HybridElements::HybridElements() : size_( 0 ), elements_size_(), elements_begin_ add( Field( "remote_idx", make_datatype(), make_shape( size() ) ) ); add( Field( "partition", make_datatype(), make_shape( size() ) ) ); add( Field( "halo", make_datatype(), make_shape( size() ) ) ); + add( Field( "flags", make_datatype(), make_shape( size() ) ) ); add( Field( "patch", make_datatype(), make_shape( size() ) ) ); set_uninitialized_fields_to_zero( *this, 0 ); diff --git a/src/atlas/mesh/HybridElements.h b/src/atlas/mesh/HybridElements.h index 59369dbea..3e5f9b5f9 100644 --- a/src/atlas/mesh/HybridElements.h +++ b/src/atlas/mesh/HybridElements.h @@ -119,6 +119,9 @@ class HybridElements : public eckit::Owned { const Field& halo() const { return field( "halo" ); } Field& halo() { return field( "halo" ); } + const Field& flags() const { return field( "flags" ); } + Field& flags() { return field( "flags" ); } + // -- Modifiers /// @brief Add a new element type with given number of elements diff --git a/src/atlas/mesh/actions/BuildHalo.cc b/src/atlas/mesh/actions/BuildHalo.cc index 776901bad..acb5fdf6d 100644 --- a/src/atlas/mesh/actions/BuildHalo.cc +++ b/src/atlas/mesh/actions/BuildHalo.cc @@ -194,14 +194,25 @@ void make_cells_global_index_human_readable( const mesh::actions::BuildHalo& bui if ( do_all ) { cells_to_edit.resize( cells_glb_idx.size() ); - for ( size_t i = 0; i < cells_glb_idx.size(); ++i ) + for ( size_t i = 0; i < cells_glb_idx.size(); ++i ) { cells_to_edit[i] = i; + } } else { + size_t nb_cells_to_edit(0); + for( const auto& new_cells : build_halo.periodic_cells_local_index_ ) { + nb_cells_to_edit += new_cells.size(); + } + cells_to_edit.resize( nb_cells_to_edit ); + int c{ 0 }; + int i{ 0 }; + for ( int t = 0; t < cells.nb_types(); ++t ) { + for ( idx_t p : build_halo.periodic_cells_local_index_[t] ) { + cells_to_edit[i++] = c + p; + } + c += cells.elements( t ).size(); + } glb_idx_max = cells.global_index().metadata().getLong( "max", 0 ); - cells_to_edit.resize( build_halo.periodic_cells_local_index_.size() ); - for ( size_t i = 0; i < cells_to_edit.size(); ++i ) - cells_to_edit[i] = build_halo.periodic_cells_local_index_[i]; } std::vector glb_idx( cells_to_edit.size() ); @@ -497,6 +508,8 @@ class BuildHaloHelper { std::vector> elem_part; + std::vector> elem_flags; + std::vector> elem_type; Buffers( Mesh& mesh ) { @@ -511,6 +524,7 @@ class BuildHaloHelper { elem_nodes_id.resize( mpi_size ); elem_nodes_displs.resize( mpi_size ); elem_part.resize( mpi_size ); + elem_flags.resize( mpi_size ); elem_type.resize( mpi_size ); } @@ -556,6 +570,7 @@ class BuildHaloHelper { comm.allToAll( send.elem_nodes_id, recv.elem_nodes_id ); comm.allToAll( send.elem_part, recv.elem_part ); comm.allToAll( send.elem_type, recv.elem_type ); + comm.allToAll( send.elem_flags, recv.elem_flags ); comm.allToAll( send.elem_nodes_displs, recv.elem_nodes_displs ); } } @@ -577,6 +592,7 @@ class BuildHaloHelper { array::ArrayView ghost; mesh::HybridElements::Connectivity* elem_nodes; array::ArrayView elem_part; + array::ArrayView elem_flags; array::ArrayView elem_glb_idx; std::vector bdry_nodes; @@ -598,6 +614,7 @@ class BuildHaloHelper { ghost( array::make_view( mesh.nodes().ghost() ) ), elem_nodes( &mesh.cells().node_connectivity() ), elem_part( array::make_view( mesh.cells().partition() ) ), + elem_flags( array::make_view( mesh.cells().flags() ) ), elem_glb_idx( array::make_view( mesh.cells().global_index() ) ), compute_uid( mesh ) { halo = 0; @@ -618,6 +635,7 @@ class BuildHaloHelper { elem_nodes = &mesh.cells().node_connectivity(); elem_part = array::make_view( mesh.cells().partition() ); + elem_flags = array::make_view( mesh.cells().flags() ); elem_glb_idx = array::make_view( mesh.cells().global_index() ); } @@ -665,6 +683,7 @@ class BuildHaloHelper { buf.elem_glb_idx[p].resize( nb_elems ); buf.elem_part[p].resize( nb_elems ); + buf.elem_flags[p].resize( nb_elems, Topology::NONE ); buf.elem_type[p].resize( nb_elems ); buf.elem_nodes_id[p].resize( nb_elem_nodes ); buf.elem_nodes_displs[p].resize( nb_elems ); @@ -675,6 +694,7 @@ class BuildHaloHelper { buf.elem_glb_idx[p][jelem] = elem_glb_idx( ielem ); buf.elem_part[p][jelem] = elem_part( ielem ); + Topology::set( buf.elem_flags[p][jelem], elem_flags( ielem ) ); buf.elem_type[p][jelem] = mesh.cells().type_idx( ielem ); for ( size_t jnode = 0; jnode < elem_nodes->cols( ielem ); ++jnode ) buf.elem_nodes_id[p][jelemnode++] = compute_uid( ( *elem_nodes )( ielem, jnode ) ); @@ -728,6 +748,7 @@ class BuildHaloHelper { buf.elem_glb_idx[p].resize( nb_elems ); buf.elem_part[p].resize( nb_elems ); + buf.elem_flags[p].resize( nb_elems, Topology::NONE ); buf.elem_type[p].resize( nb_elems ); buf.elem_nodes_id[p].resize( nb_elem_nodes ); buf.elem_nodes_displs[p].resize( nb_elems ); @@ -736,6 +757,7 @@ class BuildHaloHelper { buf.elem_nodes_displs[p][jelem] = jelemnode; size_t ielem = elems[jelem]; buf.elem_part[p][jelem] = elem_part( ielem ); + Topology::set( buf.elem_flags[p][jelem], elem_flags( ielem ) | newflags ); buf.elem_type[p][jelem] = mesh.cells().type_idx( ielem ); std::vector crds( elem_nodes->cols( ielem ) * 2 ); for ( size_t jnode = 0; jnode < elem_nodes->cols( ielem ); ++jnode ) { @@ -751,7 +773,7 @@ class BuildHaloHelper { } } - void add_nodes( Buffers& buf, bool periodic ) { + void add_nodes( Buffers& buf ) { ATLAS_TRACE(); const size_t mpi_size = mpi::comm().size(); @@ -827,7 +849,10 @@ class BuildHaloHelper { lonlat( loc_idx, XX ) = pll.lon(); lonlat( loc_idx, YY ) = pll.lat(); - if ( periodic ) status.new_periodic_ghost_points.push_back( loc_idx ); + if ( Topology::check( flags( loc_idx ), Topology::PERIODIC ) and not + Topology::check( flags( loc_idx ), Topology::BC ) ) { + status.new_periodic_ghost_points.push_back( loc_idx ); + } // make sure new node was not already there { @@ -849,7 +874,7 @@ class BuildHaloHelper { } } - void add_elements( Buffers& buf, bool periodic ) { + void add_elements( Buffers& buf ) { ATLAS_TRACE(); const size_t mpi_size = mpi::comm().size(); @@ -924,6 +949,7 @@ class BuildHaloHelper { auto elem_type_glb_idx = elements.view( mesh.cells().global_index() ); auto elem_type_part = elements.view( mesh.cells().partition() ); auto elem_type_halo = elements.view( mesh.cells().halo() ); + auto elem_type_flags = elements.view( mesh.cells().flags() ); auto elem_type_patch = elements.view( mesh.cells().field( "patch" ) ); // Copy information in new elements @@ -936,21 +962,24 @@ class BuildHaloHelper { elem_type_part( loc_idx ) = buf.elem_part[jpart][jelem]; elem_type_halo( loc_idx ) = halo + 1; elem_type_patch( loc_idx ) = 0; - for ( size_t n = 0; n < node_connectivity.cols(); ++n ) + elem_type_flags( loc_idx ) = buf.elem_flags[jpart][jelem]; + for ( size_t n = 0; n < node_connectivity.cols(); ++n ) { node_connectivity.set( loc_idx, n, uid2node[buf.elem_nodes_id[jpart][buf.elem_nodes_displs[jpart][jelem] + n]] ); + } - if ( periodic ) { status.new_periodic_ghost_cells[t].push_back( old_size + new_elem ); } - + if( Topology::check( elem_type_flags( loc_idx ), Topology::PERIODIC ) ) { + status.new_periodic_ghost_cells[t].push_back( old_size + new_elem ); + } ++new_elem; } } } } - void add_buffers( Buffers& buf, bool periodic = false ) { - add_nodes( buf, periodic ); - add_elements( buf, periodic ); + void add_buffers( Buffers& buf ) { + add_nodes( buf ); + add_elements( buf ); update(); } }; @@ -1196,9 +1225,11 @@ void increase_halo_periodic( BuildHaloHelper& helper, const PeriodicPoints& peri #ifdef DEBUG_OUTPUT Log::debug() << "recv: \n" << recvmesh << std::endl; #endif - helper.add_buffers( recvmesh, /* periodic = */ true ); + helper.add_buffers( recvmesh ); } +BuildHalo::BuildHalo(Mesh& mesh) : mesh_( mesh ), periodic_cells_local_index_( mesh.cells().nb_types() ) {} + void BuildHalo::operator()( int nb_elems ) { ATLAS_TRACE( "BuildHalo" ); @@ -1240,12 +1271,10 @@ void BuildHalo::operator()( int nb_elems ) { for ( idx_t p : helper.status.new_periodic_ghost_points ) { periodic_points_local_index_.push_back( p ); } - int c( 0 ); for ( int t = 0; t < mesh_.cells().nb_types(); ++t ) { for ( idx_t p : helper.status.new_periodic_ghost_cells[t] ) { - periodic_cells_local_index_.push_back( c + p ); + periodic_cells_local_index_[t].push_back( p ); } - c += mesh_.cells().elements( t ).size(); } std::stringstream ss; @@ -1268,6 +1297,7 @@ void BuildHalo::operator()( int nb_elems ) { make_nodes_global_index_human_readable( *this, mesh_.nodes(), /*do_all*/ false ); + make_cells_global_index_human_readable( *this, mesh_.cells(), /*do_all*/ false ); // renumber_nodes_glb_idx (mesh_.nodes()); diff --git a/src/atlas/mesh/actions/BuildHalo.h b/src/atlas/mesh/actions/BuildHalo.h index 99839b60f..542af321f 100644 --- a/src/atlas/mesh/actions/BuildHalo.h +++ b/src/atlas/mesh/actions/BuildHalo.h @@ -23,12 +23,12 @@ namespace actions { class BuildHalo { public: - BuildHalo( Mesh& mesh ) : mesh_( mesh ) {} + BuildHalo( Mesh& mesh ); void operator()( int nb_elems ); public: std::vector periodic_points_local_index_; - std::vector periodic_cells_local_index_; + std::vector> periodic_cells_local_index_; private: Mesh& mesh_; From 206f676e55e88401b7c6f7876714a5b031f07790 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 31 May 2018 10:50:31 +0100 Subject: [PATCH 099/123] ATLAS-160 Remove cells.field("patch") in favour of cells.field("flags") --- src/atlas/mesh/Elements.h | 3 +++ src/atlas/mesh/HybridElements.cc | 4 +--- src/atlas/mesh/Nodes.h | 3 ++- src/atlas/mesh/PartitionPolygon.cc | 11 ++++++++--- src/atlas/mesh/actions/BuildDualMesh.cc | 7 ++++++- src/atlas/mesh/actions/BuildEdges.cc | 11 +++++++---- src/atlas/mesh/actions/BuildHalo.cc | 8 +++++--- src/atlas/mesh/detail/AccumulateFacets.cc | 7 ++++++- src/atlas/meshgenerator/StructuredMeshGenerator.cc | 11 ++++------- 9 files changed, 42 insertions(+), 23 deletions(-) diff --git a/src/atlas/mesh/Elements.h b/src/atlas/mesh/Elements.h index 12024510f..b765f00ea 100644 --- a/src/atlas/mesh/Elements.h +++ b/src/atlas/mesh/Elements.h @@ -116,6 +116,9 @@ class Elements : public eckit::Owned { const Field& halo() const { return hybrid_elements_->halo(); } Field& halo() { return hybrid_elements_->halo(); } + const Field& flags() const { return hybrid_elements_->flags(); } + Field& flags() { return hybrid_elements_->flags(); } + template array::LocalView view( const Field& ) const; diff --git a/src/atlas/mesh/HybridElements.cc b/src/atlas/mesh/HybridElements.cc index 91c345438..d663321d2 100644 --- a/src/atlas/mesh/HybridElements.cc +++ b/src/atlas/mesh/HybridElements.cc @@ -50,14 +50,13 @@ static void set_uninitialized_fields_to_zero( HybridElements& elems, size_t begi ArrayView partition = make_view( elems.partition() ); ArrayView halo = make_view( elems.halo() ); ArrayView flags = make_view( elems.flags() ); - ArrayView patch = make_view( elems.field( "patch" ) ); for ( size_t j = begin; j < elems.size(); ++j ) { global_index( j ) = 0; remote_index( j ) = 0; partition( j ) = 0; halo( j ) = 0; - patch( j ) = 0; + flags( j ) = 0; } } } // namespace @@ -70,7 +69,6 @@ HybridElements::HybridElements() : size_( 0 ), elements_size_(), elements_begin_ add( Field( "partition", make_datatype(), make_shape( size() ) ) ); add( Field( "halo", make_datatype(), make_shape( size() ) ) ); add( Field( "flags", make_datatype(), make_shape( size() ) ) ); - add( Field( "patch", make_datatype(), make_shape( size() ) ) ); set_uninitialized_fields_to_zero( *this, 0 ); node_connectivity_ = &add( new Connectivity( "node" ) ); diff --git a/src/atlas/mesh/Nodes.h b/src/atlas/mesh/Nodes.h index 4c4713fb9..46e0d4abb 100644 --- a/src/atlas/mesh/Nodes.h +++ b/src/atlas/mesh/Nodes.h @@ -47,7 +47,8 @@ class Nodes : public eckit::Owned { WEST = ( 1 << 4 ), EAST = ( 1 << 5 ), NORTH = ( 1 << 6 ), - SOUTH = ( 1 << 7 ) + SOUTH = ( 1 << 7 ), + PATCH = ( 1 << 8 ) }; }; diff --git a/src/atlas/mesh/PartitionPolygon.cc b/src/atlas/mesh/PartitionPolygon.cc index aadf2e8b3..c18c3ebe7 100644 --- a/src/atlas/mesh/PartitionPolygon.cc +++ b/src/atlas/mesh/PartitionPolygon.cc @@ -27,13 +27,18 @@ util::Polygon::edge_set_t compute_edges( const detail::MeshImpl& mesh, size_t ha const Elements& elements = mesh.cells().elements( t ); const BlockConnectivity& conn = elements.node_connectivity(); - auto field_patch = elements.view( elements.field( "patch" ) ); - auto field_halo = elements.view( elements.field( "halo" ) ); + auto field_flags = elements.view( elements.flags() ); + auto field_halo = elements.view( elements.halo() ); + + auto patch = [&field_flags]( size_t e ) { + using Topology = atlas::mesh::Nodes::Topology; + return Topology::check( field_flags( e ), Topology::PATCH ); + }; const size_t nb_nodes = elements.nb_nodes(); for ( size_t j = 0; j < elements.size(); ++j ) { - if ( field_patch( j ) == 0 && field_halo( j ) <= halo ) { + if ( patch( j ) == 0 && field_halo( j ) <= halo ) { for ( size_t k = 0; k < nb_nodes; ++k ) { util::Polygon::edge_t edge( conn( j, k ), conn( j, ( k + 1 ) % nb_nodes ) ); if ( !edges.erase( edge.reverse() ) ) { edges.insert( edge ); } diff --git a/src/atlas/mesh/actions/BuildDualMesh.cc b/src/atlas/mesh/actions/BuildDualMesh.cc index 52ed5edb3..2fe9fb693 100644 --- a/src/atlas/mesh/actions/BuildDualMesh.cc +++ b/src/atlas/mesh/actions/BuildDualMesh.cc @@ -167,7 +167,12 @@ void add_median_dual_volume_contribution_cells( const mesh::HybridElements& cell const array::ArrayView edge_centroids = array::make_view( edges.field( "centroids_xy" ) ); const mesh::HybridElements::Connectivity& cell_edge_connectivity = cells.edge_connectivity(); const mesh::HybridElements::Connectivity& edge_node_connectivity = edges.node_connectivity(); - auto patch = array::make_view( cells.field( "patch" ) ); + auto field_flags = array::make_view( cells.flags() ); + + auto patch = [&field_flags]( size_t e ) { + using Topology = atlas::mesh::Nodes::Topology; + return Topology::check( field_flags( e ), Topology::PATCH ); + }; // special ordering for bit-identical results size_t nb_cells = cells.size(); diff --git a/src/atlas/mesh/actions/BuildEdges.cc b/src/atlas/mesh/actions/BuildEdges.cc index e4ea06ddc..d06296268 100644 --- a/src/atlas/mesh/actions/BuildEdges.cc +++ b/src/atlas/mesh/actions/BuildEdges.cc @@ -113,13 +113,16 @@ void build_element_to_edge_connectivity( Mesh& mesh ) { } } + // Verify that all edges have been found + auto field_flags = array::make_view( mesh.cells().flags() ); + auto patch = [&field_flags]( size_t e ) { + using Topology = atlas::mesh::Nodes::Topology; + return Topology::check( field_flags( e ), Topology::PATCH ); + }; + for ( size_t jcell = 0; jcell < mesh.cells().size(); ++jcell ) { - // If this is a patched element (over the pole), there were no edges - // created, so skip the check. - auto patch = array::make_view( mesh.cells().field( "patch" ) ); if ( patch( jcell ) ) continue; - for ( size_t jcol = 0; jcol < cell_edge_connectivity.cols( jcell ); ++jcol ) { if ( cell_edge_connectivity( jcell, jcol ) == cell_edge_connectivity.missing_value() ) { const array::ArrayView gidx = array::make_view( mesh.nodes().global_index() ); diff --git a/src/atlas/mesh/actions/BuildHalo.cc b/src/atlas/mesh/actions/BuildHalo.cc index acb5fdf6d..3fb385ceb 100644 --- a/src/atlas/mesh/actions/BuildHalo.cc +++ b/src/atlas/mesh/actions/BuildHalo.cc @@ -307,7 +307,11 @@ void build_lookup_node2elem( const Mesh& mesh, Node2Elem& node2elem ) { } const mesh::HybridElements::Connectivity& elem_nodes = mesh.cells().node_connectivity(); - auto patched = array::make_view( mesh.cells().field( "patch" ) ); + auto field_flags = array::make_view( mesh.cells().flags() ); + auto patched = [&field_flags]( size_t e ) { + using Topology = atlas::mesh::Nodes::Topology; + return Topology::check( field_flags( e ), Topology::PATCH ); + }; size_t nb_elems = mesh.cells().size(); for ( size_t elem = 0; elem < nb_elems; ++elem ) { @@ -950,7 +954,6 @@ class BuildHaloHelper { auto elem_type_part = elements.view( mesh.cells().partition() ); auto elem_type_halo = elements.view( mesh.cells().halo() ); auto elem_type_flags = elements.view( mesh.cells().flags() ); - auto elem_type_patch = elements.view( mesh.cells().field( "patch" ) ); // Copy information in new elements size_t new_elem( 0 ); @@ -961,7 +964,6 @@ class BuildHaloHelper { elem_type_glb_idx( loc_idx ) = std::abs( buf.elem_glb_idx[jpart][jelem] ); elem_type_part( loc_idx ) = buf.elem_part[jpart][jelem]; elem_type_halo( loc_idx ) = halo + 1; - elem_type_patch( loc_idx ) = 0; elem_type_flags( loc_idx ) = buf.elem_flags[jpart][jelem]; for ( size_t n = 0; n < node_connectivity.cols(); ++n ) { node_connectivity.set( diff --git a/src/atlas/mesh/detail/AccumulateFacets.cc b/src/atlas/mesh/detail/AccumulateFacets.cc index 827a1e3ed..5262377ca 100644 --- a/src/atlas/mesh/detail/AccumulateFacets.cc +++ b/src/atlas/mesh/detail/AccumulateFacets.cc @@ -37,7 +37,12 @@ void accumulate_facets( const mesh::HybridElements& cells, const mesh::Nodes& no for ( size_t t = 0; t < cells.nb_types(); ++t ) { const mesh::Elements& elements = cells.elements( t ); const mesh::BlockConnectivity& elem_nodes = elements.node_connectivity(); - auto patch = elements.view( elements.field( "patch" ) ); + auto elem_flags = elements.view( elements.flags() ); + + auto patch = [&elem_flags]( size_t e ) { + using Topology = atlas::mesh::Nodes::Topology; + return Topology::check( elem_flags( e ), Topology::PATCH ); + }; size_t nb_elems = elements.size(); size_t nb_nodes_in_facet = 2; diff --git a/src/atlas/meshgenerator/StructuredMeshGenerator.cc b/src/atlas/meshgenerator/StructuredMeshGenerator.cc index 6ee5e8936..028e90a1e 100644 --- a/src/atlas/meshgenerator/StructuredMeshGenerator.cc +++ b/src/atlas/meshgenerator/StructuredMeshGenerator.cc @@ -1005,12 +1005,12 @@ void StructuredMeshGenerator::generate_mesh( const grid::StructuredGrid& rg, con mesh::HybridElements::Connectivity& node_connectivity = mesh.cells().node_connectivity(); array::ArrayView cells_glb_idx = array::make_view( mesh.cells().global_index() ); array::ArrayView cells_part = array::make_view( mesh.cells().partition() ); - array::ArrayView cells_patch = array::make_view( mesh.cells().field( "patch" ) ); + array::ArrayView cells_flags = array::make_view( mesh.cells().flags() ); /* * label all patch cells a non-patch */ - cells_patch.assign( 0 ); + cells_flags.assign( 0 ); /* * Fill in connectivity tables with global node indices first @@ -1050,7 +1050,6 @@ void StructuredMeshGenerator::generate_mesh( const grid::StructuredGrid& rg, con node_connectivity.set( jcell, quad_nodes ); cells_glb_idx( jcell ) = jcell + 1; cells_part( jcell ) = mypart; - cells_patch( jcell ) = 0; } else // This is a triag { @@ -1088,7 +1087,6 @@ void StructuredMeshGenerator::generate_mesh( const grid::StructuredGrid& rg, con node_connectivity.set( jcell, triag_nodes ); cells_glb_idx( jcell ) = jcell + 1; cells_part( jcell ) = mypart; - cells_patch( jcell ) = 0; } } } @@ -1107,7 +1105,6 @@ void StructuredMeshGenerator::generate_mesh( const grid::StructuredGrid& rg, con node_connectivity.set( jcell, triag_nodes ); cells_glb_idx( jcell ) = jcell + 1; cells_part( jcell ) = mypart; - cells_patch( jcell ) = 0; } } else if ( patch_north_pole ) { @@ -1140,7 +1137,7 @@ void StructuredMeshGenerator::generate_mesh( const grid::StructuredGrid& rg, con cells_glb_idx( jcell ) = jcell + 1; cells_part( jcell ) = mypart; - cells_patch( jcell ) = 1; // mark cell as "patch" + Topology::set( cells_flags( jcell ), Topology::PATCH ); if ( jbackward == jforward + 2 ) break; @@ -1203,7 +1200,7 @@ void StructuredMeshGenerator::generate_mesh( const grid::StructuredGrid& rg, con cells_glb_idx( jcell ) = jcell + 1; cells_part( jcell ) = mypart; - cells_patch( jcell ) = 1; // mark cell as "patch" + Topology::set( cells_flags( jcell ), Topology::PATCH ); if ( jbackward == jforward + 2 ) break; From af1d543a95a37b0cec353776718f4e3dd9611d13 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 31 May 2018 10:51:08 +0100 Subject: [PATCH 100/123] Reenable writing of field --- src/tests/acceptance_tests/atest_mgrids.cc | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/tests/acceptance_tests/atest_mgrids.cc b/src/tests/acceptance_tests/atest_mgrids.cc index 948a4a9e7..70287a218 100644 --- a/src/tests/acceptance_tests/atest_mgrids.cc +++ b/src/tests/acceptance_tests/atest_mgrids.cc @@ -55,14 +55,14 @@ Program::Program( int argc, char** argv ) : AtlasTool( argc, argv ) { //----------------------------------------------------------------------------- void Program::execute( const Args& args ) { - + auto ghost = util::Config("ghost",args.getBool("ghost",false)); auto haloA = option::halo( args.getLong("haloA",1) ); auto haloB = option::halo( args.getLong("haloB",1) ); - + auto gridA = Grid( args.getString("gridA") ); auto gridB = Grid( args.getString("gridB") ); - + auto meshgenerator = MeshGenerator( "structured" ); auto distA = grid::Distribution( gridA, grid::Partitioner( "trans" ) ); @@ -73,18 +73,17 @@ void Program::execute( const Args& args ) { auto gmshA = output::Gmsh( "meshA.msh", ghost ); gmshA.write(meshA); - auto distB = grid::Distribution( gridB, grid::MatchingMeshPartitioner( meshA ) ); auto meshB = meshgenerator.generate( gridB, distB ); numerics::fvm::Method fvmB(meshB,haloB); - // Field fieldB = fvmB.node_columns().createField(); + Field fieldB = fvmB.node_columns().createField(); output::Gmsh gmshB( "meshB.msh", ghost ); gmshB.write(meshB); - // gmshB.write(fieldB); + gmshB.write(fieldB); Interpolation AtoB( option::type("finite-element"), fvmA.node_columns(), fvmB.node_columns() ); Interpolation BtoA( option::type("finite-element"), fvmB.node_columns(), fvmA.node_columns() ); From 0e910aa24b9a96191e3e6979fb044862757409cf Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 31 May 2018 09:33:45 +0100 Subject: [PATCH 101/123] Move some output to trace channel --- .../detail/partitioner/MatchingMeshPartitionerBruteForce.cc | 2 +- .../detail/partitioner/MatchingMeshPartitionerLonLatPolygon.cc | 2 +- .../partitioner/MatchingMeshPartitionerSphericalPolygon.cc | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/atlas/grid/detail/partitioner/MatchingMeshPartitionerBruteForce.cc b/src/atlas/grid/detail/partitioner/MatchingMeshPartitionerBruteForce.cc index efe07b5c9..594405720 100644 --- a/src/atlas/grid/detail/partitioner/MatchingMeshPartitionerBruteForce.cc +++ b/src/atlas/grid/detail/partitioner/MatchingMeshPartitionerBruteForce.cc @@ -100,7 +100,7 @@ void MatchingMeshPartitionerBruteForce::partition( const Grid& grid, int partiti } { - eckit::ProgressTimer timer( "Partitioning target", grid.size(), "point", double( 10 ), atlas::Log::info() ); + eckit::ProgressTimer timer( "Partitioning target", grid.size(), "point", double( 10 ), atlas::Log::trace() ); for ( size_t i = 0; i < grid.size(); ++i, ++timer ) { partitioning[i] = -1; const PointLonLat& P( coordinates[i] ); diff --git a/src/atlas/grid/detail/partitioner/MatchingMeshPartitionerLonLatPolygon.cc b/src/atlas/grid/detail/partitioner/MatchingMeshPartitionerLonLatPolygon.cc index af30c791e..0fc38d583 100644 --- a/src/atlas/grid/detail/partitioner/MatchingMeshPartitionerLonLatPolygon.cc +++ b/src/atlas/grid/detail/partitioner/MatchingMeshPartitionerLonLatPolygon.cc @@ -47,7 +47,7 @@ void MatchingMeshPartitionerLonLatPolygon::partition( const Grid& grid, int part const util::LonLatPolygon poly( prePartitionedMesh_.polygon( 0 ), prePartitionedMesh_.nodes().lonlat() ); { - eckit::ProgressTimer timer( "Partitioning", grid.size(), "point", double( 10 ), atlas::Log::info() ); + eckit::ProgressTimer timer( "Partitioning", grid.size(), "point", double( 10 ), atlas::Log::trace() ); size_t i = 0; for ( const PointXY Pxy : grid.xy() ) { diff --git a/src/atlas/grid/detail/partitioner/MatchingMeshPartitionerSphericalPolygon.cc b/src/atlas/grid/detail/partitioner/MatchingMeshPartitionerSphericalPolygon.cc index a4e42abaa..39d52178f 100644 --- a/src/atlas/grid/detail/partitioner/MatchingMeshPartitionerSphericalPolygon.cc +++ b/src/atlas/grid/detail/partitioner/MatchingMeshPartitionerSphericalPolygon.cc @@ -46,7 +46,7 @@ void MatchingMeshPartitionerSphericalPolygon::partition( const Grid& grid, int p const util::SphericalPolygon poly( prePartitionedMesh_.polygon( 0 ), prePartitionedMesh_.nodes().lonlat() ); { - eckit::ProgressTimer timer( "Partitioning", grid.size(), "point", double( 10 ), atlas::Log::info() ); + eckit::ProgressTimer timer( "Partitioning", grid.size(), "point", double( 10 ), atlas::Log::trace() ); size_t i = 0; for ( const PointXY Pxy : grid.xy() ) { From cca0973a1bdc619d2458cf1a2bc50912d9669975 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 31 May 2018 17:52:33 +0100 Subject: [PATCH 102/123] ATLAS-161 Fix parallel FiniteElement interpolation --- .../interpolation/method/FiniteElement.cc | 7 ++- src/atlas/mesh/actions/BuildCellCentres.cc | 60 +++++++++++++------ src/atlas/mesh/actions/BuildCellCentres.h | 7 ++- 3 files changed, 54 insertions(+), 20 deletions(-) diff --git a/src/atlas/interpolation/method/FiniteElement.cc b/src/atlas/interpolation/method/FiniteElement.cc index b7b255ef4..9a70c4a15 100644 --- a/src/atlas/interpolation/method/FiniteElement.cc +++ b/src/atlas/interpolation/method/FiniteElement.cc @@ -88,7 +88,10 @@ void FiniteElement::setup( const FunctionSpace& source ) { Field source_xyz = mesh::actions::BuildXYZField( "xyz" )( meshSource ); // generate barycenters of each triangle & insert them on a kd-tree - Field cell_centres = mesh::actions::BuildCellCentres( "centre" )( meshSource ); + util::Config config; + config.set("name", "centre "); + config.set("flatten_virtual_elements", false ); + Field cell_centres = mesh::actions::BuildCellCentres( config )( meshSource ); eckit::ScopedPtr eTree( create_element_kdtree( cell_centres ) ); @@ -150,6 +153,7 @@ void FiniteElement::setup( const FunctionSpace& source ) { "---------------------\n"; PointLonLat pll; util::Earth::convertCartesianToSpherical( p, pll ); + if( pll.lon() < 0 ) pll.lon() += 360.; Log::debug() << "Failed to project point (lon,lat)=" << pll << '\n'; Log::debug() << failures_log.str(); } @@ -166,6 +170,7 @@ void FiniteElement::setup( const FunctionSpace& source ) { const PointXYZ p{( *ocoords_ )( *i, 0 ), ( *ocoords_ )( *i, 1 ), ( *ocoords_ )( *i, 2 )}; // lookup point PointLonLat pll; util::Earth::convertCartesianToSpherical( p, pll ); + if( pll.lon() < 0 ) pll.lon() += 360.; msg << "\t(lon,lat) = " << pll << "\n"; } diff --git a/src/atlas/mesh/actions/BuildCellCentres.cc b/src/atlas/mesh/actions/BuildCellCentres.cc index 71ff4dd80..2c2e1f773 100644 --- a/src/atlas/mesh/actions/BuildCellCentres.cc +++ b/src/atlas/mesh/actions/BuildCellCentres.cc @@ -26,10 +26,25 @@ namespace actions { //---------------------------------------------------------------------------------------------------------------------- -BuildCellCentres::BuildCellCentres( const std::string& field_name ) : field_name_( field_name ) {} +BuildCellCentres::BuildCellCentres( const std::string& field_name, bool force_recompute ) : + field_name_( field_name ), + force_recompute_( force_recompute ), + flatten_virtual_elements_( true ) { +} + +BuildCellCentres::BuildCellCentres( eckit::Configuration& config ) : + field_name_( config.getString( "name", "centre" ) ), + force_recompute_( config.getBool( "force_recompute", false ) ), + flatten_virtual_elements_( config.getBool( "flatten_virtual_elements", true) ) { +} Field& BuildCellCentres::operator()( Mesh& mesh ) const { + bool recompute = force_recompute_; if ( !mesh.cells().has_field( field_name_ ) ) { + mesh.cells().add( Field( field_name_, array::make_datatype(), array::make_shape( mesh.cells().size(), 3 ) ) ); + recompute = true; + } + if ( recompute ) { mesh::Nodes& nodes = mesh.nodes(); array::ArrayView coords = array::make_view( nodes.field( "xyz" ) ); @@ -37,8 +52,7 @@ Field& BuildCellCentres::operator()( Mesh& mesh ) const { if ( nodes.metadata().has( "NbRealPts" ) ) { firstVirtualPoint = nodes.metadata().get( "NbRealPts" ); } size_t nb_cells = mesh.cells().size(); - array::ArrayView centroids = array::make_view( mesh.cells().add( - Field( field_name_, array::make_datatype(), array::make_shape( nb_cells, 3 ) ) ) ); + auto centroids = array::make_view( mesh.cells().field( field_name_ ) ); const mesh::HybridElements::Connectivity& cell_node_connectivity = mesh.cells().node_connectivity(); for ( size_t e = 0; e < nb_cells; ++e ) { @@ -68,23 +82,33 @@ Field& BuildCellCentres::operator()( Mesh& mesh ) const { int nb_unique_nodes = int( nb_cell_nodes ) - nb_equal_nodes; if ( nb_unique_nodes < 3 ) { continue; } - // calculate centroid by averaging coordinates (uses only "real" nodes) - size_t nb_real_nodes = 0; - for ( size_t n = 0; n < nb_cell_nodes; ++n ) { - const size_t i = size_t( cell_node_connectivity( e, n ) ); - if ( i < firstVirtualPoint ) { - ++nb_real_nodes; - centroids( e, XX ) += coords( i, XX ); - centroids( e, YY ) += coords( i, YY ); - centroids( e, ZZ ) += coords( i, ZZ ); + if( flatten_virtual_elements_ ) { + // calculate centroid by averaging coordinates (uses only "real" nodes) + size_t nb_real_nodes = 0; + for ( size_t n = 0; n < nb_cell_nodes; ++n ) { + const size_t i = size_t( cell_node_connectivity( e, n ) ); + if ( i < firstVirtualPoint ) { + ++nb_real_nodes; + centroids( e, XX ) += coords( i, XX ); + centroids( e, YY ) += coords( i, YY ); + centroids( e, ZZ ) += coords( i, ZZ ); + } } - } - if ( nb_real_nodes > 1 ) { - const double average_coefficient = 1. / static_cast( nb_real_nodes ); - centroids( e, XX ) *= average_coefficient; - centroids( e, YY ) *= average_coefficient; - centroids( e, ZZ ) *= average_coefficient; + if ( nb_real_nodes > 1 ) { + const double average_coefficient = 1. / static_cast( nb_real_nodes ); + centroids( e, XX ) *= average_coefficient; + centroids( e, YY ) *= average_coefficient; + centroids( e, ZZ ) *= average_coefficient; + } + } else { + const double average_coefficient = 1./ static_cast( nb_cell_nodes ); + for ( size_t n = 0; n < nb_cell_nodes; ++n ) { + const size_t i = size_t( cell_node_connectivity( e, n ) ); + for ( size_t d=0; d<3; ++d ) { + centroids( e, d ) += coords( i, d ) * average_coefficient; + } + } } } } diff --git a/src/atlas/mesh/actions/BuildCellCentres.h b/src/atlas/mesh/actions/BuildCellCentres.h index 0df60152b..6f4182cd3 100644 --- a/src/atlas/mesh/actions/BuildCellCentres.h +++ b/src/atlas/mesh/actions/BuildCellCentres.h @@ -12,6 +12,8 @@ #include +#include "atlas/util/Config.h" + namespace atlas { class Mesh; @@ -23,13 +25,16 @@ namespace actions { /// Generates the cell centres on each cell class BuildCellCentres { public: - BuildCellCentres( const std::string& field_name = "centre" ); + BuildCellCentres( const std::string& field_name = "centre", bool force_recompute = false ); + BuildCellCentres( eckit::Configuration& ); /// @note Correct only for Linear Triangles and Quadrilaterals Field& operator()( Mesh& ) const; private: std::string field_name_; + bool force_recompute_; + bool flatten_virtual_elements_; }; } // namespace actions From 90648c8a8bdd6dfe0cd8b43f6c12d8406c189a5f Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 31 May 2018 17:01:13 +0000 Subject: [PATCH 103/123] ATLAS-161 Add more options to atlas_atest_mgrids --- src/tests/acceptance_tests/atest_mgrids.cc | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/tests/acceptance_tests/atest_mgrids.cc b/src/tests/acceptance_tests/atest_mgrids.cc index 70287a218..c71df090f 100644 --- a/src/tests/acceptance_tests/atest_mgrids.cc +++ b/src/tests/acceptance_tests/atest_mgrids.cc @@ -50,6 +50,8 @@ Program::Program( int argc, char** argv ) : AtlasTool( argc, argv ) { add_option( new SimpleOption( "ghost", "Output ghost elements" ) ); add_option( new SimpleOption( "haloA", "Halo size" ) ); add_option( new SimpleOption( "haloB", "Halo size" ) ); + add_option( new SimpleOption( "no-forward", "no forward interpolation" ) ); + add_option( new SimpleOption( "no-backward", "no backward interpolation" ) ); } //----------------------------------------------------------------------------- @@ -85,9 +87,12 @@ void Program::execute( const Args& args ) { gmshB.write(meshB); gmshB.write(fieldB); - Interpolation AtoB( option::type("finite-element"), fvmA.node_columns(), fvmB.node_columns() ); - Interpolation BtoA( option::type("finite-element"), fvmB.node_columns(), fvmA.node_columns() ); - + if( not args.getBool("no-forward",false) ) { + Interpolation AtoB( option::type("finite-element"), fvmA.node_columns(), fvmB.node_columns() ); + } + if( not args.getBool("no-backward",false) ) { + Interpolation BtoA( option::type("finite-element"), fvmB.node_columns(), fvmA.node_columns() ); + } } //------------------------------------------------------------------------------ From fc3b8c43970da00fe9743c57a5815c9f5ccbdce4 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Wed, 30 May 2018 18:56:47 +0100 Subject: [PATCH 104/123] some cleanup --- src/atlas/trans/local/TransLocal.cc | 47 ++++++++++++------------ src/atlas/trans/local/VorDivToUVLocal.cc | 32 ++++++++-------- src/tests/trans/test_transgeneral.cc | 13 ++++--- 3 files changed, 47 insertions(+), 45 deletions(-) diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc index bbb417962..d8d945dc7 100644 --- a/src/atlas/trans/local/TransLocal.cc +++ b/src/atlas/trans/local/TransLocal.cc @@ -529,7 +529,7 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma // write.close(); // } } - // other FFT implementations should be added with #elif statements + // other FFT implementations should be added with #elif statements #else useFFT_ = false; // no FFT implemented => default to dgemm std::string file_path = TransParameters( config ).write_fft(); @@ -894,26 +894,25 @@ void TransLocal::invtrans_fourier_regular( const int nlats, const int nlons, con eckit::linalg::Matrix B( scl_fourier, ( truncation_ + 1 ) * 2, nb_fields * nlats ); eckit::linalg::Matrix C( gp_fields, nlons, nb_fields * nlats ); -// BUG ATLAS-159: valgrind warns here, saying that B(1,:) is uninitialised -// if workaround above labeled ATLAS-159 is not applied. -// -// for( int i=0; i builder( "local" ); // -------------------------------------------------------------------------------------------------------------------- // Routine to copy spectral data into internal storage form of IFS trans // Ported to C++ by: Andreas Mueller *ECMWF* -void prfi1bopt3( const int truncation, - const int km, // zonal wavenumber - const int nb_fields, // number of fields - const double rspec[], // spectral data - double pia[] ) // spectral components in data layout of trans library +void prfi1b( const int truncation, + const int km, // zonal wavenumber + const int nb_fields, // number of fields + const double rspec[], // spectral data + double pia[] ) // spectral components in data layout of trans library { int ilcm = truncation + 1 - km, ioff = ( 2 * truncation - km + 3 ) * km, nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2; @@ -58,14 +58,14 @@ void prfi1bopt3( const int truncation, // ECMWF Research Department documentation of the IFS // Temperton, 1991, MWR 119 p1303 // Ported to C++ by: Andreas Mueller *ECMWF* -void vd2uvopt3( const int truncation, // truncation - const int km, // zonal wavenumber - const int nb_vordiv_fields, // number of vorticity and divergence fields - const double vorticity_spectra[], // spectral data of vorticity - const double divergence_spectra[], // spectral data of divergence - double U[], // spectral data of U - double V[], // spectral data of V - const eckit::Configuration& config ) { +void vd2uv( const int truncation, // truncation + const int km, // zonal wavenumber + const int nb_vordiv_fields, // number of vorticity and divergence fields + const double vorticity_spectra[], // spectral data of vorticity + const double divergence_spectra[], // spectral data of divergence + double U[], // spectral data of U + double V[], // spectral data of V + const eckit::Configuration& config ) { int nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2; // repsnm: epsilon from eq.(2.12) and (2.13) in [Temperton 1991] @@ -112,8 +112,8 @@ void vd2uvopt3( const int truncation, // truncation std::vector rdiv( 2 * nb_vordiv_fields * nlei1 ); std::vector ru( 2 * nb_vordiv_fields * nlei1 ); std::vector rv( 2 * nb_vordiv_fields * nlei1 ); - prfi1bopt3( truncation, km, nb_vordiv_fields, vorticity_spectra, rvor.data() ); - prfi1bopt3( truncation, km, nb_vordiv_fields, divergence_spectra, rdiv.data() ); + prfi1b( truncation, km, nb_vordiv_fields, vorticity_spectra, rvor.data() ); + prfi1b( truncation, km, nb_vordiv_fields, divergence_spectra, rdiv.data() ); // compute eq.(2.12) and (2.13) in [Temperton 1991]: if ( km == 0 ) { @@ -168,7 +168,7 @@ void VorDivToUVLocal::execute( const int nb_coeff, const int nb_fields, const do const double divergence[], double U[], double V[], const eckit::Configuration& config ) const { for ( int jm = 0; jm <= truncation_; ++jm ) { - vd2uvopt3( truncation_, jm, nb_fields, vorticity, divergence, U, V, config ); + vd2uv( truncation_, jm, nb_fields, vorticity, divergence, U, V, config ); } } diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index c775fc9f0..2b6ccc1a9 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -970,18 +970,18 @@ CASE( "test_trans_domain" ) { //Domain testdomain = RectangularDomain( {0., 30.}, {-.05, .05} ); Domain testdomain1 = ZonalBandDomain( {-10., 5.} ); //Domain testdomain1 = RectangularDomain( {-1., 1.}, {50., 55.} ); - Domain testdomain2 = RectangularDomain( {-1., 1.}, {-5., 40.} ); + Domain testdomain2 = RectangularDomain( {-10., 10.}, {-5., 40.} ); // Grid: (Adjust the following line if the test takes too long!) Grid global_grid( "O64" ); Grid g1( global_grid, testdomain1 ); - //Grid g2( gridString, testdomain2 ); + Grid g2( global_grid, testdomain2 ); bool fourierTrc1 = true; bool fourierTrc2 = false; using grid::StructuredGrid; using LinearSpacing = grid::LinearSpacing; - StructuredGrid g2( LinearSpacing( {0., 180.}, 181 ), LinearSpacing( {0., 45.}, 46 ) ); + //StructuredGrid g2( LinearSpacing( {0., 180.}, 181 ), LinearSpacing( {0., 45.}, 46 ) ); int trc = 63; //Log::info() << "rgp1:" << std::endl; @@ -994,9 +994,12 @@ CASE( "test_trans_domain" ) { trans::Cache cache; ATLAS_TRACE_SCOPE( "Read cache" ) cache = trans::LegendreCache( "legcache.bin" ); Trace t2( Here(), "translocal2 construction" ); + trans::Trans transLocal2( cache, global_grid, g2.domain(), trc, + option::type( "local" ) | option::write_legendre( "legcache2.bin" ) ); + //trans::Trans transLocal2( cache, g2, trc, option::type( "local" ) ); //trans::Trans transLocal2( cache, g2, trc, // option::type( "local" ) | option::no_fft() ); - trans::Trans transLocal2( g2, trc, option::type( "local" ) ); + //trans::Trans transLocal2( g2, trc, option::type( "local" ) ); t2.stop(); double rav1 = 0., rav2 = 0.; // compute average rms errors of transLocal1 and transLocal2 @@ -1293,7 +1296,7 @@ CASE( "test_trans_invtrans" ) { } #endif - //----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- #if 0 CASE( "test_trans_fourier_truncation" ) { From 00568966a14448cde9c9c00f529a6ee3fc0dc683 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 31 May 2018 19:08:25 +0100 Subject: [PATCH 105/123] fixed vd2uv for reduced grids and improved its performance by moving loop over wavenumbers inside vd2uv --- src/atlas/trans/local/TransLocal.cc | 2 +- src/atlas/trans/local/VorDivToUVLocal.cc | 141 ++++++++++++----------- src/tests/trans/test_transgeneral.cc | 8 +- 3 files changed, 77 insertions(+), 74 deletions(-) diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc index d8d945dc7..604114dec 100644 --- a/src/atlas/trans/local/TransLocal.cc +++ b/src/atlas/trans/local/TransLocal.cc @@ -1255,7 +1255,7 @@ void TransLocal::invtrans_uv( const int truncation, const int nb_scalar_fields, int idx = 0; for ( int jfld = 0; jfld < nb_fields; jfld++ ) { for ( int jlat = 0; jlat < g.ny(); jlat++ ) { - for ( int jlon = 0; jlon < g.nxmax(); jlon++ ) { + for ( int jlon = 0; jlon < g.nx( jlat ); jlon++ ) { gp_fields[idx] /= coslats[jlat]; idx++; } diff --git a/src/atlas/trans/local/VorDivToUVLocal.cc b/src/atlas/trans/local/VorDivToUVLocal.cc index dd171b718..798f28f5e 100644 --- a/src/atlas/trans/local/VorDivToUVLocal.cc +++ b/src/atlas/trans/local/VorDivToUVLocal.cc @@ -12,6 +12,7 @@ #include // for std::sqrt #include "atlas/functionspace/Spectral.h" #include "atlas/runtime/Log.h" +#include "atlas/runtime/Trace.h" #include "atlas/util/Earth.h" using atlas::FunctionSpace; @@ -59,18 +60,20 @@ void prfi1b( const int truncation, // Temperton, 1991, MWR 119 p1303 // Ported to C++ by: Andreas Mueller *ECMWF* void vd2uv( const int truncation, // truncation - const int km, // zonal wavenumber const int nb_vordiv_fields, // number of vorticity and divergence fields const double vorticity_spectra[], // spectral data of vorticity const double divergence_spectra[], // spectral data of divergence double U[], // spectral data of U double V[], // spectral data of V const eckit::Configuration& config ) { + ATLAS_TRACE( "vd2uv" ); + std::vector repsnm( ( truncation + 1 ) * ( truncation + 6 ) / 2 ); + int idx = 0; int nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2; + double ra = util::Earth::radius(); + std::vector rlapin( truncation + 3 ); // repsnm: epsilon from eq.(2.12) and (2.13) in [Temperton 1991] - std::vector repsnm( ( truncation + 1 ) * ( truncation + 6 ) / 2 ); - int idx = 0; for ( int jm = 0; jm <= truncation; ++jm ) { for ( int jn = jm; jn <= truncation + 2; ++jn, ++idx ) { repsnm[idx] = std::sqrt( ( jn * jn - jm * jm ) / ( 4. * jn * jn - 1. ) ); @@ -79,8 +82,6 @@ void vd2uv( const int truncation, // truncation repsnm[0] = 0.; // rlapin: constant factor from eq.(2.2) and (2.3) in [Temperton 1991] - double ra = util::Earth::radius(); - std::vector rlapin( truncation + 3 ); for ( int jn = 1; jn <= truncation + 2; ++jn ) { rlapin[jn] = -ra * ra / ( jn * ( jn + 1. ) ); } @@ -90,76 +91,78 @@ void vd2uv( const int truncation, // truncation std::vector zepsnm( truncation + 6 ); std::vector zlapin( truncation + 6 ); std::vector zn( truncation + 6 ); - for ( int jn = km - 1; jn <= truncation + 2; ++jn ) { - int ij = truncation + 3 - jn; - if ( jn >= 0 ) { - zlapin[ij] = rlapin[jn]; - if ( jn < km ) { zepsnm[ij] = 0.; } + for ( int km = 0; km <= truncation; ++km ) { + for ( int jn = km - 1; jn <= truncation + 2; ++jn ) { + int ij = truncation + 3 - jn; + if ( jn >= 0 ) { + zlapin[ij] = rlapin[jn]; + if ( jn < km ) { zepsnm[ij] = 0.; } + else { + zepsnm[ij] = repsnm[jn + ( 2 * truncation - km + 5 ) * km / 2]; + } + } else { - zepsnm[ij] = repsnm[jn + ( 2 * truncation - km + 5 ) * km / 2]; + zlapin[ij] = 0.; + zepsnm[ij] = 0.; } + zn[ij] = jn; } - else { - zlapin[ij] = 0.; - zepsnm[ij] = 0.; - } - zn[ij] = jn; - } - zn[0] = truncation + 3; - - // copy spectral data into internal trans storage: - std::vector rvor( 2 * nb_vordiv_fields * nlei1 ); - std::vector rdiv( 2 * nb_vordiv_fields * nlei1 ); - std::vector ru( 2 * nb_vordiv_fields * nlei1 ); - std::vector rv( 2 * nb_vordiv_fields * nlei1 ); - prfi1b( truncation, km, nb_vordiv_fields, vorticity_spectra, rvor.data() ); - prfi1b( truncation, km, nb_vordiv_fields, divergence_spectra, rdiv.data() ); - - // compute eq.(2.12) and (2.13) in [Temperton 1991]: - if ( km == 0 ) { - for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { - int ir = 2 * jfld * nlei1 - 1; - for ( int ji = 2; ji < truncation + 4 - km; ++ji ) { - double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1]; - double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1]; - ru[ir + ji] = +psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1]; - rv[ir + ji] = -psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1]; + zn[0] = truncation + 3; + + // copy spectral data into internal trans storage: + std::vector rvor( 2 * nb_vordiv_fields * nlei1 ); + std::vector rdiv( 2 * nb_vordiv_fields * nlei1 ); + std::vector ru( 2 * nb_vordiv_fields * nlei1 ); + std::vector rv( 2 * nb_vordiv_fields * nlei1 ); + prfi1b( truncation, km, nb_vordiv_fields, vorticity_spectra, rvor.data() ); + prfi1b( truncation, km, nb_vordiv_fields, divergence_spectra, rdiv.data() ); + + // compute eq.(2.12) and (2.13) in [Temperton 1991]: + if ( km == 0 ) { + for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { + int ir = 2 * jfld * nlei1 - 1; + for ( int ji = 2; ji < truncation + 4 - km; ++ji ) { + double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1]; + double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1]; + ru[ir + ji] = +psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1]; + rv[ir + ji] = -psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1]; + } } } - } - else { - for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { - int ir = 2 * jfld * nlei1 - 1, ii = ir + nlei1; - for ( int ji = 2; ji < truncation + 4 - km; ++ji ) { - double chiIm = km * zlapin[ji]; - double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1]; - double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1]; - ru[ir + ji] = -chiIm * rdiv[ii + ji] + psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1]; - ru[ii + ji] = +chiIm * rdiv[ir + ji] + psiM1 * rvor[ii + ji + 1] - psiP1 * rvor[ii + ji - 1]; - rv[ir + ji] = -chiIm * rvor[ii + ji] - psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1]; - rv[ii + ji] = +chiIm * rvor[ir + ji] - psiM1 * rdiv[ii + ji + 1] + psiP1 * rdiv[ii + ji - 1]; + else { + for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { + int ir = 2 * jfld * nlei1 - 1, ii = ir + nlei1; + for ( int ji = 2; ji < truncation + 4 - km; ++ji ) { + double chiIm = km * zlapin[ji]; + double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1]; + double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1]; + ru[ir + ji] = -chiIm * rdiv[ii + ji] + psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1]; + ru[ii + ji] = +chiIm * rdiv[ir + ji] + psiM1 * rvor[ii + ji + 1] - psiP1 * rvor[ii + ji - 1]; + rv[ir + ji] = -chiIm * rvor[ii + ji] - psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1]; + rv[ii + ji] = +chiIm * rvor[ir + ji] - psiM1 * rdiv[ii + ji + 1] + psiP1 * rdiv[ii + ji - 1]; + } } } - } - // copy data from internal storage back to external spectral data: - int ilcm = truncation - km; - int ioff = ( 2 * truncation - km + 3 ) * km; - // ioff: start index of zonal wavenumber km in spectral data - double za_r = 1. / util::Earth::radius(); - for ( int j = 0; j <= ilcm; ++j ) { - // ilcm-j = total wavenumber - int inm = ioff + ( ilcm - j ) * 2; - for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { - int ir = 2 * jfld * nlei1, ii = ir + nlei1; - int idx = inm * nb_vordiv_fields + jfld; - // real part: - U[idx] = ru[ir + j + 2] * za_r; - V[idx] = rv[ir + j + 2] * za_r; - idx += nb_vordiv_fields; - // imaginary part: - U[idx] = ru[ii + j + 2] * za_r; - V[idx] = rv[ii + j + 2] * za_r; + // copy data from internal storage back to external spectral data: + int ilcm = truncation - km; + int ioff = ( 2 * truncation - km + 3 ) * km; + // ioff: start index of zonal wavenumber km in spectral data + double za_r = 1. / util::Earth::radius(); + for ( int j = 0; j <= ilcm; ++j ) { + // ilcm-j = total wavenumber + int inm = ioff + ( ilcm - j ) * 2; + for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { + int ir = 2 * jfld * nlei1, ii = ir + nlei1; + int idx = inm * nb_vordiv_fields + jfld; + // real part: + U[idx] = ru[ir + j + 2] * za_r; + V[idx] = rv[ir + j + 2] * za_r; + idx += nb_vordiv_fields; + // imaginary part: + U[idx] = ru[ii + j + 2] * za_r; + V[idx] = rv[ii + j + 2] * za_r; + } } } } @@ -167,9 +170,7 @@ void vd2uv( const int truncation, // truncation void VorDivToUVLocal::execute( const int nb_coeff, const int nb_fields, const double vorticity[], const double divergence[], double U[], double V[], const eckit::Configuration& config ) const { - for ( int jm = 0; jm <= truncation_; ++jm ) { - vd2uv( truncation_, jm, nb_fields, vorticity, divergence, U, V, config ); - } + vd2uv( truncation_, nb_fields, vorticity, divergence, U, V, config ); } VorDivToUVLocal::VorDivToUVLocal( const int truncation, const eckit::Configuration& config ) : diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index 2b6ccc1a9..c9c413ff5 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -976,6 +976,8 @@ CASE( "test_trans_domain" ) { Grid global_grid( "O64" ); Grid g1( global_grid, testdomain1 ); Grid g2( global_grid, testdomain2 ); + //Grid g1( global_grid ); + //Grid g2( global_grid ); bool fourierTrc1 = true; bool fourierTrc2 = false; @@ -1006,7 +1008,7 @@ CASE( "test_trans_domain" ) { functionspace::Spectral spectral( trc ); - int nb_scalar = 1, nb_vordiv = 0; + int nb_scalar = 1, nb_vordiv = 1; int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv; std::vector sp( 2 * N * nb_scalar ); std::vector vor( 2 * N * nb_vordiv ); @@ -1018,8 +1020,8 @@ CASE( "test_trans_domain" ) { std::vector rgp2_analytic( g2.size() ); int icase = 0; - for ( int ivar_in = 2; ivar_in < 3; ivar_in++ ) { // vorticity, divergence, scalar - for ( int ivar_out = 2; ivar_out < 3; ivar_out++ ) { // u, v, scalar + for ( int ivar_in = 0; ivar_in < 3; ivar_in++ ) { // vorticity, divergence, scalar + for ( int ivar_out = 0; ivar_out < 3; ivar_out++ ) { // u, v, scalar int nb_fld = 1; if ( ivar_out == 2 ) { tolerance = 1.e-13; From 98cfd1e55dea2ed4a3011a3816533b90f9cba05a Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Tue, 5 Jun 2018 17:06:37 +0100 Subject: [PATCH 106/123] some clean-up. Unstructured grids are currently not working (ATLAS-162) --- src/atlas/trans/local/TransLocal.cc | 2 +- src/atlas/trans/local/VorDivToUVLocal.cc | 144 +++++---- src/tests/trans/test_transgeneral.cc | 371 +---------------------- 3 files changed, 87 insertions(+), 430 deletions(-) diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc index 604114dec..cdd20ad80 100644 --- a/src/atlas/trans/local/TransLocal.cc +++ b/src/atlas/trans/local/TransLocal.cc @@ -1253,7 +1253,7 @@ void TransLocal::invtrans_uv( const int truncation, const int nb_scalar_fields, coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() ); } int idx = 0; - for ( int jfld = 0; jfld < nb_fields; jfld++ ) { + for ( int jfld = 0; jfld < nb_vordiv_fields; jfld++ ) { for ( int jlat = 0; jlat < g.ny(); jlat++ ) { for ( int jlon = 0; jlon < g.nx( jlat ); jlon++ ) { gp_fields[idx] /= coslats[jlat]; diff --git a/src/atlas/trans/local/VorDivToUVLocal.cc b/src/atlas/trans/local/VorDivToUVLocal.cc index 798f28f5e..56bf78c99 100644 --- a/src/atlas/trans/local/VorDivToUVLocal.cc +++ b/src/atlas/trans/local/VorDivToUVLocal.cc @@ -66,102 +66,116 @@ void vd2uv( const int truncation, // truncation double U[], // spectral data of U double V[], // spectral data of V const eckit::Configuration& config ) { - ATLAS_TRACE( "vd2uv" ); std::vector repsnm( ( truncation + 1 ) * ( truncation + 6 ) / 2 ); int idx = 0; int nlei1 = truncation + 4 + ( truncation + 4 + 1 ) % 2; double ra = util::Earth::radius(); std::vector rlapin( truncation + 3 ); - // repsnm: epsilon from eq.(2.12) and (2.13) in [Temperton 1991] - for ( int jm = 0; jm <= truncation; ++jm ) { - for ( int jn = jm; jn <= truncation + 2; ++jn, ++idx ) { - repsnm[idx] = std::sqrt( ( jn * jn - jm * jm ) / ( 4. * jn * jn - 1. ) ); + { + //ATLAS_TRACE( "general setup" ); + // repsnm: epsilon from eq.(2.12) and (2.13) in [Temperton 1991] + for ( int jm = 0; jm <= truncation; ++jm ) { + for ( int jn = jm; jn <= truncation + 2; ++jn, ++idx ) { + repsnm[idx] = std::sqrt( ( jn * jn - jm * jm ) / ( 4. * jn * jn - 1. ) ); + } } - } - repsnm[0] = 0.; + repsnm[0] = 0.; - // rlapin: constant factor from eq.(2.2) and (2.3) in [Temperton 1991] - for ( int jn = 1; jn <= truncation + 2; ++jn ) { - rlapin[jn] = -ra * ra / ( jn * ( jn + 1. ) ); + // rlapin: constant factor from eq.(2.2) and (2.3) in [Temperton 1991] + for ( int jn = 1; jn <= truncation + 2; ++jn ) { + rlapin[jn] = -ra * ra / ( jn * ( jn + 1. ) ); + } + rlapin[0] = 0.; } - rlapin[0] = 0.; // inverse the order of repsnm and rlapin for improved accuracy std::vector zepsnm( truncation + 6 ); std::vector zlapin( truncation + 6 ); std::vector zn( truncation + 6 ); for ( int km = 0; km <= truncation; ++km ) { - for ( int jn = km - 1; jn <= truncation + 2; ++jn ) { - int ij = truncation + 3 - jn; - if ( jn >= 0 ) { - zlapin[ij] = rlapin[jn]; - if ( jn < km ) { zepsnm[ij] = 0.; } + { + //ATLAS_TRACE( "current wavenumber setup" ); + for ( int jn = km - 1; jn <= truncation + 2; ++jn ) { + int ij = truncation + 3 - jn; + if ( jn >= 0 ) { + zlapin[ij] = rlapin[jn]; + if ( jn < km ) { zepsnm[ij] = 0.; } + else { + zepsnm[ij] = repsnm[jn + ( 2 * truncation - km + 5 ) * km / 2]; + } + } else { - zepsnm[ij] = repsnm[jn + ( 2 * truncation - km + 5 ) * km / 2]; + zlapin[ij] = 0.; + zepsnm[ij] = 0.; } + zn[ij] = jn; } - else { - zlapin[ij] = 0.; - zepsnm[ij] = 0.; - } - zn[ij] = jn; + zn[0] = truncation + 3; } - zn[0] = truncation + 3; // copy spectral data into internal trans storage: std::vector rvor( 2 * nb_vordiv_fields * nlei1 ); std::vector rdiv( 2 * nb_vordiv_fields * nlei1 ); std::vector ru( 2 * nb_vordiv_fields * nlei1 ); std::vector rv( 2 * nb_vordiv_fields * nlei1 ); - prfi1b( truncation, km, nb_vordiv_fields, vorticity_spectra, rvor.data() ); - prfi1b( truncation, km, nb_vordiv_fields, divergence_spectra, rdiv.data() ); - - // compute eq.(2.12) and (2.13) in [Temperton 1991]: - if ( km == 0 ) { - for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { - int ir = 2 * jfld * nlei1 - 1; - for ( int ji = 2; ji < truncation + 4 - km; ++ji ) { - double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1]; - double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1]; - ru[ir + ji] = +psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1]; - rv[ir + ji] = -psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1]; + { + //ATLAS_TRACE( "copy data to internal storage" ); + prfi1b( truncation, km, nb_vordiv_fields, vorticity_spectra, rvor.data() ); + prfi1b( truncation, km, nb_vordiv_fields, divergence_spectra, rdiv.data() ); + } + + { + //ATLAS_TRACE( "actual computation" ); + // compute eq.(2.12) and (2.13) in [Temperton 1991]: + if ( km == 0 ) { + for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { + int ir = 2 * jfld * nlei1 - 1; + for ( int ji = 2; ji < truncation + 4 - km; ++ji ) { + double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1]; + double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1]; + ru[ir + ji] = +psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1]; + rv[ir + ji] = -psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1]; + } } } - } - else { - for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { - int ir = 2 * jfld * nlei1 - 1, ii = ir + nlei1; - for ( int ji = 2; ji < truncation + 4 - km; ++ji ) { - double chiIm = km * zlapin[ji]; - double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1]; - double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1]; - ru[ir + ji] = -chiIm * rdiv[ii + ji] + psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1]; - ru[ii + ji] = +chiIm * rdiv[ir + ji] + psiM1 * rvor[ii + ji + 1] - psiP1 * rvor[ii + ji - 1]; - rv[ir + ji] = -chiIm * rvor[ii + ji] - psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1]; - rv[ii + ji] = +chiIm * rvor[ir + ji] - psiM1 * rdiv[ii + ji + 1] + psiP1 * rdiv[ii + ji - 1]; + else { + for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { + int ir = 2 * jfld * nlei1 - 1, ii = ir + nlei1; + for ( int ji = 2; ji < truncation + 4 - km; ++ji ) { + double chiIm = km * zlapin[ji]; + double psiM1 = zn[ji + 1] * zepsnm[ji] * zlapin[ji + 1]; + double psiP1 = zn[ji - 2] * zepsnm[ji - 1] * zlapin[ji - 1]; + ru[ir + ji] = -chiIm * rdiv[ii + ji] + psiM1 * rvor[ir + ji + 1] - psiP1 * rvor[ir + ji - 1]; + ru[ii + ji] = +chiIm * rdiv[ir + ji] + psiM1 * rvor[ii + ji + 1] - psiP1 * rvor[ii + ji - 1]; + rv[ir + ji] = -chiIm * rvor[ii + ji] - psiM1 * rdiv[ir + ji + 1] + psiP1 * rdiv[ir + ji - 1]; + rv[ii + ji] = +chiIm * rvor[ir + ji] - psiM1 * rdiv[ii + ji + 1] + psiP1 * rdiv[ii + ji - 1]; + } } } } - // copy data from internal storage back to external spectral data: - int ilcm = truncation - km; - int ioff = ( 2 * truncation - km + 3 ) * km; - // ioff: start index of zonal wavenumber km in spectral data - double za_r = 1. / util::Earth::radius(); - for ( int j = 0; j <= ilcm; ++j ) { - // ilcm-j = total wavenumber - int inm = ioff + ( ilcm - j ) * 2; - for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { - int ir = 2 * jfld * nlei1, ii = ir + nlei1; - int idx = inm * nb_vordiv_fields + jfld; - // real part: - U[idx] = ru[ir + j + 2] * za_r; - V[idx] = rv[ir + j + 2] * za_r; - idx += nb_vordiv_fields; - // imaginary part: - U[idx] = ru[ii + j + 2] * za_r; - V[idx] = rv[ii + j + 2] * za_r; + { + //ATLAS_TRACE( "copy data back to external storage" ); + // copy data from internal storage back to external spectral data: + int ilcm = truncation - km; + int ioff = ( 2 * truncation - km + 3 ) * km; + // ioff: start index of zonal wavenumber km in spectral data + double za_r = 1. / util::Earth::radius(); + for ( int j = 0; j <= ilcm; ++j ) { + // ilcm-j = total wavenumber + int inm = ioff + ( ilcm - j ) * 2; + for ( int jfld = 0; jfld < nb_vordiv_fields; ++jfld ) { + int ir = 2 * jfld * nlei1, ii = ir + nlei1; + int idx = inm * nb_vordiv_fields + jfld; + // real part: + U[idx] = ru[ir + j + 2] * za_r; + V[idx] = rv[ir + j + 2] * za_r; + idx += nb_vordiv_fields; + // imaginary part: + U[idx] = ru[ii + j + 2] * za_r; + V[idx] = rv[ii + j + 2] * za_r; + } } } } diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index c9c413ff5..da74a9980 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -65,157 +65,6 @@ struct AtlasTransEnvironment : public AtlasTestEnvironment { } }; -//----------------------------------------------------------------------------- - -#if 0 -void compute_legendre( const size_t trc, // truncation (in) - const double& lat, // latitude in radians (in) - array::ArrayView& zlfpol ) // values of - // associated - // Legendre - // functions, size - // (trc+1)*trc/2 - // (out) -{ - trans::compute_legendre_polynomials( trc, lat, zlfpol.data() ); -} - -//----------------------------------------------------------------------------- - -void legendre_transform( const size_t trc, // truncation (in) - const size_t trcFT, // truncation for Fourier transformation (in) - array::ArrayView& rlegReal, // values of associated Legendre - // functions, size (trc+1)*trc/2 - // (out) - array::ArrayView& rlegImag, // values of associated Legendre - // functions, size (trc+1)*trc/2 - // (out) - const array::ArrayView& zlfpol, // values of associated Legendre - // functions, size (trc+1)*trc/2 - // (in) - const double rspecg[] ) // spectral data, size (trc+1)*trc (in) -{ - trans::invtrans_legendre( trc, trcFT, trc, zlfpol.data(), 1, rspecg, rlegReal.data(), rlegImag.data() ); -} - -//----------------------------------------------------------------------------- - -double fourier_transform( const size_t trcFT, array::ArrayView& rlegReal, // values of associated Legendre - // functions, size (trc+1)*trc/2 - // (out) - array::ArrayView& rlegImag, // values of associated Legendre - // functions, size (trc+1)*trc/2 - // (out) - const double lon ) // radians -{ - double gp[1]; - trans::invtrans_fourier( trcFT, lon, 1, rlegReal.data(), rlegImag.data(), gp ); - return gp[0]; -} - -//----------------------------------------------------------------------------- -// Routine to compute the spectral transform by using a local Fourier -// transformation -// for a single point -// -// Author: -// Andreas Mueller *ECMWF* -// -double spectral_transform_point( const size_t trc, // truncation (in) - const size_t trcFT, // truncation for Fourier transformation (in) - const double lon, // longitude in radians (in) - const double lat, // latitude in radians (in) - const double rspecg[] ) // spectral data, size (trc+1)*trc (in) -{ - int N = ( trc + 2 ) * ( trc + 1 ) / 2; - ATLAS_TRACE(); - atlas::array::ArrayT zlfpol_( N ); - atlas::array::ArrayView zlfpol = make_view( zlfpol_ ); - - atlas::array::ArrayT rlegReal_( trcFT + 1 ); - atlas::array::ArrayView rlegReal = make_view( rlegReal_ ); - - atlas::array::ArrayT rlegImag_( trcFT + 1 ); - atlas::array::ArrayView rlegImag = make_view( rlegImag_ ); - - // Legendre transform: - compute_legendre( trc, lat, zlfpol ); - legendre_transform( trc, trcFT, rlegReal, rlegImag, zlfpol, rspecg ); - - // Fourier transform: - return fourier_transform( trcFT, rlegReal, rlegImag, lon ); -} - -//----------------------------------------------------------------------------- -// Routine to compute the spectral transform by using a local Fourier -// transformation -// for a grid (same latitude for all longitudes, allows to compute Legendre -// functions -// once for all longitudes) -// -// Author: -// Andreas Mueller *ECMWF* -// -void spectral_transform_grid( const size_t trc, // truncation (in) - const size_t trcFT, // truncation for Fourier transformation (in) - const Grid grid, // call with something like Grid("O32") - const double rspecg[], // spectral data, size (trc+1)*trc (in) - double rgp[], // resulting grid point data (out) - const bool pointwise ) // use point function for unstructured mesh for - // testing purposes -{ - std::ostream& out = Log::info(); // just for debugging - int N = ( trc + 2 ) * ( trc + 1 ) / 2; - ATLAS_TRACE(); - atlas::array::ArrayT zlfpol_( N ); - atlas::array::ArrayView zlfpol = make_view( zlfpol_ ); - - atlas::array::ArrayT rlegReal_( trcFT + 1 ); - atlas::array::ArrayView rlegReal = make_view( rlegReal_ ); - - atlas::array::ArrayT rlegImag_( trcFT + 1 ); - atlas::array::ArrayView rlegImag = make_view( rlegImag_ ); - - int idx = 0; - - if ( grid::StructuredGrid( grid ) ) { - grid::StructuredGrid g( grid ); - for ( size_t j = 0; j < g.ny(); ++j ) { - double lat = g.y( j ) * util::Constants::degreesToRadians(); - - // Legendre transform: - compute_legendre( trc, lat, zlfpol ); - legendre_transform( trc, trcFT, rlegReal, rlegImag, zlfpol, rspecg ); - - for ( size_t i = 0; i < g.nx( j ); ++i ) { - double lon = g.x( i, j ) * util::Constants::degreesToRadians(); - // Fourier transform: - rgp[idx++] = fourier_transform( trcFT, rlegReal, rlegImag, lon ); - } - } - } - else { - for ( PointXY p : grid.xy() ) { - double lon = p.x() * util::Constants::degreesToRadians(); - double lat = p.y() * util::Constants::degreesToRadians(); - if ( pointwise ) { - // alternative for testing: use spectral_transform_point function: - rgp[idx++] = spectral_transform_point( trc, trcFT, lon, lat, rspecg ); - } - else { - // Legendre transform: - compute_legendre( trc, lat, zlfpol ); - legendre_transform( trc, trcFT, rlegReal, rlegImag, zlfpol, rspecg ); - - // Fourier transform: - rgp[idx++] = fourier_transform( trcFT, rlegReal, rlegImag, lon ); - } - } - } - - EXPECT( idx == grid.size() ); -} -#endif //----------------------------------------------------------------------------- // Routine to compute the spherical harmonics analytically at one point // (up to wave number 3) @@ -527,203 +376,8 @@ double compute_rms( const size_t N, // length of the arrays return rms; } -#if 0 -//----------------------------------------------------------------------------- -// Routine to test the spectral transform by comparing it with the analytically -// derived spherical harmonics -// -// Author: -// Andreas Mueller *ECMWF* -// -double spectral_transform_test( double trc, // truncation - double n, // total wave number (implemented so far for n<4 - double m, // zonal wave number (implemented so far for m<4, m zlfpol_(N); - atlas::array::ArrayView zlfpol = make_view(zlfpol_); - - double lat = std::acos(0.99312859918509488); - compute_legendre(trc, lat, zlfpol); -} -#endif -//----------------------------------------------------------------------------- -#if 0 -#if 1 -CASE( "test_transgeneral_point" ) { - std::ostream& out = Log::info(); - Log::info() << "test_transgeneral_point" << std::endl; - double tolerance = 2.e-15; - // test spectral transform up to wave number 3 by comparing - // the result with the analytically computed spherical harmonics - - Grid g = grid::UnstructuredGrid( {{50., 20.}, {30., -20.}, {179., -89.}, {-101., 70.}} ); - - int trc = 47; // truncation - - double rms = 0.; - for ( int m = 0; m <= 3; m++ ) { // zonal wavenumber - for ( int n = m; n <= 3; n++ ) { // total wavenumber - for ( int imag = 0; imag <= 1; imag++ ) { // real and imaginary part - rms = spectral_transform_test( trc, n, m, imag, g, true ); - EXPECT( rms < tolerance ); - } - } - } -} -#endif //----------------------------------------------------------------------------- #if 1 -CASE( "test_transgeneral_unstructured" ) { - std::ostream& out = Log::info(); - Log::info() << "test_transgeneral_unstructured" << std::endl; - double tolerance = 2.e-15; - // test spectral transform up to wave number 3 by comparing - // the result with the analytically computed spherical harmonics - - Grid g = grid::UnstructuredGrid( new std::vector{{50., 20.}, {30., -20.}, {179., -89.}, {-101., 70.}} ); - - int trc = 47; // truncation - - double rms = 0.; - for ( int m = 0; m <= 3; m++ ) { // zonal wavenumber - for ( int n = m; n <= 3; n++ ) { // total wavenumber - for ( int imag = 0; imag <= 1; imag++ ) { // real and imaginary part - rms = spectral_transform_test( trc, n, m, imag, g, false ); - EXPECT( rms < tolerance ); - } - } - } -} - -//----------------------------------------------------------------------------- - -CASE( "test_transgeneral_structured" ) { - std::ostream& out = Log::info(); - Log::info() << "test_transgeneral_structured" << std::endl; - double tolerance = 2.e-15; - // test spectral transform up to wave number 3 by comparing - // the result with the analytically computed spherical harmonics - - std::string grid_uid( "O10" ); - grid::StructuredGrid g( grid_uid ); - - int trc = 47; // truncation - - double rms = 0.; - for ( int m = 0; m <= 3; m++ ) { // zonal wavenumber - for ( int n = m; n <= 3; n++ ) { // total wavenumber - for ( int imag = 0; imag <= 1; imag++ ) { // real and imaginary part - rms = spectral_transform_test( trc, n, m, imag, g, false ); - EXPECT( rms < tolerance ); - } - } - } -} - -//----------------------------------------------------------------------------- - -CASE( "test_transgeneral_with_translib" ) { - Log::info() << "test_transgeneral_with_translib" << std::endl; - // test transgeneral by comparing its result with the trans library - // this test is based on the test_nomesh case in test_trans.cc - - std::ostream& out = Log::info(); - double tolerance = 1.e-13; - Grid g( "F24" ); - grid::StructuredGrid gs( g ); - int trc = 47; -#if ATLAS_HAVE_TRANS - trans::Trans transIFS( g, trc, util::Config( "type", "ifs" ) ); -#endif - functionspace::StructuredColumns gridpoints( g ); - functionspace::Spectral spectral( trc ); - Field spf = spectral.createField( option::name( "spf" ) ); - Field gpf = gridpoints.createField( option::name( "gpf" ) ); - - int N = ( trc + 2 ) * ( trc + 1 ) / 2; - std::vector rspecg( 2 * N ); - std::vector rgp( g.size() ); - std::vector rgp_analytic( g.size() ); - - int k = 0; - for ( int m = 0; m <= trc; m++ ) { // zonal wavenumber - for ( int n = m; n <= trc; n++ ) { // total wavenumber - for ( int imag = 0; imag <= 1; imag++ ) { // real and imaginary part - - if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., 2, 2 ) == 0. ) { - array::ArrayView sp = array::make_view( spf ); - sp.assign( 0. ); - sp( k ) = 1.; - spectral_transform_grid_analytic( trc, trc, n, m, imag, g, // - rspecg.data(), rgp_analytic.data(), 2, 2 ); - - // compute spectral transform with the general transform: - spectral_transform_grid( trc, trc, g, sp.data(), rgp.data(), false ); - array::ArrayView gp = array::make_view( gpf ); - - double rms_gen = compute_rms( g.size(), rgp.data(), rgp_analytic.data() ); - - if ( rms_gen >= tolerance ) { - ATLAS_DEBUG_VAR( rms_gen ); - ATLAS_DEBUG_VAR( tolerance ); - } - EXPECT( rms_gen < tolerance ); -#if ATLAS_HAVE_TRANS - EXPECT_NO_THROW( transIFS.invtrans( spf, gpf ) ); - double rms_trans = compute_rms( g.size(), gp.data(), rgp.data() ); - EXPECT( rms_trans < tolerance ); -#endif - } - k++; - } - } - } -} -#endif -#endif -//----------------------------------------------------------------------------- -#if 0 CASE( "test_trans_vordiv_with_translib" ) { Log::info() << "test_trans_vordiv_with_translib" << std::endl; // test transgeneral by comparing its result with the trans library @@ -743,8 +397,8 @@ CASE( "test_trans_vordiv_with_translib" ) { trans::Trans transIFS( g, trc, util::Config( "type", "ifs" ) ); double rav = 0.; // compute average rms error of trans library in rav #endif - trans::Trans transLocal1( g, trc, util::Config( "type", "localopt2" ) ); - trans::Trans transLocal2( g, trc, util::Config( "type", "Local" ) ); + trans::Trans transLocal1( g, trc, util::Config( "type", "local" ) ); + trans::Trans transLocal2( g, trc, util::Config( "type", "local" ) ); double rav1 = 0., rav2 = 0.; // compute average rms errors of transLocal1 and transLocal2 functionspace::Spectral spectral( trc ); @@ -1150,7 +804,9 @@ CASE( "test_trans_unstructured" ) { //Domain testdomain = RectangularDomain( {20., 25.}, {40., 60.} ); Domain testdomain = RectangularDomain( {0., 90.}, {0., 90.} ); // Grid: (Adjust the following line if the test takes too long!) - Grid g( "F120", testdomain ); + Grid grid_global( "F120" ); + Grid g( grid_global, testdomain ); + int trc = 120; grid::StructuredGrid gs( g ); std::vector pts( g.size() ); int idx( 0 ); @@ -1166,7 +822,6 @@ CASE( "test_trans_unstructured" ) { } Grid gu = grid::UnstructuredGrid( new std::vector( &pts[0], &pts[idx] ) ); Log::info() << "gu: size=" << gu.size() << std::endl; - int trc = 120; double rav1 = 0., rav2 = 0.; // compute average rms errors of transLocal1 and transLocal2 int nb_scalar = 1, nb_vordiv = 1; @@ -1181,8 +836,8 @@ CASE( "test_trans_unstructured" ) { std::vector rgp_analytic1( g.size() ); std::vector rgp_analytic2( gu.size() ); - trans::Trans transLocal1( g, trc, util::Config( "type", "Local" ) ); - trans::Trans transLocal2( gu, trc, util::Config( "type", "Local" ) ); + trans::Trans transLocal1( grid_global, testdomain, trc, option::type( "local" ) ); + trans::Trans transLocal2( gu, trc, util::Config( "type", "local" ) ); int icase = 0; for ( int ivar_in = 2; ivar_in < 3; ivar_in++ ) { // vorticity, divergence, scalar @@ -1285,18 +940,6 @@ CASE( "test_trans_unstructured" ) { Log::info() << "average RMS error of transLocal2: " << rav2 << std::endl; } #endif -#if 0 -CASE( "test_trans_invtrans" ) { - trans::Trans trans( Grid( "O64" ), 63, util::Config( "type", "local" ) ); - - std::vector rspec( trans.spectralCoefficients() ); - std::vector rgp( trans.grid().size() ); - - // TODO: rspec needs proper initial data - - trans.invtrans( 1, rspec.data(), rgp.data() ); -} -#endif //----------------------------------------------------------------------------- From 63660bf1d9e0c499ac62bef53f0dd21ceb1f3a5e Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Wed, 6 Jun 2018 11:43:35 +0100 Subject: [PATCH 107/123] Avoid inclusion of fftw3.h in TransLocal.h --- src/atlas/trans/local/TransLocal.cc | 65 +++++++++++++++--------- src/atlas/trans/local/TransLocal.h | 16 +++--- src/atlas_f/trans/atlas_Trans_module.F90 | 2 + 3 files changed, 51 insertions(+), 32 deletions(-) diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc index cdd20ad80..cdcceaa19 100644 --- a/src/atlas/trans/local/TransLocal.cc +++ b/src/atlas/trans/local/TransLocal.cc @@ -26,6 +26,11 @@ #include "eckit/log/Bytes.h" #include "eckit/parser/JSON.h" +#include "atlas/library/defines.h" +#if ATLAS_HAVE_FFTW +#include +#endif + namespace atlas { namespace trans { @@ -217,6 +222,17 @@ int fourier_truncation( const int truncation, // truncation return trc; } +namespace detail { +struct FFTW_Data { +#if ATLAS_HAVE_FFTW + fftw_complex* in; + double* out; + std::vector plans; +#endif +}; +} + + // -------------------------------------------------------------------------------------------------------------------- // Class TransLocal // -------------------------------------------------------------------------------------------------------------------- @@ -245,6 +261,7 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma legendre_cachesize_( cache.legendre().size() ), fft_cache_( cache.fft().data() ), fft_cachesize_( cache.fft().size() ), + fftw_( new detail::FFTW_Data ), linalg_( linear_algebra_backend() ), warning_( TransParameters( config ).warning() ) { ATLAS_TRACE( "TransLocal constructor" ); @@ -478,8 +495,8 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma { ATLAS_TRACE( "Fourier precomputations (FFTW)" ); int num_complex = ( nlonsMaxGlobal_ / 2 ) + 1; - fft_in_ = fftw_alloc_complex( nlats * num_complex ); - fft_out_ = fftw_alloc_real( nlats * nlonsMaxGlobal_ ); + fftw_->in = fftw_alloc_complex( nlats * num_complex ); + fftw_->out = fftw_alloc_real( nlats * nlonsMaxGlobal_ ); if ( fft_cache_ ) { Log::debug() << "Import FFTW wisdom from cache" << std::endl; @@ -498,16 +515,16 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma // read.close(); // if ( wisdomString.length() > 0 ) { fftw_import_wisdom_from_string( &wisdomString[0u] ); } if ( grid::RegularGrid( gridGlobal_ ) ) { - plans_.resize( 1 ); - plans_[0] = fftw_plan_many_dft_c2r( 1, &nlonsMaxGlobal_, nlats, fft_in_, NULL, 1, num_complex, - fft_out_, NULL, 1, nlonsMaxGlobal_, FFTW_ESTIMATE ); + fftw_->plans.resize( 1 ); + fftw_->plans[0] = fftw_plan_many_dft_c2r( 1, &nlonsMaxGlobal_, nlats, fftw_->in, NULL, 1, num_complex, + fftw_->out, NULL, 1, nlonsMaxGlobal_, FFTW_ESTIMATE ); } else { - plans_.resize( nlatsLegDomain_ ); + fftw_->plans.resize( nlatsLegDomain_ ); for ( int j = 0; j < nlatsLegDomain_; j++ ) { int nlonsGlobalj = gs_global.nx( jlatMinLeg_ + j ); //ASSERT( nlonsGlobalj > 0 && nlonsGlobalj <= nlonsMaxGlobal_ ); - plans_[j] = fftw_plan_dft_c2r_1d( nlonsGlobalj, fft_in_, fft_out_, FFTW_ESTIMATE ); + fftw_->plans[j] = fftw_plan_dft_c2r_1d( nlonsGlobalj, fftw_->in, fftw_->out, FFTW_ESTIMATE ); } } std::string file_path = TransParameters( config ).write_fft(); @@ -627,11 +644,11 @@ TransLocal::~TransLocal() { } if ( useFFT_ ) { #if ATLAS_HAVE_FFTW && !TRANSLOCAL_DGEMM2 - for ( int j = 0; j < plans_.size(); j++ ) { - fftw_destroy_plan( plans_[j] ); + for ( int j = 0; j < fftw_->plans.size(); j++ ) { + fftw_destroy_plan( fftw_->plans[j] ); } - fftw_free( fft_in_ ); - fftw_free( fft_out_ ); + fftw_free( fftw_->in ); + fftw_free( fftw_->out ); #endif } else { @@ -859,25 +876,25 @@ void TransLocal::invtrans_fourier_regular( const int nlats, const int nlons, con for ( int jfld = 0; jfld < nb_fields; jfld++ ) { int idx = 0; for ( int jlat = 0; jlat < nlats; jlat++ ) { - fft_in_[idx++][0] = scl_fourier[posMethod( jfld, 0, jlat, 0, nb_fields, nlats )]; + fftw_->in[idx++][0] = scl_fourier[posMethod( jfld, 0, jlat, 0, nb_fields, nlats )]; for ( int jm = 1; jm < num_complex; jm++, idx++ ) { for ( int imag = 0; imag < 2; imag++ ) { if ( jm <= truncation_ ) { - fft_in_[idx][imag] = + fftw_->in[idx][imag] = scl_fourier[posMethod( jfld, imag, jlat, jm, nb_fields, nlats )]; } else { - fft_in_[idx][imag] = 0.; + fftw_->in[idx][imag] = 0.; } } } } - fftw_execute_dft_c2r( plans_[0], fft_in_, fft_out_ ); + fftw_execute_dft_c2r( fftw_->plans[0], fftw_->in, fftw_->out ); for ( int jlat = 0; jlat < nlats; jlat++ ) { for ( int jlon = 0; jlon < nlons; jlon++ ) { int j = jlon + jlonMin_[0]; if ( j >= nlonsMaxGlobal_ ) { j -= nlonsMaxGlobal_; } - gp_fields[jlon + nlons * ( jlat + nlats * jfld )] = fft_out_[j + nlonsMaxGlobal_ * jlat]; + gp_fields[jlon + nlons * ( jlat + nlats * jfld )] = fftw_->out[j + nlonsMaxGlobal_ * jlat]; } } } @@ -965,18 +982,18 @@ void TransLocal::invtrans_fourier_reduced( const int nlats, const grid::Structur int idx = 0; //Log::info() << jlat << "in:" << std::endl; int num_complex = ( nlonsGlobal_[jlat] / 2 ) + 1; - fft_in_[idx++][0] = scl_fourier[posMethod( jfld, 0, jlat, 0, nb_fields, nlats )]; - //Log::info() << fft_in_[0][0] << " "; + fftw_->in[idx++][0] = scl_fourier[posMethod( jfld, 0, jlat, 0, nb_fields, nlats )]; + //Log::info() << fftw_->in[0][0] << " "; for ( int jm = 1; jm < num_complex; jm++, idx++ ) { for ( int imag = 0; imag < 2; imag++ ) { if ( jm <= truncation_ ) { - fft_in_[idx][imag] = + fftw_->in[idx][imag] = scl_fourier[posMethod( jfld, imag, jlat, jm, nb_fields, nlats )]; } else { - fft_in_[idx][imag] = 0.; + fftw_->in[idx][imag] = 0.; } - //Log::info() << fft_in_[idx][imag] << " "; + //Log::info() << fftw_->in[idx][imag] << " "; } } //Log::info() << std::endl; @@ -984,13 +1001,13 @@ void TransLocal::invtrans_fourier_reduced( const int nlats, const grid::Structur int jplan = nlatsLegDomain_ - nlatsNH_ + jlat; if ( jplan >= nlatsLegDomain_ ) { jplan = nlats - 1 + nlatsLegDomain_ - nlatsSH_ - jlat; }; //ASSERT( jplan < nlatsLeg_ && jplan >= 0 ); - fftw_execute_dft_c2r( plans_[jplan], fft_in_, fft_out_ ); + fftw_execute_dft_c2r( fftw_->plans[jplan], fftw_->in, fftw_->out ); for ( int jlon = 0; jlon < g.nx( jlat ); jlon++ ) { int j = jlon + jlonMin_[jlat]; if ( j >= nlonsGlobal_[jlat] ) { j -= nlonsGlobal_[jlat]; } - //Log::info() << fft_out_[j] << " "; + //Log::info() << fftw_->out[j] << " "; ASSERT( j < nlonsMaxGlobal_ ); - gp_fields[jgp++] = fft_out_[j]; + gp_fields[jgp++] = fftw_->out[j]; } //Log::info() << std::endl; } diff --git a/src/atlas/trans/local/TransLocal.h b/src/atlas/trans/local/TransLocal.h index 29005083f..014045ca2 100644 --- a/src/atlas/trans/local/TransLocal.h +++ b/src/atlas/trans/local/TransLocal.h @@ -11,13 +11,11 @@ #pragma once #include +#include #include "atlas/array.h" #include "atlas/grid/Grid.h" #include "atlas/trans/Trans.h" -#if ATLAS_HAVE_FFTW -#include -#endif #define TRANSLOCAL_DGEMM2 0 @@ -40,6 +38,10 @@ class FieldSet; namespace atlas { namespace trans { +namespace detail { +struct FFTW_Data; +} + class LegendreCacheCreatorLocal; int fourier_truncation( const int truncation, // truncation const int nx, // number of longitudes @@ -186,11 +188,9 @@ class TransLocal : public trans::TransImpl { std::vector legendre_begin_; std::vector legendre_sym_begin_; std::vector legendre_asym_begin_; -#if ATLAS_HAVE_FFTW - fftw_complex* fft_in_; - double* fft_out_; - std::vector plans_; -#endif + + + std::unique_ptr fftw_; Cache cache_; Cache export_legendre_; diff --git a/src/atlas_f/trans/atlas_Trans_module.F90 b/src/atlas_f/trans/atlas_Trans_module.F90 index 25ee8ee34..549d473ac 100644 --- a/src/atlas_f/trans/atlas_Trans_module.F90 +++ b/src/atlas_f/trans/atlas_Trans_module.F90 @@ -207,6 +207,7 @@ function nb_gridpoints_global( this ) end function function grid( this ) + use, intrinsic :: iso_c_binding, only: c_null_ptr use atlas_trans_c_binding class(atlas_Trans) :: this type(atlas_Grid) :: grid @@ -216,6 +217,7 @@ function grid( this ) #else THROW_ERROR FCKIT_SUPPRESS_UNUSED( this ) + grid = atlas_Grid( c_null_ptr ) FCKIT_SUPPRESS_UNUSED( grid ) #endif end function From 7aaf2dcf18030b35892f9ac979b3b19f68a1b1ed Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Wed, 6 Jun 2018 14:27:52 +0100 Subject: [PATCH 108/123] work in progress: merging all spectral data into one array with vdoption --- src/atlas/trans/local/TransLocal.cc | 93 +++++++++++++++++++--------- src/tests/trans/test_transgeneral.cc | 20 +++--- 2 files changed, 76 insertions(+), 37 deletions(-) diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc index cdcceaa19..bd9b923f1 100644 --- a/src/atlas/trans/local/TransLocal.cc +++ b/src/atlas/trans/local/TransLocal.cc @@ -1327,38 +1327,75 @@ void extend_truncation( const int old_truncation, const int nb_fields, const dou void TransLocal::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields, const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[], const eckit::Configuration& config ) const { - ATLAS_TRACE( "TransLocal::invtrans" ); - int nb_gp = grid_.size(); - int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields; - if ( nb_vordiv_fields > 0 ) { - std::vector vorticity_spectra_extended( nb_vordiv_spec_ext, 0. ); - std::vector divergence_spectra_extended( nb_vordiv_spec_ext, 0. ); - std::vector U_ext( nb_vordiv_spec_ext, 0. ); - std::vector V_ext( nb_vordiv_spec_ext, 0. ); - + if ( config.getBool( "vdoption", false ) ) { + // collect all spectral data into one array "all_spectra": + ATLAS_TRACE( "TransLocal::invtrans" ); + int nb_all_fields = 2 * nb_vordiv_fields + nb_scalar_fields; + int nb_all_size = 2 * legendre_size( truncation_ ) * nb_all_fields; + std::vector all_spectra( nb_all_size ); + int k = 0, i = 0, j = 0, l = 0; { - ATLAS_TRACE( "extend vordiv" ); - // increase truncation in vorticity_spectra and divergence_spectra: - extend_truncation( truncation_, nb_vordiv_fields, vorticity_spectra, vorticity_spectra_extended.data() ); - extend_truncation( truncation_, nb_vordiv_fields, divergence_spectra, divergence_spectra_extended.data() ); + ATLAS_TRACE( "merge all spectra" ); + for ( int m = 0; m <= truncation_; m++ ) { // zonal wavenumber + for ( int n = m; n <= truncation_; n++ ) { // total wavenumber + for ( int imag = 0; imag < 2; imag++ ) { // imaginary/real part + for ( int jfld = 0; jfld < nb_vordiv_fields; jfld++ ) { // vorticity fields + all_spectra[k++] = vorticity_spectra[i++]; + } + for ( int jfld = 0; jfld < nb_vordiv_fields; jfld++ ) { // divergence fields + all_spectra[k++] = divergence_spectra[j++]; + } + for ( int jfld = 0; jfld < nb_scalar_fields; jfld++ ) { // scalar fields + all_spectra[k++] = scalar_spectra[l++]; + } + } + } + } } + int nb_vordiv_size = 2 * legendre_size( truncation_ ) * nb_vordiv_fields; + int nb_scalar_size = 2 * legendre_size( truncation_ ) * nb_scalar_fields; + ASSERT( k == nb_all_size ); + ASSERT( i == nb_vordiv_size ); + ASSERT( j == nb_vordiv_size ); + ASSERT( l == nb_scalar_size ); + invtrans_uv( truncation_, nb_all_fields, 0, all_spectra.data(), gp_fields, config ); + } + else { + ATLAS_TRACE( "TransLocal::invtrans" ); + int nb_gp = grid_.size(); + int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields; + if ( nb_vordiv_fields > 0 ) { + std::vector vorticity_spectra_extended( nb_vordiv_spec_ext, 0. ); + std::vector divergence_spectra_extended( nb_vordiv_spec_ext, 0. ); + std::vector U_ext( nb_vordiv_spec_ext, 0. ); + std::vector V_ext( nb_vordiv_spec_ext, 0. ); - { - ATLAS_TRACE( "vordiv to UV" ); - // call vd2uv to compute u and v in spectral space - trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "local" ) ); - vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(), - divergence_spectra_extended.data(), U_ext.data(), V_ext.data() ); - } + { + ATLAS_TRACE( "extend vordiv" ); + // increase truncation in vorticity_spectra and divergence_spectra: + extend_truncation( truncation_, nb_vordiv_fields, vorticity_spectra, + vorticity_spectra_extended.data() ); + extend_truncation( truncation_, nb_vordiv_fields, divergence_spectra, + divergence_spectra_extended.data() ); + } - // perform spectral transform to compute all fields in grid point space - invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config ); - invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(), - gp_fields + nb_gp * nb_vordiv_fields, config ); - } - if ( nb_scalar_fields > 0 ) { - invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields + 2 * nb_gp * nb_vordiv_fields, - config ); + { + ATLAS_TRACE( "vordiv to UV" ); + // call vd2uv to compute u and v in spectral space + trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "local" ) ); + vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(), + divergence_spectra_extended.data(), U_ext.data(), V_ext.data() ); + } + + // perform spectral transform to compute all fields in grid point space + invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config ); + invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(), + gp_fields + nb_gp * nb_vordiv_fields, config ); + } + if ( nb_scalar_fields > 0 ) { + invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields + 2 * nb_gp * nb_vordiv_fields, + config ); + } } } diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index da74a9980..85bc1bb53 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -377,7 +377,7 @@ double compute_rms( const size_t N, // length of the arrays } //----------------------------------------------------------------------------- -#if 1 +#if 0 CASE( "test_trans_vordiv_with_translib" ) { Log::info() << "test_trans_vordiv_with_translib" << std::endl; // test transgeneral by comparing its result with the trans library @@ -619,27 +619,29 @@ CASE( "test_trans_domain" ) { std::ostream& out = Log::info(); double tolerance = 1.e-13; + util::Config vd2uvoption( "vdoption", true ); //Domain testdomain = ZonalBandDomain( {-90., 90.} ); //Domain testdomain = ZonalBandDomain( {-.5, .5} ); //Domain testdomain = RectangularDomain( {0., 30.}, {-.05, .05} ); - Domain testdomain1 = ZonalBandDomain( {-10., 5.} ); + //Domain testdomain1 = ZonalBandDomain( {-10., 5.} ); + Domain testdomain1 = RectangularDomain( {-1., 1.}, {-.5, 0.} ); //Domain testdomain1 = RectangularDomain( {-1., 1.}, {50., 55.} ); - Domain testdomain2 = RectangularDomain( {-10., 10.}, {-5., 40.} ); + Domain testdomain2 = RectangularDomain( {-1., 1.}, {-.5, 0.} ); // Grid: (Adjust the following line if the test takes too long!) - Grid global_grid( "O64" ); + Grid global_grid( "O512" ); Grid g1( global_grid, testdomain1 ); Grid g2( global_grid, testdomain2 ); //Grid g1( global_grid ); //Grid g2( global_grid ); bool fourierTrc1 = true; - bool fourierTrc2 = false; + bool fourierTrc2 = true; using grid::StructuredGrid; using LinearSpacing = grid::LinearSpacing; //StructuredGrid g2( LinearSpacing( {0., 180.}, 181 ), LinearSpacing( {0., 45.}, 46 ) ); - int trc = 63; + int trc = 511; //Log::info() << "rgp1:" << std::endl; if ( eckit::PathName( "legcache.bin" ).exists() ) eckit::PathName( "legcache.bin" ).unlink(); Trace t1( Here(), "translocal1 construction" ); @@ -692,7 +694,7 @@ CASE( "test_trans_domain" ) { for ( int imag = 0; imag <= 1; imag++ ) { // real and imaginary part if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. && - icase < 1000 ) { + icase < 10 ) { auto start = std::chrono::system_clock::now(); for ( int j = 0; j < 2 * N * nb_scalar; j++ ) { sp[j] = 0.; @@ -727,7 +729,7 @@ CASE( "test_trans_domain" ) { //Log::info() << std::endl << "rgp1:"; ATLAS_TRACE_SCOPE( "translocal1" ) EXPECT_NO_THROW( transLocal1.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), - div.data(), rgp1.data() ) ); + div.data(), rgp1.data(), vd2uvoption ) ); //Log::info() << std::endl << "rgp2:"; ATLAS_TRACE_SCOPE( "translocal2" ) @@ -737,7 +739,7 @@ CASE( "test_trans_domain" ) { int pos = ( ivar_out * nb_vordiv + jfld ); double rms_gen1 = - compute_rms( g1.size(), rgp1.data() + pos * g1.size(), rgp1_analytic.data() ); + 0.; //compute_rms( g1.size(), rgp1.data() + pos * g1.size(), rgp1_analytic.data() ); double rms_gen2 = compute_rms( g2.size(), rgp2.data() + pos * g2.size(), rgp2_analytic.data() ); From 0bfbd5d7bbfb1947625dde9901228619c36956b0 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 7 Jun 2018 16:55:58 +0100 Subject: [PATCH 109/123] added option to merge U,V,scalars before transform with old vd2uv; this should be faster because the size of the dgemm calls is bigger --- src/atlas/trans/local/TransLocal.cc | 68 +++++++++++++++++++++------- src/atlas/trans/local/TransLocal.h | 5 +- src/tests/trans/test_transgeneral.cc | 12 ++--- 3 files changed, 60 insertions(+), 25 deletions(-) diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc index bd9b923f1..38a89fdd3 100644 --- a/src/atlas/trans/local/TransLocal.cc +++ b/src/atlas/trans/local/TransLocal.cc @@ -712,7 +712,7 @@ void gp_transpose( const int nb_size, const int nb_fields, const double gp_tmp[] // -------------------------------------------------------------------------------------------------------------------- void TransLocal::invtrans_legendre( const int truncation, const int nlats, const int nb_fields, - const double scalar_spectra[], double scl_fourier[], + const int nb_vordiv_fields, const double scalar_spectra[], double scl_fourier[], const eckit::Configuration& config ) const { // Legendre transform: { @@ -858,8 +858,8 @@ void TransLocal::invtrans_legendre( const int truncation, const int nlats, const } } } - } - } + } // namespace trans + } // namespace atlas } // -------------------------------------------------------------------------------------------------------------------- @@ -1101,7 +1101,7 @@ void TransLocal::invtrans_unstructured_precomp( const int truncation, const int if ( nb_vordiv_fields > 0 ) { //ATLAS_TRACE( " u,v from U,V" ); double coslat = std::cos( lat ); - for ( int j = 0; j < nb_fields; j++ ) { + for ( int j = 0; j < 2 * nb_vordiv_fields && j < nb_fields; j++ ) { gp_fields[ip + j * grid_.size()] /= coslat; } } @@ -1199,7 +1199,7 @@ void TransLocal::invtrans_unstructured( const int truncation, const int nb_field if ( nb_vordiv_fields > 0 ) { //ATLAS_TRACE( "u,v from U,V" ); const double coslat = std::cos( lat ); - for ( int j = 0; j < nb_fields; j++ ) { + for ( int j = 0; j < 2 * nb_vordiv_fields && j < nb_fields; j++ ) { gp_fields[ip + j * grid_.size()] /= coslat; } } @@ -1251,7 +1251,8 @@ void TransLocal::invtrans_uv( const int truncation, const int nb_scalar_fields, // ATLAS-159 workaround end // Legendre transformation: - invtrans_legendre( truncation, nlats, nb_scalar_fields, scalar_spectra, scl_fourier, config ); + invtrans_legendre( truncation, nlats, nb_scalar_fields, nb_vordiv_fields, scalar_spectra, scl_fourier, + config ); // Fourier transformation: if ( grid::RegularGrid( gridGlobal_ ) ) { @@ -1270,7 +1271,7 @@ void TransLocal::invtrans_uv( const int truncation, const int nb_scalar_fields, coslats[j] = std::cos( g.y( j ) * util::Constants::degreesToRadians() ); } int idx = 0; - for ( int jfld = 0; jfld < nb_vordiv_fields; jfld++ ) { + for ( int jfld = 0; jfld < 2 * nb_vordiv_fields && jfld < nb_fields; jfld++ ) { for ( int jlat = 0; jlat < g.ny(); jlat++ ) { for ( int jlon = 0; jlon < g.nx( jlat ); jlon++ ) { gp_fields[idx] /= coslats[jlat]; @@ -1327,38 +1328,71 @@ void extend_truncation( const int old_truncation, const int nb_fields, const dou void TransLocal::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields, const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[], const eckit::Configuration& config ) const { - if ( config.getBool( "vdoption", false ) ) { + if ( config.getBool( "mergeBeforeTransform", false ) ) { // collect all spectral data into one array "all_spectra": ATLAS_TRACE( "TransLocal::invtrans" ); + int nb_gp = grid_.size(); + int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields; + std::vector U_ext; + std::vector V_ext; + std::vector scalar_ext; + if ( nb_vordiv_fields > 0 ) { + std::vector vorticity_spectra_extended( nb_vordiv_spec_ext ); + std::vector divergence_spectra_extended( nb_vordiv_spec_ext ); + U_ext.resize( nb_vordiv_spec_ext ); + V_ext.resize( nb_vordiv_spec_ext ); + + { + ATLAS_TRACE( "extend vordiv" ); + // increase truncation in vorticity_spectra and divergence_spectra: + extend_truncation( truncation_, nb_vordiv_fields, vorticity_spectra, + vorticity_spectra_extended.data() ); + extend_truncation( truncation_, nb_vordiv_fields, divergence_spectra, + divergence_spectra_extended.data() ); + } + + { + ATLAS_TRACE( "vordiv to UV" ); + // call vd2uv to compute u and v in spectral space + trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "local" ) ); + vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(), + divergence_spectra_extended.data(), U_ext.data(), V_ext.data() ); + } + } + if ( nb_scalar_fields > 0 ) { + int nb_scalar_ext = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields; + scalar_ext.resize( nb_scalar_ext ); + extend_truncation( truncation_, nb_scalar_fields, scalar_spectra, scalar_ext.data() ); + } int nb_all_fields = 2 * nb_vordiv_fields + nb_scalar_fields; - int nb_all_size = 2 * legendre_size( truncation_ ) * nb_all_fields; + int nb_all_size = 2 * legendre_size( truncation_ + 1 ) * nb_all_fields; std::vector all_spectra( nb_all_size ); int k = 0, i = 0, j = 0, l = 0; { ATLAS_TRACE( "merge all spectra" ); - for ( int m = 0; m <= truncation_; m++ ) { // zonal wavenumber - for ( int n = m; n <= truncation_; n++ ) { // total wavenumber + for ( int m = 0; m <= truncation_ + 1; m++ ) { // zonal wavenumber + for ( int n = m; n <= truncation_ + 1; n++ ) { // total wavenumber for ( int imag = 0; imag < 2; imag++ ) { // imaginary/real part for ( int jfld = 0; jfld < nb_vordiv_fields; jfld++ ) { // vorticity fields - all_spectra[k++] = vorticity_spectra[i++]; + all_spectra[k++] = U_ext[i++]; } for ( int jfld = 0; jfld < nb_vordiv_fields; jfld++ ) { // divergence fields - all_spectra[k++] = divergence_spectra[j++]; + all_spectra[k++] = V_ext[j++]; } for ( int jfld = 0; jfld < nb_scalar_fields; jfld++ ) { // scalar fields - all_spectra[k++] = scalar_spectra[l++]; + all_spectra[k++] = scalar_ext[l++]; } } } } } - int nb_vordiv_size = 2 * legendre_size( truncation_ ) * nb_vordiv_fields; - int nb_scalar_size = 2 * legendre_size( truncation_ ) * nb_scalar_fields; + int nb_vordiv_size = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields; + int nb_scalar_size = 2 * legendre_size( truncation_ + 1 ) * nb_scalar_fields; ASSERT( k == nb_all_size ); ASSERT( i == nb_vordiv_size ); ASSERT( j == nb_vordiv_size ); ASSERT( l == nb_scalar_size ); - invtrans_uv( truncation_, nb_all_fields, 0, all_spectra.data(), gp_fields, config ); + invtrans_uv( truncation_ + 1, nb_all_fields, nb_vordiv_fields, all_spectra.data(), gp_fields, config ); } else { ATLAS_TRACE( "TransLocal::invtrans" ); diff --git a/src/atlas/trans/local/TransLocal.h b/src/atlas/trans/local/TransLocal.h index 014045ca2..9d3d69140 100644 --- a/src/atlas/trans/local/TransLocal.h +++ b/src/atlas/trans/local/TransLocal.h @@ -134,8 +134,9 @@ class TransLocal : public trans::TransImpl { #endif }; - void invtrans_legendre( const int truncation, const int nlats, const int nb_fields, const double scalar_spectra[], - double scl_fourier[], const eckit::Configuration& config ) const; + void invtrans_legendre( const int truncation, const int nlats, const int nb_fields, const int nb_vordiv_fields, + const double scalar_spectra[], double scl_fourier[], + const eckit::Configuration& config ) const; void invtrans_fourier_regular( const int nlats, const int nlons, const int nb_fields, double scl_fourier[], double gp_fields[], const eckit::Configuration& config ) const; diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index 85bc1bb53..678d223b2 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -619,7 +619,6 @@ CASE( "test_trans_domain" ) { std::ostream& out = Log::info(); double tolerance = 1.e-13; - util::Config vd2uvoption( "vdoption", true ); //Domain testdomain = ZonalBandDomain( {-90., 90.} ); //Domain testdomain = ZonalBandDomain( {-.5, .5} ); //Domain testdomain = RectangularDomain( {0., 30.}, {-.05, .05} ); @@ -664,7 +663,7 @@ CASE( "test_trans_domain" ) { functionspace::Spectral spectral( trc ); - int nb_scalar = 1, nb_vordiv = 1; + int nb_scalar = 0, nb_vordiv = 1; int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv; std::vector sp( 2 * N * nb_scalar ); std::vector vor( 2 * N * nb_vordiv ); @@ -676,8 +675,8 @@ CASE( "test_trans_domain" ) { std::vector rgp2_analytic( g2.size() ); int icase = 0; - for ( int ivar_in = 0; ivar_in < 3; ivar_in++ ) { // vorticity, divergence, scalar - for ( int ivar_out = 0; ivar_out < 3; ivar_out++ ) { // u, v, scalar + for ( int ivar_in = 0; ivar_in < 2; ivar_in++ ) { // vorticity, divergence, scalar + for ( int ivar_out = 0; ivar_out < 2; ivar_out++ ) { // u, v, scalar int nb_fld = 1; if ( ivar_out == 2 ) { tolerance = 1.e-13; @@ -729,7 +728,8 @@ CASE( "test_trans_domain" ) { //Log::info() << std::endl << "rgp1:"; ATLAS_TRACE_SCOPE( "translocal1" ) EXPECT_NO_THROW( transLocal1.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), - div.data(), rgp1.data(), vd2uvoption ) ); + div.data(), rgp1.data(), + util::Config( "mergeBeforeTransform", true ) ) ); //Log::info() << std::endl << "rgp2:"; ATLAS_TRACE_SCOPE( "translocal2" ) @@ -739,7 +739,7 @@ CASE( "test_trans_domain" ) { int pos = ( ivar_out * nb_vordiv + jfld ); double rms_gen1 = - 0.; //compute_rms( g1.size(), rgp1.data() + pos * g1.size(), rgp1_analytic.data() ); + compute_rms( g1.size(), rgp1.data() + pos * g1.size(), rgp1_analytic.data() ); double rms_gen2 = compute_rms( g2.size(), rgp2.data() + pos * g2.size(), rgp2_analytic.data() ); From 2c6ce9bd9fa23ac0b7116125b8511c3135f04ca5 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 7 Jun 2018 17:22:05 +0100 Subject: [PATCH 110/123] merging spectral U and V before the transform is now the default --- src/atlas/trans/local/TransLocal.cc | 35 ++-------------------------- src/tests/trans/test_transgeneral.cc | 27 +++++++++++---------- 2 files changed, 15 insertions(+), 47 deletions(-) diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc index 38a89fdd3..1e3520557 100644 --- a/src/atlas/trans/local/TransLocal.cc +++ b/src/atlas/trans/local/TransLocal.cc @@ -1328,10 +1328,10 @@ void extend_truncation( const int old_truncation, const int nb_fields, const dou void TransLocal::invtrans( const int nb_scalar_fields, const double scalar_spectra[], const int nb_vordiv_fields, const double vorticity_spectra[], const double divergence_spectra[], double gp_fields[], const eckit::Configuration& config ) const { - if ( config.getBool( "mergeBeforeTransform", false ) ) { + int nb_gp = grid_.size(); + if ( nb_vordiv_fields > 0 ) { // collect all spectral data into one array "all_spectra": ATLAS_TRACE( "TransLocal::invtrans" ); - int nb_gp = grid_.size(); int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields; std::vector U_ext; std::vector V_ext; @@ -1395,37 +1395,6 @@ void TransLocal::invtrans( const int nb_scalar_fields, const double scalar_spect invtrans_uv( truncation_ + 1, nb_all_fields, nb_vordiv_fields, all_spectra.data(), gp_fields, config ); } else { - ATLAS_TRACE( "TransLocal::invtrans" ); - int nb_gp = grid_.size(); - int nb_vordiv_spec_ext = 2 * legendre_size( truncation_ + 1 ) * nb_vordiv_fields; - if ( nb_vordiv_fields > 0 ) { - std::vector vorticity_spectra_extended( nb_vordiv_spec_ext, 0. ); - std::vector divergence_spectra_extended( nb_vordiv_spec_ext, 0. ); - std::vector U_ext( nb_vordiv_spec_ext, 0. ); - std::vector V_ext( nb_vordiv_spec_ext, 0. ); - - { - ATLAS_TRACE( "extend vordiv" ); - // increase truncation in vorticity_spectra and divergence_spectra: - extend_truncation( truncation_, nb_vordiv_fields, vorticity_spectra, - vorticity_spectra_extended.data() ); - extend_truncation( truncation_, nb_vordiv_fields, divergence_spectra, - divergence_spectra_extended.data() ); - } - - { - ATLAS_TRACE( "vordiv to UV" ); - // call vd2uv to compute u and v in spectral space - trans::VorDivToUV vordiv_to_UV_ext( truncation_ + 1, option::type( "local" ) ); - vordiv_to_UV_ext.execute( nb_vordiv_spec_ext, nb_vordiv_fields, vorticity_spectra_extended.data(), - divergence_spectra_extended.data(), U_ext.data(), V_ext.data() ); - } - - // perform spectral transform to compute all fields in grid point space - invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, U_ext.data(), gp_fields, config ); - invtrans_uv( truncation_ + 1, nb_vordiv_fields, nb_vordiv_fields, V_ext.data(), - gp_fields + nb_gp * nb_vordiv_fields, config ); - } if ( nb_scalar_fields > 0 ) { invtrans_uv( truncation_, nb_scalar_fields, 0, scalar_spectra, gp_fields + 2 * nb_gp * nb_vordiv_fields, config ); diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index 678d223b2..f0ed2f679 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -377,7 +377,7 @@ double compute_rms( const size_t N, // length of the arrays } //----------------------------------------------------------------------------- -#if 0 +#if 1 CASE( "test_trans_vordiv_with_translib" ) { Log::info() << "test_trans_vordiv_with_translib" << std::endl; // test transgeneral by comparing its result with the trans library @@ -387,7 +387,7 @@ CASE( "test_trans_vordiv_with_translib" ) { double tolerance = 1.e-13; // Grid: (Adjust the following line if the test takes too long!) - Grid g( "F120" ); + Grid g( "F64" ); grid::StructuredGrid gs( g ); int ndgl = gs.ny(); @@ -623,15 +623,15 @@ CASE( "test_trans_domain" ) { //Domain testdomain = ZonalBandDomain( {-.5, .5} ); //Domain testdomain = RectangularDomain( {0., 30.}, {-.05, .05} ); //Domain testdomain1 = ZonalBandDomain( {-10., 5.} ); - Domain testdomain1 = RectangularDomain( {-1., 1.}, {-.5, 0.} ); + Domain testdomain1 = RectangularDomain( {-5., 5.}, {-2.5, 0.} ); //Domain testdomain1 = RectangularDomain( {-1., 1.}, {50., 55.} ); - Domain testdomain2 = RectangularDomain( {-1., 1.}, {-.5, 0.} ); + Domain testdomain2 = RectangularDomain( {-5., 5.}, {-2.5, 0.} ); // Grid: (Adjust the following line if the test takes too long!) - Grid global_grid( "O512" ); - Grid g1( global_grid, testdomain1 ); + Grid global_grid( "O64" ); + //Grid g1( global_grid, testdomain1 ); Grid g2( global_grid, testdomain2 ); - //Grid g1( global_grid ); + Grid g1( global_grid ); //Grid g2( global_grid ); bool fourierTrc1 = true; @@ -640,7 +640,7 @@ CASE( "test_trans_domain" ) { using LinearSpacing = grid::LinearSpacing; //StructuredGrid g2( LinearSpacing( {0., 180.}, 181 ), LinearSpacing( {0., 45.}, 46 ) ); - int trc = 511; + int trc = 63; //Log::info() << "rgp1:" << std::endl; if ( eckit::PathName( "legcache.bin" ).exists() ) eckit::PathName( "legcache.bin" ).unlink(); Trace t1( Here(), "translocal1 construction" ); @@ -663,7 +663,7 @@ CASE( "test_trans_domain" ) { functionspace::Spectral spectral( trc ); - int nb_scalar = 0, nb_vordiv = 1; + int nb_scalar = 1, nb_vordiv = 1; int N = ( trc + 2 ) * ( trc + 1 ) / 2, nb_all = nb_scalar + 2 * nb_vordiv; std::vector sp( 2 * N * nb_scalar ); std::vector vor( 2 * N * nb_vordiv ); @@ -675,8 +675,8 @@ CASE( "test_trans_domain" ) { std::vector rgp2_analytic( g2.size() ); int icase = 0; - for ( int ivar_in = 0; ivar_in < 2; ivar_in++ ) { // vorticity, divergence, scalar - for ( int ivar_out = 0; ivar_out < 2; ivar_out++ ) { // u, v, scalar + for ( int ivar_in = 0; ivar_in < 3; ivar_in++ ) { // vorticity, divergence, scalar + for ( int ivar_out = 0; ivar_out < 3; ivar_out++ ) { // u, v, scalar int nb_fld = 1; if ( ivar_out == 2 ) { tolerance = 1.e-13; @@ -693,7 +693,7 @@ CASE( "test_trans_domain" ) { for ( int imag = 0; imag <= 1; imag++ ) { // real and imaginary part if ( sphericalharmonics_analytic_point( n, m, true, 0., 0., ivar_in, ivar_in ) == 0. && - icase < 10 ) { + icase < 1000 ) { auto start = std::chrono::system_clock::now(); for ( int j = 0; j < 2 * N * nb_scalar; j++ ) { sp[j] = 0.; @@ -728,8 +728,7 @@ CASE( "test_trans_domain" ) { //Log::info() << std::endl << "rgp1:"; ATLAS_TRACE_SCOPE( "translocal1" ) EXPECT_NO_THROW( transLocal1.invtrans( nb_scalar, sp.data(), nb_vordiv, vor.data(), - div.data(), rgp1.data(), - util::Config( "mergeBeforeTransform", true ) ) ); + div.data(), rgp1.data() ) ); //Log::info() << std::endl << "rgp2:"; ATLAS_TRACE_SCOPE( "translocal2" ) From 72384e79bf75c668cab77147bf61d199af9c431b Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Fri, 8 Jun 2018 13:04:49 +0100 Subject: [PATCH 111/123] ATLAS-163 Introduce bit reproducibility for parallel interpolation --- src/atlas/interpolation/Interpolation.h | 1 + .../interpolation/method/FiniteElement.cc | 176 +++++++++++++++++- .../interpolation/method/FiniteElement.h | 6 + .../interpolation/method/KNearestNeighbours.h | 2 + src/atlas/interpolation/method/Method.cc | 2 + src/atlas/interpolation/method/Method.h | 10 +- .../interpolation/method/NearestNeighbour.h | 2 + .../atlas-parallel-interpolation.cc | 8 + 8 files changed, 197 insertions(+), 10 deletions(-) diff --git a/src/atlas/interpolation/Interpolation.h b/src/atlas/interpolation/Interpolation.h index 0ff0e6aa4..8575159d7 100644 --- a/src/atlas/interpolation/Interpolation.h +++ b/src/atlas/interpolation/Interpolation.h @@ -39,6 +39,7 @@ class Interpolation { operator bool() const { return implementation_; } + void print( std::ostream& out ) const { implementation_->print(out); } private: eckit::SharedPtr implementation_; }; diff --git a/src/atlas/interpolation/method/FiniteElement.cc b/src/atlas/interpolation/method/FiniteElement.cc index 9a70c4a15..e5f433b5e 100644 --- a/src/atlas/interpolation/method/FiniteElement.cc +++ b/src/atlas/interpolation/method/FiniteElement.cc @@ -17,6 +17,7 @@ #include "eckit/log/ProgressTimer.h" #include "eckit/log/Seconds.h" #include "eckit/mpi/Comm.h" +#include "eckit/exception/Exceptions.h" #include "atlas/functionspace/NodeColumns.h" #include "atlas/functionspace/PointCloud.h" @@ -32,6 +33,9 @@ #include "atlas/util/CoordinateEnums.h" #include "atlas/util/Earth.h" #include "atlas/util/Point.h" +#include "atlas/parallel/mpi/Buffer.h" +#include "atlas/parallel/GatherScatter.h" + namespace atlas { namespace interpolation { @@ -49,6 +53,9 @@ static const double parametricEpsilon = 1e-15; void FiniteElement::setup( const FunctionSpace& source, const FunctionSpace& target ) { ATLAS_TRACE( "atlas::interpolation::method::FiniteElement::setup()" ); + source_ = source; + target_ = target; + if ( functionspace::NodeColumns tgt = target ) { Mesh meshTarget = tgt.mesh(); @@ -78,6 +85,89 @@ void FiniteElement::setup( const FunctionSpace& source, const FunctionSpace& tar setup( source ); } +struct Stencil { + enum { max_stencil_size = 4 }; + Stencil() { + g = -1; + size = 0; + } + void add( gidx_t tgt, gidx_t src, double weight ) { + if( g >= 0 ) { + ASSERT( tgt == g ); + } + g = tgt; + size_t i=size; + source[i] = src; + weights[i] = weight; + ++size; + } + gidx_t g; + std::array source; + std::array weights; + size_t size; +}; + +void FiniteElement::print(std::ostream& out) const +{ + functionspace::NodeColumns src (source_); + functionspace::NodeColumns tgt (target_); + if( not tgt ) NOTIMP; + auto gidx_src = array::make_view( src.nodes().global_index() ); + + ASSERT( tgt.nodes().size() == matrix_.rows() ); + + + auto field_stencil_points_loc = tgt.createField( option::variables(Stencil::max_stencil_size) ); + auto field_stencil_weigths_loc = tgt.createField( option::variables(Stencil::max_stencil_size) ); + auto field_stencil_size_loc = tgt.createField(); + + auto stencil_points_loc = array::make_view( field_stencil_points_loc ); + auto stencil_weights_loc = array::make_view( field_stencil_weigths_loc ); + auto stencil_size_loc = array::make_view( field_stencil_size_loc ); + stencil_size_loc.assign(0); + + for( Matrix::const_iterator it = matrix_.begin(); it!=matrix_.end(); ++it ) { + int p = it.row(); + int& i = stencil_size_loc( p ); + stencil_points_loc( p, i ) = gidx_src( it.col() ); + stencil_weights_loc( p, i ) = *it; + ++i; + } + + + size_t global_size = tgt.gather().glb_dof(); + + auto field_stencil_points_glb = tgt.createField( option::variables(Stencil::max_stencil_size) | option::global(0) ); + auto field_stencil_weights_glb = tgt.createField( option::variables(Stencil::max_stencil_size) | option::global(0) ); + auto field_stencil_size_glb = tgt.createField( option::global(0) ); + + + + auto stencil_points_glb = array::make_view( field_stencil_points_glb ); + auto stencil_weights_glb = array::make_view( field_stencil_weights_glb ); + auto stencil_size_glb = array::make_view( field_stencil_size_glb ); + + tgt.gather().gather( stencil_size_loc, stencil_size_glb ); + tgt.gather().gather( stencil_points_loc, stencil_points_glb ); + tgt.gather().gather( stencil_weights_loc, stencil_weights_glb ); + + if( mpi::comm().rank() == 0 ) { + for( idx_t i=0; i( array::make_view( source_xyz ) ) ); ocoords_.reset( new array::ArrayView( array::make_view( target_xyz_ ) ) ); + igidx_.reset( new array::ArrayView( array::make_view( src.nodes().global_index() ) ) ); connectivity_ = &meshSource.cells().node_connectivity(); @@ -183,17 +274,26 @@ void FiniteElement::setup( const FunctionSpace& source ) { matrix_.swap( A ); } +struct ElementEdge { + std::array idx; + void swap() { + idx_t tmp = idx[0]; + idx[0] = idx[1]; + idx[1] = tmp; + } +}; + Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemIndex3::NodeList& elems, std::ostream& failures_log ) const { ASSERT( elems.begin() != elems.end() ); const size_t inp_points = icoords_->shape( 0 ); - size_t idx[4]; - double w[4]; + std::array idx; + std::array w; Triplets triplets; Ray ray( PointXYZ{( *ocoords_ )( ip, 0 ), ( *ocoords_ )( ip, 1 ), ( *ocoords_ )( ip, 2 )} ); - + ElementEdge edge; for ( ElemIndex3::NodeList::const_iterator itc = elems.begin(); itc != elems.end(); ++itc ) { const size_t elem_id = ( *itc ).value().payload(); ASSERT( elem_id < connectivity_->rows() ); @@ -206,6 +306,48 @@ Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemInd ASSERT( idx[i] < inp_points ); } + auto on_triag_edge = [&](ElementEdge& edge) { + if( w[0] < 1.e-15 ) { + edge.idx[0] = 1; + edge.idx[1] = 2; + return true; + } + if( w[1] < 1.e-15 ) { + edge.idx[0] = 0; + edge.idx[1] = 2; + return true; + } + if( w[2] < 1.e-15 ) { + edge.idx[0] = 0; + edge.idx[1] = 1; + return true; + } + return false; + }; + + auto on_quad_edge = [&](ElementEdge& edge) { + if( w[0] < 1.e-15 && w[1] < 1.e-15 ) { + edge.idx[0] = 2; + edge.idx[1] = 3; + return true; + } + if( w[1] < 1.e-15 && w[2] < 1.e-15 ) { + edge.idx[0] = 0; + edge.idx[1] = 3; + return true; + } + if( w[2] < 1.e-15 && w[3] < 1.e-15 ) { + edge.idx[0] = 0; + edge.idx[1] = 1; + return true; + } + if( w[3] < 1.e-15 && w[0] < 1.e-15 ) { + edge.idx[0] = 1; + edge.idx[1] = 2; + return true; + } + return false; + }; if ( nb_cols == 3 ) { /* triangle */ element::Triag3D triag( @@ -227,8 +369,18 @@ Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemInd w[1] = is.u; w[2] = is.v; - for ( size_t i = 0; i < 3; ++i ) { - triplets.push_back( Triplet( ip, idx[i], w[i] ) ); + if( on_triag_edge( edge) ) { + if( ( *igidx_ )( idx[edge.idx[1]] ) < ( *igidx_ )( idx[edge.idx[0]] ) ) { + edge.swap(); + } + for( size_t i = 0; i < 2; ++i ) { + triplets.push_back( Triplet( ip, idx[edge.idx[i]], w[edge.idx[i]] ) ); + } + } + else { + for ( size_t i = 0; i < 3; ++i ) { + triplets.push_back( Triplet( ip, idx[i], w[i] ) ); + } } break; // stop looking for elements @@ -256,8 +408,18 @@ Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemInd w[2] = is.u * is.v; w[3] = ( 1. - is.u ) * is.v; - for ( size_t i = 0; i < 4; ++i ) { - triplets.push_back( Triplet( ip, idx[i], w[i] ) ); + if( on_quad_edge( edge ) ) { + if( ( *igidx_ )( idx[edge.idx[1]] ) < ( *igidx_ )( idx[edge.idx[0]] ) ) { + edge.swap(); + } + for( size_t i = 0; i < 2; ++i ) { + triplets.push_back( Triplet( ip, idx[edge.idx[i]], w[edge.idx[i]] ) ); + } + } + else { + for ( size_t i = 0; i < 4; ++i ) { + triplets.push_back( Triplet( ip, idx[i], w[i] ) ); + } } break; // stop looking for elements } diff --git a/src/atlas/interpolation/method/FiniteElement.h b/src/atlas/interpolation/method/FiniteElement.h index bbadf0289..7d0c8b4b2 100644 --- a/src/atlas/interpolation/method/FiniteElement.h +++ b/src/atlas/interpolation/method/FiniteElement.h @@ -33,6 +33,8 @@ class FiniteElement : public Method { virtual void setup( const FunctionSpace& source, const FunctionSpace& target ) override; + virtual void print( std::ostream& ) const override; + protected: /** * @brief Create an interpolant sparse matrix relating two (pre-partitioned) @@ -59,9 +61,13 @@ class FiniteElement : public Method { mesh::MultiBlockConnectivity* connectivity_; std::unique_ptr> icoords_; std::unique_ptr> ocoords_; + std::unique_ptr> igidx_; Field target_xyz_; Field target_ghost_; + + FunctionSpace source_; + FunctionSpace target_; }; } // namespace method diff --git a/src/atlas/interpolation/method/KNearestNeighbours.h b/src/atlas/interpolation/method/KNearestNeighbours.h index 9f412512e..ab20d5779 100644 --- a/src/atlas/interpolation/method/KNearestNeighbours.h +++ b/src/atlas/interpolation/method/KNearestNeighbours.h @@ -30,6 +30,8 @@ class KNearestNeighbours : public KNearestNeighboursBase { */ virtual void setup( const FunctionSpace& source, const FunctionSpace& target ) override; + virtual void print( std::ostream& ) const override {} + protected: size_t k_; }; diff --git a/src/atlas/interpolation/method/Method.cc b/src/atlas/interpolation/method/Method.cc index 05365cc52..c4d46dd93 100644 --- a/src/atlas/interpolation/method/Method.cc +++ b/src/atlas/interpolation/method/Method.cc @@ -22,8 +22,10 @@ #include "atlas/field/Field.h" #include "atlas/field/FieldSet.h" +#include "atlas/functionspace/NodeColumns.h" #include "atlas/runtime/Log.h" #include "atlas/runtime/Trace.h" +#include "atlas/mesh/Nodes.h" // for static linking #include "FiniteElement.h" diff --git a/src/atlas/interpolation/method/Method.h b/src/atlas/interpolation/method/Method.h index 3cee4e597..34d62965a 100644 --- a/src/atlas/interpolation/method/Method.h +++ b/src/atlas/interpolation/method/Method.h @@ -12,6 +12,7 @@ #include #include +#include #include "eckit/config/Configuration.h" #include "eckit/linalg/SparseMatrix.h" @@ -44,10 +45,12 @@ class Method : public eckit::Owned { virtual void execute( const FieldSet& source, FieldSet& target ) const; virtual void execute( const Field& source, Field& target ) const; + virtual void print( std::ostream& ) const = 0; + protected: - typedef eckit::linalg::Triplet Triplet; - typedef std::vector Triplets; - typedef eckit::linalg::SparseMatrix Matrix; + using Triplet = eckit::linalg::Triplet; + using Triplets = std::vector; + using Matrix = eckit::linalg::SparseMatrix; static void normalise( Triplets& triplets ); @@ -58,6 +61,7 @@ class Method : public eckit::Owned { // so do not expose here, even though only linear operators are now // implemented. Matrix matrix_; + }; struct MethodFactory { diff --git a/src/atlas/interpolation/method/NearestNeighbour.h b/src/atlas/interpolation/method/NearestNeighbour.h index 9b311c499..829e8647a 100644 --- a/src/atlas/interpolation/method/NearestNeighbour.h +++ b/src/atlas/interpolation/method/NearestNeighbour.h @@ -21,6 +21,8 @@ class NearestNeighbour : public KNearestNeighboursBase { NearestNeighbour( const Config& config ) : KNearestNeighboursBase( config ) {} virtual ~NearestNeighbour() {} + virtual void print( std::ostream& ) const override {} + protected: /** * @brief Create an interpolant sparse matrix relating two (pre-partitioned) diff --git a/src/sandbox/interpolation/atlas-parallel-interpolation.cc b/src/sandbox/interpolation/atlas-parallel-interpolation.cc index 67a5d3dfe..6a4efe4ef 100644 --- a/src/sandbox/interpolation/atlas-parallel-interpolation.cc +++ b/src/sandbox/interpolation/atlas-parallel-interpolation.cc @@ -70,6 +70,10 @@ class AtlasParallelInterpolation : public AtlasTool { add_option( new SimpleOption( "target-mesh-generator-angle", "target mesh generator angle option (default 0.)" ) ); add_option( new SimpleOption( "target-mesh-halo", "target mesh halo size (default 1)" ) ); + add_option( new SimpleOption( "forward-interpolator-output", + "Output forward interpolator's points and weights" ) ); + add_option( new SimpleOption( "backward-interpolator-output", + "Output backward interpolator's points and weights" ) ); } }; @@ -148,6 +152,10 @@ void AtlasParallelInterpolation::execute( const AtlasTool::Args& args ) { Interpolation( option::type( interpolation_method ), tgt_functionspace, src_functionspace ); } + if( args.getBool( "forward-interpolator-output", false ) ) { + interpolator_forward.print( Log::info() ); + } + // Create source FunctionSpace and fields FieldSet src_fields; From bd0c9725d7169b36ec547c3f286d9d1b8f055a46 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Fri, 8 Jun 2018 14:43:27 +0100 Subject: [PATCH 112/123] ATLAS-163 Introduce bit reproducibility for parallel interpolation for coinciding meshes --- .../interpolation/method/FiniteElement.cc | 48 ++++++++++++++----- 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/src/atlas/interpolation/method/FiniteElement.cc b/src/atlas/interpolation/method/FiniteElement.cc index e5f433b5e..610ea53ba 100644 --- a/src/atlas/interpolation/method/FiniteElement.cc +++ b/src/atlas/interpolation/method/FiniteElement.cc @@ -294,6 +294,7 @@ Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemInd Triplets triplets; Ray ray( PointXYZ{( *ocoords_ )( ip, 0 ), ( *ocoords_ )( ip, 1 ), ( *ocoords_ )( ip, 2 )} ); ElementEdge edge; + idx_t single_point; for ( ElemIndex3::NodeList::const_iterator itc = elems.begin(); itc != elems.end(); ++itc ) { const size_t elem_id = ( *itc ).value().payload(); ASSERT( elem_id < connectivity_->rows() ); @@ -306,7 +307,7 @@ Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemInd ASSERT( idx[i] < inp_points ); } - auto on_triag_edge = [&](ElementEdge& edge) { + auto on_triag_edge = [&]() { if( w[0] < 1.e-15 ) { edge.idx[0] = 1; edge.idx[1] = 2; @@ -325,7 +326,7 @@ Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemInd return false; }; - auto on_quad_edge = [&](ElementEdge& edge) { + auto on_quad_edge = [&]() { if( w[0] < 1.e-15 && w[1] < 1.e-15 ) { edge.idx[0] = 2; edge.idx[1] = 3; @@ -348,6 +349,19 @@ Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemInd } return false; }; + + auto on_single_point = [&]() { + if( w[edge.idx[0]] < 1.e-15 ) { + single_point = edge.idx[1]; + return true; + } + if( w[edge.idx[1]] < 1.e-15 ) { + single_point = edge.idx[0]; + return true; + } + return false; + }; + if ( nb_cols == 3 ) { /* triangle */ element::Triag3D triag( @@ -369,12 +383,17 @@ Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemInd w[1] = is.u; w[2] = is.v; - if( on_triag_edge( edge) ) { - if( ( *igidx_ )( idx[edge.idx[1]] ) < ( *igidx_ )( idx[edge.idx[0]] ) ) { - edge.swap(); + if( on_triag_edge() ) { + if( on_single_point() ) { + triplets.push_back( Triplet( ip, idx[single_point], w[single_point] ) ); } - for( size_t i = 0; i < 2; ++i ) { - triplets.push_back( Triplet( ip, idx[edge.idx[i]], w[edge.idx[i]] ) ); + else { + if( ( *igidx_ )( idx[edge.idx[1]] ) < ( *igidx_ )( idx[edge.idx[0]] ) ) { + edge.swap(); + } + for( size_t i = 0; i < 2; ++i ) { + triplets.push_back( Triplet( ip, idx[edge.idx[i]], w[edge.idx[i]] ) ); + } } } else { @@ -408,12 +427,17 @@ Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemInd w[2] = is.u * is.v; w[3] = ( 1. - is.u ) * is.v; - if( on_quad_edge( edge ) ) { - if( ( *igidx_ )( idx[edge.idx[1]] ) < ( *igidx_ )( idx[edge.idx[0]] ) ) { - edge.swap(); + if( on_quad_edge() ) { + if( on_single_point() ) { + triplets.push_back( Triplet( ip, idx[single_point], w[single_point] ) ); } - for( size_t i = 0; i < 2; ++i ) { - triplets.push_back( Triplet( ip, idx[edge.idx[i]], w[edge.idx[i]] ) ); + else { + if( ( *igidx_ )( idx[edge.idx[1]] ) < ( *igidx_ )( idx[edge.idx[0]] ) ) { + edge.swap(); + } + for( size_t i = 0; i < 2; ++i ) { + triplets.push_back( Triplet( ip, idx[edge.idx[i]], w[edge.idx[i]] ) ); + } } } else { From caa760fddd7480e545ac9bbdc7963d5dea7461b9 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Fri, 8 Jun 2018 16:32:03 +0100 Subject: [PATCH 113/123] ATLAS-163 Recompute weights on edges --- src/atlas/interpolation/Interpolation.cc | 10 +++++- src/atlas/interpolation/element/Quad3D.h | 7 ++++ src/atlas/interpolation/element/Triag3D.h | 6 ++++ .../interpolation/method/FiniteElement.cc | 33 +++++++++++++------ 4 files changed, 45 insertions(+), 11 deletions(-) diff --git a/src/atlas/interpolation/Interpolation.cc b/src/atlas/interpolation/Interpolation.cc index 579211174..2247d867f 100644 --- a/src/atlas/interpolation/Interpolation.cc +++ b/src/atlas/interpolation/Interpolation.cc @@ -24,7 +24,15 @@ Interpolation::Interpolation( const Config& config, const FunctionSpace& source, Implementation* impl = interpolation::MethodFactory::build( type, config ); impl->setup( source, target ); return impl; - }() ) {} + }() ) { + + std::string path; + if( config.get( "output", path ) ) { + std::ofstream file( path ); + print( file ); + } + + } Interpolation::Interpolation( const Interpolation& other ) : implementation_( other.implementation_ ) {} diff --git a/src/atlas/interpolation/element/Quad3D.h b/src/atlas/interpolation/element/Quad3D.h index 6c5dd1753..122c97324 100644 --- a/src/atlas/interpolation/element/Quad3D.h +++ b/src/atlas/interpolation/element/Quad3D.h @@ -52,6 +52,13 @@ class Quad3D { return s; } + const Vector3D& p(int i) { + if(i==0) return v00; + if(i==1) return v10; + if(i==2) return v11; + if(i==3) return v01; + } + private: // members Vector3D v00; // aka v0 Vector3D v10; // aka v1 diff --git a/src/atlas/interpolation/element/Triag3D.h b/src/atlas/interpolation/element/Triag3D.h index 1bd758c23..d28fdfc31 100644 --- a/src/atlas/interpolation/element/Triag3D.h +++ b/src/atlas/interpolation/element/Triag3D.h @@ -55,6 +55,12 @@ class Triag3D { return s; } + const Vector3D& p(int i) { + if(i==0) return v0; + if(i==1) return v1; + if(i==2) return v2; + } + private: // members Vector3D v0; Vector3D v1; diff --git a/src/atlas/interpolation/method/FiniteElement.cc b/src/atlas/interpolation/method/FiniteElement.cc index 610ea53ba..7e94d64bb 100644 --- a/src/atlas/interpolation/method/FiniteElement.cc +++ b/src/atlas/interpolation/method/FiniteElement.cc @@ -9,6 +9,7 @@ */ #include +#include #include "atlas/interpolation/method/FiniteElement.h" @@ -152,6 +153,7 @@ void FiniteElement::print(std::ostream& out) const tgt.gather().gather( stencil_weights_loc, stencil_weights_glb ); if( mpi::comm().rank() == 0 ) { + int precision = std::numeric_limits::max_digits10; for( idx_t i=0; i Date: Mon, 18 Jun 2018 15:56:00 +0100 Subject: [PATCH 114/123] ATLAS-164 Array uses raw C pointer rather than std::vector and is initialised with signaling_NaN for debug builds --- CMakeLists.txt | 20 ++++++- src/CMakeLists.txt | 6 ++ src/atlas/array/native/NativeDataStore.h | 73 ++++++++++++++++-------- src/atlas/library/defines.h.in | 1 + src/atlas/output/detail/PointCloudIO.cc | 51 +++++++++++++---- src/tests/io/test_pointcloud_io.cc | 6 +- 6 files changed, 115 insertions(+), 42 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 917d6da60..bd0231591 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -191,15 +191,29 @@ set( ATLAS_BITS_GLOBAL 64 ) ### Bounds checking if( ${CMAKE_BUILD_TYPE} MATCHES "Debug" ) set( DEFAULT_BOUNDSCHECKING ON ) + set( DEFAULT_INIT_SNAN ON ) else() set( DEFAULT_BOUNDSCHECKING OFF ) + set( DEFAULT_INIT_SNAN OFF ) endif() + ecbuild_add_option( FEATURE BOUNDSCHECKING DEFAULT ${DEFAULT_BOUNDSCHECKING} DESCRIPTION "Bounds checking for atlas::ArrayView and atlas::IndexView" ) -if( ${CMAKE_BUILD_TYPE} MATCHES "Debug" AND NOT ATLAS_HAVE_BOUNDSCHECKING ) - ecbuild_info( "Turning BOUNDSCHECKING ON for Debug build" ) - set( ATLAS_HAVE_BOUNDSCHECKING 1 ) + +ecbuild_add_option( FEATURE INIT_SNAN + DEFAULT ${DEFAULT_INIT_SNAN} + DESCRIPTION "Initialise atlas arrays with signaling_NaN (real types) or other invalid values (other types)" ) + +if( ${CMAKE_BUILD_TYPE} MATCHES "Debug" ) + if( NOT ATLAS_HAVE_INIT_SNAN ) + ecbuild_info( "Turning INIT_SNAN ON for Debug build" ) + set( ATLAS_HAVE_INIT_SNAN 1 ) + endif() + if( NOT ATLAS_HAVE_BOUNDSCHECKING ) + ecbuild_info( "Turning BOUNDSCHECKING ON for Debug build" ) + set( ATLAS_HAVE_BOUNDSCHECKING 1 ) + endif() endif() ### sandbox diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 98a5e1dd0..8dd72c231 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -48,6 +48,12 @@ else() set( ATLAS_HAVE_BOUNDSCHECKING 0 ) endif() +if( ATLAS_HAVE_INIT_SNAN ) + set( ATLAS_HAVE_INIT_SNAN 1 ) +else() + set( ATLAS_HAVE_INIT_SNAN 0 ) +endif() + if( ATLAS_HAVE_GRIDTOOLS_STORAGE ) set( ATLAS_HAVE_GRIDTOOLS_STORAGE 1 ) else() diff --git a/src/atlas/array/native/NativeDataStore.h b/src/atlas/array/native/NativeDataStore.h index f85c76de9..cfc29cc9d 100644 --- a/src/atlas/array/native/NativeDataStore.h +++ b/src/atlas/array/native/NativeDataStore.h @@ -10,6 +10,9 @@ #pragma once + +#include // std::fill +#include // std::numeric_limits::signaling_NaN #include "atlas/array/ArrayUtil.h" #include "atlas/library/config.h" @@ -19,35 +22,57 @@ namespace atlas { namespace array { namespace native { +template +static constexpr Value invalid_value() { + return std::numeric_limits::has_signaling_NaN ? std::numeric_limits::signaling_NaN() : + std::numeric_limits::has_quiet_NaN ? std::numeric_limits::quiet_NaN() : + std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : + std::numeric_limits::max(); +} + +#if ATLAS_INIT_SNAN +template< typename Value > +void initialise( Value array[], size_t size ) { + std::fill_n( array, size, invalid_value() ); +} +#else +template< typename Value > void initialise( Value[], size_t ) {} +#endif + template class DataStore : public ArrayDataStore { public: - DataStore( size_t size ) : data_store_( size ) {} + DataStore( size_t size ) : data_store_( new Value[size] ), size_( size ) { + initialise( data_store_, size_ ); + } - void cloneToDevice() const {} + virtual ~DataStore() override { delete[] data_store_; } - void cloneFromDevice() const {} + virtual void cloneToDevice() const override {} - bool valid() const { return true; } + virtual void cloneFromDevice() const override {} - void syncHostDevice() const {} + virtual bool valid() const override { return true; } - bool hostNeedsUpdate() const { return false; } + virtual void syncHostDevice() const override {} - bool deviceNeedsUpdate() const { return false; } + virtual bool hostNeedsUpdate() const override { return false; } - void reactivateDeviceWriteViews() const {} + virtual bool deviceNeedsUpdate() const override { return false; } - void reactivateHostWriteViews() const {} + virtual void reactivateDeviceWriteViews() const override {} - void* voidDataStore() { return static_cast( &data_store_.front() ); } + virtual void reactivateHostWriteViews() const override {} - void* voidHostData() { return static_cast( &data_store_.front() ); } + virtual void* voidDataStore() override { return static_cast( data_store_ ); } - void* voidDeviceData() { return static_cast( &data_store_.front() ); } + virtual void* voidHostData() override { return static_cast( data_store_ ); } + + virtual void* voidDeviceData() override { return static_cast( data_store_ ); } private: - std::vector data_store_; + Value* data_store_; + size_t size_; }; //------------------------------------------------------------------------------ @@ -57,27 +82,27 @@ class WrappedDataStore : public ArrayDataStore { public: WrappedDataStore( Value* data_store ) : data_store_( data_store ) {} - void cloneToDevice() const {} + virtual void cloneToDevice() const override {} - void cloneFromDevice() const {} + virtual void cloneFromDevice() const override {} - bool valid() const { return true; } + virtual bool valid() const override { return true; } - void syncHostDevice() const {} + virtual void syncHostDevice() const override {} - bool hostNeedsUpdate() const { return true; } + virtual bool hostNeedsUpdate() const override { return true; } - bool deviceNeedsUpdate() const { return false; } + virtual bool deviceNeedsUpdate() const override { return false; } - void reactivateDeviceWriteViews() const {} + virtual void reactivateDeviceWriteViews() const override {} - void reactivateHostWriteViews() const {} + virtual void reactivateHostWriteViews() const override {} - void* voidDataStore() { return static_cast( data_store_ ); } + virtual void* voidDataStore() override { return static_cast( data_store_ ); } - void* voidHostData() { return static_cast( data_store_ ); } + virtual void* voidHostData() override { return static_cast( data_store_ ); } - void* voidDeviceData() { return static_cast( data_store_ ); } + virtual void* voidDeviceData() override { return static_cast( data_store_ ); } private: Value* data_store_; diff --git a/src/atlas/library/defines.h.in b/src/atlas/library/defines.h.in index e644fc73e..e8050e1fe 100644 --- a/src/atlas/library/defines.h.in +++ b/src/atlas/library/defines.h.in @@ -14,6 +14,7 @@ #define ATLAS_BITS_GLOBAL @ATLAS_BITS_GLOBAL@ #define ATLAS_ARRAYVIEW_BOUNDS_CHECKING @ATLAS_HAVE_BOUNDSCHECKING@ #define ATLAS_INDEXVIEW_BOUNDS_CHECKING @ATLAS_HAVE_BOUNDSCHECKING@ +#define ATLAS_INIT_SNAN @ATLAS_HAVE_INIT_SNAN@ #define ATLAS_HAVE_GRIDTOOLS_STORAGE @ATLAS_HAVE_GRIDTOOLS_STORAGE@ #define ATLAS_GRIDTOOLS_STORAGE_BACKEND_HOST @ATLAS_GRIDTOOLS_STORAGE_BACKEND_HOST@ #define ATLAS_GRIDTOOLS_STORAGE_BACKEND_CUDA @ATLAS_GRIDTOOLS_STORAGE_BACKEND_CUDA@ diff --git a/src/atlas/output/detail/PointCloudIO.cc b/src/atlas/output/detail/PointCloudIO.cc index d164b57a6..29da1f424 100644 --- a/src/atlas/output/detail/PointCloudIO.cc +++ b/src/atlas/output/detail/PointCloudIO.cc @@ -53,6 +53,8 @@ std::string sanitize_field_name( const std::string& s ) { Mesh PointCloudIO::read( const eckit::PathName& path, std::vector& vfnames ) { const std::string msg( "PointCloudIO::read: " ); + Log::debug() << "PointCloudIO reading " << path << std::endl; + Mesh mesh; vfnames.clear(); @@ -82,14 +84,17 @@ Mesh PointCloudIO::read( const eckit::PathName& path, std::vector& << ")"; throw eckit::BadParameter( errmsg.str(), Here() ); } - if ( nb_pts == 0 ) throw eckit::BadValue( msg + "invalid number of points (failed: nb_pts>0)" ); - if ( nb_columns < 2 ) throw eckit::BadValue( msg + "invalid number of columns (failed: nb_columns>=2)" ); + if ( nb_pts == 0 ) throw eckit::BadValue( msg + " invalid number of points (failed: nb_pts>0)" ); + if ( nb_columns < 2 ) throw eckit::BadValue( msg + " invalid number of columns (failed: nb_columns>=2)" ); mesh.nodes().resize( nb_pts ); mesh::Nodes& nodes = mesh.nodes(); array::ArrayView xy = array::make_view( nodes.xy() ); - + array::ArrayView lonlat = array::make_view( nodes.lonlat() ); + array::ArrayView glb_idx = array::make_view( nodes.global_index() ); + array::ArrayView part = array::make_view( nodes.partition() ); + part.assign(0); // header, part 2: // determine columns' labels // (check end of first line for possible column labels, starting from @@ -97,9 +102,10 @@ Mesh PointCloudIO::read( const eckit::PathName& path, std::vector& vfnames.resize( nb_columns ); for ( size_t j = 0; j < nb_columns; ++j ) { - oss.str( "column_" ); - oss << ( j + 1 ); - vfnames[j] = ( iss && iss >> line ) ? sanitize_field_name( line ) : oss.str(); + std::stringstream name; + name.str( "column_" ); + name << ( j + 1 ); + vfnames[j] = ( iss && iss >> line ) ? sanitize_field_name( line ) : name.str(); } // (preallocate data, and define fields without the first two columns @@ -109,7 +115,7 @@ Mesh PointCloudIO::read( const eckit::PathName& path, std::vector& std::vector> fields; for ( size_t j = 0; j < nb_fld; ++j ) { - fields.push_back( array::make_view( + fields.emplace_back( array::make_view( nodes.add( Field( vfnames[j], array::make_datatype(), array::make_shape( nb_pts ) ) ) ) ); } @@ -121,18 +127,25 @@ Mesh PointCloudIO::read( const eckit::PathName& path, std::vector& iss.str( line ); // NOTE always expects (lon,lat) order, maybe make it configurable? - iss >> xy( i, XX ) >> xy( i, YY ); - ; + PointXY pxy; + iss >> pxy.x() >>pxy.y(); + + xy(i,XX) = pxy.x(); + xy(i,YY) = pxy.y(); + lonlat(i,LON) = pxy.x(); + lonlat(i,LAT) = pxy.y(); + glb_idx(i) = i+1; + for ( j = 0; iss && j < nb_fld; ++j ) iss >> fields[j]( i ); if ( j < nb_fld ) { - oss << "invalid number of fields in data section, on line " << ( i + 1 ) << ", read " << j + oss << " Invalid number of fields in data section, on line " << ( i + 1 ) << ", read " << j << " fields, expected " << nb_fld << "."; throw eckit::BadValue( msg + oss.str() ); } } if ( i < nb_pts ) { - oss << "invalid number of lines in data section, read " << ( i ) << " lines, expected " << nb_pts << "."; + oss << " Invalid number of lines in data section, read " << ( i ) << " lines, expected " << nb_pts << "."; throw eckit::BadValue( msg + oss.str() ); } @@ -150,6 +163,8 @@ Mesh PointCloudIO::read( const eckit::PathName& path ) { void PointCloudIO::write( const eckit::PathName& path, const Mesh& mesh ) { const std::string msg( "PointCloudIO::write: " ); + Log::debug() << "PointCloudIO writing " << path << std::endl; + // operate in mesh function space, creating transversing data structures // @warning: several copy operations here @@ -200,13 +215,15 @@ void PointCloudIO::write( const eckit::PathName& path, const FieldSet& fieldset, const functionspace::NodeColumns& function_space ) { const std::string msg( "PointCloudIO::write: " ); + Log::debug() << "PointCloudIO writing " << path << std::endl; + // operate in field sets with same grid and consistent size(s), creating // transversing data structures // @warning: several copy operations here ASSERT( fieldset.size() ); - array::ArrayView lonlat = array::make_view( function_space.nodes().lonlat() ); + array::ArrayView lonlat = array::make_view( function_space.nodes().xy() ); if ( !lonlat.size() ) throw eckit::BadParameter( msg + "invalid number of points (failed: nb_pts>0)" ); // get the fields (sanitized) names and values @@ -247,6 +264,9 @@ void PointCloudIO::write( const eckit::PathName& path, const FieldSet& fieldset, } void PointCloudIO::write( const eckit::PathName& path, const std::vector& pts ) { + + Log::debug() << "PointCloudIO writing " << path << std::endl; + std::ofstream f( path.asString().c_str() ); if ( !f.is_open() ) throw eckit::CantOpenFile( path.asString() ); @@ -262,6 +282,9 @@ void PointCloudIO::write( const eckit::PathName& path, const std::vector& lon, const std::vector& lat, const std::vector*>& vfvalues, const std::vector& vfnames ) { + + Log::debug() << "PointCloudIO writing " << path << std::endl; + const std::string msg( "PointCloudIO::write: " ); const size_t Npts( lon.size() ), Nfld( vfvalues.size() ); if ( Npts != lat.size() ) throw eckit::BadParameter( msg + "number of points inconsistent (failed: #lon == #lat)" ); @@ -295,6 +318,10 @@ void PointCloudIO::write( const eckit::PathName& path, const std::vector void PointCloudIO::write( const eckit::PathName& path, const int& nb_pts, const double* lon, const double* lat, const int& nb_fld, const double** afvalues, const char** afnames ) { + + Log::debug() << "PointCloudIO writing " << path << std::endl; + + const std::string msg( "PointCloudIO::write: " ); const size_t Npts( nb_pts > 0 ? nb_pts : 0 ), Nfld( nb_fld > 0 && afvalues && afnames ? nb_fld : 0 ); diff --git a/src/tests/io/test_pointcloud_io.cc b/src/tests/io/test_pointcloud_io.cc index 56c86dace..e45996df1 100644 --- a/src/tests/io/test_pointcloud_io.cc +++ b/src/tests/io/test_pointcloud_io.cc @@ -326,6 +326,8 @@ CASE( "write_read_write_field" ) { for ( size_t i = 0; i < field_data.size(); ++i ) { EXPECT( eckit::types::is_approximately_equal( funny_formula( i ), field_data( i ), 0.001 ) ); // 0.001% relative error + EXPECT( eckit::types::is_approximately_equal( funny_formula( i ), field_data( i ), + 0.001 ) ); // 0.001% relative error } // PART 4 @@ -338,6 +340,7 @@ CASE( "write_read_write_field" ) { EXPECT_NO_THROW( fieldset.add( field ) ); functionspace::NodeColumns functionspace( mesh ); + EXPECT_NO_THROW( output::detail::PointCloudIO::write( "pointcloud_FieldSet.txt", fieldset, functionspace ) ); EXPECT_NO_THROW( output::detail::PointCloudIO::write( "pointcloud_Grid.txt", mesh ) ); @@ -350,9 +353,6 @@ CASE( "write_read_write_field" ) { EXPECT( grid_from_FieldSet ); EXPECT( grid_from_Grid ); - // (guarantee different grid, to make checks useful) - EXPECT( grid != grid_from_FieldSet ); - EXPECT( grid != grid_from_Grid ); // PART 5 // compare reading of reference data to: From 67c202a8ec58d2c29ec50087ad5b842433aac572 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Mon, 18 Jun 2018 16:45:38 +0100 Subject: [PATCH 115/123] ATLAS-165 Fix intents in Fortran functions --- .../atlas_functionspace_NodeColumns_module.F90 | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/atlas_f/functionspace/atlas_functionspace_NodeColumns_module.F90 b/src/atlas_f/functionspace/atlas_functionspace_NodeColumns_module.F90 index 1cbc9d74d..b8bc99614 100644 --- a/src/atlas_f/functionspace/atlas_functionspace_NodeColumns_module.F90 +++ b/src/atlas_f/functionspace/atlas_functionspace_NodeColumns_module.F90 @@ -1846,8 +1846,8 @@ subroutine minloc_per_level(this,field,minimum,location) use atlas_functionspace_NodeColumns_c_binding class(atlas_functionspace_NodeColumns), intent(in) :: this type(atlas_Field), intent(in) :: field - type(atlas_Field), intent(out) :: minimum - type(atlas_Field), intent(out) :: location + type(atlas_Field), intent(inout) :: minimum + type(atlas_Field), intent(inout) :: location call atlas__NodesFunctionSpace__minloc_per_level(this%c_ptr(),field%c_ptr(),minimum%c_ptr(),location%c_ptr()) end subroutine @@ -1857,8 +1857,8 @@ subroutine maxloc_per_level(this,field,maximum,location) use atlas_functionspace_NodeColumns_c_binding class(atlas_functionspace_NodeColumns), intent(in) :: this type(atlas_Field), intent(in) :: field - type(atlas_Field), intent(out) :: maximum - type(atlas_Field), intent(out) :: location + type(atlas_Field), intent(inout) :: maximum + type(atlas_Field), intent(inout) :: location call atlas__NodesFunctionSpace__maxloc_per_level(this%c_ptr(),field%c_ptr(),maximum%c_ptr(),location%c_ptr()) end subroutine @@ -1868,7 +1868,7 @@ subroutine minimum_per_level(this,field,minimum) use atlas_functionspace_NodeColumns_c_binding class(atlas_functionspace_NodeColumns), intent(in) :: this type(atlas_Field), intent(in) :: field - type(atlas_Field), intent(out) :: minimum + type(atlas_Field), intent(inout) :: minimum call atlas__NodesFunctionSpace__min_per_level(this%c_ptr(),field%c_ptr(),minimum%c_ptr()) end subroutine @@ -1878,7 +1878,7 @@ subroutine maximum_per_level(this,field,maximum) use atlas_functionspace_NodeColumns_c_binding class(atlas_functionspace_NodeColumns), intent(in) :: this type(atlas_Field), intent(in) :: field - type(atlas_Field), intent(out) :: maximum + type(atlas_Field), intent(inout) :: maximum call atlas__NodesFunctionSpace__max_per_level(this%c_ptr(),field%c_ptr(),maximum%c_ptr()) end subroutine @@ -1889,7 +1889,7 @@ subroutine sum_per_level(this,field,sum,N) use, intrinsic :: iso_c_binding, only : c_int class(atlas_functionspace_NodeColumns), intent(in) :: this type(atlas_Field), intent(in) :: field - type(atlas_Field), intent(out) :: sum + type(atlas_Field), intent(inout) :: sum integer(c_int), intent(out), optional :: N integer(c_int) :: opt_N call atlas__NodesFunctionSpace__sum_per_level(this%c_ptr(),field%c_ptr(),sum%c_ptr(),opt_N) @@ -1903,7 +1903,7 @@ subroutine order_independent_sum_per_level(this,field,sum,N) use, intrinsic :: iso_c_binding, only : c_int class(atlas_functionspace_NodeColumns), intent(in) :: this type(atlas_Field), intent(in) :: field - type(atlas_Field), intent(out) :: sum + type(atlas_Field), intent(inout) :: sum integer(c_int), intent(out), optional :: N integer(c_int) :: opt_N call atlas__NodesFunctionSpace__oisum_per_level(this%c_ptr(),field%c_ptr(),sum%c_ptr(),opt_N) @@ -1917,7 +1917,7 @@ subroutine mean_per_level(this,field,mean,N) use, intrinsic :: iso_c_binding, only : c_int class(atlas_functionspace_NodeColumns), intent(in) :: this type(atlas_Field), intent(in) :: field - type(atlas_Field), intent(out) :: mean + type(atlas_Field), intent(inout) :: mean integer(c_int), intent(out), optional :: N integer(c_int) :: opt_N call atlas__NodesFunctionSpace__mean_per_level(this%c_ptr(),field%c_ptr(),mean%c_ptr(),opt_N) From c5bd183c2b84ca04ce8204dfee53fcb151c789dc Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Mon, 18 Jun 2018 16:48:20 +0100 Subject: [PATCH 116/123] Apply clang-format --- src/atlas/array/native/NativeDataStore.h | 23 +-- src/atlas/grid/Grid.cc | 2 +- src/atlas/grid/Grid.h | 2 +- src/atlas/interpolation/Interpolation.h | 3 +- .../interpolation/method/FiniteElement.cc | 143 +++++++++--------- src/atlas/interpolation/method/Method.cc | 2 +- src/atlas/interpolation/method/Method.h | 3 +- src/atlas/mesh/actions/BuildCellCentres.cc | 22 +-- src/atlas/mesh/actions/BuildDualMesh.cc | 2 +- src/atlas/mesh/actions/BuildEdges.cc | 2 +- src/atlas/mesh/actions/BuildHalo.cc | 26 ++-- src/atlas/mesh/actions/BuildParallelFields.cc | 19 ++- src/atlas/output/detail/PointCloudIO.cc | 25 ++- src/atlas/trans/local/TransLocal.cc | 15 +- src/atlas/trans/local/TransLocal.h | 2 +- .../atlas-parallel-interpolation.cc | 4 +- src/tests/acceptance_tests/atest_mgrids.cc | 64 ++++---- src/tests/trans/test_transgeneral.cc | 2 +- 18 files changed, 177 insertions(+), 184 deletions(-) diff --git a/src/atlas/array/native/NativeDataStore.h b/src/atlas/array/native/NativeDataStore.h index cfc29cc9d..d11dd7f49 100644 --- a/src/atlas/array/native/NativeDataStore.h +++ b/src/atlas/array/native/NativeDataStore.h @@ -11,8 +11,8 @@ #pragma once -#include // std::fill -#include // std::numeric_limits::signaling_NaN +#include // std::fill +#include // std::numeric_limits::signaling_NaN #include "atlas/array/ArrayUtil.h" #include "atlas/library/config.h" @@ -24,27 +24,28 @@ namespace native { template static constexpr Value invalid_value() { - return std::numeric_limits::has_signaling_NaN ? std::numeric_limits::signaling_NaN() : - std::numeric_limits::has_quiet_NaN ? std::numeric_limits::quiet_NaN() : - std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : - std::numeric_limits::max(); + return std::numeric_limits::has_signaling_NaN + ? std::numeric_limits::signaling_NaN() + : std::numeric_limits::has_quiet_NaN + ? std::numeric_limits::quiet_NaN() + : std::numeric_limits::has_infinity ? std::numeric_limits::infinity() + : std::numeric_limits::max(); } #if ATLAS_INIT_SNAN -template< typename Value > +template void initialise( Value array[], size_t size ) { std::fill_n( array, size, invalid_value() ); } #else -template< typename Value > void initialise( Value[], size_t ) {} +template +void initialise( Value[], size_t ) {} #endif template class DataStore : public ArrayDataStore { public: - DataStore( size_t size ) : data_store_( new Value[size] ), size_( size ) { - initialise( data_store_, size_ ); - } + DataStore( size_t size ) : data_store_( new Value[size] ), size_( size ) { initialise( data_store_, size_ ); } virtual ~DataStore() override { delete[] data_store_; } diff --git a/src/atlas/grid/Grid.cc b/src/atlas/grid/Grid.cc index a0c18922e..2a130116b 100644 --- a/src/atlas/grid/Grid.cc +++ b/src/atlas/grid/Grid.cc @@ -11,8 +11,8 @@ #include "atlas/grid/Grid.h" #include -#include #include +#include #include "eckit/config/Parametrisation.h" #include "eckit/exception/Exceptions.h" diff --git a/src/atlas/grid/Grid.h b/src/atlas/grid/Grid.h index 9f45b14b6..69916b108 100644 --- a/src/atlas/grid/Grid.h +++ b/src/atlas/grid/Grid.h @@ -316,7 +316,7 @@ class RegularGaussianGrid : public Gaussian { public: using grid_t::grid_t; - RegularGaussianGrid( int N , const Domain& = Domain() ); + RegularGaussianGrid( int N, const Domain& = Domain() ); inline double lon( size_t i ) const { return x( i ); } diff --git a/src/atlas/interpolation/Interpolation.h b/src/atlas/interpolation/Interpolation.h index 8575159d7..2e9468c8f 100644 --- a/src/atlas/interpolation/Interpolation.h +++ b/src/atlas/interpolation/Interpolation.h @@ -39,7 +39,8 @@ class Interpolation { operator bool() const { return implementation_; } - void print( std::ostream& out ) const { implementation_->print(out); } + void print( std::ostream& out ) const { implementation_->print( out ); } + private: eckit::SharedPtr implementation_; }; diff --git a/src/atlas/interpolation/method/FiniteElement.cc b/src/atlas/interpolation/method/FiniteElement.cc index e5f433b5e..975c06a36 100644 --- a/src/atlas/interpolation/method/FiniteElement.cc +++ b/src/atlas/interpolation/method/FiniteElement.cc @@ -12,12 +12,12 @@ #include "atlas/interpolation/method/FiniteElement.h" +#include "eckit/exception/Exceptions.h" #include "eckit/geometry/Point3.h" #include "eckit/log/Plural.h" #include "eckit/log/ProgressTimer.h" #include "eckit/log/Seconds.h" #include "eckit/mpi/Comm.h" -#include "eckit/exception/Exceptions.h" #include "atlas/functionspace/NodeColumns.h" #include "atlas/functionspace/PointCloud.h" @@ -28,13 +28,13 @@ #include "atlas/mesh/Nodes.h" #include "atlas/mesh/actions/BuildCellCentres.h" #include "atlas/mesh/actions/BuildXYZField.h" +#include "atlas/parallel/GatherScatter.h" +#include "atlas/parallel/mpi/Buffer.h" #include "atlas/runtime/Log.h" #include "atlas/runtime/Trace.h" #include "atlas/util/CoordinateEnums.h" #include "atlas/util/Earth.h" #include "atlas/util/Point.h" -#include "atlas/parallel/mpi/Buffer.h" -#include "atlas/parallel/GatherScatter.h" namespace atlas { @@ -86,50 +86,50 @@ void FiniteElement::setup( const FunctionSpace& source, const FunctionSpace& tar } struct Stencil { - enum { max_stencil_size = 4 }; + enum + { + max_stencil_size = 4 + }; Stencil() { - g = -1; + g = -1; size = 0; } void add( gidx_t tgt, gidx_t src, double weight ) { - if( g >= 0 ) { - ASSERT( tgt == g ); - } - g = tgt; - size_t i=size; - source[i] = src; + if ( g >= 0 ) { ASSERT( tgt == g ); } + g = tgt; + size_t i = size; + source[i] = src; weights[i] = weight; ++size; } gidx_t g; - std::array source; - std::array weights; + std::array source; + std::array weights; size_t size; }; -void FiniteElement::print(std::ostream& out) const -{ - functionspace::NodeColumns src (source_); - functionspace::NodeColumns tgt (target_); - if( not tgt ) NOTIMP; - auto gidx_src = array::make_view( src.nodes().global_index() ); +void FiniteElement::print( std::ostream& out ) const { + functionspace::NodeColumns src( source_ ); + functionspace::NodeColumns tgt( target_ ); + if ( not tgt ) NOTIMP; + auto gidx_src = array::make_view( src.nodes().global_index() ); ASSERT( tgt.nodes().size() == matrix_.rows() ); - auto field_stencil_points_loc = tgt.createField( option::variables(Stencil::max_stencil_size) ); - auto field_stencil_weigths_loc = tgt.createField( option::variables(Stencil::max_stencil_size) ); - auto field_stencil_size_loc = tgt.createField(); + auto field_stencil_points_loc = tgt.createField( option::variables( Stencil::max_stencil_size ) ); + auto field_stencil_weigths_loc = tgt.createField( option::variables( Stencil::max_stencil_size ) ); + auto field_stencil_size_loc = tgt.createField(); - auto stencil_points_loc = array::make_view( field_stencil_points_loc ); - auto stencil_weights_loc = array::make_view( field_stencil_weigths_loc ); - auto stencil_size_loc = array::make_view( field_stencil_size_loc ); - stencil_size_loc.assign(0); + auto stencil_points_loc = array::make_view( field_stencil_points_loc ); + auto stencil_weights_loc = array::make_view( field_stencil_weigths_loc ); + auto stencil_size_loc = array::make_view( field_stencil_size_loc ); + stencil_size_loc.assign( 0 ); - for( Matrix::const_iterator it = matrix_.begin(); it!=matrix_.end(); ++it ) { - int p = it.row(); - int& i = stencil_size_loc( p ); - stencil_points_loc( p, i ) = gidx_src( it.col() ); + for ( Matrix::const_iterator it = matrix_.begin(); it != matrix_.end(); ++it ) { + int p = it.row(); + int& i = stencil_size_loc( p ); + stencil_points_loc( p, i ) = gidx_src( it.col() ); stencil_weights_loc( p, i ) = *it; ++i; } @@ -137,31 +137,32 @@ void FiniteElement::print(std::ostream& out) const size_t global_size = tgt.gather().glb_dof(); - auto field_stencil_points_glb = tgt.createField( option::variables(Stencil::max_stencil_size) | option::global(0) ); - auto field_stencil_weights_glb = tgt.createField( option::variables(Stencil::max_stencil_size) | option::global(0) ); - auto field_stencil_size_glb = tgt.createField( option::global(0) ); - + auto field_stencil_points_glb = + tgt.createField( option::variables( Stencil::max_stencil_size ) | option::global( 0 ) ); + auto field_stencil_weights_glb = + tgt.createField( option::variables( Stencil::max_stencil_size ) | option::global( 0 ) ); + auto field_stencil_size_glb = tgt.createField( option::global( 0 ) ); - auto stencil_points_glb = array::make_view( field_stencil_points_glb ); - auto stencil_weights_glb = array::make_view( field_stencil_weights_glb ); - auto stencil_size_glb = array::make_view( field_stencil_size_glb ); + auto stencil_points_glb = array::make_view( field_stencil_points_glb ); + auto stencil_weights_glb = array::make_view( field_stencil_weights_glb ); + auto stencil_size_glb = array::make_view( field_stencil_size_glb ); tgt.gather().gather( stencil_size_loc, stencil_size_glb ); tgt.gather().gather( stencil_points_loc, stencil_points_glb ); tgt.gather().gather( stencil_weights_loc, stencil_weights_glb ); - if( mpi::comm().rank() == 0 ) { - for( idx_t i=0; i eTree( create_element_kdtree( cell_centres ) ); @@ -244,7 +245,7 @@ void FiniteElement::setup( const FunctionSpace& source ) { "---------------------\n"; PointLonLat pll; util::Earth::convertCartesianToSpherical( p, pll ); - if( pll.lon() < 0 ) pll.lon() += 360.; + if ( pll.lon() < 0 ) pll.lon() += 360.; Log::debug() << "Failed to project point (lon,lat)=" << pll << '\n'; Log::debug() << failures_log.str(); } @@ -261,7 +262,7 @@ void FiniteElement::setup( const FunctionSpace& source ) { const PointXYZ p{( *ocoords_ )( *i, 0 ), ( *ocoords_ )( *i, 1 ), ( *ocoords_ )( *i, 2 )}; // lookup point PointLonLat pll; util::Earth::convertCartesianToSpherical( p, pll ); - if( pll.lon() < 0 ) pll.lon() += 360.; + if ( pll.lon() < 0 ) pll.lon() += 360.; msg << "\t(lon,lat) = " << pll << "\n"; } @@ -275,11 +276,11 @@ void FiniteElement::setup( const FunctionSpace& source ) { } struct ElementEdge { - std::array idx; + std::array idx; void swap() { idx_t tmp = idx[0]; - idx[0] = idx[1]; - idx[1] = tmp; + idx[0] = idx[1]; + idx[1] = tmp; } }; @@ -288,8 +289,8 @@ Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemInd ASSERT( elems.begin() != elems.end() ); const size_t inp_points = icoords_->shape( 0 ); - std::array idx; - std::array w; + std::array idx; + std::array w; Triplets triplets; Ray ray( PointXYZ{( *ocoords_ )( ip, 0 ), ( *ocoords_ )( ip, 1 ), ( *ocoords_ )( ip, 2 )} ); @@ -306,18 +307,18 @@ Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemInd ASSERT( idx[i] < inp_points ); } - auto on_triag_edge = [&](ElementEdge& edge) { - if( w[0] < 1.e-15 ) { + auto on_triag_edge = [&]( ElementEdge& edge ) { + if ( w[0] < 1.e-15 ) { edge.idx[0] = 1; edge.idx[1] = 2; return true; } - if( w[1] < 1.e-15 ) { + if ( w[1] < 1.e-15 ) { edge.idx[0] = 0; edge.idx[1] = 2; return true; } - if( w[2] < 1.e-15 ) { + if ( w[2] < 1.e-15 ) { edge.idx[0] = 0; edge.idx[1] = 1; return true; @@ -325,23 +326,23 @@ Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemInd return false; }; - auto on_quad_edge = [&](ElementEdge& edge) { - if( w[0] < 1.e-15 && w[1] < 1.e-15 ) { + auto on_quad_edge = [&]( ElementEdge& edge ) { + if ( w[0] < 1.e-15 && w[1] < 1.e-15 ) { edge.idx[0] = 2; edge.idx[1] = 3; return true; } - if( w[1] < 1.e-15 && w[2] < 1.e-15 ) { + if ( w[1] < 1.e-15 && w[2] < 1.e-15 ) { edge.idx[0] = 0; edge.idx[1] = 3; return true; } - if( w[2] < 1.e-15 && w[3] < 1.e-15 ) { + if ( w[2] < 1.e-15 && w[3] < 1.e-15 ) { edge.idx[0] = 0; edge.idx[1] = 1; return true; } - if( w[3] < 1.e-15 && w[0] < 1.e-15 ) { + if ( w[3] < 1.e-15 && w[0] < 1.e-15 ) { edge.idx[0] = 1; edge.idx[1] = 2; return true; @@ -369,11 +370,9 @@ Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemInd w[1] = is.u; w[2] = is.v; - if( on_triag_edge( edge) ) { - if( ( *igidx_ )( idx[edge.idx[1]] ) < ( *igidx_ )( idx[edge.idx[0]] ) ) { - edge.swap(); - } - for( size_t i = 0; i < 2; ++i ) { + if ( on_triag_edge( edge ) ) { + if ( ( *igidx_ )( idx[edge.idx[1]] ) < ( *igidx_ )( idx[edge.idx[0]] ) ) { edge.swap(); } + for ( size_t i = 0; i < 2; ++i ) { triplets.push_back( Triplet( ip, idx[edge.idx[i]], w[edge.idx[i]] ) ); } } @@ -408,11 +407,9 @@ Method::Triplets FiniteElement::projectPointToElements( size_t ip, const ElemInd w[2] = is.u * is.v; w[3] = ( 1. - is.u ) * is.v; - if( on_quad_edge( edge ) ) { - if( ( *igidx_ )( idx[edge.idx[1]] ) < ( *igidx_ )( idx[edge.idx[0]] ) ) { - edge.swap(); - } - for( size_t i = 0; i < 2; ++i ) { + if ( on_quad_edge( edge ) ) { + if ( ( *igidx_ )( idx[edge.idx[1]] ) < ( *igidx_ )( idx[edge.idx[0]] ) ) { edge.swap(); } + for ( size_t i = 0; i < 2; ++i ) { triplets.push_back( Triplet( ip, idx[edge.idx[i]], w[edge.idx[i]] ) ); } } diff --git a/src/atlas/interpolation/method/Method.cc b/src/atlas/interpolation/method/Method.cc index c4d46dd93..f51c31134 100644 --- a/src/atlas/interpolation/method/Method.cc +++ b/src/atlas/interpolation/method/Method.cc @@ -23,9 +23,9 @@ #include "atlas/field/Field.h" #include "atlas/field/FieldSet.h" #include "atlas/functionspace/NodeColumns.h" +#include "atlas/mesh/Nodes.h" #include "atlas/runtime/Log.h" #include "atlas/runtime/Trace.h" -#include "atlas/mesh/Nodes.h" // for static linking #include "FiniteElement.h" diff --git a/src/atlas/interpolation/method/Method.h b/src/atlas/interpolation/method/Method.h index 34d62965a..0f57609f5 100644 --- a/src/atlas/interpolation/method/Method.h +++ b/src/atlas/interpolation/method/Method.h @@ -10,9 +10,9 @@ #pragma once +#include #include #include -#include #include "eckit/config/Configuration.h" #include "eckit/linalg/SparseMatrix.h" @@ -61,7 +61,6 @@ class Method : public eckit::Owned { // so do not expose here, even though only linear operators are now // implemented. Matrix matrix_; - }; struct MethodFactory { diff --git a/src/atlas/mesh/actions/BuildCellCentres.cc b/src/atlas/mesh/actions/BuildCellCentres.cc index 2c2e1f773..2d23c36e9 100644 --- a/src/atlas/mesh/actions/BuildCellCentres.cc +++ b/src/atlas/mesh/actions/BuildCellCentres.cc @@ -29,19 +29,18 @@ namespace actions { BuildCellCentres::BuildCellCentres( const std::string& field_name, bool force_recompute ) : field_name_( field_name ), force_recompute_( force_recompute ), - flatten_virtual_elements_( true ) { -} + flatten_virtual_elements_( true ) {} BuildCellCentres::BuildCellCentres( eckit::Configuration& config ) : field_name_( config.getString( "name", "centre" ) ), force_recompute_( config.getBool( "force_recompute", false ) ), - flatten_virtual_elements_( config.getBool( "flatten_virtual_elements", true) ) { -} + flatten_virtual_elements_( config.getBool( "flatten_virtual_elements", true ) ) {} Field& BuildCellCentres::operator()( Mesh& mesh ) const { bool recompute = force_recompute_; if ( !mesh.cells().has_field( field_name_ ) ) { - mesh.cells().add( Field( field_name_, array::make_datatype(), array::make_shape( mesh.cells().size(), 3 ) ) ); + mesh.cells().add( + Field( field_name_, array::make_datatype(), array::make_shape( mesh.cells().size(), 3 ) ) ); recompute = true; } if ( recompute ) { @@ -51,8 +50,8 @@ Field& BuildCellCentres::operator()( Mesh& mesh ) const { size_t firstVirtualPoint = std::numeric_limits::max(); if ( nodes.metadata().has( "NbRealPts" ) ) { firstVirtualPoint = nodes.metadata().get( "NbRealPts" ); } - size_t nb_cells = mesh.cells().size(); - auto centroids = array::make_view( mesh.cells().field( field_name_ ) ); + size_t nb_cells = mesh.cells().size(); + auto centroids = array::make_view( mesh.cells().field( field_name_ ) ); const mesh::HybridElements::Connectivity& cell_node_connectivity = mesh.cells().node_connectivity(); for ( size_t e = 0; e < nb_cells; ++e ) { @@ -82,7 +81,7 @@ Field& BuildCellCentres::operator()( Mesh& mesh ) const { int nb_unique_nodes = int( nb_cell_nodes ) - nb_equal_nodes; if ( nb_unique_nodes < 3 ) { continue; } - if( flatten_virtual_elements_ ) { + if ( flatten_virtual_elements_ ) { // calculate centroid by averaging coordinates (uses only "real" nodes) size_t nb_real_nodes = 0; for ( size_t n = 0; n < nb_cell_nodes; ++n ) { @@ -101,11 +100,12 @@ Field& BuildCellCentres::operator()( Mesh& mesh ) const { centroids( e, YY ) *= average_coefficient; centroids( e, ZZ ) *= average_coefficient; } - } else { - const double average_coefficient = 1./ static_cast( nb_cell_nodes ); + } + else { + const double average_coefficient = 1. / static_cast( nb_cell_nodes ); for ( size_t n = 0; n < nb_cell_nodes; ++n ) { const size_t i = size_t( cell_node_connectivity( e, n ) ); - for ( size_t d=0; d<3; ++d ) { + for ( size_t d = 0; d < 3; ++d ) { centroids( e, d ) += coords( i, d ) * average_coefficient; } } diff --git a/src/atlas/mesh/actions/BuildDualMesh.cc b/src/atlas/mesh/actions/BuildDualMesh.cc index 2fe9fb693..68543f879 100644 --- a/src/atlas/mesh/actions/BuildDualMesh.cc +++ b/src/atlas/mesh/actions/BuildDualMesh.cc @@ -167,7 +167,7 @@ void add_median_dual_volume_contribution_cells( const mesh::HybridElements& cell const array::ArrayView edge_centroids = array::make_view( edges.field( "centroids_xy" ) ); const mesh::HybridElements::Connectivity& cell_edge_connectivity = cells.edge_connectivity(); const mesh::HybridElements::Connectivity& edge_node_connectivity = edges.node_connectivity(); - auto field_flags = array::make_view( cells.flags() ); + auto field_flags = array::make_view( cells.flags() ); auto patch = [&field_flags]( size_t e ) { using Topology = atlas::mesh::Nodes::Topology; diff --git a/src/atlas/mesh/actions/BuildEdges.cc b/src/atlas/mesh/actions/BuildEdges.cc index d06296268..5a6067ce2 100644 --- a/src/atlas/mesh/actions/BuildEdges.cc +++ b/src/atlas/mesh/actions/BuildEdges.cc @@ -116,7 +116,7 @@ void build_element_to_edge_connectivity( Mesh& mesh ) { // Verify that all edges have been found auto field_flags = array::make_view( mesh.cells().flags() ); - auto patch = [&field_flags]( size_t e ) { + auto patch = [&field_flags]( size_t e ) { using Topology = atlas::mesh::Nodes::Topology; return Topology::check( field_flags( e ), Topology::PATCH ); }; diff --git a/src/atlas/mesh/actions/BuildHalo.cc b/src/atlas/mesh/actions/BuildHalo.cc index 3fb385ceb..deaefea4a 100644 --- a/src/atlas/mesh/actions/BuildHalo.cc +++ b/src/atlas/mesh/actions/BuildHalo.cc @@ -199,16 +199,16 @@ void make_cells_global_index_human_readable( const mesh::actions::BuildHalo& bui } } else { - size_t nb_cells_to_edit(0); - for( const auto& new_cells : build_halo.periodic_cells_local_index_ ) { + size_t nb_cells_to_edit( 0 ); + for ( const auto& new_cells : build_halo.periodic_cells_local_index_ ) { nb_cells_to_edit += new_cells.size(); } cells_to_edit.resize( nb_cells_to_edit ); - int c{ 0 }; - int i{ 0 }; + int c{0}; + int i{0}; for ( int t = 0; t < cells.nb_types(); ++t ) { for ( idx_t p : build_halo.periodic_cells_local_index_[t] ) { - cells_to_edit[i++] = c + p; + cells_to_edit[i++] = c + p; } c += cells.elements( t ).size(); } @@ -308,7 +308,7 @@ void build_lookup_node2elem( const Mesh& mesh, Node2Elem& node2elem ) { const mesh::HybridElements::Connectivity& elem_nodes = mesh.cells().node_connectivity(); auto field_flags = array::make_view( mesh.cells().flags() ); - auto patched = [&field_flags]( size_t e ) { + auto patched = [&field_flags]( size_t e ) { using Topology = atlas::mesh::Nodes::Topology; return Topology::check( field_flags( e ), Topology::PATCH ); }; @@ -699,7 +699,7 @@ class BuildHaloHelper { buf.elem_glb_idx[p][jelem] = elem_glb_idx( ielem ); buf.elem_part[p][jelem] = elem_part( ielem ); Topology::set( buf.elem_flags[p][jelem], elem_flags( ielem ) ); - buf.elem_type[p][jelem] = mesh.cells().type_idx( ielem ); + buf.elem_type[p][jelem] = mesh.cells().type_idx( ielem ); for ( size_t jnode = 0; jnode < elem_nodes->cols( ielem ); ++jnode ) buf.elem_nodes_id[p][jelemnode++] = compute_uid( ( *elem_nodes )( ielem, jnode ) ); } @@ -762,7 +762,7 @@ class BuildHaloHelper { size_t ielem = elems[jelem]; buf.elem_part[p][jelem] = elem_part( ielem ); Topology::set( buf.elem_flags[p][jelem], elem_flags( ielem ) | newflags ); - buf.elem_type[p][jelem] = mesh.cells().type_idx( ielem ); + buf.elem_type[p][jelem] = mesh.cells().type_idx( ielem ); std::vector crds( elem_nodes->cols( ielem ) * 2 ); for ( size_t jnode = 0; jnode < elem_nodes->cols( ielem ); ++jnode ) { double crd[] = {xy( ( *elem_nodes )( ielem, jnode ), XX ), xy( ( *elem_nodes )( ielem, jnode ), YY )}; @@ -853,8 +853,8 @@ class BuildHaloHelper { lonlat( loc_idx, XX ) = pll.lon(); lonlat( loc_idx, YY ) = pll.lat(); - if ( Topology::check( flags( loc_idx ), Topology::PERIODIC ) and not - Topology::check( flags( loc_idx ), Topology::BC ) ) { + if ( Topology::check( flags( loc_idx ), Topology::PERIODIC ) and + not Topology::check( flags( loc_idx ), Topology::BC ) ) { status.new_periodic_ghost_points.push_back( loc_idx ); } @@ -970,7 +970,7 @@ class BuildHaloHelper { loc_idx, n, uid2node[buf.elem_nodes_id[jpart][buf.elem_nodes_displs[jpart][jelem] + n]] ); } - if( Topology::check( elem_type_flags( loc_idx ), Topology::PERIODIC ) ) { + if ( Topology::check( elem_type_flags( loc_idx ), Topology::PERIODIC ) ) { status.new_periodic_ghost_cells[t].push_back( old_size + new_elem ); } ++new_elem; @@ -1230,7 +1230,7 @@ void increase_halo_periodic( BuildHaloHelper& helper, const PeriodicPoints& peri helper.add_buffers( recvmesh ); } -BuildHalo::BuildHalo(Mesh& mesh) : mesh_( mesh ), periodic_cells_local_index_( mesh.cells().nb_types() ) {} +BuildHalo::BuildHalo( Mesh& mesh ) : mesh_( mesh ), periodic_cells_local_index_( mesh.cells().nb_types() ) {} void BuildHalo::operator()( int nb_elems ) { ATLAS_TRACE( "BuildHalo" ); @@ -1275,7 +1275,7 @@ void BuildHalo::operator()( int nb_elems ) { } for ( int t = 0; t < mesh_.cells().nb_types(); ++t ) { for ( idx_t p : helper.status.new_periodic_ghost_cells[t] ) { - periodic_cells_local_index_[t].push_back( p ); + periodic_cells_local_index_[t].push_back( p ); } } diff --git a/src/atlas/mesh/actions/BuildParallelFields.cc b/src/atlas/mesh/actions/BuildParallelFields.cc index 120a4f86a..be297b962 100644 --- a/src/atlas/mesh/actions/BuildParallelFields.cc +++ b/src/atlas/mesh/actions/BuildParallelFields.cc @@ -413,9 +413,7 @@ Field& build_edges_partition( Mesh& mesh ) { // if( not domain_bdry(jedge) ) { bdry_edges.push_back( edge_glb_idx( jedge ) ); p = elem_part( elem1 ); - if( pn1 != p && pn2 == pn1 && elem_halo( elem1 ) > 0 ) { - p = pn1; - } + if ( pn1 != p && pn2 == pn1 && elem_halo( elem1 ) > 0 ) { p = pn1; } // } } else if ( p != elem_part( elem1 ) && p != elem_part( elem2 ) ) { @@ -508,12 +506,12 @@ Field& build_edges_partition( Mesh& mesh ) { if ( edge_is_partition_boundary ) { if ( not edge_partition_is_same_as_one_of_nodes ) { if ( elem1 != edge_to_elem.missing_value() ) { - Log::error() << "[" << mypart << "] " << EDGE( jedge ) << " [p" << p << "] is not correct elem1[p" << elem_part( elem1 ) - << "]" << std::endl; + Log::error() << "[" << mypart << "] " << EDGE( jedge ) << " [p" << p << "] is not correct elem1[p" + << elem_part( elem1 ) << "]" << std::endl; } else { - Log::error() << "[" << mypart << "] " << EDGE( jedge ) << " [p" << p << "] is not correct elem2[p" << elem_part( elem2 ) - << "]" << std::endl; + Log::error() << "[" << mypart << "] " << EDGE( jedge ) << " [p" << p << "] is not correct elem2[p" + << elem_part( elem2 ) << "]" << std::endl; } insane = 1; } @@ -665,13 +663,14 @@ Field& build_edges_remote_idx( Mesh& mesh ) { else { std::stringstream msg; #ifdef DEBUGGING_PARFIELDS - msg << "Edge(" << recv_edge[ jedge * varsize + 2 ] << "[p" << recv_edge[ jedge * varsize + 4 ] << "] " - << recv_edge[ jedge *varsize + 3 ] << "[p" << recv_edge[ jedge * varsize + 5 ] << "])"; + msg << "Edge(" << recv_edge[jedge * varsize + 2] << "[p" << recv_edge[jedge * varsize + 4] << "] " + << recv_edge[jedge * varsize + 3] << "[p" << recv_edge[jedge * varsize + 5] << "])"; #else msg << "Edge with uid " << recv_uid; #endif msg << " requested by rank [" << jpart << "]"; - msg << " that should be owned by " << mpi::comm().rank() << " is not found. This could be because no " + msg << " that should be owned by " << mpi::comm().rank() + << " is not found. This could be because no " "halo was built."; // throw eckit::SeriousBug(msg.str(),Here()); Log::warning() << msg.str() << " @ " << Here() << std::endl; diff --git a/src/atlas/output/detail/PointCloudIO.cc b/src/atlas/output/detail/PointCloudIO.cc index 29da1f424..1e24441d7 100644 --- a/src/atlas/output/detail/PointCloudIO.cc +++ b/src/atlas/output/detail/PointCloudIO.cc @@ -89,12 +89,12 @@ Mesh PointCloudIO::read( const eckit::PathName& path, std::vector& mesh.nodes().resize( nb_pts ); - mesh::Nodes& nodes = mesh.nodes(); - array::ArrayView xy = array::make_view( nodes.xy() ); - array::ArrayView lonlat = array::make_view( nodes.lonlat() ); + mesh::Nodes& nodes = mesh.nodes(); + array::ArrayView xy = array::make_view( nodes.xy() ); + array::ArrayView lonlat = array::make_view( nodes.lonlat() ); array::ArrayView glb_idx = array::make_view( nodes.global_index() ); - array::ArrayView part = array::make_view( nodes.partition() ); - part.assign(0); + array::ArrayView part = array::make_view( nodes.partition() ); + part.assign( 0 ); // header, part 2: // determine columns' labels // (check end of first line for possible column labels, starting from @@ -128,13 +128,13 @@ Mesh PointCloudIO::read( const eckit::PathName& path, std::vector& // NOTE always expects (lon,lat) order, maybe make it configurable? PointXY pxy; - iss >> pxy.x() >>pxy.y(); + iss >> pxy.x() >> pxy.y(); - xy(i,XX) = pxy.x(); - xy(i,YY) = pxy.y(); - lonlat(i,LON) = pxy.x(); - lonlat(i,LAT) = pxy.y(); - glb_idx(i) = i+1; + xy( i, XX ) = pxy.x(); + xy( i, YY ) = pxy.y(); + lonlat( i, LON ) = pxy.x(); + lonlat( i, LAT ) = pxy.y(); + glb_idx( i ) = i + 1; for ( j = 0; iss && j < nb_fld; ++j ) iss >> fields[j]( i ); @@ -264,7 +264,6 @@ void PointCloudIO::write( const eckit::PathName& path, const FieldSet& fieldset, } void PointCloudIO::write( const eckit::PathName& path, const std::vector& pts ) { - Log::debug() << "PointCloudIO writing " << path << std::endl; std::ofstream f( path.asString().c_str() ); @@ -282,7 +281,6 @@ void PointCloudIO::write( const eckit::PathName& path, const std::vector& lon, const std::vector& lat, const std::vector*>& vfvalues, const std::vector& vfnames ) { - Log::debug() << "PointCloudIO writing " << path << std::endl; const std::string msg( "PointCloudIO::write: " ); @@ -318,7 +316,6 @@ void PointCloudIO::write( const eckit::PathName& path, const std::vector void PointCloudIO::write( const eckit::PathName& path, const int& nb_pts, const double* lon, const double* lat, const int& nb_fld, const double** afvalues, const char** afnames ) { - Log::debug() << "PointCloudIO writing " << path << std::endl; diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc index 1e3520557..03b4a0802 100644 --- a/src/atlas/trans/local/TransLocal.cc +++ b/src/atlas/trans/local/TransLocal.cc @@ -230,7 +230,7 @@ struct FFTW_Data { std::vector plans; #endif }; -} +} // namespace detail // -------------------------------------------------------------------------------------------------------------------- @@ -495,8 +495,8 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma { ATLAS_TRACE( "Fourier precomputations (FFTW)" ); int num_complex = ( nlonsMaxGlobal_ / 2 ) + 1; - fftw_->in = fftw_alloc_complex( nlats * num_complex ); - fftw_->out = fftw_alloc_real( nlats * nlonsMaxGlobal_ ); + fftw_->in = fftw_alloc_complex( nlats * num_complex ); + fftw_->out = fftw_alloc_real( nlats * nlonsMaxGlobal_ ); if ( fft_cache_ ) { Log::debug() << "Import FFTW wisdom from cache" << std::endl; @@ -516,8 +516,9 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma // if ( wisdomString.length() > 0 ) { fftw_import_wisdom_from_string( &wisdomString[0u] ); } if ( grid::RegularGrid( gridGlobal_ ) ) { fftw_->plans.resize( 1 ); - fftw_->plans[0] = fftw_plan_many_dft_c2r( 1, &nlonsMaxGlobal_, nlats, fftw_->in, NULL, 1, num_complex, - fftw_->out, NULL, 1, nlonsMaxGlobal_, FFTW_ESTIMATE ); + fftw_->plans[0] = + fftw_plan_many_dft_c2r( 1, &nlonsMaxGlobal_, nlats, fftw_->in, NULL, 1, num_complex, fftw_->out, + NULL, 1, nlonsMaxGlobal_, FFTW_ESTIMATE ); } else { fftw_->plans.resize( nlatsLegDomain_ ); @@ -546,7 +547,7 @@ TransLocal::TransLocal( const Cache& cache, const Grid& grid, const Domain& doma // write.close(); // } } - // other FFT implementations should be added with #elif statements + // other FFT implementations should be added with #elif statements #else useFFT_ = false; // no FFT implemented => default to dgemm std::string file_path = TransParameters( config ).write_fft(); @@ -981,7 +982,7 @@ void TransLocal::invtrans_fourier_reduced( const int nlats, const grid::Structur for ( int jlat = 0; jlat < nlats; jlat++ ) { int idx = 0; //Log::info() << jlat << "in:" << std::endl; - int num_complex = ( nlonsGlobal_[jlat] / 2 ) + 1; + int num_complex = ( nlonsGlobal_[jlat] / 2 ) + 1; fftw_->in[idx++][0] = scl_fourier[posMethod( jfld, 0, jlat, 0, nb_fields, nlats )]; //Log::info() << fftw_->in[0][0] << " "; for ( int jm = 1; jm < num_complex; jm++, idx++ ) { diff --git a/src/atlas/trans/local/TransLocal.h b/src/atlas/trans/local/TransLocal.h index 9d3d69140..4e66473ee 100644 --- a/src/atlas/trans/local/TransLocal.h +++ b/src/atlas/trans/local/TransLocal.h @@ -10,8 +10,8 @@ #pragma once -#include #include +#include #include "atlas/array.h" #include "atlas/grid/Grid.h" diff --git a/src/sandbox/interpolation/atlas-parallel-interpolation.cc b/src/sandbox/interpolation/atlas-parallel-interpolation.cc index 6a4efe4ef..18bad95c6 100644 --- a/src/sandbox/interpolation/atlas-parallel-interpolation.cc +++ b/src/sandbox/interpolation/atlas-parallel-interpolation.cc @@ -152,9 +152,7 @@ void AtlasParallelInterpolation::execute( const AtlasTool::Args& args ) { Interpolation( option::type( interpolation_method ), tgt_functionspace, src_functionspace ); } - if( args.getBool( "forward-interpolator-output", false ) ) { - interpolator_forward.print( Log::info() ); - } + if ( args.getBool( "forward-interpolator-output", false ) ) { interpolator_forward.print( Log::info() ); } // Create source FunctionSpace and fields diff --git a/src/tests/acceptance_tests/atest_mgrids.cc b/src/tests/acceptance_tests/atest_mgrids.cc index c71df090f..837b782f5 100644 --- a/src/tests/acceptance_tests/atest_mgrids.cc +++ b/src/tests/acceptance_tests/atest_mgrids.cc @@ -16,19 +16,19 @@ #include #include +#include "atlas/field.h" +#include "atlas/functionspace.h" #include "atlas/grid.h" +#include "atlas/interpolation/Interpolation.h" #include "atlas/mesh.h" -#include "atlas/functionspace.h" -#include "atlas/field.h" #include "atlas/meshgenerator.h" +#include "atlas/numerics/fvm/Method.h" #include "atlas/option.h" +#include "atlas/output/Gmsh.h" #include "atlas/parallel/mpi/mpi.h" #include "atlas/runtime/AtlasTool.h" #include "atlas/runtime/Log.h" #include "atlas/util/Config.h" -#include "atlas/output/Gmsh.h" -#include "atlas/numerics/fvm/Method.h" -#include "atlas/interpolation/Interpolation.h" #include "atlas/mesh/actions/BuildHalo.h" @@ -38,9 +38,10 @@ using namespace atlas; class Program : public AtlasTool { virtual void execute( const Args& args ); + public: Program( int argc, char** argv ); - }; +}; //----------------------------------------------------------------------------- @@ -50,49 +51,48 @@ Program::Program( int argc, char** argv ) : AtlasTool( argc, argv ) { add_option( new SimpleOption( "ghost", "Output ghost elements" ) ); add_option( new SimpleOption( "haloA", "Halo size" ) ); add_option( new SimpleOption( "haloB", "Halo size" ) ); - add_option( new SimpleOption( "no-forward", "no forward interpolation" ) ); + add_option( new SimpleOption( "no-forward", "no forward interpolation" ) ); add_option( new SimpleOption( "no-backward", "no backward interpolation" ) ); } //----------------------------------------------------------------------------- void Program::execute( const Args& args ) { + auto ghost = util::Config( "ghost", args.getBool( "ghost", false ) ); + auto haloA = option::halo( args.getLong( "haloA", 1 ) ); + auto haloB = option::halo( args.getLong( "haloB", 1 ) ); - auto ghost = util::Config("ghost",args.getBool("ghost",false)); - auto haloA = option::halo( args.getLong("haloA",1) ); - auto haloB = option::halo( args.getLong("haloB",1) ); - - auto gridA = Grid( args.getString("gridA") ); - auto gridB = Grid( args.getString("gridB") ); + auto gridA = Grid( args.getString( "gridA" ) ); + auto gridB = Grid( args.getString( "gridB" ) ); - auto meshgenerator = MeshGenerator( "structured" ); + auto meshgenerator = MeshGenerator( "structured" ); - auto distA = grid::Distribution( gridA, grid::Partitioner( "trans" ) ); + auto distA = grid::Distribution( gridA, grid::Partitioner( "trans" ) ); - auto meshA = meshgenerator.generate( gridA, distA ); + auto meshA = meshgenerator.generate( gridA, distA ); - numerics::fvm::Method fvmA(meshA,haloA); - auto gmshA = output::Gmsh( "meshA.msh", ghost ); - gmshA.write(meshA); + numerics::fvm::Method fvmA( meshA, haloA ); + auto gmshA = output::Gmsh( "meshA.msh", ghost ); + gmshA.write( meshA ); - auto distB = grid::Distribution( gridB, grid::MatchingMeshPartitioner( meshA ) ); + auto distB = grid::Distribution( gridB, grid::MatchingMeshPartitioner( meshA ) ); - auto meshB = meshgenerator.generate( gridB, distB ); + auto meshB = meshgenerator.generate( gridB, distB ); - numerics::fvm::Method fvmB(meshB,haloB); + numerics::fvm::Method fvmB( meshB, haloB ); - Field fieldB = fvmB.node_columns().createField(); + Field fieldB = fvmB.node_columns().createField(); - output::Gmsh gmshB( "meshB.msh", ghost ); - gmshB.write(meshB); - gmshB.write(fieldB); + output::Gmsh gmshB( "meshB.msh", ghost ); + gmshB.write( meshB ); + gmshB.write( fieldB ); - if( not args.getBool("no-forward",false) ) { - Interpolation AtoB( option::type("finite-element"), fvmA.node_columns(), fvmB.node_columns() ); - } - if( not args.getBool("no-backward",false) ) { - Interpolation BtoA( option::type("finite-element"), fvmB.node_columns(), fvmA.node_columns() ); - } + if ( not args.getBool( "no-forward", false ) ) { + Interpolation AtoB( option::type( "finite-element" ), fvmA.node_columns(), fvmB.node_columns() ); + } + if ( not args.getBool( "no-backward", false ) ) { + Interpolation BtoA( option::type( "finite-element" ), fvmB.node_columns(), fvmA.node_columns() ); + } } //------------------------------------------------------------------------------ diff --git a/src/tests/trans/test_transgeneral.cc b/src/tests/trans/test_transgeneral.cc index f0ed2f679..e2ccea7de 100644 --- a/src/tests/trans/test_transgeneral.cc +++ b/src/tests/trans/test_transgeneral.cc @@ -942,7 +942,7 @@ CASE( "test_trans_unstructured" ) { } #endif -//----------------------------------------------------------------------------- + //----------------------------------------------------------------------------- #if 0 CASE( "test_trans_fourier_truncation" ) { From 279bae10d6b9382085a1e270fb5211e7363c9357 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Tue, 19 Jun 2018 17:43:58 +0100 Subject: [PATCH 117/123] Fix a wrong template type --- src/atlas/functionspace/NodeColumns.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/atlas/functionspace/NodeColumns.cc b/src/atlas/functionspace/NodeColumns.cc index 8ffff7276..1e34143ec 100644 --- a/src/atlas/functionspace/NodeColumns.cc +++ b/src/atlas/functionspace/NodeColumns.cc @@ -1593,7 +1593,7 @@ void dispatch_minimum_and_location_per_level( const NodeColumns& fs, const Field } } - array::ArrayT glb_idx_private( glb_idx.shape( 0 ), glb_idx.shape( 1 ) ); + array::ArrayT glb_idx_private( glb_idx.shape( 0 ), glb_idx.shape( 1 ) ); array::ArrayView glb_idx_private_view = array::make_view( glb_idx_private ); const size_t npts = arr.shape( 0 ); atlas_omp_for( size_t n = 0; n < npts; ++n ) { @@ -1692,7 +1692,7 @@ void dispatch_maximum_and_location_per_level( const NodeColumns& fs, const Field } } - array::ArrayT glb_idx_private( glb_idx.shape( 0 ), glb_idx.shape( 1 ) ); + array::ArrayT glb_idx_private( glb_idx.shape( 0 ), glb_idx.shape( 1 ) ); array::ArrayView glb_idx_private_view = array::make_view( glb_idx_private ); const size_t npts = arr.shape( 0 ); atlas_omp_for( size_t n = 0; n < npts; ++n ) { From 3d23761f6824e4f15867ab6eb4b206d7dcba629e Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Tue, 19 Jun 2018 18:29:38 +0100 Subject: [PATCH 118/123] Version 0.15.0 --- CHANGELOG.md | 10 ++++++++++ VERSION.cmake | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 386b4815a..f352289ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,15 @@ This project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html ## [Unreleased] +## [0.15.0] - 2018-06-19 +### Changed +- Native Array data storage uses now a raw C pointer instead of std::vector +- Significant performance improvements to Spherical harmonics transforms + +### Fixed +- Various bugs related to parallel halos +- Bit reproducibility for parallel interpolation + ## [0.14.0] - 2018-03-22 ### Added - Spherical Harmonics transforms can receive a cache memory handle @@ -30,6 +39,7 @@ This project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html ## 0.13.0 - 2018-02-16 [Unreleased]: https://github.com/ecmwf/atlas/compare/master...develop +[0.15.0]: https://github.com/ecmwf/atlas/compare/0.14.0...0.15.0 [0.14.0]: https://github.com/ecmwf/atlas/compare/0.13.2...0.14.0 [0.13.2]: https://github.com/ecmwf/atlas/compare/0.13.1...0.13.2 [0.13.1]: https://github.com/ecmwf/atlas/compare/0.13.0...0.13.1 diff --git a/VERSION.cmake b/VERSION.cmake index 7cd7d9260..eed3fc5f0 100644 --- a/VERSION.cmake +++ b/VERSION.cmake @@ -6,5 +6,5 @@ # granted to it by virtue of its status as an intergovernmental organisation nor # does it submit to any jurisdiction. -set ( ${PROJECT_NAME}_VERSION_STR "0.14.0-develop" ) +set ( ${PROJECT_NAME}_VERSION_STR "0.15.0" ) From 1ce7e7037f50f622d89674e6f585c11a316b4c4d Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Tue, 19 Jun 2018 19:06:48 +0100 Subject: [PATCH 119/123] Fix various warnings shown by pgi/18.4 -- still failing tests for pgi --- src/apps/atlas-benchmark.cc | 7 ------ src/atlas/domain/detail/ZonalBandDomain.cc | 2 ++ src/atlas/field/detail/FieldImpl.cc | 2 -- src/atlas/functionspace/EdgeColumns.cc | 24 ------------------- src/atlas/functionspace/NodeColumns.cc | 1 - src/atlas/functionspace/PointCloud.cc | 1 - src/atlas/functionspace/StructuredColumns.cc | 1 - src/atlas/grid/detail/grid/GridBuilder.cc | 9 +++++-- src/atlas/grid/detail/grid/Structured.h | 3 ++- src/atlas/grid/detail/grid/Unstructured.h | 2 ++ src/atlas/interpolation/element/Quad3D.h | 1 + src/atlas/interpolation/element/Triag3D.h | 1 + src/atlas/mesh/ElementType.cc | 1 - src/atlas/mesh/actions/BuildDualMesh.cc | 4 ---- src/atlas/numerics/fvm/Nabla.cc | 1 - src/atlas/output/detail/GmshIO.cc | 5 ++-- src/atlas/parallel/Checksum.h | 1 - src/atlas/parallel/GatherScatter.cc | 4 ---- src/atlas/runtime/trace/Timings.cc | 1 - src/atlas/trans/local/TransLocal.cc | 1 - .../functionspace/test_structuredcolumns.cc | 1 - 21 files changed, 17 insertions(+), 56 deletions(-) diff --git a/src/apps/atlas-benchmark.cc b/src/apps/atlas-benchmark.cc index f9ac36505..8186332e5 100644 --- a/src/apps/atlas-benchmark.cc +++ b/src/apps/atlas-benchmark.cc @@ -89,13 +89,6 @@ using namespace atlas::functionspace; using namespace atlas::meshgenerator; using atlas::AtlasTool; -namespace { -void usage( const std::string& tool ) { - Log::info() << "Usage: " << tool << " [OPTIONS]..." << std::endl; -} - -} // namespace - //---------------------------------------------------------------------------------------------------------------------- struct TimerStats { diff --git a/src/atlas/domain/detail/ZonalBandDomain.cc b/src/atlas/domain/detail/ZonalBandDomain.cc index 0f9d57eb1..509e70ba6 100644 --- a/src/atlas/domain/detail/ZonalBandDomain.cc +++ b/src/atlas/domain/detail/ZonalBandDomain.cc @@ -20,9 +20,11 @@ static std::array get_interval_y( const eckit::Parametrisation& param return {ymin, ymax}; } +/* constexpr std::array interval_x() { return {0., 360.}; } +*/ } // namespace constexpr char ZonalBandDomain::units_[]; diff --git a/src/atlas/field/detail/FieldImpl.cc b/src/atlas/field/detail/FieldImpl.cc index 67f7c015f..19500835c 100644 --- a/src/atlas/field/detail/FieldImpl.cc +++ b/src/atlas/field/detail/FieldImpl.cc @@ -40,8 +40,6 @@ FieldImpl* FieldImpl::create( const eckit::Parametrisation& params ) { throw eckit::Exception( "Could not find parameter 'creator' " "in Parametrisation for call to FieldImpl::create()" ); - - return 0; } FieldImpl* FieldImpl::create( const std::string& name, array::DataType datatype, const array::ArrayShape& shape ) { diff --git a/src/atlas/functionspace/EdgeColumns.cc b/src/atlas/functionspace/EdgeColumns.cc index 8c35306e9..e90d6d78f 100644 --- a/src/atlas/functionspace/EdgeColumns.cc +++ b/src/atlas/functionspace/EdgeColumns.cc @@ -526,30 +526,6 @@ const parallel::Checksum& EdgeColumns::checksum() const { return *checksum_; } -namespace { -void reverse_copy( const int variables[], const int size, std::vector& reverse ) { - int r = size; - for ( int i = 0; i < size; ++i ) { - reverse[--r] = static_cast( variables[i] ); - } -} - -void copy( const int variables[], const int size, std::vector& cpy ) { - for ( int i = 0; i < size; ++i ) { - cpy[i] = static_cast( variables[i] ); - } -} - -std::vector variables_to_vector( const int variables[], const int size, bool fortran_ordering ) { - std::vector vec( size ); - if ( fortran_ordering ) - reverse_copy( variables, size, vec ); - else - copy( variables, size, vec ); - return vec; -} -} // namespace - //------------------------------------------------------------------------------ //------------------------------------------------------------------------------ //------------------------------------------------------------------------------ diff --git a/src/atlas/functionspace/NodeColumns.cc b/src/atlas/functionspace/NodeColumns.cc index 1e34143ec..d73e60f53 100644 --- a/src/atlas/functionspace/NodeColumns.cc +++ b/src/atlas/functionspace/NodeColumns.cc @@ -428,7 +428,6 @@ void NodeColumns::haloExchange( FieldSet& fieldset, bool on_device ) const { break; default: throw eckit::Exception( "Rank not supported", Here() ); - break; } } } diff --git a/src/atlas/functionspace/PointCloud.cc b/src/atlas/functionspace/PointCloud.cc index 084f42eef..48793d6ba 100644 --- a/src/atlas/functionspace/PointCloud.cc +++ b/src/atlas/functionspace/PointCloud.cc @@ -39,7 +39,6 @@ const Field& PointCloud::ghost() const { Field PointCloud::createField( const eckit::Configuration& options ) const { NOTIMP; - return Field(); } Field PointCloud::createField( const Field& other, const eckit::Configuration& config ) const { diff --git a/src/atlas/functionspace/StructuredColumns.cc b/src/atlas/functionspace/StructuredColumns.cc index ac199f8a3..2997b2e14 100644 --- a/src/atlas/functionspace/StructuredColumns.cc +++ b/src/atlas/functionspace/StructuredColumns.cc @@ -774,7 +774,6 @@ void StructuredColumns::haloExchange( FieldSet& fieldset ) const { break; default: throw eckit::Exception( "Rank not supported", Here() ); - break; } } } diff --git a/src/atlas/grid/detail/grid/GridBuilder.cc b/src/atlas/grid/detail/grid/GridBuilder.cc index 957084d84..859ff7ec2 100644 --- a/src/atlas/grid/detail/grid/GridBuilder.cc +++ b/src/atlas/grid/detail/grid/GridBuilder.cc @@ -70,14 +70,21 @@ int regex_match_impl( const std::string& string, const std::string& regex, std:: class Regex { public: Regex( const std::string& regex, bool use_case = true ) : regex_( regex ), use_case_( use_case ) {} +/* + // unused bool match( const std::string& string ) const { std::vector substr; return regex_match_impl( string, regex_, substr, false, use_case_ ); } +*/ bool match( const std::string& string, std::vector& substr ) const { return regex_match_impl( string, regex_, substr, true, use_case_ ); } + +/* + // unused operator std::string() const { return regex_; } +*/ private: std::string regex_; @@ -183,8 +190,6 @@ const Grid::Implementation* GridBuilder::create( const Grid::Config& config ) co else { throw eckit::BadParameter( "name or type in configuration don't exist", Here() ); } - - return nullptr; } bool GridBuilder::match( const std::string& string, std::vector& matches, int& id ) const { diff --git a/src/atlas/grid/detail/grid/Structured.h b/src/atlas/grid/detail/grid/Structured.h index d26891f20..4cb402be8 100644 --- a/src/atlas/grid/detail/grid/Structured.h +++ b/src/atlas/grid/detail/grid/Structured.h @@ -101,7 +101,7 @@ class Structured : public Grid { virtual bool next( PointXY& xy ) { NOTIMP; - +#if 0 if ( j_ < grid_.ny() && i_ < grid_.nx( j_ ) ) { xy = grid_.xy( i_++, j_ ); @@ -112,6 +112,7 @@ class Structured : public Grid { return true; } return false; +#endif } virtual const PointXY operator*() const { return grid_.xy( i_, j_ ); } diff --git a/src/atlas/grid/detail/grid/Unstructured.h b/src/atlas/grid/detail/grid/Unstructured.h index ab9c5c427..135b6d726 100644 --- a/src/atlas/grid/detail/grid/Unstructured.h +++ b/src/atlas/grid/detail/grid/Unstructured.h @@ -82,6 +82,7 @@ class Unstructured : public Grid { virtual bool next( PointXY& xy ) { NOTIMP; +#if 0 if ( n_ != grid_.points_->size() ) { xy = grid_.xy( n_++ ); return true; @@ -89,6 +90,7 @@ class Unstructured : public Grid { else { return false; } +#endif } virtual const PointXY operator*() const { return grid_.xy( n_ ); } diff --git a/src/atlas/interpolation/element/Quad3D.h b/src/atlas/interpolation/element/Quad3D.h index 36d0b4597..e6af6527a 100644 --- a/src/atlas/interpolation/element/Quad3D.h +++ b/src/atlas/interpolation/element/Quad3D.h @@ -57,6 +57,7 @@ class Quad3D { if ( i == 1 ) return v10; if ( i == 2 ) return v11; if ( i == 3 ) return v01; + return Vector3D(); } private: // members diff --git a/src/atlas/interpolation/element/Triag3D.h b/src/atlas/interpolation/element/Triag3D.h index 8b1ed3fa8..1f7455e7c 100644 --- a/src/atlas/interpolation/element/Triag3D.h +++ b/src/atlas/interpolation/element/Triag3D.h @@ -59,6 +59,7 @@ class Triag3D { if ( i == 0 ) return v0; if ( i == 1 ) return v1; if ( i == 2 ) return v2; + return Vector3D(); } private: // members diff --git a/src/atlas/mesh/ElementType.cc b/src/atlas/mesh/ElementType.cc index d4ea081e7..09407a8c0 100644 --- a/src/atlas/mesh/ElementType.cc +++ b/src/atlas/mesh/ElementType.cc @@ -19,7 +19,6 @@ namespace mesh { ElementType* ElementType::create( const std::string& ) { NOTIMP; - return 0; } ElementType::ElementType() {} diff --git a/src/atlas/mesh/actions/BuildDualMesh.cc b/src/atlas/mesh/actions/BuildDualMesh.cc index 68543f879..af13bb413 100644 --- a/src/atlas/mesh/actions/BuildDualMesh.cc +++ b/src/atlas/mesh/actions/BuildDualMesh.cc @@ -76,10 +76,6 @@ struct Node { bool operator<( const Node& other ) const { return ( g < other.g ); } }; -inline double sqr( double a ) { - return a * a; -} - } // namespace array::Array* build_centroids_xy( const mesh::HybridElements&, const Field& xy ); diff --git a/src/atlas/numerics/fvm/Nabla.cc b/src/atlas/numerics/fvm/Nabla.cc index 95f244a90..676127c8e 100644 --- a/src/atlas/numerics/fvm/Nabla.cc +++ b/src/atlas/numerics/fvm/Nabla.cc @@ -72,7 +72,6 @@ void Nabla::gradient( const Field& field, Field& grad_field ) const { else { return gradient_of_scalar( field, grad_field ); } - throw eckit::SeriousBug( "Cannot figure out if field is a scalar or vector field", Here() ); } void Nabla::gradient_of_scalar( const Field& scalar_field, Field& grad_field ) const { diff --git a/src/atlas/output/detail/GmshIO.cc b/src/atlas/output/detail/GmshIO.cc index c8c2a60e4..456bafead 100644 --- a/src/atlas/output/detail/GmshIO.cc +++ b/src/atlas/output/detail/GmshIO.cc @@ -502,7 +502,6 @@ mesh::ElementType* make_element_type( int type ) { if ( type == TRIAG ) return new mesh::temporary::Triangle(); if ( type == LINE ) return new mesh::temporary::Line(); throw eckit::SeriousBug( "Element type not supported", Here() ); - return 0; } } // namespace @@ -956,7 +955,7 @@ void GmshIO::write_delegate( const FieldSet& fieldset, const functionspace::Node bool binary( !options.get( "ascii" ) ); if ( binary ) mode |= std::ios_base::binary; bool gather = options.has( "gather" ) ? options.get( "gather" ) : false; - GmshFile file( file_path, mode, gather ? -1 : atlas::mpi::comm().rank() ); + GmshFile file( file_path, mode, gather ? -1 : int(atlas::mpi::comm().rank()) ); // Header if ( is_new_file ) { write_header_ascii( file ); } @@ -995,7 +994,7 @@ void GmshIO::write_delegate( const FieldSet& fieldset, const functionspace::Stru bool gather = options.has( "gather" ) ? options.get( "gather" ) : false; - GmshFile file( file_path, mode, gather ? -1 : atlas::mpi::comm().rank() ); + GmshFile file( file_path, mode, gather ? -1 : int(atlas::mpi::comm().rank()) ); // Header if ( is_new_file ) write_header_ascii( file ); diff --git a/src/atlas/parallel/Checksum.h b/src/atlas/parallel/Checksum.h index db5289a40..111f716bc 100644 --- a/src/atlas/parallel/Checksum.h +++ b/src/atlas/parallel/Checksum.h @@ -149,7 +149,6 @@ std::string Checksum::execute( const array::ArrayView& lfield Log::error() << "lfield.shape(0) = " << lfield.shape( 0 ); NOTIMP; // Need to implement with parallel ranks > 1 } - return std::string( "" ); } // ------------------------------------------------------------------ diff --git a/src/atlas/parallel/GatherScatter.cc b/src/atlas/parallel/GatherScatter.cc index 47242218d..e0e91069e 100644 --- a/src/atlas/parallel/GatherScatter.cc +++ b/src/atlas/parallel/GatherScatter.cc @@ -59,10 +59,6 @@ struct Node { bool operator==( const Node& other ) const { return ( g == other.g ); } }; -bool operator<( const gidx_t g, const Node& n ) { - return ( g < n.g ); -} - } // namespace GatherScatter::GatherScatter() : name_(), is_setup_( false ) { diff --git a/src/atlas/runtime/trace/Timings.cc b/src/atlas/runtime/trace/Timings.cc index d3cb5c599..b6c191b68 100644 --- a/src/atlas/runtime/trace/Timings.cc +++ b/src/atlas/runtime/trace/Timings.cc @@ -340,7 +340,6 @@ std::string TimingsRegistry::filter_filepath( const std::string& filepath ) cons std::regex filepath_re( "(.*)?/atlas/src/(.*)" ); std::smatch matches; std::string filtered( "" ); - bool is_atlas = false; if ( std::regex_search( filepath, matches, filepath_re ) ) { // filtered = matches[2]; filtered = "[atlas] "; diff --git a/src/atlas/trans/local/TransLocal.cc b/src/atlas/trans/local/TransLocal.cc index 03b4a0802..9ed9536b1 100644 --- a/src/atlas/trans/local/TransLocal.cc +++ b/src/atlas/trans/local/TransLocal.cc @@ -1033,7 +1033,6 @@ void TransLocal::invtrans_unstructured_precomp( const int truncation, const int const int nlats = grid_.size(); const int size_fourier = nb_fields * 2; - double* legendre; double* scl_fourier; double* scl_fourier_tp; double* fouriertp; diff --git a/src/tests/functionspace/test_structuredcolumns.cc b/src/tests/functionspace/test_structuredcolumns.cc index c0e974af5..aa8eaeaa7 100644 --- a/src/tests/functionspace/test_structuredcolumns.cc +++ b/src/tests/functionspace/test_structuredcolumns.cc @@ -92,7 +92,6 @@ CASE( "test_functionspace_StructuredColumns_no_halo" ) { CASE( "test_functionspace_StructuredColumns_halo" ) { ATLAS_DEBUG_VAR( mpi::comm().size() ); - int root = 0; // grid::StructuredGrid grid( // grid::StructuredGrid::XSpace( {0.,360.} , {2,4,6,6,4,2} , false ), // grid::StructuredGrid::YSpace( grid::LinearSpacing( {80.,-80.}, 6 ) ), From 31f329b8c02bc56325fa645f3275e7ed620a71ff Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Wed, 20 Jun 2018 17:00:02 +0100 Subject: [PATCH 120/123] Remove warnings related to unreachable statements detected by PGI compiler --- src/atlas/array/native/NativeArray.cc | 1 - src/atlas/functionspace/Spectral.cc | 1 - src/atlas/functionspace/StructuredColumns.cc | 2 +- 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/atlas/array/native/NativeArray.cc b/src/atlas/array/native/NativeArray.cc index 3e5f3c346..ef87cad2f 100644 --- a/src/atlas/array/native/NativeArray.cc +++ b/src/atlas/array/native/NativeArray.cc @@ -69,7 +69,6 @@ Array* Array::create( DataType datatype, const ArrayShape& shape ) { throw eckit::BadParameter( err.str(), Here() ); } } - return 0; } template diff --git a/src/atlas/functionspace/Spectral.cc b/src/atlas/functionspace/Spectral.cc index 2a833f453..f5aab2087 100644 --- a/src/atlas/functionspace/Spectral.cc +++ b/src/atlas/functionspace/Spectral.cc @@ -287,7 +287,6 @@ void Spectral::scatter( const Field& global, Field& local ) const { std::string Spectral::checksum( const FieldSet& fieldset ) const { eckit::MD5 md5; NOTIMP; - return md5; } std::string Spectral::checksum( const Field& field ) const { FieldSet fieldset; diff --git a/src/atlas/functionspace/StructuredColumns.cc b/src/atlas/functionspace/StructuredColumns.cc index 2997b2e14..3a10f19ba 100644 --- a/src/atlas/functionspace/StructuredColumns.cc +++ b/src/atlas/functionspace/StructuredColumns.cc @@ -86,7 +86,7 @@ struct GridPoint { return false; } - bool operator==( const GridPoint& other ) const { return ( j == other.j && i == other.i ); } + //bool operator==( const GridPoint& other ) const { return ( j == other.j && i == other.i ); } }; struct GridPointSet { From 2abad35cb219d973c5f7c30f09486e6437962fac Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Wed, 20 Jun 2018 17:11:31 +0100 Subject: [PATCH 121/123] Reduce test times --- src/atlas/interpolation/element/Quad3D.h | 2 +- src/atlas/interpolation/element/Triag3D.h | 3 ++- src/tests/numerics/test_fvm_nabla.cc | 2 +- src/tests/util/test_footprint.cc | 6 +++++- 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/atlas/interpolation/element/Quad3D.h b/src/atlas/interpolation/element/Quad3D.h index e6af6527a..f0d4a2b92 100644 --- a/src/atlas/interpolation/element/Quad3D.h +++ b/src/atlas/interpolation/element/Quad3D.h @@ -57,7 +57,7 @@ class Quad3D { if ( i == 1 ) return v10; if ( i == 2 ) return v11; if ( i == 3 ) return v01; - return Vector3D(); + throw eckit::OutOfRange(i,4,Here()); } private: // members diff --git a/src/atlas/interpolation/element/Triag3D.h b/src/atlas/interpolation/element/Triag3D.h index 1f7455e7c..f4da50450 100644 --- a/src/atlas/interpolation/element/Triag3D.h +++ b/src/atlas/interpolation/element/Triag3D.h @@ -12,6 +12,7 @@ #include +#include "eckit/exception/Exceptions.h" #include "atlas/interpolation/Vector3D.h" #include "atlas/interpolation/method/Intersect.h" #include "atlas/util/Point.h" @@ -59,7 +60,7 @@ class Triag3D { if ( i == 0 ) return v0; if ( i == 1 ) return v1; if ( i == 2 ) return v2; - return Vector3D(); + throw eckit::OutOfRange(i,3,Here()); } private: // members diff --git a/src/tests/numerics/test_fvm_nabla.cc b/src/tests/numerics/test_fvm_nabla.cc index 5983ad56f..115ce4c08 100644 --- a/src/tests/numerics/test_fvm_nabla.cc +++ b/src/tests/numerics/test_fvm_nabla.cc @@ -106,7 +106,7 @@ void rotated_flow_magnitude( const fvm::Method& fvm, Field& field, const double& } static std::string griduid() { - return "Slat80"; + return "Slat20"; } //----------------------------------------------------------------------------- diff --git a/src/tests/util/test_footprint.cc b/src/tests/util/test_footprint.cc index 281474e98..19876a460 100644 --- a/src/tests/util/test_footprint.cc +++ b/src/tests/util/test_footprint.cc @@ -30,6 +30,10 @@ using namespace atlas::util; namespace atlas { namespace test { +static std::string griduid() { + return "O64"; +} + //----------------------------------------------------------------------------- CASE( "test_broadcast_to_self" ) { @@ -39,7 +43,7 @@ CASE( "test_broadcast_to_self" ) { Field field( "field", array::make_datatype(), array::make_shape( 10, 2 ) ); Log::info() << "field.footprint = " << eckit::Bytes( field.footprint() ) << std::endl; - Grid grid( "O640" ); + Grid grid( griduid() ); MeshGenerator meshgen( "structured" ); Mesh mesh = meshgen.generate( grid ); From 4ba33483d5b4d8b0338784d94c3999e3a47fdad3 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Wed, 20 Jun 2018 19:34:51 +0100 Subject: [PATCH 122/123] Print more configuration information --- src/atlas/library/Library.cc | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/atlas/library/Library.cc b/src/atlas/library/Library.cc index 084311533..92e99b845 100644 --- a/src/atlas/library/Library.cc +++ b/src/atlas/library/Library.cc @@ -220,11 +220,18 @@ void Library::Information::print( std::ostream& out ) const { bool feature_fortran( ATLAS_HAVE_FORTRAN ); bool feature_OpenMP( ATLAS_HAVE_OMP ); bool feature_Trans( ATLAS_HAVE_TRANS ); + bool feature_FFTW( ATLAS_HAVE_FFTW ); + bool feature_Eigen( ATLAS_HAVE_EIGEN ); bool feature_Tesselation( ATLAS_HAVE_TESSELATION ); bool feature_BoundsChecking( ATLAS_ARRAYVIEW_BOUNDS_CHECKING ); + bool feature_Init_sNaN( ATLAS_INIT_SNAN ); bool feature_MPI( false ); #ifdef ECKIT_HAVE_MPI feature_MPI = true; +#endif + bool feature_MKL( false ); +#ifdef ECKIT_HAVE_MKL + feature_MKL = true; #endif std::string array_data_store = "Native"; #if ATLAS_HAVE_GRIDTOOLS_STORAGE @@ -238,7 +245,11 @@ void Library::Information::print( std::ostream& out ) const { << " MPI : " << str( feature_MPI ) << '\n' << " OpenMP : " << str( feature_OpenMP ) << '\n' << " BoundsChecking : " << str( feature_BoundsChecking ) << '\n' + << " Init_sNaN : " << str( feature_Init_sNaN ) << '\n' << " Trans : " << str( feature_Trans ) << '\n' + << " FFTW : " << str( feature_FFTW ) << '\n' + << " Eigen : " << str( feature_Eigen ) << '\n' + << " MKL : " << str( feature_MKL ) << '\n' << " Tesselation : " << str( feature_Tesselation ) << '\n' << " ArrayDataStore : " << array_data_store << '\n' << " gidx_t : " << ATLAS_BITS_GLOBAL << " bit integer" << '\n' From e8a347f8e605e7caef487bff05e1308a5baeec4b Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Wed, 20 Jun 2018 19:39:59 +0100 Subject: [PATCH 123/123] Disable PGI build from travis ci --- .travis.yml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/.travis.yml b/.travis.yml index f02f88aa1..d32f179b3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -61,16 +61,6 @@ matrix: sources: ['ubuntu-toolchain-r-test'] packages: ['g++-7', 'gcc-7', 'gfortran-7', 'libfftw3-dev', 'lcov'] - - os: linux - compiler: gcc - env: - - CACHE_NAME=linux-pgi-openmpi - - CXX_COMPILER='pgc++' C_COMPILER='pgcc' Fortran_COMPILER='pgfortran' - - MPI='openmpi' - - PGI_VERSION="CommunityEdition" - - ECKIT_CMAKE_OPTIONS="-DRT_LIB=/usr/lib/x86_64-linux-gnu/librt.so -DCURSES_LIBRARY=/usr/lib/x86_64-linux-gnu/libcurses.so" - - ATLAS_CMAKE_OPTIONS="-DCMAKE_BUILD_TYPE=DEBUG -DENABLE_FORTRAN=OFF" # Fortran tests known to be broken with pgi/17.10 - - os: osx env: - CACHE_NAME=osx-clang-openmpi @@ -90,6 +80,16 @@ matrix: # - MPI=mpich # osx_image: xcode9 + # - os: linux + # compiler: gcc + # env: + # - CACHE_NAME=linux-pgi-openmpi + # - CXX_COMPILER='pgc++' C_COMPILER='pgcc' Fortran_COMPILER='pgfortran' + # - MPI='openmpi' + # - PGI_VERSION="CommunityEdition" + # - ECKIT_CMAKE_OPTIONS="-DRT_LIB=/usr/lib/x86_64-linux-gnu/librt.so -DCURSES_LIBRARY=/usr/lib/x86_64-linux-gnu/libcurses.so" + # - ATLAS_CMAKE_OPTIONS="-DCMAKE_BUILD_TYPE=DEBUG -DENABLE_FORTRAN=OFF" # Fortran tests known to be broken with pgi/17.10 + before_install: #################################################################