diff --git a/lib/parms/Makefile b/lib/parms/Makefile index d64da292e..4baf77a25 100755 --- a/lib/parms/Makefile +++ b/lib/parms/Makefile @@ -30,7 +30,7 @@ OBJ1 = ./src/parms_comm.o \ ./src/parms_qsplit.o ./src/parms_solver.o \ ./src/parms_table.o ./src/parms_timer.o \ ./src/parms_vec.o ./src/parms_viewer.o \ - ./src/fgmres.o ./src/bicgstab.o ./src/cg.o \ + ./src/fgmres.o ./src/bicgstab.o ./src/cg.o ./src/pbicgstab.o ./src/pbicgstab_ras.o\ ./src/gmres.o ./src/parms_complex.o #$(HYPRE_OBJ) \ diff --git a/lib/parms/README b/lib/parms/README index 1cbc7dd84..2bff6a939 100755 --- a/lib/parms/README +++ b/lib/parms/README @@ -50,3 +50,19 @@ data->issetup for reusing of the LU-Facorization changed convergence criterion in fgmres.c, gmres.c (ro <= tol) instead of (ro <= eps1) + +/////////////////////////////////////////////////////////////////////////////// + +additional routine implemented by Natalja Rakowsky, natalja.rakowsky@awi.de + +Solver: PBICGS (pbicgstab.c) + + Pipelined BiCGstab, overlapping global sums with computation. See: + Siegfried Cools, Wim Vanroose + The communication-hiding pipelined BiCGStab method for the parallel solution of large unsymmetric linear systems + Parallel Computing 65, pp. 1-20, July 2017 + +Solver: PBICGS_RAS (pbicgstab_ras.c) + + Pipelined BiCGstab with hard-coded RAS preconditioner. This allows to overlap the + halo communication for RAS with some computations in bicgstab. diff --git a/lib/parms/include/fparms.h b/lib/parms/include/fparms.h index 64d10cc00..d49b2bea9 100755 --- a/lib/parms/include/fparms.h +++ b/lib/parms/include/fparms.h @@ -26,8 +26,8 @@ integer :: PCILU0, PCILUK, PCILUT, PCARMS parameter(PCBJ=0, PCSCHUR=1, PCRAS=2, PCSCHURRAS=3) parameter(PCILU0=0, PCILUK=1, PCILUT=2, PCARMS=3) - integer :: SOLFGMRES, SOLGMRES, SOLBICGS, SOLCG + integer :: SOLFGMRES, SOLGMRES, SOLBICGS, SOLCG, SOLPBICGS, SOLPBICGS_RAS, SOLBICGS_RAS integer :: MAXITS, KSIZE, DTOL, NEIG - parameter(SOLFGMRES=0,SOLGMRES=1, SOLBICGS=2, SOLCG=3) + parameter(SOLFGMRES=0,SOLGMRES=1, SOLBICGS=2, SOLCG=3, SOLPBICGS=4, SOLPBICGS_RAS=5, SOLBICGS_RAS=6) parameter(MAXITS=0,KSIZE=1,DTOL=2,NEIG=3) diff --git a/lib/parms/include/parms_sys.h b/lib/parms/include/parms_sys.h index e76894e06..d49f25ef6 100755 --- a/lib/parms/include/parms_sys.h +++ b/lib/parms/include/parms_sys.h @@ -59,7 +59,7 @@ typedef enum VARSTYPE {INTERLACED, NONINTERLACED} VARSTYPE; typedef enum INSERTMODE {INSERT, ADD} INSERTMODE; typedef enum NNZSTRUCT {SAME_NONZERO_STRUCTURE, DIFFERENT_NONZERO_STRUCTURE} NNZSTRUCT; typedef enum COMMTYPE {P2P, DERIVED} COMMTYPE; -typedef enum SOLVERTYPE {SOLFGMRES, SOLGMRES, SOLBICGS, SOLCG} SOLVERTYPE; +typedef enum SOLVERTYPE {SOLFGMRES, SOLGMRES, SOLBICGS, SOLCG, SOLPBICGS, SOLPBICGS_RAS, SOLBICGS_RAS} SOLVERTYPE; typedef enum PARAMTYPE {MAXITS, KSIZE, DTOL, NEIG} PARAMTYPE; typedef enum MATTYPE {MAT_NULL=-1, MAT_VCSR=0, MAT_CSR=1} MATTYPE; typedef enum PCTYPE {PCBJ, PCSCHUR, PCRAS, PCSCHURRAS} PCTYPE; diff --git a/lib/parms/src/bicgstab_ras.c b/lib/parms/src/bicgstab_ras.c new file mode 100644 index 000000000..505494f4f --- /dev/null +++ b/lib/parms/src/bicgstab_ras.c @@ -0,0 +1,360 @@ +/*-------------------------------------------------------------------- + bicgstabras_create : create the BICGS_RAS solver. + bicgstabras_free : free the memory for the BICGS_RAS solver. + bicgstabras_view : dump the BICGS_RAS solver. + parms_bicgstabras : the BICGS_RAS solve function. + + $Id: fgmres.c,v 1.4 2006-12-15 07:02:07 zzli Exp $ + ------------------------------------------------------------------*/ + +#if defined(__ICC) +#include +#else +#if defined(C99) +#include +#else +#include +#endif +#endif +#include "parms_vec.h" +#include "parms_mat.h" +#include "parms_pc.h" +#include "./include/parms_solver_impl.h" + +#include "./include/parms_comm_impl.h" +#include "./include/parms_pc_impl.h" +#include "./include/parms_opt_impl.h" + + +typedef struct ras_data { + parms_Operator op; + parms_Comm handler; + parms_Map is; + BOOL issetup; + FLOAT *rbuf; + int nloc; + int n; + int nodv; + int nsend; +} *ras_data; + + +#define DBL_EPSILON 2.2204460492503131e-16 // double epsilon + +typedef struct bicgs_data { + int neigs; +} *bicgstabras_data; + +#define TINY 1.0e-20 +int parms_bicgstabras(parms_Solver self, FLOAT *y, FLOAT *x) +{ + + int i, its; + int maxits, nloc, n_ext, size, one = 1; + FLOAT alpha, t1, tmp[2]; + FLOAT *r0, *v, *t; + FLOAT *r_ext, *st_ext, *p_ext, *pt_ext; + REAL tol, rho, rho_alt, omega, sigma, beta; + MPI_Request req; + int nodv, nsend; + + parms_Mat A; + parms_PC pc; + parms_Map is; + parms_Viewer viewer; + parms_Comm pc_handler; + ras_data pc_data; + + int rank; + MPI_Comm comm; + + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + A = self->A; + pc = self->pc; + is = A->is; + + maxits = self->maxits; + tol = self->tol*self->tol; + nloc = parms_MapGetLocalSize(is); + comm = is->comm; + + // For inlined RAS preconditioner + pc_data = (ras_data)pc->data; + n_ext = pc_data->n; // local size with halo for Schwarz + pc_handler = pc_data->handler; + nsend = pc_data->nsend; + nodv = pc_data->nodv; + +/*----- allocate memory for working local arrays -------*/ + size = nloc; + /* PARMS_NEWARRAY(r0, size); */ + PARMS_NEWARRAY(r0, size); + PARMS_NEWARRAY(t, size); + PARMS_NEWARRAY(v, size); + + PARMS_NEWARRAY(r_ext, n_ext); + PARMS_NEWARRAY(p_ext, n_ext); + PARMS_NEWARRAY(pt_ext, n_ext); + PARMS_NEWARRAY(st_ext, n_ext); + + +/* --- first permute x and y for pARMS matrix structure ------*/ + if (is->isperm) { + for (i = 0; i < nloc; i++) { + t[is->perm[i]] = x[i]; + r0[is->perm[i]] = y[i]; + } + memcpy(x, t, nloc*sizeof(FLOAT)); + is->isvecperm = true; + } + + + + /* compute residual vector r = y - Ax */ + /* r0 = y (permuted) */ + parms_MatVec(A, x, r_ext); + parms_VecAYPX(r_ext, r0, -1.0, is); + + /* choose r0 (arbitrary), e.g., PARMS_MEMCPY(r0,r,nloc); or: */ + /* PARMS_MEMCPY(r0, y, nloc); */ + /* As r0 and y are never changed, we can simply set */ + /* r0=y; */ + /* Do this already above, use r0 as permuted y! */ + + tmp[0] = 0.; + tmp[1] = 0.; + for (i = 0; i < nloc; i++) + { + tmp[0] += r_ext[i] * r_ext[i]; // omega + tmp[1] += r0[i] * r_ext[i]; // rho + } + if(is->isserial == false) + MPI_Iallreduce(MPI_IN_PLACE, tmp, 2, MPI_DOUBLE, MPI_SUM, comm, &req); + + + PARMS_MEMCPY(p_ext, r_ext, nloc); + alpha = 0.; + sigma = rho_alt = 1.0; + + if(is->isserial == false) MPI_Wait(&req, MPI_STATUS_IGNORE); + + omega = tmp[0]; + rho = tmp[1]; + + + if (omega > tol){ + for (its = 0; its < maxits; its++){ + + // parms_PCApply(pc, p, pt); + if (nsend) parms_CommDataBegin(pc_handler, p_ext, 0); + if (nodv) parms_CommDataEnd(pc_handler); + if (pc_data->n - pc_data->nloc) + PARMS_MEMCPY(&p_ext[nloc], pc_data->rbuf, pc_data->n - pc_data->nloc); + + /* solve the extended linear system */ + parms_OperatorApply(pc_data->op, p_ext, pt_ext); + + /* No halo exchange of the solution, that's the "Restricted" in RAS */ + /* === End Preconditioner === */ + + parms_MatVec(A, pt_ext, v); + parms_VecDOT(r0,v,&alpha,is); + + alpha = rho/alpha; + + for (i = 0; i < nloc; i++) r_ext[i] -= alpha * v[i]; + + // parms_PCApply(pc, r, st); + if (nsend) parms_CommDataBegin(pc_handler, r_ext, 0); + if (nodv) parms_CommDataEnd(pc_handler); + if (pc_data->n - pc_data->nloc) + PARMS_MEMCPY(&r_ext[nloc], pc_data->rbuf, pc_data->n - pc_data->nloc); + + /* solve the extended linear system */ + parms_OperatorApply(pc_data->op, r_ext, st_ext); + + /* No halo exchange of the solution, that's the "Restricted" in RAS */ + /* === End Preconditioner === */ + parms_MatVec(A, st_ext, t); + + tmp[0] = 0.; + tmp[1] = 0.; + for (i = 0; i < nloc; i++) + { + tmp[0] += t[i] * r_ext[i]; + tmp[1] += t[i] * t[i]; + } + + + if(is->isserial == false) + MPI_Iallreduce(MPI_IN_PLACE, tmp, 2, MPI_DOUBLE, MPI_SUM, comm, &req); + + //for (i = 0; i < nloc; i++) x[i] += alpha * pt_ext[i]; + + if(is->isserial == false) MPI_Wait(&req, MPI_STATUS_IGNORE); + + sigma = tmp[0]/tmp[1]; + + + for (i = 0; i < nloc; i++) r_ext[i] -= sigma * t[i]; + + tmp[0] = 0.; + tmp[1] = 0.; + for (i = 0; i < nloc; i++) + { + tmp[0] += r_ext[i] * r_ext[i]; // omega + tmp[1] += r0[i] * r_ext[i]; // rho + } + + if(is->isserial == false) + MPI_Iallreduce(MPI_IN_PLACE, tmp, 2, MPI_DOUBLE, MPI_SUM, comm, &req); + + for (i = 0; i < nloc; i++) x[i] += alpha * pt_ext[i] + sigma * st_ext[i]; + + + if(is->isserial == false) MPI_Wait(&req, MPI_STATUS_IGNORE); + + omega = tmp[0]; + + if (omega < tol) break; + + rho = tmp[1]; + + beta = (rho * alpha) / (rho_alt * sigma); + rho_alt = rho; + + for (i = 0; i < nloc; i++) + { + p_ext[i] = r_ext[i] + beta*(p_ext[i] - sigma* v[i]); + } + } + } + + + its++; + + if(its == maxits && omega > tol) + printf("ERROR: no convergence\n"); + + /* reset isvecperm and do inverse permutation*/ + if (is->isperm) { + for (i = 0; i < nloc; i++) { + t[is->iperm[i]] = x[i]; + } + memcpy(x, t, nloc*sizeof(FLOAT)); + is->isvecperm = false; + } + free(r0); + free(r_ext); + free(p_ext); + free(st_ext); + free(t); + free(v); + free(pt_ext); + + + self->its = its; + return 0; +} + +static int bicgstabras_getresidual(parms_Solver self, FLOAT *y, FLOAT *x, FLOAT *res) +{ + + parms_Mat A; + + A = self->A; + + parms_MatMVPY(A, -1.0, x, 1.0, y, res); + + return 0; +} + +static int bicgstabras_getresidualnorm2(parms_Solver self, FLOAT *y, FLOAT *x, REAL *rnorm) +{ + + int nloc; + FLOAT *res; + parms_Mat A; + parms_Map is; + + A = self->A; + is = A->is; + nloc = parms_MapGetLocalSize(is); + PARMS_NEWARRAY(res, nloc); + + parms_MatMVPY(A, -1.0, x, 1.0, y, res); + + parms_VecGetNorm2(res, rnorm, is); + + PARMS_FREE(res); + + return 0; +} + + +static int bicgstabras_free(parms_Solver *self) +{ + /*dummy*/ + return 0; +} + +static int bicgstabras_view(parms_Solver self, parms_Viewer v) +{ + FILE *fp; + char *name, *iluname; + int dim; + parms_ViewerGetFP(v, &fp); + + fprintf(fp,"\n=============================\n"); + fprintf(fp," Solver Parameters \n"); + fprintf(fp,"=============================\n"); + + fprintf(fp, "Solver type = BiConjugate Gradient Stabilized Method (BICGS_RAS) \n"); + + fprintf(fp, "maxits = %d \n", self->maxits); + fprintf(fp, "Relative tolerance = %-8.2e \n", self->tol); + + parms_PCGetName(self->pc, &name); + parms_PCILUGetName(self->pc, &iluname); + fprintf(fp, "Global Preconditioner: %s\n", name); + fprintf(fp, "Local Preconditioner: %s\n", iluname); + + parms_ViewerGetFP(v, &fp); + + return 0; +} + +static int bicgstabras_setksize(parms_Solver self, int restart) +{ + /*dummy*/ + return 0; +} + +static int bicgstabras_setneig(parms_Solver self, int neigs) +{ + /*dummy*/ + return 0; +} + +static struct parms_Solver_ops parms_bicgstabras_sol = { + parms_bicgstabras, + bicgstabras_getresidual, + bicgstabras_getresidualnorm2, + bicgstabras_setksize, + bicgstabras_setneig, + bicgstabras_free, + bicgstabras_view +}; + + +/** Create the BICGS_RAS solver. + * + * \param self A parms_Solver object. + * \return 0 on success. + */ +int bicgstabras_create(parms_Solver self) +{ + PARMS_MEMCPY(self->ops, &parms_bicgstabras_sol, 1); + return 0; +} diff --git a/lib/parms/src/include/parms_solver_impl.h b/lib/parms/src/include/parms_solver_impl.h index 6e1d6666a..60d98f74f 100755 --- a/lib/parms/src/include/parms_solver_impl.h +++ b/lib/parms/src/include/parms_solver_impl.h @@ -42,4 +42,14 @@ extern int parms_gmres(parms_Solver self, FLOAT *y, FLOAT *x); extern int bicgstab_create(parms_Solver self); extern int parms_bicgstab(parms_Solver self, FLOAT *y, FLOAT *x); +/* additional Solver Type (NR) */ +extern int cg_create(parms_Solver self); +extern int parms_cg(parms_Solver self, FLOAT *y, FLOAT *x); +extern int pbicgstab_create(parms_Solver self); +extern int parms_pbicgstab(parms_Solver self, FLOAT *y, FLOAT *x); +extern int pbicgstabras_create(parms_Solver self); +extern int parms_pbicgstabras(parms_Solver self, FLOAT *y, FLOAT *x); +extern int bicgstabras_create(parms_Solver self); +extern int parms_bicgstabras(parms_Solver self, FLOAT *y, FLOAT *x); + #endif diff --git a/lib/parms/src/parms_solver.c b/lib/parms/src/parms_solver.c index 5a72bf423..22120f69a 100755 --- a/lib/parms/src/parms_solver.c +++ b/lib/parms/src/parms_solver.c @@ -170,6 +170,12 @@ int parms_SolverSetType(parms_Solver self, SOLVERTYPE stype) bicgstab_create(self); else if(stype == SOLCG) cg_create(self); + else if(stype == SOLPBICGS) + pbicgstab_create(self); + else if(stype == SOLPBICGS_RAS) + pbicgstabras_create(self); + else if(stype == SOLBICGS_RAS) + bicgstabras_create(self); else{ printf("ERROR: Invalid choice of solver - (Check SOLVERTYPE for parms_SolverSetType(...) \n"); PARMS_ABORT(17); diff --git a/lib/parms/src/pbicgstab.c b/lib/parms/src/pbicgstab.c new file mode 100755 index 000000000..7f181c490 --- /dev/null +++ b/lib/parms/src/pbicgstab.c @@ -0,0 +1,326 @@ +/*-------------------------------------------------------------------- + pbicgstab_create : create the PBICGS solver. + pbicgstab_free : free the memory for the PBICGS solver. + pbicgstab_view : dump the PBICGS solver. + parms_pbicgstab : the PBICGS solve function. + + Pipelined BiCGstab, overlapping global sums with computation. See: + Siegfried Cools, Wim Vanroose + The communication-hiding pipelined BiCGStab method for the parallel solution of large unsymmetric linear systems + Parallel Computing 65, pp. 1-20, July 2017 + ------------------------------------------------------------------*/ + +#if defined(__ICC) +#include +#else +#if defined(C99) +#include +#else +#include +#endif +#endif +#include "parms_vec.h" +#include "parms_mat.h" +#include "parms_pc.h" +#include "./include/parms_solver_impl.h" + +#define DBL_EPSILON 2.2204460492503131e-16 // double epsilon + +typedef struct pbicgs_data { + int neigs; +} *pbicgstab_data; + +#define TINY 1.0e-20 +int parms_pbicgstab(parms_Solver self, double *rhs, double *xin) +{ + + int i, its; + int maxits, nloc, size, one = 1; + double tmp[5]; + double *b, *r0, *r, *rt, *w, *wt, *t, *pt, *s, *st, *z, *zt; + double *q, *qt, *v, *x, *y; + double tol, alpha, beta, omega, myres, rho, rho_alt; + MPI_Request req, req1, req2; + + parms_Mat A; + parms_PC pc; + parms_Map is; + parms_Viewer viewer; + + int rank; + MPI_Comm comm; + + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + A = self->A; + pc = self->pc; + is = A->is; + + maxits = self->maxits; + tol = self->tol*self->tol; + nloc = parms_MapGetLocalSize(is); + comm = is->comm; + +/*----- allocate memory for working local arrays -------*/ +/*----- initialize to zero if not initialized before the iteration starts ---*/ + size = nloc; + + PARMS_NEWARRAY(b, size); + PARMS_NEWARRAY(r0, size); + PARMS_NEWARRAY(r, size); for (i=0; iisperm) { + for (i = 0; i < nloc; i++) { + x[is->perm[i]] = xin[i]; + b[is->perm[i]] = rhs[i]; + } + is->isvecperm = true; + } + + + /* compute residual vector r0 = b - Ax */ + /* b = y (permuted), t=Ax temporary vector */ + parms_MatVec(A, x, t); + for (i=0; iisserial == false) + MPI_Iallreduce(MPI_IN_PLACE, tmp, 2, MPI_DOUBLE, MPI_SUM, comm, &req); + + parms_PCApply(pc, w, wt); // wt = M w + parms_MatVec(A, wt, t); // t = Awt + + omega = 0.; + beta = 0.; + + if(is->isserial == false) MPI_Wait(&req, MPI_STATUS_IGNORE); + + rho_alt = tmp[0]; + alpha = tmp[0] / tmp[1]; // alpha = (r0,r0) / (r0,w) + + + for (its = 0; its < maxits; its++){ + + for (i=0; iisserial == false) + MPI_Iallreduce(MPI_IN_PLACE, tmp, 2, MPI_DOUBLE, MPI_SUM, comm, &req1); + + parms_PCApply(pc, z, zt); + parms_MatVec(A, zt, v); + + if(is->isserial == false) MPI_Wait(&req1, MPI_STATUS_IGNORE); + + omega = tmp[0]/tmp[1]; + + for (i=0; iisserial == false) + MPI_Iallreduce(MPI_IN_PLACE, tmp, 5, MPI_DOUBLE, MPI_SUM, comm, &req2); + + parms_PCApply(pc, w, wt); + parms_MatVec(A, wt, t); + + if(is->isserial == false) MPI_Wait(&req2, MPI_STATUS_IGNORE); + + // Convergence check: + if ( tmp[4] < tol) break; + + rho = tmp[0]; + beta = alpha*rho / (omega*rho_alt); + rho_alt = rho; + alpha = rho / (tmp[1] + beta*(tmp[2] - omega*tmp[3])); + + //pbicgstab_getresidualnorm2(self, b, x, &myres); + + //if(rank == 0) printf("nparms check at %d, myres=%e, (r,r)=%e\n",its,myres, tmp[4]); + + //if (myres < tol) break; + } + + + its++; + if(rank == 0){ + if(its == maxits) { + printf("ERROR: no convergence\n"); + } else { + printf("Parms iterations, residuum %d, %e\n",its,sqrt(tmp[4])); + } + } + /* reset isvecperm and do inverse permutation*/ + if (is->isperm) { + for (i = 0; i < nloc; i++) { + xin[is->iperm[i]] = x[i]; + } + is->isvecperm = false; + } + + + free(b); + free(r0); + free(r); + free(rt); + free(w); + free(wt); + free(t); + free(pt); + free(s); + free(st); + free(z); + free(zt); + free(q); + free(qt); + free(y); + free(v); + free(x); + + self->its = its; + return 0; +} + +static int pbicgstab_getresidual(parms_Solver self, double *y, double *x, double *res) +{ + + parms_Mat A; + + A = self->A; + + parms_MatMVPY(A, -1.0, x, 1.0, y, res); + + return 0; +} + +static int pbicgstab_getresidualnorm2(parms_Solver self, double *y, double *x, double *rnorm) +{ + + int nloc; + double *res; + parms_Mat A; + parms_Map is; + + A = self->A; + is = A->is; + nloc = parms_MapGetLocalSize(is); + PARMS_NEWARRAY(res, nloc); + + parms_MatMVPY(A, -1.0, x, 1.0, y, res); + + parms_VecGetNorm2(res, rnorm, is); + + PARMS_FREE(res); + + return 0; +} + + +static int pbicgstab_free(parms_Solver *self) +{ + /*dummy*/ + return 0; +} + +static int pbicgstab_view(parms_Solver self, parms_Viewer v) +{ + FILE *fp; + char *name, *iluname; + int dim; + parms_ViewerGetFP(v, &fp); + + fprintf(fp,"\n=============================\n"); + fprintf(fp," Solver Parameters \n"); + fprintf(fp,"=============================\n"); + + fprintf(fp, "Solver type = Pipelined BiConjugate Gradient Stabilized Method (PBICGS) \n"); + + fprintf(fp, "maxits = %d \n", self->maxits); + fprintf(fp, "Relative tolerance = %-8.2e \n", self->tol); + + parms_PCGetName(self->pc, &name); + parms_PCILUGetName(self->pc, &iluname); + fprintf(fp, "Global Preconditioner: %s\n", name); + fprintf(fp, "Local Preconditioner: %s\n", iluname); + + parms_ViewerGetFP(v, &fp); + + return 0; +} + +static int pbicgstab_setksize(parms_Solver self, int restart) +{ + /*dummy*/ + return 0; +} + +static int pbicgstab_setneig(parms_Solver self, int neigs) +{ + /*dummy*/ + return 0; +} + +static struct parms_Solver_ops parms_pbicgstab_sol = { + parms_pbicgstab, + pbicgstab_getresidual, + pbicgstab_getresidualnorm2, + pbicgstab_setksize, + pbicgstab_setneig, + pbicgstab_free, + pbicgstab_view +}; + + +/** Create the PBICGS solver. + * + * \param self A parms_Solver object. + * \return 0 on success. + */ +int pbicgstab_create(parms_Solver self) +{ + PARMS_MEMCPY(self->ops, &parms_pbicgstab_sol, 1); + return 0; +} diff --git a/lib/parms/src/pbicgstab_ras.c b/lib/parms/src/pbicgstab_ras.c new file mode 100644 index 000000000..b1044add6 --- /dev/null +++ b/lib/parms/src/pbicgstab_ras.c @@ -0,0 +1,381 @@ +/*-------------------------------------------------------------------- + pbicgstabras_create : create the PBICGS_RAS solver. + pbicgstabras_free : free the memory for the PBICGS_RAS solver. + pbicgstabras_view : dump the PBICGS_RAS solver. + parms_pbicgstabras : the PBICGS_RAS solve function. + + Pipelined BiCGstab, overlapping global sums with computation. See: + Siegfried Cools, Wim Vanroose + The communication-hiding pipelined BiCGStab method for the parallel solution of large unsymmetric linear systems + Parallel Computing 65, pp. 1-20, July 2017 + ------------------------------------------------------------------*/ + +#if defined(__ICC) +#include +#else +#if defined(C99) +#include +#else +#include +#endif +#endif +#include "parms_vec.h" +#include "parms_mat.h" +#include "parms_pc.h" +#include "./include/parms_solver_impl.h" + +#include "./include/parms_comm_impl.h" +#include "./include/parms_pc_impl.h" +#include "./include/parms_opt_impl.h" + +typedef struct ras_data { + parms_Operator op; + parms_Comm handler; + parms_Map is; + BOOL issetup; + FLOAT *rbuf; + int nloc; + int n; + int nodv; + int nsend; +} *ras_data; + + +#define DBL_EPSILON 2.2204460492503131e-16 // double epsilon + +typedef struct pbicgs_data { + int neigs; +} *pbicgstabras_data; + +#define TINY 1.0e-20 +int parms_pbicgstabras(parms_Solver self, double *rhs, double *xin) +{ + + int i, its; + int maxits, nloc, n_ext, one = 1; + double tmp[5]; + double *b, *r0, *r, *rt, *t, *pt, *s, *st; + double *q, *qt, *v, *x, *y; + double *z_ext, *zt_ext, *w_ext, *wt_ext; + double tol, alpha, beta, omega, myres, rho, rho_alt; + MPI_Request req, req1, req2; + int nodv, nsend; + + parms_Mat A; + parms_PC pc; + parms_Map is; + parms_Viewer viewer; + parms_Comm pc_handler; + ras_data pc_data; + + int rank; + MPI_Comm comm; + + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + A = self->A; + pc = self->pc; + is = A->is; + + maxits = self->maxits; + tol = self->tol*self->tol; + nloc = parms_MapGetLocalSize(is); + comm = is->comm; + + // For inlined RAS preconditioner + pc_data = (ras_data)pc->data; + n_ext = pc_data->n; // local size with halo for Schwarz + pc_handler = pc_data->handler; + nsend = pc_data->nsend; + nodv = pc_data->nodv; + +/*----- allocate memory for working local arrays -------*/ +/*----- initialize to zero if not initialized before the iteration starts ---*/ + + PARMS_NEWARRAY(b, nloc); + PARMS_NEWARRAY(r0, nloc); + PARMS_NEWARRAY(r, nloc); for (i=0; iisperm) { + for (i = 0; i < nloc; i++) { + x[is->perm[i]] = xin[i]; + b[is->perm[i]] = rhs[i]; + } + is->isvecperm = true; + } + + + /* compute residual vector r0 = b - Ax */ + /* b = y (permuted), t=Ax temporary vector */ + parms_MatVec(A, x, t); + for (i=0; iisserial == false) + MPI_Iallreduce(MPI_IN_PLACE, tmp, 2, MPI_DOUBLE, MPI_SUM, comm, &req); + + parms_PCApply(pc, w_ext, wt_ext); // wt = M w + parms_MatVec(A, wt_ext, t); // t = Awt + + omega = 0.; + beta = 0.; + + if(is->isserial == false) MPI_Wait(&req, MPI_STATUS_IGNORE); + + rho_alt = tmp[0]; + alpha = tmp[0] / tmp[1]; // alpha = (r0,r0) / (r0,w) + + + for (its = 0; its < maxits; its++){ + + for (i=0; in - pc_data->nloc) + PARMS_MEMCPY(&z_ext[nloc], pc_data->rbuf, pc_data->n - pc_data->nloc); + + tmp[0] = 0.; tmp[1] = 0.; + for (i=0; iisserial == false) + MPI_Iallreduce(MPI_IN_PLACE, tmp, 2, MPI_DOUBLE, MPI_SUM, comm, &req1); + + /* === Preconditioner RAS parms_PCApply(pc, z, zt); === */ + + /* solve the extended linear system */ + parms_OperatorApply(pc_data->op, z_ext, zt_ext); + + /* No halo exchange of the solution, that's the "Restricted" in RAS */ + /* === End Preconditioner === */ + + parms_MatVec(A, zt_ext, v); + + if(is->isserial == false) MPI_Wait(&req1, MPI_STATUS_IGNORE); + + omega = tmp[0]/tmp[1]; + + for (i=0; in - pc_data->nloc) + PARMS_MEMCPY(&w_ext[nloc], pc_data->rbuf, pc_data->n - pc_data->nloc); + + tmp[0] = 0.; tmp[1] = 0.; tmp[2] = 0.; tmp[3] = 0.; tmp[4] = 0.; + for (i=0; iisserial == false) + MPI_Iallreduce(MPI_IN_PLACE, tmp, 5, MPI_DOUBLE, MPI_SUM, comm, &req2); + + /* === Preconditioner RAS: parms_PCApply(pc, w, wt); === */ + + + parms_OperatorApply(pc_data->op, w_ext, wt_ext); + + /* === End Preconditioner === */ + + parms_MatVec(A, wt_ext, t); + + if(is->isserial == false) MPI_Wait(&req2, MPI_STATUS_IGNORE); + + // Convergence check: + if ( tmp[4] < tol) break; + + rho = tmp[0]; + beta = alpha*rho / (omega*rho_alt); + rho_alt = rho; + alpha = rho / (tmp[1] + beta*(tmp[2] - omega*tmp[3])); + + //pbicgstabras_getresidualnorm2(self, b, x, &myres); + //if(rank == 0) printf("nparms check at %d, myres=%e, (r,r)=%e\n",its,myres, tmp[4]); + + } + + + its++; + if(rank == 0){ + if(its == maxits) { + printf("ERROR: no convergence\n"); + } else { + printf("Parms iterations, residuum %d, %e\n",its,sqrt(tmp[4])); + } + } + /* reset isvecperm and do inverse permutation*/ + if (is->isperm) { + for (i = 0; i < nloc; i++) { + xin[is->iperm[i]] = x[i]; + } + is->isvecperm = false; + } + + + free(b); + free(r0); + free(r); + free(rt); + // free(wt); + free(t); + free(pt); + free(s); + free(st); + // free(zt); + free(q); + free(qt); + free(y); + free(v); + free(x); + free(z_ext); + free(zt_ext); + free(w_ext); + free(wt_ext); + + self->its = its; + return 0; +} + +static int pbicgstabras_getresidual(parms_Solver self, double *y, double *x, double *res) +{ + + parms_Mat A; + + A = self->A; + + parms_MatMVPY(A, -1.0, x, 1.0, y, res); + + return 0; +} + +static int pbicgstabras_getresidualnorm2(parms_Solver self, double *y, double *x, double *rnorm) +{ + + int nloc; + double *res; + parms_Mat A; + parms_Map is; + + A = self->A; + is = A->is; + nloc = parms_MapGetLocalSize(is); + PARMS_NEWARRAY(res, nloc); + + parms_MatMVPY(A, -1.0, x, 1.0, y, res); + + parms_VecGetNorm2(res, rnorm, is); + + PARMS_FREE(res); + + return 0; +} + + +static int pbicgstabras_free(parms_Solver *self) +{ + /*dummy*/ + return 0; +} + +static int pbicgstabras_view(parms_Solver self, parms_Viewer v) +{ + FILE *fp; + char *name, *iluname; + int dim; + parms_ViewerGetFP(v, &fp); + fprintf(fp,"\n=============================\n"); + fprintf(fp," Solver Parameters \n"); + fprintf(fp,"=============================\n"); + + fprintf(fp, "Solver type = Pipelined BiConjugate Gradient Stabilized Method (PBICGS_RAS) \n"); + fprintf(fp, " with hard coded global preconditioner RAS \n"); + + fprintf(fp, "maxits = %d \n", self->maxits); + fprintf(fp, "Relative tolerance = %-8.2e \n", self->tol); + + parms_PCILUGetName(self->pc, &iluname); + + fprintf(fp, "Local Preconditioner: %s\n", iluname); + + parms_ViewerGetFP(v, &fp); + + return 0; +} + +static int pbicgstabras_setksize(parms_Solver self, int restart) +{ + /*dummy*/ + return 0; +} + +static int pbicgstabras_setneig(parms_Solver self, int neigs) +{ + /*dummy*/ + return 0; +} + +static struct parms_Solver_ops parms_pbicgstabras_sol = { + parms_pbicgstabras, + pbicgstabras_getresidual, + pbicgstabras_getresidualnorm2, + pbicgstabras_setksize, + pbicgstabras_setneig, + pbicgstabras_free, + pbicgstabras_view +}; + + +/** Create the PBICGS_RAS solver. + * + * \param self A parms_Solver object. + * \return 0 on success. + */ +int pbicgstabras_create(parms_Solver self) +{ + PARMS_MEMCPY(self->ops, &parms_pbicgstabras_sol, 1); + return 0; +} diff --git a/src/Makefile b/src/Makefile index e8d4cd841..d9791340a 100755 --- a/src/Makefile +++ b/src/Makefile @@ -33,11 +33,11 @@ MODULES = oce_modules.o \ oce_ale_mixing_pp.o \ oce_tracer_mod.o \ fort_part.o \ - gen_input.o \ io_meandata.o \ io_restart.o \ io_blowup.o \ - io_mesh_info.o + io_mesh_info.o \ + gen_ic3d.o OBJECTS= fvom_main.o \ gen_comm.o \ @@ -54,6 +54,8 @@ OBJECTS= fvom_main.o \ oce_shortwave_pene.o \ oce_ale.o \ oce_ale_tracer.o \ + oce_mo_conv.o \ + oce_spp.o \ ice_EVP.o \ ice_maEVP.o \ ice_setup_step.o \ diff --git a/src/fvom_init.F90 b/src/fvom_init.F90 index e6f05eab3..8818234dc 100755 --- a/src/fvom_init.F90 +++ b/src/fvom_init.F90 @@ -880,7 +880,7 @@ end subroutine partit call partit(ssh_stiff%dim, ssh_stiff%rowptr, ssh_stiff%colind, & nlevels_nod2D, np, part) -!!$ call check_partitioning + call check_partitioning write(*,*) 'Partitioning is done.' @@ -938,119 +938,103 @@ subroutine check_partitioning allocate(ne_part(max_adjacent_nodes), ne_part_num(max_adjacent_nodes), & ne_part_load(2,max_adjacent_nodes)) - isolated_nodes_check: do n_iter = 1, 10 - print *,' ' - print *,'Check for isolated nodes, new iteration ========' - n_iso = 0 - do n=1,nod2D - is = ssh_stiff%rowptr(n) - ie = ssh_stiff%rowptr(n+1) -1 - - node_neighb_part(1:ie-is) = part(ssh_stiff%colind(is:ie)) - if (count(node_neighb_part(1:ie-is) == part(n)) <= 1) then - - n_iso = n_iso+1 - print *,'Isolated node',n, 'in partition', part(n) - print *,'Neighbouring nodes are in partitions', node_neighb_part(1:ie-is) - - ! count the adjacent nodes of the other PEs - - np=1 - ne_part(1) = node_neighb_part(1) - ne_part_num(1) = 1 - ne_part_load(1,1) = nod_per_partition(1,ne_part(1)) + 1 - ne_part_load(2,1) = nod_per_partition(2,ne_part(1)) + nlevels_nod2D(n) - - do i=1,ie-is - if (node_neighb_part(i)==part(n)) cycle - already_counted = .false. - do k=1,np - if (node_neighb_part(i) == ne_part(k)) then - ne_part_num(k) = ne_part_num(k) + 1 - already_counted = .true. - exit - endif - enddo - if (.not. already_counted) then - np = np+1 - ne_part(np) = node_neighb_part(i) - ne_part_num(np) = 1 - ne_part_load(1,np) = nod_per_partition(1,ne_part(np)) + 1 - ne_part_load(2,np) = nod_per_partition(2,ne_part(np)) + nlevels_nod2D(n) + print *,' ' + print *,'Check for isolated nodes ========' + n_iso = 0 + checkloop: do n=1,nod2D + is = ssh_stiff%rowptr(n) + ie = ssh_stiff%rowptr(n+1) -1 + + node_neighb_part(1:ie-is) = part(ssh_stiff%colind(is:ie)) + if (count(node_neighb_part(1:ie-is) == part(n)) <= 1) then + + n_iso = n_iso+1 + print *,'Isolated node',n, 'in partition', part(n) + print *,'Neighbouring nodes are in partitions', node_neighb_part(1:ie-is) + + ! count the adjacent nodes of the other PEs + + np=1 + ne_part(1) = node_neighb_part(1) + ne_part_num(1) = 1 + ne_part_load(1,1) = nod_per_partition(1,ne_part(1)) + 1 + ne_part_load(2,1) = nod_per_partition(2,ne_part(1)) + nlevels_nod2D(n) + + do i=1,ie-is + if (node_neighb_part(i)==part(n)) cycle + already_counted = .false. + do k=1,np + if (node_neighb_part(i) == ne_part(k)) then + ne_part_num(k) = ne_part_num(k) + 1 + already_counted = .true. + exit endif enddo + if (.not. already_counted) then + np = np+1 + ne_part(np) = node_neighb_part(i) + ne_part_num(np) = 1 + ne_part_load(1,np) = nod_per_partition(1,ne_part(np)) + 1 + ne_part_load(2,np) = nod_per_partition(2,ne_part(np)) + nlevels_nod2D(n) + endif + enddo - ! Now, check for two things: The load balance, and if - ! there is more than one node of that partition. - ! Otherwise, it would become isolated again. - - ! Best choice would be the partition with most adjacent nodes (edgecut!) - ! Choose, if it does not decrease the load balance. - ! (There might be two partitions with the same number of adjacent - ! nodes. Don't care about this here) + ! Now, check for two things: The load balance, and if + ! there is more than one node of that partition. + ! Otherwise, it would become isolated again. - kmax = maxloc(ne_part_num(1:np),1) + ! Best choice would be the partition with most adjacent nodes (edgecut!) + ! Choose, if it does not decrease the load balance. + ! (There might be two partitions with the same number of adjacent + ! nodes. Don't care about this here) - if (ne_part_num(kmax) <= 1) then - print *,'Sorry, no chance to solve an isolated node problem' - exit isolated_nodes_check - endif + kmax = maxloc(ne_part_num(1:np),1) - if (ne_part_load(1,kmax) <= max_nod_per_part(1) .and. & - ne_part_load(2,kmax) <= max_nod_per_part(2) ) then - k = kmax - else - ! Don't make it too compicated. Reject partitions that have only one - ! adjacent node. Take the next not violating the load balance. - found_part = .false. - do k=1,np - if (ne_part_num(k)==1 .or. k==kmax) cycle + if (ne_part_num(kmax) <= 1) then + ! No chance - this is probably a boundary + ! node that has only two neighbors. + cycle checkloop + endif - if (ne_part_load(1,k) <= max_nod_per_part(1) .and. & - ne_part_load(2,k) <= max_nod_per_part(2) ) then + if (ne_part_load(1,kmax) <= max_nod_per_part(1) .and. & + ne_part_load(2,kmax) <= max_nod_per_part(2) ) then + k = kmax + else + ! Don't make it too compicated. Reject partitions that have only one + ! adjacent node. Take the next not violating the load balance. + found_part = .false. + do k=1,np + if (ne_part_num(k)==1 .or. k==kmax) cycle - found_part = .true. - exit - endif - enddo + if (ne_part_load(1,k) <= max_nod_per_part(1) .and. & + ne_part_load(2,k) <= max_nod_per_part(2) ) then - if (.not. found_part) then - ! Ok, don't think to much. Simply go for minimized edge cut. - k = kmax + found_part = .true. + exit endif - endif + enddo - ! Adjust the load balancing + if (.not. found_part) then + ! Ok, don't think to much. Simply go for minimized edge cut. + k = kmax + endif + endif - nod_per_partition(1,ne_part(k)) = nod_per_partition(1,ne_part(k)) + 1 - nod_per_partition(2,ne_part(k)) = nod_per_partition(2,ne_part(k)) + nlevels_nod2D(n) - nod_per_partition(1,part(n)) = nod_per_partition(1,part(n)) - 1 - nod_per_partition(2,part(n)) = nod_per_partition(2,part(n)) - nlevels_nod2D(n) + ! Adjust the load balancing - ! And, finally, move nod n to other partition - part(n) = ne_part(k) - print *,'Node',n,'is moved to part',part(n) - endif - enddo + nod_per_partition(1,ne_part(k)) = nod_per_partition(1,ne_part(k)) + 1 + nod_per_partition(2,ne_part(k)) = nod_per_partition(2,ne_part(k)) + nlevels_nod2D(n) + nod_per_partition(1,part(n)) = nod_per_partition(1,part(n)) - 1 + nod_per_partition(2,part(n)) = nod_per_partition(2,part(n)) - nlevels_nod2D(n) - if (n_iso==0) then - print *,'No isolated nodes found' - exit isolated_nodes_check + ! And, finally, move nod n to other partition + part(n) = ne_part(k) + print *,'Node',n,'is moved to part',part(n) endif - ! Check for isolated nodes again - end do isolated_nodes_check + enddo checkloop deallocate(ne_part, ne_part_num, ne_part_load) - if (n_iso > 0) then - print *,'++++++++++++++++++++++++++++++++++++++++++++' - print *,'+ WARNING: PARTITIONING LOOKS REALLY BAD +' - print *,'+ It was not possible to remove all +' - print *,'+ isolated nodes. Consider to rerun with +' - print *,'+ new metis random seed (see Makefile.in) +' - print *,'++++++++++++++++++++++++++++++++++++++++++++' - endif - print *,'=== LOAD BALANCING ===' print *,'2D nodes: min, aver, max per part',min_nod_per_part(1), & average_nod_per_part(1),max_nod_per_part(1) diff --git a/src/fvom_main.F90 b/src/fvom_main.F90 index eb1e9a2ff..3384f891a 100755 --- a/src/fvom_main.F90 +++ b/src/fvom_main.F90 @@ -168,16 +168,16 @@ program main if (mype==0) then write(*,*) '___MODEL RUNTIME [seconds]_____________________________' write(*,*) ' runtime ocean : ',mrtime_oce/npes, ' sec' - write(*,*) ' > runtime oce. mix,pres...: ',mrtime_oce_mixpres/npes, ' sec' - write(*,*) ' > runtime oce. dyn. u,v,w : ',mrtime_oce_dyn/npes, ' sec' - write(*,*) ' > runtime oce. dyn. ssh : ',mrtime_oce_dynssh/npes, ' sec' + write(*,*) ' > runtime oce. mix,pres... : ',mrtime_oce_mixpres/npes, ' sec' + write(*,*) ' > runtime oce. dyn. u,v,w : ',mrtime_oce_dyn/npes, ' sec' + write(*,*) ' > runtime oce. dyn. ssh : ',mrtime_oce_dynssh/npes, ' sec' write(*,*) ' > runtime oce. solve ssh : ',mrtime_oce_solvessh/npes, ' sec' - write(*,*) ' > runtime oce. GM/Redi : ',mrtime_oce_GMRedi/npes, ' sec' - write(*,*) ' > runtime oce. solve tacer: ',mrtime_oce_solvetra/npes, ' sec' + write(*,*) ' > runtime oce. GM/Redi : ',mrtime_oce_GMRedi/npes, ' sec' + write(*,*) ' > runtime oce. solve tracer: ',mrtime_oce_solvetra/npes, ' sec' write(*,*) ' runtime ice : ',mrtime_ice/npes, ' sec' write(*,*) ' runtime total : ',mrtime_tot/npes, ' sec' end if call clock_finish call par_ex end program main - \ No newline at end of file + diff --git a/src/gen_bulk_formulae.F90 b/src/gen_bulk_formulae.F90 index 8ac9d1a32..3b99e17a2 100755 --- a/src/gen_bulk_formulae.F90 +++ b/src/gen_bulk_formulae.F90 @@ -50,7 +50,7 @@ subroutine ncar_ocean_fluxes_mode cd_n10 = (2.7/u10+0.142+0.0764*u10)*1.0e-3 ! L-Y eqn. 6a cd_n10_rt = sqrt(cd_n10) ce_n10 = 34.6 *cd_n10_rt*1.0e-3 ! L-Y eqn. 6b - stab = 0.5 + sign(0.5,t-ts) + stab = 0.5 + sign(0.5_WP,t-ts) ch_n10 = (18.0*stab+32.7*(1.0-stab))*cd_n10_rt*1.e-3 ! L-Y eqn. 6c cd = cd_n10 ! first guess for exchange coeff's at z @@ -63,7 +63,7 @@ subroutine ncar_ocean_fluxes_mode qstar = (ce/cd_rt)*(q-qs) ! L-Y eqn. 7c bstar = grav*(tstar/tv+qstar/(q+1.0/0.608)) zeta = vonkarm*bstar*zz/(ustar*ustar) ! L-Y eqn. 8a - zeta = sign( min(abs(zeta),10.0), zeta ) ! undocumented NCAR + zeta = sign( min(abs(zeta),10.0_WP), zeta ) ! undocumented NCAR x2 = sqrt(abs(1.-16.*zeta)) ! L-Y eqn. 8b x2 = max(x2, 1.0) ! undocumented NCAR x = sqrt(x2) @@ -80,7 +80,7 @@ subroutine ncar_ocean_fluxes_mode cd_n10 = (2.7/u10+0.142+0.0764*u10)*1.e-3 ! L-Y eqn. 6a again cd_n10_rt = sqrt(cd_n10) ce_n10 = 34.6*cd_n10_rt*1.e-3 ! L-Y eqn. 6b again - stab = 0.5 + sign(0.5,zeta) + stab = 0.5 + sign(0.5_WP,zeta) ch_n10 = (18.0*stab+32.7*(1.0-stab))*cd_n10_rt*1.e-3 ! L-Y eqn. 6c again !z0 = 10*exp(-vonkarm/cd_n10_rt) ! diagnostic diff --git a/src/oce_ale.F90 b/src/oce_ale.F90 index 67128e762..484a215f1 100644 --- a/src/oce_ale.F90 +++ b/src/oce_ale.F90 @@ -1776,10 +1776,16 @@ end subroutine psolve ! new_values>0 requires reuse=1 in psolver_init! if (lfirst) then - ! Set SOLCG for CG solver (symmetric, positiv definit matrices only!!) + ! Set SOLCG for CG solver (symmetric, positiv definit matrices only, no precond available!!) ! SOLBICGS for BiCGstab solver (arbitrary matrices) - ! call psolver_init(ident, SOLCG, PCRAS, PCILUK, lutype, & - call psolver_init(ident, SOLBICGS, PCRAS, PCILUK, lutype, & + ! SOLBICGS_RAS for BiCGstab solver (arbitrary matrices) with integrated RAS - the global + ! preconditioner setting is ignored! It saves a 4 vector copies per iteration + ! compared to SOLBICGS + PCRAS. + ! SOLPBICGS for pipelined BiCGstab solver (arbitrary matrices) + ! Should scale better than SOLBICGS, but be careful, it is still experimental. + ! SOLPBICGS_RAS is SOLPBICGS with integrated RAS (global preconditioner setting is ignored!) + ! for even better scalability, well, in the end, it does not matter much. + call psolver_init(ident, SOLBICGS_RAS, PCRAS, PCILUK, lutype, & fillin, droptol, maxiter, restart, soltol, & part-1, ssh_stiff%rowptr(:)-ssh_stiff%rowptr(1), & ssh_stiff%colind-1, ssh_stiff%values, reuse, MPI_COMM_FESOM) diff --git a/src/oce_ale_fct_3d_adv.F90 b/src/oce_ale_fct_3d_adv.F90 index 31b4dfafc..bd05fa5df 100644 --- a/src/oce_ale_fct_3d_adv.F90 +++ b/src/oce_ale_fct_3d_adv.F90 @@ -622,8 +622,7 @@ subroutine fct_ale(ttf, iter_yn) end do ! fct_minus and fct_plus must be known to neighbouring PE - call exchange_nod(fct_plus) - call exchange_nod(fct_minus) + call exchange_nod(fct_plus, fct_minus) !___________________________________________________________________________ ! b3. Limiting @@ -655,6 +654,8 @@ subroutine fct_ale(ttf, iter_yn) end do ! the bottom flux is always zero end do + + call exchange_nod_end ! fct_plus, fct_minus !Horizontal do edge=1, myDim_edge2D diff --git a/src/oce_ale_pressure_bv.F90 b/src/oce_ale_pressure_bv.F90 index 70a9fa519..5b6da64d8 100644 --- a/src/oce_ale_pressure_bv.F90 +++ b/src/oce_ale_pressure_bv.F90 @@ -186,7 +186,7 @@ subroutine pressure_force real(kind=WP) :: s_z(4), s_dens(4), s_H, aux1, aux2, aux(2), s_dup, s_dlo real(kind=WP) :: a, b, c, d, dz, rho_n(3), rhograd_e(2), p_grad(2) - if (trim(which_ale)=='linfs' .and. (use_partial_cell==.false.) ) then + if (trim(which_ale)=='linfs' .and. (.not. use_partial_cell) ) then !___________________________________________________________________________ ! loop over triangular elemments do elem=1, myDim_elem2D diff --git a/src/oce_mesh.F90 b/src/oce_mesh.F90 index 13fca2a29..b435e513a 100755 --- a/src/oce_mesh.F90 +++ b/src/oce_mesh.F90 @@ -175,13 +175,13 @@ SUBROUTINE read_mesh read(fileID,*) ibuff(n,1), rbuff(n,1), rbuff(n,2), ibuff(n,2) !___________________________________________________________________ ! check if input mesh is already rotated --> force_rotation flag == .False. - if (force_rotation==.True. .and. & + if (force_rotation .and. & (rbuff(n,1)>=xp-2.5 .and. rbuff(n,1)<=xp+2.5 .and. & rbuff(n,2)>=yp-2.5 .and. rbuff(n,2)<=yp+2.5)) then flag_checkisrot = 1 !___________________________________________________________________ ! check if input mesh is already unrotated --> force_rotation flag ==.True. - elseif (force_rotation==.False. .and. & + elseif ((.not. force_rotation) .and. & (rbuff(n,1)>=xp-2.5 .and. rbuff(n,1)<=xp+2.5 .and. & rbuff(n,2)>=yp-2.5 .and. rbuff(n,2)<=yp+2.5)) then flag_checkmustrot = 0 @@ -214,7 +214,7 @@ SUBROUTINE read_mesh !___________________________________________________________________________ ! check if rotation is applied to an already rotated mesh - if ((mype==0) .and. (force_rotation==.True.) .and. (flag_checkisrot==1)) then + if ((mype==0) .and. (force_rotation) .and. (flag_checkisrot==1)) then write(*,*) '____________________________________________________________________' write(*,*) ' ERROR: Your input mesh seems to be rotated and you try to' write(*,*) ' rotate it again in FESOM (force_rotation=.true. ) !' @@ -231,7 +231,7 @@ SUBROUTINE read_mesh call par_ex(0) !___________________________________________________________________________ ! check if rotation needs to be applied to an unrotated mesh - elseif ((mype==0) .and. (force_rotation==.False.) .and. (flag_checkmustrot==1)) then + elseif ((mype==0) .and. (.not. force_rotation) .and. (flag_checkmustrot==1)) then write(*,*) '____________________________________________________________________' write(*,*) ' ERROR: Your input mesh seems to be unrotated this requires' write(*,*) ' that it is rotated in FESOM, but you set force_rotation=.False'