From 88d23aeb720dbaae5d9d887a60d9581d97e1293b Mon Sep 17 00:00:00 2001 From: dsidoren Date: Tue, 23 Jan 2024 17:53:23 +0200 Subject: [PATCH] fixed GPU bug when dynamics%use_wsplit=.TRUE. The problem was that adv_tra_vert_impl is realized only on CPU. Hence !$ACC UPDATE HOST(fct_LO) is required before and !$ACC UPDATE DEVICE(fct_LO) is required after the call. adv_tra_vert_impl needs to be ACC ported as soon as possible. --- src/fesom_module.F90 | 19 ++++++++++--------- src/gen_surface_forcing.F90 | 4 ++-- src/oce_adv_tra_driver.F90 | 3 +++ src/oce_adv_tra_ver.F90 | 1 + src/oce_ale_tracer.F90 | 11 +++++------ src/oce_tracer_mod.F90 | 2 +- 6 files changed, 22 insertions(+), 18 deletions(-) diff --git a/src/fesom_module.F90 b/src/fesom_module.F90 index ee434148f..ab0fa8056 100755 --- a/src/fesom_module.F90 +++ b/src/fesom_module.F90 @@ -306,11 +306,11 @@ subroutine fesom_init(fesom_total_nsteps) !$ACC ENTER DATA COPYIN (f%partit, f%partit%eDim_nod2D, f%partit%myDim_edge2D) !$ACC ENTER DATA COPYIN (f%partit%myDim_elem2D, f%partit%myDim_nod2D, f%partit%myList_edge2D) - !$ACC ENTER DATA COPYIN (f%mesh%helem, f%mesh%elem_cos, f%mesh%edge_cross_dxdy, f%mesh%elem2d_nodes, f%mesh%nl) + !$ACC ENTER DATA COPYIN (f%mesh%elem_cos, f%mesh%edge_cross_dxdy, f%mesh%elem2d_nodes, f%mesh%nl) !$ACC ENTER DATA COPYIN (f%mesh%nlevels_nod2D, f%mesh%nod_in_elem2D, f%mesh%nod_in_elem2D_num) - !$ACC ENTER DATA COPYIN (f%mesh%edge_dxdy, f%mesh%nlevels, f%mesh%hnode, f%mesh%hnode_new, f%mesh%ulevels_nod2D_max) - !$ACC ENTER DATA COPYIN (f%mesh%zbar_3d_n, f%mesh%z_3d_n, f%mesh%areasvol, f%mesh%nlevels_nod2D_min) - + !$ACC ENTER DATA COPYIN (f%mesh%edge_dxdy, f%mesh%nlevels, f%mesh%ulevels_nod2D_max) + !$ACC ENTER DATA COPYIN (f%mesh%areasvol, f%mesh%nlevels_nod2D_min) + !$ACC ENTER DATA CREATE (f%mesh%helem, f%mesh%hnode, f%mesh%hnode_new, f%mesh%zbar_3d_n, f%mesh%z_3d_n) !do n=f%from_nstep, f%from_nstep-1+current_nsteps !$ACC ENTER DATA COPYIN (f%ice) !$ACC ENTER DATA CREATE (f%ice%data, f%ice%work, f%ice%work%fct_massmatrix) @@ -320,7 +320,7 @@ subroutine fesom_init(fesom_total_nsteps) !$ACC ENTER DATA CREATE (f%ice%work%eps11, f%ice%work%eps12, f%ice%work%eps22) !$ACC ENTER DATA CREATE (f%ice%work%sigma11, f%ice%work%sigma12, f%ice%work%sigma22) !$ACC ENTER DATA CREATE (f%ice%work%ice_strength, f%ice%stress_atmice_x, f%ice%stress_atmice_y) - !$ACC ENTER DATA CREATE (f%ice%thermo%rhosno, f%ice%thermo%rhoice, f%ice%thermo%inv_rhowat) + !$ACC ENTER DATA COPYIN (f%ice%thermo%rhosno, f%ice%thermo%rhoice, f%ice%thermo%inv_rhowat) !$ACC ENTER DATA CREATE (f%ice%srfoce_ssh, f%ice%pstar, f%ice%c_pressure) !$ACC ENTER DATA CREATE (f%ice%work%inv_areamass, f%ice%work%inv_mass, f%ice%uice_rhs, f%ice%vice_rhs) !$ACC ENTER DATA CREATE (f%ice%uice, f%ice%vice, f%ice%srfoce_u, f%ice%srfoce_v, f%ice%uice_old, f%ice%vice_old) @@ -334,15 +334,16 @@ subroutine fesom_init(fesom_total_nsteps) #endif !$ACC ENTER DATA COPYIN (f%dynamics) !$ACC ENTER DATA CREATE (f%dynamics%w, f%dynamics%w_e, f%dynamics%uv) - !$ACC ENTER DATA COPYIN (f%tracers) + !$ACC ENTER DATA CREATE (f%tracers%work%del_ttf) !$ACC ENTER DATA CREATE (f%tracers%data, f%tracers%work) do tr_num=1, f%tracers%num_tracers - !$ACC ENTER DATA CREATE (f%tracers%data(tr_num)%values, f%tracers%data(tr_num)%valuesAB) + !$ACC ENTER DATA CREATE (f%tracers%data(tr_num)%values, f%tracers%data(tr_num)%valuesAB) + !$ACC ENTER DATA CREATE (f%tracers%data(tr_num)%tra_adv_ph, f%tracers%data(tr_num)%tra_adv_pv) end do !$ACC ENTER DATA CREATE (f%tracers%work%fct_ttf_min, f%tracers%work%fct_ttf_max, f%tracers%work%fct_plus, f%tracers%work%fct_minus) & !$ACC CREATE (f%tracers%work%adv_flux_hor, f%tracers%work%adv_flux_ver, f%tracers%work%fct_LO) & !$ACC CREATE (f%tracers%work%del_ttf_advvert, f%tracers%work%del_ttf_advhoriz, f%tracers%work%edge_up_dn_grad) & - !$ACC CREATE (f%tracers%work%del_ttf) + !$ACC CREATE (f%tracers%work%del_ttf) end subroutine @@ -374,7 +375,7 @@ subroutine fesom_runloop(current_nsteps) end if nstart=f%from_nstep ntotal=f%from_nstep-1+current_nsteps -write(0,*) 'f%from_nstep before the loop:', f%from_nstep + !write(0,*) 'f%from_nstep before the loop:', f%from_nstep do n=nstart, ntotal if (use_global_tides) then call foreph(f%partit, f%mesh) diff --git a/src/gen_surface_forcing.F90 b/src/gen_surface_forcing.F90 index 1c4012210..a6b71950c 100644 --- a/src/gen_surface_forcing.F90 +++ b/src/gen_surface_forcing.F90 @@ -1084,7 +1084,7 @@ SUBROUTINE sbc_ini(partit, mesh) call nc_sbc_ini(rdate, partit, mesh) !========================================================================== #endif - ! runoff + ! runoff if (runoff_data_source=='CORE1' .or. runoff_data_source=='CORE2' ) then ! runoff in CORE is constant in time ! Warning: For a global mesh, conservative scheme is to be updated!! @@ -1099,7 +1099,7 @@ SUBROUTINE sbc_ini(partit, mesh) else if (mype==0) write(*,*) 'using constant chlorophyll concentration: ', chl_const chl=chl_const - end if + end if end if if (mype==0) write(*,*) "DONE: Ocean forcing inizialization." diff --git a/src/oce_adv_tra_driver.F90 b/src/oce_adv_tra_driver.F90 index 43b612dae..52125f40d 100644 --- a/src/oce_adv_tra_driver.F90 +++ b/src/oce_adv_tra_driver.F90 @@ -235,7 +235,10 @@ subroutine do_oce_adv_tra(dt, vel, w, wi, we, tr_num, dynamics, tracers, partit, #endif if (dynamics%use_wsplit) then !wvel/=wvel_e ! update for implicit contribution (w_split option) +!when adv_tra_vert_impl is ported to ACC the UPDATEs below wont be needed! +!$ACC UPDATE HOST(fct_LO) call adv_tra_vert_impl(dt, wi, fct_LO, partit, mesh) +!$ACC UPDATE DEVICE(fct_LO) ! compute the low order upwind vertical flux (full vertical velocity) ! zero the input/output flux before computation ! --> compute here low order part of vertical anti diffusive fluxes, diff --git a/src/oce_adv_tra_ver.F90 b/src/oce_adv_tra_ver.F90 index 152cc4574..6429447f5 100644 --- a/src/oce_adv_tra_ver.F90 +++ b/src/oce_adv_tra_ver.F90 @@ -235,6 +235,7 @@ subroutine adv_tra_vert_impl(dt, w, ttf, partit, mesh) end do end do ! --> do n=1,myDim_nod2D !$OMP END DO +!$OMP BARRIER !$OMP END PARALLEL end subroutine adv_tra_vert_impl ! diff --git a/src/oce_ale_tracer.F90 b/src/oce_ale_tracer.F90 index 26fbb8453..febfdadeb 100644 --- a/src/oce_ale_tracer.F90 +++ b/src/oce_ale_tracer.F90 @@ -192,8 +192,8 @@ subroutine solve_tracers_ale(ice, dynamics, tracers, partit, mesh) !___________________________________________________________________________ ! loop over all tracers - !$ACC UPDATE DEVICE(dynamics%w, dynamics%w_e, dynamics%uv) async(1) - !$ACC UPDATE DEVICE(tracers%work%fct_ttf_min, tracers%work%fct_ttf_max, tracers%work%fct_plus, tracers%work%fct_minus) + !$ACC UPDATE DEVICE(dynamics%w, dynamics%w_e, dynamics%uv) !!! async(1) +!!! !$ACC UPDATE DEVICE(tracers%work%fct_ttf_min, tracers%work%fct_ttf_max, tracers%work%fct_plus, tracers%work%fct_minus) !$ACC UPDATE DEVICE (mesh%helem, mesh%hnode, mesh%hnode_new, mesh%zbar_3d_n, mesh%z_3d_n) do tr_num=1, tracers%num_tracers ! do tracer AB (Adams-Bashfort) interpolation only for advectiv part @@ -213,8 +213,7 @@ subroutine solve_tracers_ale(ice, dynamics, tracers, partit, mesh) call do_oce_adv_tra(dt, UV, Wvel, Wvel_i, Wvel_e, tr_num, dynamics, tracers, partit, mesh) - !$ACC UPDATE HOST(tracers%data(tr_num)%values, tracers%data(tr_num)%valuesAB) & - !$ACC HOST(tracers%work%del_ttf, tracers%work%del_ttf_advhoriz, tracers%work%del_ttf_advvert) + !$ACC UPDATE HOST(tracers%work%del_ttf, tracers%work%del_ttf_advhoriz, tracers%work%del_ttf_advvert) !___________________________________________________________________________ ! update array for total tracer flux del_ttf with the fluxes from horizontal ! and vertical advection @@ -246,8 +245,8 @@ subroutine solve_tracers_ale(ice, dynamics, tracers, partit, mesh) call exchange_nod(tracers%data(tr_num)%values(:,:), partit) !$OMP BARRIER end do - !$ACC UPDATE HOST (tracers%work%fct_ttf_min, tracers%work%fct_ttf_max, tracers%work%fct_plus, tracers%work%fct_minus) & - !$ACC HOST (tracers%work%edge_up_dn_grad) +!!! !$ACC UPDATE HOST (tracers%work%fct_ttf_min, tracers%work%fct_ttf_max, tracers%work%fct_plus, tracers%work%fct_minus) & +!!! !$ACC HOST (tracers%work%edge_up_dn_grad) !___________________________________________________________________________ ! 3D restoring for "passive" tracers diff --git a/src/oce_tracer_mod.F90 b/src/oce_tracer_mod.F90 index bc6182039..721f3eaf3 100755 --- a/src/oce_tracer_mod.F90 +++ b/src/oce_tracer_mod.F90 @@ -25,7 +25,7 @@ SUBROUTINE init_tracers_AB(tr_num, tracers, partit, mesh) type(t_tracer), intent(inout), target :: tracers integer :: n,nz -!$ACC parallel loop collapse(2) default(present) async(1) +!$ACC parallel loop collapse(2) default(present) !!!async(1) do n=1, partit%myDim_nod2D+partit%eDim_nod2D do nz=1, mesh%nl-1 ! del_ttf will contain all advection / diffusion contributions for this tracer. Set it to 0 at the beginning!