Support IntelLLVM compiler (#353)

* Add support for IntelLLVM compiler * Update cmake/compiler_flags_IntelLLVM_Fortran.cmake * Workaround in driver/fvGFS/atmosphere.F90 for ifx ICE is not needed anymore * Initialize Atm%q_con to zero. * Move initialization of q_con if USE_COND is not defined * Use one set of OMP directives for both Intel and GNU compilers
NOAA-GFDL · Oct 1, 2024 · 24686a2 · 24686a2
1 parent ac3055e
commit 24686a2
Show file tree

Hide file tree

Showing 6 changed files with 59 additions and 80 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -19,7 +19,7 @@ if(NOT CMAKE_BUILD_TYPE MATCHES "^(Debug|Release|Repro|MinSizeRel|RelWithDebInfo
 endif()
 message(STATUS "Setting build type to '${CMAKE_BUILD_TYPE}'.")
 
-if(NOT CMAKE_Fortran_COMPILER_ID MATCHES "^(Intel|GNU)$")
+if(NOT CMAKE_Fortran_COMPILER_ID MATCHES "^(Intel|IntelLLVM|GNU)$")
   message(WARNING "Compiler not officially supported: ${CMAKE_Fortran_COMPILER_ID}")
 endif()
 

diff --git a/cmake/compiler_flags_IntelLLVM_Fortran.cmake b/cmake/compiler_flags_IntelLLVM_Fortran.cmake
@@ -0,0 +1,24 @@
+# Precision-based Fortran compiler flags
+set(R4_flags "-real-size 32") # Fortran flags for 32BIT precision
+set(R8_flags "-real-size 64") # Fortran flags for 64BIT precision
+set(R8_flags "${R8_flags} -no-prec-div")
+
+# Intel Fortran
+set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -g -traceback -fpp -fno-alias -auto -safe-cray-ptr -ftz -assume byterecl -nowarn -align array64byte -qno-opt-dynamic-align ${${kind}_flags}")
+
+set(CMAKE_Fortran_FLAGS_REPRO "-O2 -debug minimal -fp-model consistent -qoverride-limits")
+
+set(CMAKE_Fortran_FLAGS_RELEASE "-O2 -debug minimal -fp-model strict -qoverride-limits")
+
+set(CMAKE_Fortran_FLAGS_DEBUG "-O0 -check -check noarg_temp_created -check nopointer -warn -warn noerrors -fp-stack-check -fstack-protector-all -fpe0 -debug -ftrapuv -init=snan,arrays")
+
+set(CMAKE_Fortran_LINK_FLAGS "")
+
+if(NOT CMAKE_BUILD_TYPE MATCHES "^(Debug|Repro)$")
+  if(AVX2)
+    set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -march=core-avx2")
+  elseif(SIMDMULTIARCH)
+    set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -axSSE4.2,CORE-AVX2")
+  endif()
+endif()
+
diff --git a/cmake/fv3_compiler_flags.cmake b/cmake/fv3_compiler_flags.cmake
@@ -4,8 +4,10 @@
 
 if(CMAKE_Fortran_COMPILER_ID MATCHES "GNU")
   include(compiler_flags_GNU_Fortran)
-elseif(CMAKE_Fortran_COMPILER_ID MATCHES "Intel")
+elseif(CMAKE_Fortran_COMPILER_ID MATCHES "^(Intel)$")
   include(compiler_flags_Intel_Fortran)
+elseif(CMAKE_Fortran_COMPILER_ID MATCHES "^(IntelLLVM)$")
+  include(compiler_flags_IntelLLVM_Fortran)
 else()
   message(WARNING "Fortran compiler with ID ${CMAKE_Fortran_COMPILER_ID} will be used with CMake default options")
 endif()
diff --git a/model/fv_arrays.F90 b/model/fv_arrays.F90
@@ -1552,8 +1552,10 @@ subroutine allocate_fv_atmos_type(Atm, isd_in, ied_in, jsd_in, jed_in, is_in, ie
 
 #ifdef USE_COND
       allocate ( Atm%q_con(isd:ied,jsd:jed,1:npz) )
+      ! q_con will be initialized to 0, in the following omp loop
 #else
       allocate ( Atm%q_con(isd:isd,jsd:jsd,1) )
+      Atm%q_con = 0.
 #endif
 
 ! Notes by SJL
@@ -1567,6 +1569,9 @@ subroutine allocate_fv_atmos_type(Atm, isd_in, ied_in, jsd_in, jed_in, is_in, ie
                 Atm%va(i,j,k) = real_big
                 Atm%pt(i,j,k) = real_big
               Atm%delp(i,j,k) = real_big
+#ifdef USE_COND
+             Atm%q_con(i,j,k) = 0.
+#endif
            enddo
         enddo
         do j=jsd, jed+1

diff --git a/model/fv_dynamics.F90 b/model/fv_dynamics.F90
@@ -303,11 +303,15 @@ subroutine fv_dynamics(npx, npy, npz, nq_tot,  ng, bdt, consv_te, fill,
       real :: time_total
       integer :: seconds, days
 
-      ccpp_associate: associate( cappa     => GFDL_interstitial%cappa,     &
-                                 dp1       => GFDL_interstitial%te0,       &
-                                 dtdt_m    => GFDL_interstitial%dtdt,      &
-                                 last_step => GFDL_interstitial%last_step, &
-                                 te_2d     => GFDL_interstitial%te0_2d     )
+      real, dimension(:,:,:), pointer :: cappa
+      real, dimension(:,:,:), pointer :: dp1
+      real, dimension(:,:,:), pointer :: dtdt_m
+      real, dimension(:,:), pointer :: te_2d
+
+      cappa => GFDL_interstitial%cappa
+      dp1 => GFDL_interstitial%te0
+      dtdt_m => GFDL_interstitial%dtdt
+      te_2d => GFDL_interstitial%te0_2d
 
       is  = bd%is
       ie  = bd%ie
@@ -428,11 +432,7 @@ subroutine fv_dynamics(npx, npy, npz, nq_tot,  ng, bdt, consv_te, fill,
       enddo
 
     if ( hydrostatic ) then
-#ifdef __GFORTRAN__
-!$OMP parallel do default(none) shared(is,ie,js,je,isd,ied,jsd,jed,npz,zvir,nwat,q,q_con,sphum,liq_wat, &
-#else
 !$OMP parallel do default(none) shared(is,ie,js,je,isd,ied,jsd,jed,npz,dp1,zvir,nwat,q,q_con,sphum,liq_wat, &
-#endif
 !$OMP      rainwat,ice_wat,snowwat,graupel,hailwat) private(cvm,i,j,k)
       do k=1,npz
          do j=js,je
@@ -446,20 +446,12 @@ subroutine fv_dynamics(npx, npy, npz, nq_tot,  ng, bdt, consv_te, fill,
          enddo
       enddo
     else
-#ifdef __GFORTRAN__
-!$OMP parallel do default(none) shared(is,ie,js,je,isd,ied,jsd,jed,npz,zvir,q,q_con,sphum,liq_wat, &
-#else
 !$OMP parallel do default(none) shared(is,ie,js,je,isd,ied,jsd,jed,npz,dp1,zvir,q,q_con,sphum,liq_wat, &
-#endif
 !$OMP                                  rainwat,ice_wat,snowwat,graupel,hailwat,pkz,flagstruct, &
 #ifdef MULTI_GASES
 !$OMP                                  kapad,                                          &
 #endif
-#ifdef __GFORTRAN__
-!$OMP                                  kappa,rdg,delp,pt,delz,nwat)                    &
-#else
 !$OMP                                  cappa,kappa,rdg,delp,pt,delz,nwat)              &
-#endif
 !$OMP                          private(cvm,i,j,k)
        do k=1,npz
          if ( flagstruct%moist_phys ) then
@@ -587,11 +579,7 @@ subroutine fv_dynamics(npx, npy, npz, nq_tot,  ng, bdt, consv_te, fill,
        pt_initialized = .true.
      endif
   else
-#ifdef __GFORTRAN__
-!$OMP parallel do default(none) shared(is,ie,js,je,npz,pt,pkz,q_con)
-#else
 !$OMP parallel do default(none) shared(is,ie,js,je,npz,pt,dp1,pkz,q_con)
-#endif
   do k=1,npz
      do j=js,je
         do i=is,ie
@@ -611,15 +599,11 @@ subroutine fv_dynamics(npx, npy, npz, nq_tot,  ng, bdt, consv_te, fill,
   endif
 #endif
 
-  last_step = .false.
+  GFDL_interstitial%last_step = .false.
   mdt = bdt / real(k_split)
 
   if ( idiag%id_mdt > 0 .and. (.not. do_adiabatic_init) ) then
-#ifdef __GFORTRAN__
-!$OMP parallel do default(none) shared(is,ie,js,je,npz)
-#else
 !$OMP parallel do default(none) shared(is,ie,js,je,npz,dtdt_m)
-#endif
        do k=1,npz
           do j=js,je
              do i=is,ie
@@ -649,11 +633,7 @@ subroutine fv_dynamics(npx, npy, npz, nq_tot,  ng, bdt, consv_te, fill,
       call start_group_halo_update(i_pack(8), u, v, domain, gridtype=DGRID_NE)
 #endif
                                            call timing_off('COMM_TOTAL')
-#ifdef __GFORTRAN__
-!$OMP parallel do default(none) shared(isd,ied,jsd,jed,npz,delp)
-#else
 !$OMP parallel do default(none) shared(isd,ied,jsd,jed,npz,dp1,delp)
-#endif
       do k=1,npz
          do j=jsd,jed
             do i=isd,ied
@@ -665,7 +645,7 @@ subroutine fv_dynamics(npx, npy, npz, nq_tot,  ng, bdt, consv_te, fill,
          call start_group_halo_update(i_pack(13), dp1, domain)
       endif
 
-      if ( n_map==k_split ) last_step = .true.
+      if ( n_map==k_split ) GFDL_interstitial%last_step = .true.
 
 #ifdef USE_COND
                                            call timing_on('COMM_TOTAL')
@@ -688,7 +668,7 @@ subroutine fv_dynamics(npx, npy, npz, nq_tot,  ng, bdt, consv_te, fill,
                     u, v, w, delz, pt, q, delp, pe, pk, phis, ws, omga, ptop, pfull, ua, va,           &
                     uc, vc, mfx, mfy, cx, cy, pkz, peln, q_con, ak, bk, ks, &
                     gridstruct, flagstruct, neststruct, idiag, bd, &
-                    domain, n_map==1, i_pack, last_step, diss_est,time_total)
+                    domain, n_map==1, i_pack, GFDL_interstitial%last_step, diss_est,time_total)
                                            call timing_off('DYN_CORE')
 
 
@@ -744,7 +724,7 @@ subroutine fv_dynamics(npx, npy, npz, nq_tot,  ng, bdt, consv_te, fill,
      endif
 #endif
 
-         if( last_step .and. idiag%id_divg>0 ) then
+         if( GFDL_interstitial%last_step .and. idiag%id_divg>0 ) then
              used = send_data(idiag%id_divg, dp1, fv_time)
              if(flagstruct%fv_debug) call prt_mxm('divg',  dp1, is, ie, js, je, 0, npz, 1.,gridstruct%area_64, domain)
          endif
@@ -768,7 +748,7 @@ subroutine fv_dynamics(npx, npy, npz, nq_tot,  ng, bdt, consv_te, fill,
                                                   call avec_timer_start(6)
 #endif
 
-         call Lagrangian_to_Eulerian(last_step, consv_te, ps, pe, delp,          &
+         call Lagrangian_to_Eulerian(GFDL_interstitial%last_step, consv_te, ps, pe, delp,          &
                      pkz, pk, mdt, bdt, npx, npy, npz, is,ie,js,je, isd,ied,jsd,jed,       &
                      nr, nwat, sphum, q_con, u,  v, w, delz, pt, q, phis,    &
                      zvir, cp_air, akap, cappa, flagstruct%kord_mt, flagstruct%kord_wz, &
@@ -782,10 +762,10 @@ subroutine fv_dynamics(npx, npy, npz, nq_tot,  ng, bdt, consv_te, fill,
                      flagstruct%moist_phys)
 
      if ( flagstruct%molecular_diffusion ) then
-! do thermosphere adjustment if it is turned on and at last_step.
-         if( md_tadj_layers .gt.0 .and. md_time .and. last_step ) then
+! do thermosphere adjustment if it is turned on and at GFDL_interstitial%last_step.
+         if( md_tadj_layers .gt.0 .and. md_time .and. GFDL_interstitial%last_step ) then
              call thermosphere_adjustment(domain,gridstruct,npz,bd,ng,pt)
-        endif ! md_tadj_layers>0 and md_time and last_step
+        endif ! md_tadj_layers>0 and md_time and GFDL_interstitial%last_step
      endif
 
      if ( flagstruct%fv_debug ) then
@@ -804,12 +784,12 @@ subroutine fv_dynamics(npx, npy, npz, nq_tot,  ng, bdt, consv_te, fill,
 #endif
                                                   call timing_off('Remapping')
 #ifdef MOIST_CAPPA
-         if ( neststruct%nested .and. .not. last_step) then
+         if ( neststruct%nested .and. .not. GFDL_interstitial%last_step) then
             call nested_grid_BC_apply_intT(cappa, &
                  0, 0, npx, npy, npz, bd, real(n_map+1), real(k_split), &
                  neststruct%cappa_BC, bctype=neststruct%nestbctype  )
          endif
-         if ( flagstruct%regional .and. .not. last_step) then
+         if ( flagstruct%regional .and. .not. GFDL_interstitial%last_step) then
             reg_bc_update_time=current_time_in_seconds+(n_map+1)*mdt
             call regional_boundary_update(cappa, 'cappa', &
                                           isd, ied, jsd, jed, npz, &
@@ -822,7 +802,7 @@ subroutine fv_dynamics(npx, npy, npz, nq_tot,  ng, bdt, consv_te, fill,
 !--------------------------
 ! Filter omega for physics:
 !--------------------------
-          if( last_step )  then
+          if( GFDL_interstitial%last_step )  then
             if(flagstruct%nf_omega>0) then   
              call del2_cubed(omga, 0.18*gridstruct%da_min, gridstruct, domain, npx, npy, npz, flagstruct%nf_omega, bd)
             endif
@@ -840,11 +820,7 @@ subroutine fv_dynamics(npx, npy, npz, nq_tot,  ng, bdt, consv_te, fill,
 
   if ( idiag%id_mdt > 0 .and. (.not.do_adiabatic_init) ) then
 ! Output temperature tendency due to inline moist physics:
-#ifdef __GFORTRAN__
-!$OMP parallel do default(none) shared(is,ie,js,je,npz,bdt)
-#else
 !$OMP parallel do default(none) shared(is,ie,js,je,npz,dtdt_m,bdt)
-#endif
        do k=1,npz
           do j=js,je
              do i=is,ie
@@ -974,11 +950,7 @@ subroutine fv_dynamics(npx, npy, npz, nq_tot,  ng, bdt, consv_te, fill,
   endif
 
   if( (flagstruct%consv_am.or.idiag%id_amdt>0) .and. (.not.do_adiabatic_init)  ) then
-#ifdef __GFORTRAN__
-!$OMP parallel do default(none) shared(is,ie,js,je,teq,dt2,ps2,ps,idiag)
-#else
 !$OMP parallel do default(none) shared(is,ie,js,je,te_2d,teq,dt2,ps2,ps,idiag)
-#endif
       do j=js,je
          do i=is,ie
 ! Note: the mountain torque computation contains also numerical error
@@ -1059,8 +1031,6 @@ subroutine fv_dynamics(npx, npy, npz, nq_tot,  ng, bdt, consv_te, fill,
   ! Call CCPP timestep finalize
   call ccpp_physics_timestep_finalize(cdata, suite_name=trim(ccpp_suite), group_name="fast_physics", ierr=ierr)
 
-  end associate ccpp_associate
-
   end subroutine fv_dynamics
 
 #ifdef USE_RF_FAST

diff --git a/model/fv_mapz.F90 b/model/fv_mapz.F90
@@ -226,12 +226,9 @@ subroutine Lagrangian_to_Eulerian(last_step, consv, ps, pe, delp, pkz, pk,   &
   real rcp, rg, rrg, bkh, dtmp, k1k
   integer:: i,j,k
   integer:: kdelz
-  integer:: nt, liq_wat, ice_wat, rainwat, snowwat, cld_amt, graupel, hailwat, ccn_cm3, iq, n, kmp, kp, k_next
+  integer:: nt, liq_wat, ice_wat, rainwat, snowwat, cld_amt, graupel, hailwat, ccn_cm3, iq, n, kp, k_next
   integer :: ierr
 
-      ccpp_associate: associate( fast_mp_consv => GFDL_interstitial%fast_mp_consv, &
-                                 kmp           => GFDL_interstitial%kmp            )
-
        k1k = rdgas/cv_air   ! akap / (1.-akap) = rg/Cv=0.4
         rg = rdgas
        rcp = 1./ cp
@@ -247,7 +244,7 @@ subroutine Lagrangian_to_Eulerian(last_step, consv, ps, pe, delp, pkz, pk,   &
        ccn_cm3 = get_tracer_index (MODEL_ATMOS, 'ccn_cm3')
 
        if ( do_adiabatic_init .or. do_sat_adj ) then
-            fast_mp_consv = (.not.do_adiabatic_init) .and. consv>consv_min
+            GFDL_interstitial%fast_mp_consv = (.not.do_adiabatic_init) .and. consv>consv_min
        endif
 
 !$OMP parallel do default(none) shared(is,ie,js,je,km,pe,ptop,kord_tm,hydrostatic, &
@@ -664,37 +661,20 @@ subroutine Lagrangian_to_Eulerian(last_step, consv, ps, pe, delp, pkz, pk,   &
 
 1000  continue
 
-#ifdef __GFORTRAN__
-!$OMP parallel default(none) shared(is,ie,js,je,km,ptop,u,v,pe,ua,va,isd,ied,jsd,jed,kord_mt,     &
-!$OMP                               te_2d,te,delp,hydrostatic,hs,rg,pt,peln, adiabatic,        &
-!$OMP                               cp,delz,nwat,rainwat,liq_wat,ice_wat,snowwat,              &
-!$OMP                               graupel,hailwat,q_con,r_vir,sphum,w,pk,pkz,last_step,consv,        &
-!$OMP                               do_adiabatic_init,zsum1,zsum0,te0_2d,domain,               &
-!$OMP                               ng,gridstruct,E_Flux,pdt,dtmp,reproduce_sum,q,             &
-!$OMP                               mdt,cld_amt,cappa,dtdt,out_dt,rrg,akap,do_sat_adj,         &
-!$OMP                               kord_tm,pe4, npx,npy,ccn_cm3,u_dt,v_dt, c2l_ord,bd,dp0,ps, &
-!$OMP                                cdata,GFDL_interstitial)                           &
-!$OMP                        shared(ccpp_suite)                                                &
-#ifdef MULTI_GASES
-!$OMP                        shared(num_gas)                                                   &
-#endif
-!$OMP                       private(q2,pe0,pe1,pe2,pe3,qv,cvm,gz,gsize,phis,kdelz,dp2,t0, ierr)
-#else
-!$OMP parallel default(none) shared(is,ie,js,je,km,kmp,ptop,u,v,pe,ua,va,isd,ied,jsd,jed,kord_mt, &
+!$OMP parallel default(none) shared(is,ie,js,je,km,ptop,u,v,pe,ua,va,isd,ied,jsd,jed,kord_mt,  &
 !$OMP                               te_2d,te,delp,hydrostatic,hs,rg,pt,peln, adiabatic,        &
 !$OMP                               cp,delz,nwat,rainwat,liq_wat,ice_wat,snowwat,              &
-!$OMP                               graupel,hailwat,q_con,r_vir,sphum,w,pk,pkz,last_step,consv,        &
+!$OMP                               graupel,hailwat,q_con,r_vir,sphum,w,pk,pkz,last_step,consv,&
 !$OMP                               do_adiabatic_init,zsum1,zsum0,te0_2d,domain,               &
 !$OMP                               ng,gridstruct,E_Flux,pdt,dtmp,reproduce_sum,q,             &
 !$OMP                               mdt,cld_amt,cappa,dtdt,out_dt,rrg,akap,do_sat_adj,         &
-!$OMP                               fast_mp_consv,kord_tm, pe4,npx,npy, ccn_cm3,               &
-!$OMP                               u_dt,v_dt,c2l_ord,bd,dp0,ps,cdata,GFDL_interstitial)        &
+!$OMP                               kord_tm, pe4,npx,npy, ccn_cm3,                             &
+!$OMP                               u_dt,v_dt,c2l_ord,bd,dp0,ps,cdata,GFDL_interstitial)       &
 !$OMP                        shared(ccpp_suite)                                                &
 #ifdef MULTI_GASES
 !$OMP                        shared(num_gas)                                                   &
 #endif
 !$OMP                       private(q2,pe0,pe1,pe2,pe3,qv,cvm,gz,gsize,phis,kdelz,dp2,t0, ierr)
-#endif
 
 !$OMP do
   do k=2,km
@@ -924,8 +904,6 @@ subroutine Lagrangian_to_Eulerian(last_step, consv, ps, pe, delp, pkz, pk,   &
   endif
 !$OMP end parallel
 
-  end associate ccpp_associate
-
  end subroutine Lagrangian_to_Eulerian