Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add comprehensive checks for problem cells #631

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
7 changes: 7 additions & 0 deletions docs/documentation/case.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,18 @@ Definition of the parameters is described in the following subsections.
| ---: | :----: | :--- |
| `run_time_info` | Logical | Output run-time information |
| `rdma_mpi` | Logical | (GPUs) Enable RDMA for MPI communication. |
| `comp_debug` | Logical | Comprehensive variable checking |

- `run_time_info` generates a text file that includes run-time information including the CFL number(s) at each time-step.
- `rdma_mpi` optimizes data transfers between GPUs using Remote Direct Memory Access (RDMA).
The underlying MPI implementation and communication infrastructure must support this
feature, detecting GPU pointers and performing RDMA accordingly.
- `comp_debug` enables comprehensive error checking.
At each Runge-Kutta sub-step, all conservative variables are checked for NaNs.
The volume fractions are checked to ensure they are in the range [0, 1].
Negative densities are also checked for.
If any of these checks find problems, the file `comp_debug.txt` will be written to the case directory with information
about what problems were found and the simulation state will be saved for visualization.

### 2. Computational Domain

Expand Down
1 change: 1 addition & 0 deletions examples/3D_performance_test/comp_debug.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
-1
2 changes: 2 additions & 0 deletions src/post_process/m_global_parameters.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ module m_global_parameters
logical :: mixture_err !< Mixture error limiter
logical :: alt_soundspeed !< Alternate sound speed
logical :: hypoelasticity !< Turn hypoelasticity on
logical :: comp_debug !< Turn on comprehensive debug
logical, parameter :: chemistry = .${chemistry}$. !< Chemistry modeling
!> @}

Expand Down Expand Up @@ -317,6 +318,7 @@ contains
relax = .false.
relax_model = dflt_int
hypoelasticity = .false.
comp_debug = .false.

bc_x%beg = dflt_int; bc_x%end = dflt_int
bc_y%beg = dflt_int; bc_y%end = dflt_int
Expand Down
2 changes: 1 addition & 1 deletion src/post_process/m_mpi_proxy.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ contains
& 'heat_ratio_wrt', 'pi_inf_wrt', 'pres_inf_wrt', 'cons_vars_wrt', &
& 'prim_vars_wrt', 'c_wrt', 'qm_wrt','schlieren_wrt', 'bubbles', 'qbmm', &
& 'polytropic', 'polydisperse', 'file_per_process', 'relax', 'cf_wrt', &
& 'adv_n', 'ib', 'cfl_adap_dt', 'cfl_const_dt', 'cfl_dt' ]
& 'adv_n', 'ib', 'cfl_adap_dt', 'cfl_const_dt', 'cfl_dt', 'comp_debug' ]
call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
#:endfor

Expand Down
52 changes: 51 additions & 1 deletion src/post_process/m_start_up.f90
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@
polydisperse, poly_sigma, file_per_process, relax, &
relax_model, cf_wrt, sigma, adv_n, ib, &
cfl_adap_dt, cfl_const_dt, t_save, t_stop, n_start, &
cfl_target
cfl_target, comp_debug

! Inquiring the status of the post_process.inp file
file_loc = 'post_process.inp'
Expand Down Expand Up @@ -184,6 +184,56 @@

end subroutine s_perform_time_step

subroutine s_perform_comprehensive_debug(varname, pres, c, H)

Check warning on line 187 in src/post_process/m_start_up.f90

View check run for this annotation

Codecov / codecov/patch

src/post_process/m_start_up.f90#L187

Added line #L187 was not covered by tests

real(kind(0d0)) :: pres
real(kind(0d0)) :: c
real(kind(0d0)) :: H
integer :: t_fail, ios
logical :: exists
character(LEN=name_len) :: varname !<
character(LEN=name_len) :: file_name = 'comp_debug.txt'
character(LEN=path_len + name_len) :: file_path

! Opening the run-time information file
file_path = trim(case_dir)//'/'//trim(file_name)

Check warning on line 199 in src/post_process/m_start_up.f90

View check run for this annotation

Codecov / codecov/patch

src/post_process/m_start_up.f90#L199

Added line #L199 was not covered by tests

inquire (file=file_path, exist=exists)

Check warning on line 201 in src/post_process/m_start_up.f90

View check run for this annotation

Codecov / codecov/patch

src/post_process/m_start_up.f90#L201

Added line #L201 was not covered by tests
if (exists) then
open (12, file=file_path)

Check warning on line 203 in src/post_process/m_start_up.f90

View check run for this annotation

Codecov / codecov/patch

src/post_process/m_start_up.f90#L203

Added line #L203 was not covered by tests
! Read the file line by line
do
read (12, '(I9)', iostat=ios) t_fail

Check warning on line 206 in src/post_process/m_start_up.f90

View check run for this annotation

Codecov / codecov/patch

src/post_process/m_start_up.f90#L205-L206

Added lines #L205 - L206 were not covered by tests
if (ios /= 0) exit ! Exit loop on error or end-of-file
end do
if (t_fail == -1) return
else
return

Check warning on line 211 in src/post_process/m_start_up.f90

View check run for this annotation

Codecov / codecov/patch

src/post_process/m_start_up.f90#L211

Added line #L211 was not covered by tests
end if

if (proc_rank == 0) then
print("(A, I6)"), " Post Processing suspicious time-step: ", t_fail

Check warning on line 215 in src/post_process/m_start_up.f90

View check run for this annotation

Codecov / codecov/patch

src/post_process/m_start_up.f90#L215

Added line #L215 was not covered by tests
end if

! Populating the grid and conservative variables
call s_read_data_files(t_fail)

Check warning on line 219 in src/post_process/m_start_up.f90

View check run for this annotation

Codecov / codecov/patch

src/post_process/m_start_up.f90#L219

Added line #L219 was not covered by tests
! Populating the buffer regions of the grid variables
if (buff_size > 0) then
call s_populate_grid_variables_buffer_regions()

Check warning on line 222 in src/post_process/m_start_up.f90

View check run for this annotation

Codecov / codecov/patch

src/post_process/m_start_up.f90#L222

Added line #L222 was not covered by tests
end if

! Populating the buffer regions of the conservative variables
if (buff_size > 0) then
call s_populate_conservative_variables_buffer_regions()

Check warning on line 227 in src/post_process/m_start_up.f90

View check run for this annotation

Codecov / codecov/patch

src/post_process/m_start_up.f90#L227

Added line #L227 was not covered by tests
end if

! Converting the conservative variables to the primitive ones
call s_convert_conservative_to_primitive_variables(q_cons_vf, q_prim_vf)

Check warning on line 231 in src/post_process/m_start_up.f90

View check run for this annotation

Codecov / codecov/patch

src/post_process/m_start_up.f90#L231

Added line #L231 was not covered by tests

call s_save_data(t_fail, varname, pres, c, H)

Check warning on line 233 in src/post_process/m_start_up.f90

View check run for this annotation

Codecov / codecov/patch

src/post_process/m_start_up.f90#L233

Added line #L233 was not covered by tests

end subroutine s_perform_comprehensive_debug

subroutine s_save_data(t_step, varname, pres, c, H)

integer, intent(inout) :: t_step
Expand Down
2 changes: 2 additions & 0 deletions src/post_process/p_main.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ program p_main

call s_initialize_modules()

if (comp_debug) call s_perform_comprehensive_debug(varname, pres, c, H)

if (cfl_dt) then
t_step = n_start
n_save = int(t_stop/t_save) + 1
Expand Down
4 changes: 4 additions & 0 deletions src/simulation/m_global_parameters.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ module m_global_parameters
logical :: hypoelasticity !< hypoelasticity modeling
logical, parameter :: chemistry = .${chemistry}$. !< Chemistry modeling
logical :: cu_tensor
logical :: comp_debug !< Variable checking at every RK step

logical :: bodyForces
logical :: bf_x, bf_y, bf_z !< body force toggle in three directions
Expand Down Expand Up @@ -637,6 +638,9 @@ contains
! Cuda aware MPI
cu_tensor = .false.

! Comprehensive debugging
comp_debug = .false.

bodyForces = .false.
bf_x = .false.; bf_y = .false.; bf_z = .false.
!< amplitude, frequency, and phase shift sinusoid in each direction
Expand Down
2 changes: 1 addition & 1 deletion src/simulation/m_mpi_proxy.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ contains
& 'polydisperse', 'qbmm', 'acoustic_source', 'probe_wrt', 'integral_wrt', &
& 'prim_vars_wrt', 'weno_avg', 'file_per_process', 'relax', &
& 'adv_n', 'adap_dt', 'ib', 'bodyForces', 'bf_x', 'bf_y', 'bf_z', &
& 'cfl_adap_dt', 'cfl_const_dt', 'cfl_dt' ]
& 'cfl_adap_dt', 'cfl_const_dt', 'cfl_dt', 'comp_debug' ]
call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
#:endfor

Expand Down
85 changes: 84 additions & 1 deletion src/simulation/m_sim_helpers.f90
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,16 @@
use m_global_parameters

use m_variables_conversion

use m_mpi_proxy
! ==========================================================================

implicit none

private; public :: s_compute_enthalpy, &
s_compute_stability_from_dt, &
s_compute_dt_from_cfl
s_compute_dt_from_cfl, &
s_check_cells

contains

Expand Down Expand Up @@ -245,4 +248,84 @@

end subroutine s_compute_dt_from_cfl

subroutine s_check_cells(q_cons_Vf, t_step, stage, errors)

Check warning on line 251 in src/simulation/m_sim_helpers.f90

View check run for this annotation

Codecov / codecov/patch

src/simulation/m_sim_helpers.f90#L251

Added line #L251 was not covered by tests

type(scalar_field), dimension(sys_size) :: q_cons_vf
integer, intent(in) :: t_step, stage
integer :: j, k, l, i
integer errors
logical :: exists

character(LEN=name_len) :: file_name = 'comp_debug.txt'
character(LEN=path_len + name_len) :: file_path
character(100) :: str_format

! Opening the run-time information file
file_path = trim(case_dir)//'/'//trim(file_name)

Check warning on line 264 in src/simulation/m_sim_helpers.f90

View check run for this annotation

Codecov / codecov/patch

src/simulation/m_sim_helpers.f90#L264

Added line #L264 was not covered by tests

str_format = "(I9, A, I3, A, I4, I4, I4, A, I2, A, I5, A, I5, I5, I5)"

Check warning on line 266 in src/simulation/m_sim_helpers.f90

View check run for this annotation

Codecov / codecov/patch

src/simulation/m_sim_helpers.f90#L266

Added line #L266 was not covered by tests

open (12, FILE=trim(file_path), &
STATUS='replace')

Check warning on line 269 in src/simulation/m_sim_helpers.f90

View check run for this annotation

Codecov / codecov/patch

src/simulation/m_sim_helpers.f90#L269

Added line #L269 was not covered by tests

errors = 0

Check warning on line 271 in src/simulation/m_sim_helpers.f90

View check run for this annotation

Codecov / codecov/patch

src/simulation/m_sim_helpers.f90#L271

Added line #L271 was not covered by tests

! Check all variables for NaNs
do i = 1, sys_size

Check warning on line 274 in src/simulation/m_sim_helpers.f90

View check run for this annotation

Codecov / codecov/patch

src/simulation/m_sim_helpers.f90#L274

Added line #L274 was not covered by tests
!$acc update host(q_cons_vf(i)%sf)
do l = 0, p
do k = 0, n
do j = 0, m

Check warning on line 278 in src/simulation/m_sim_helpers.f90

View check run for this annotation

Codecov / codecov/patch

src/simulation/m_sim_helpers.f90#L276-L278

Added lines #L276 - L278 were not covered by tests
if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then
write (12, str_format) t_step, " NaN(s) in conservative variables after RK stage ", &
stage, " at (j,k,l) ", j, k, l, " equation", i, " proc", proc_rank, &
" (m, n, p)", m, n, p
errors = errors + 1

Check warning on line 283 in src/simulation/m_sim_helpers.f90

View check run for this annotation

Codecov / codecov/patch

src/simulation/m_sim_helpers.f90#L280-L283

Added lines #L280 - L283 were not covered by tests
end if
end do
end do
end do
end do

! Check for invalid volume fractions
do i = advxb, advxe
do l = 0, p
do k = 0, n
do j = 0, m

Check warning on line 294 in src/simulation/m_sim_helpers.f90

View check run for this annotation

Codecov / codecov/patch

src/simulation/m_sim_helpers.f90#L291-L294

Added lines #L291 - L294 were not covered by tests
if (q_cons_vf(i)%sf(j, k, l) < 0d0) then
write (12, str_format) t_step, " Volume fraction < 0 after RK stage ", &
stage, " at (j,k,l) ", j, k, l, " equation", i, " proc", proc_rank, &
" (m, n, p)", m, n, p
errors = errors + 1
elseif (q_cons_vf(i)%sf(j, k, l) > 1d0 + verysmall) then
write (12, str_format) t_step, " Volume fraction > 1 after RK stage ", &
stage, " at (j,k,l) ", j, k, l, " equation", i, " proc", proc_rank, &
" (m, n, p)", m, n, p
errors = errors + 1

Check warning on line 304 in src/simulation/m_sim_helpers.f90

View check run for this annotation

Codecov / codecov/patch

src/simulation/m_sim_helpers.f90#L296-L304

Added lines #L296 - L304 were not covered by tests
end if
end do
end do
end do
end do

! Check for invalid densities
do i = contxb, contxe
do l = 0, p
do k = 0, n
do j = 0, m

Check warning on line 315 in src/simulation/m_sim_helpers.f90

View check run for this annotation

Codecov / codecov/patch

src/simulation/m_sim_helpers.f90#L312-L315

Added lines #L312 - L315 were not covered by tests
if (q_cons_vf(advxb + i - 1)%sf(j, k, l) < 0d0 .and. q_cons_vf(i)%sf(j, k, l) < 0d0 .or. &
q_cons_vf(advxb + i - 1)%sf(j, k, l) > 0d0 .and. q_cons_Vf(i)%sf(j, k, l) < 0d0) then
print *, q_cons_vf(advxb + i - 1)%sf(j, k, l), q_cons_vf(i)%sf(j, k, l)
write (12, str_format) t_step, " Density is negative after RK stage ", &
stage, " at (j,k,l) ", j, k, l, " equation", i, " proc", proc_rank, &
" (m, n, p)", m, n, p
errors = errors + 1

Check warning on line 322 in src/simulation/m_sim_helpers.f90

View check run for this annotation

Codecov / codecov/patch

src/simulation/m_sim_helpers.f90#L317-L322

Added lines #L317 - L322 were not covered by tests
end if
end do
end do
end do
end do

end subroutine

Check warning on line 329 in src/simulation/m_sim_helpers.f90

View check run for this annotation

Codecov / codecov/patch

src/simulation/m_sim_helpers.f90#L329

Added line #L329 was not covered by tests

end module m_sim_helpers
3 changes: 2 additions & 1 deletion src/simulation/m_start_up.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,8 @@ contains
pi_fac, adv_n, adap_dt, bf_x, bf_y, bf_z, &
k_x, k_y, k_z, w_x, w_y, w_z, p_x, p_y, p_z, &
g_x, g_y, g_z, n_start, t_save, t_stop, &
cfl_adap_dt, cfl_const_dt, cfl_target
cfl_adap_dt, cfl_const_dt, cfl_target, &
comp_debug

! Checking that an input file has been provided by the user. If it
! has, then the input file is read in, otherwise, simulation exits.
Expand Down
Loading
Loading