Skip to content

Commit

Permalink
Merge branch 'AMReX-Codes:development' into development
Browse files Browse the repository at this point in the history
  • Loading branch information
ruohai0925 authored Dec 27, 2023
2 parents e300f8e + 75571e2 commit a6ed89c
Show file tree
Hide file tree
Showing 77 changed files with 696 additions and 329 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/bittree.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ jobs:
mpiexec -n 2 ./main2d.gnu.TEST.MPI.ex inputs_bittree amr.plot_int=1000
${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
make -j2 -f clang-tidy-ccache-misses.mak \
make -j2 -k -f clang-tidy-ccache-misses.mak \
CLANG_TIDY=clang-tidy-15 \
CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
Expand Down Expand Up @@ -104,7 +104,7 @@ jobs:
mpiexec -n 2 ./main3d.gnu.TEST.MPI.ex inputs_bittree max_step=10
${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
make -j2 -f clang-tidy-ccache-misses.mak \
make -j2 -k -f clang-tidy-ccache-misses.mak \
CLANG_TIDY=clang-tidy-15 \
CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/clang.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ jobs:
make test_install
${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
make -j2 -f clang-tidy-ccache-misses.mak \
make -j2 -k -f clang-tidy-ccache-misses.mak \
CLANG_TIDY=clang-tidy-12 \
CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
Expand Down Expand Up @@ -117,7 +117,7 @@ jobs:
make -j 2
${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
make -j2 -f clang-tidy-ccache-misses.mak \
make -j2 -k -f clang-tidy-ccache-misses.mak \
CLANG_TIDY=clang-tidy-14 \
CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
Expand Down Expand Up @@ -159,7 +159,7 @@ jobs:
make install
${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
make -j2 -f clang-tidy-ccache-misses.mak \
make -j2 -k -f clang-tidy-ccache-misses.mak \
CLANG_TIDY=clang-tidy-14 \
CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/dependencies/dependencies_hip.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ sudo apt-get install -y --no-install-recommends \
roctracer-dev \
rocprofiler-dev \
rocrand-dev \
rocprim-dev
rocprim-dev \
hiprand-dev

# activate
#
Expand Down
26 changes: 13 additions & 13 deletions .github/workflows/gcc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ jobs:
make test_install
${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
make -j2 -f clang-tidy-ccache-misses.mak \
make -j2 -k -f clang-tidy-ccache-misses.mak \
CLANG_TIDY=clang-tidy-12 \
CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
Expand Down Expand Up @@ -107,7 +107,7 @@ jobs:
cmake --build build -j 2
${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
make -j2 -f clang-tidy-ccache-misses.mak \
make -j2 -k -f clang-tidy-ccache-misses.mak \
CLANG_TIDY=clang-tidy-12 \
CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
Expand Down Expand Up @@ -155,7 +155,7 @@ jobs:
cmake --build build -j 2
${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
make -j2 -f clang-tidy-ccache-misses.mak \
make -j2 -k -f clang-tidy-ccache-misses.mak \
CLANG_TIDY=clang-tidy-12 \
CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
Expand Down Expand Up @@ -204,7 +204,7 @@ jobs:
cmake --build build -j 2
${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
make -j2 -f clang-tidy-ccache-misses.mak \
make -j2 -k -f clang-tidy-ccache-misses.mak \
CLANG_TIDY=clang-tidy-12 \
CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
Expand Down Expand Up @@ -263,7 +263,7 @@ jobs:
# Let's not use clang-tidy for this test because it wants to use C++20.
# ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
# make -j2 -f clang-tidy-ccache-misses.mak \
# make -j2 -k -f clang-tidy-ccache-misses.mak \
# CLANG_TIDY=clang-tidy-12 \
# CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
Expand Down Expand Up @@ -320,7 +320,7 @@ jobs:
make -j 2
${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
make -j2 -f clang-tidy-ccache-misses.mak \
make -j2 -k -f clang-tidy-ccache-misses.mak \
CLANG_TIDY=clang-tidy-12 \
CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
Expand Down Expand Up @@ -384,7 +384,7 @@ jobs:
make -j 2
${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
make -j2 -f clang-tidy-ccache-misses.mak \
make -j2 -k -f clang-tidy-ccache-misses.mak \
CLANG_TIDY=clang-tidy-14 \
CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
Expand Down Expand Up @@ -426,7 +426,7 @@ jobs:
make install
${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
make -j2 -f clang-tidy-ccache-misses.mak \
make -j2 -k -f clang-tidy-ccache-misses.mak \
CLANG_TIDY=clang-tidy-12 \
CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
Expand Down Expand Up @@ -466,7 +466,7 @@ jobs:
make install
${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
make -j2 -f clang-tidy-ccache-misses.mak \
make -j2 -k -f clang-tidy-ccache-misses.mak \
CLANG_TIDY=clang-tidy-15 \
CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
Expand Down Expand Up @@ -506,7 +506,7 @@ jobs:
make install
${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
make -j2 -f clang-tidy-ccache-misses.mak \
make -j2 -k -f clang-tidy-ccache-misses.mak \
CLANG_TIDY=clang-tidy-12 \
CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
Expand Down Expand Up @@ -546,7 +546,7 @@ jobs:
make install
${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
make -j2 -f clang-tidy-ccache-misses.mak \
make -j2 -k -f clang-tidy-ccache-misses.mak \
CLANG_TIDY=clang-tidy-12 \
CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
Expand Down Expand Up @@ -585,7 +585,7 @@ jobs:
CCACHE=ccache
${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
make -j2 -f clang-tidy-ccache-misses.mak \
make -j2 -k -f clang-tidy-ccache-misses.mak \
CLANG_TIDY=clang-tidy-12 \
CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
Expand Down Expand Up @@ -630,7 +630,7 @@ jobs:
make -j 2
${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
make -j2 -f clang-tidy-ccache-misses.mak \
make -j2 -k -f clang-tidy-ccache-misses.mak \
CLANG_TIDY=clang-tidy-12 \
CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/hypre.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ jobs:
mpiexec -n 2 ./main3d.gnu.MPI.ex inputs.hypre
${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
make -j2 -f clang-tidy-ccache-misses.mak \
make -j2 -k -f clang-tidy-ccache-misses.mak \
CLANG_TIDY=clang-tidy-14 \
CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
Expand Down Expand Up @@ -148,7 +148,7 @@ jobs:
mpiexec -n 2 ./main2d.gnu.MPI.ex inputs.2d
${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
make -j2 -f clang-tidy-ccache-misses.mak \
make -j2 -k -f clang-tidy-ccache-misses.mak \
CLANG_TIDY=clang-tidy-14 \
CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
Expand Down
12 changes: 8 additions & 4 deletions .github/workflows/intel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ jobs:
-DCMAKE_C_COMPILER=$(which icx) \
-DCMAKE_CXX_COMPILER=$(which icpx) \
-DCMAKE_Fortran_COMPILER=$(which ifx) \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
-DAMReX_PARALLEL_LINK_JOBS=2
cmake --build build --parallel 2
ccache -s
Expand Down Expand Up @@ -86,7 +87,8 @@ jobs:
-DAMReX_GPU_BACKEND=SYCL \
-DCMAKE_C_COMPILER=$(which icx) \
-DCMAKE_CXX_COMPILER=$(which icpx) \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
-DAMReX_PARALLEL_LINK_JOBS=2
cmake --build build --parallel 2
ccache -s
Expand Down Expand Up @@ -136,7 +138,8 @@ jobs:
-DAMReX_GPU_BACKEND=SYCL \
-DCMAKE_C_COMPILER=$(which icx) \
-DCMAKE_CXX_COMPILER=$(which clang++) \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
-DAMReX_PARALLEL_LINK_JOBS=2
cmake --build build --parallel 2
ccache -s
Expand Down Expand Up @@ -186,7 +189,8 @@ jobs:
-DAMReX_SYCL_SUB_GROUP_SIZE=64 \
-DCMAKE_C_COMPILER=$(which icx) \
-DCMAKE_CXX_COMPILER=$(which clang++) \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
-DAMReX_PARALLEL_LINK_JOBS=2
cmake --build build --parallel 2
ccache -s
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/petsc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ jobs:
mpiexec -n 2 ./main2d.gnu.TEST.MPI.ex inputs.rt.2d.petsc
${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
make -j2 -f clang-tidy-ccache-misses.mak \
make -j2 -k -f clang-tidy-ccache-misses.mak \
CLANG_TIDY=clang-tidy-14 \
CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/smoke.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ jobs:
make test_install
${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
make -j2 -f clang-tidy-ccache-misses.mak \
make -j2 -k -f clang-tidy-ccache-misses.mak \
CLANG_TIDY=clang-tidy-15 \
CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/sundials.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ jobs:
cmake --build build -j 2
${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
make -j2 -f clang-tidy-ccache-misses.mak \
make -j2 -k -f clang-tidy-ccache-misses.mak \
CLANG_TIDY=clang-tidy-14 \
CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
Expand Down
41 changes: 41 additions & 0 deletions Docs/sphinx_documentation/source/AMReX_Profiling_Tools.rst
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,47 @@ it is also recommended to wrap any ``BL_PROFILE_TINY_FLUSH();`` calls in
informative ``amrex::Print()`` lines to ensure accurate identification of each
set of timers.

Hot Spots and Load Balance
~~~~~~~~~~~~~~~~~~~~~~~~~~

The output of TinyProfiler can help us to identify hot spots. For example,
the following output shows the top three hot spots of a linear solver test
running on 4 MPI processes.

.. highlight:: console

::

--------------------------------------------------------------------------------------------
Name NCalls Excl. Min Excl. Avg Excl. Max Max %
--------------------------------------------------------------------------------------------
MLPoisson::Fsmooth() 560 0.4775 0.4793 0.4815 34.97%
MLPoisson::Fapply() 114 0.1103 0.113 0.1167 8.48%
FabArray::Xpay() 109 0.1 0.1013 0.1038 7.54%

In this test, there are 16 boxes evenly distributed among 4 MPI processes. The
output above shows that the load is perfectly balanced. However, if the load
is not balanced, the results can be very different and sometimes
misleading. For example, if we put 2, 2, 6 and 6 boxes on processes 0, 1, 2
and 3, respectively, the top three hot spots now include two MPI
communication functions, ``FillBoundary`` and ``ParallelCopy``.

.. highlight:: console

::

--------------------------------------------------------------------------------------------
Name NCalls Excl. Min Excl. Avg Excl. Max Max %
--------------------------------------------------------------------------------------------
FillBoundary_finish() 607 0.01568 0.3367 0.6574 41.97%
MLPoisson::Fsmooth() 560 0.2133 0.4047 0.5973 38.13%
FabArray::ParallelCopy_finish() 231 0.002977 0.09748 0.1895 12.10%

The reason that the MPI communication appears slow is that the lightly
loaded processes have to wait for messages sent by the heavily loaded
processes. See also :ref:`sec:profopts` for a diagnostic option that may
provide more insight on the load imbalance.

.. _sec:full:profiling:

Full Profiling
Expand Down
36 changes: 19 additions & 17 deletions Docs/sphinx_documentation/source/GPU.rst
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ variables to configure the build
+------------------------------+-------------------------------------------------+-------------+-----------------+
| SYCL_SUB_GROUP_SIZE | Specify subgroup size | 32 | 64, 32, 16 |
+------------------------------+-------------------------------------------------+-------------+-----------------+
| SYCL_MAX_PARALLEL_LINK_JOBS | Number of parallel jobs in device link | 1 | 1, 2, 3, etc. |
| SYCL_PARALLEL_LINK_JOBS | Number of parallel jobs in device link | 1 | 1, 2, 3, etc. |
+------------------------------+-------------------------------------------------+-------------+-----------------+
.. raw:: latex

Expand Down Expand Up @@ -428,22 +428,24 @@ Below is an example configuration for SYCL:

.. table:: AMReX SYCL-specific build options

+------------------------------+-------------------------------------------------+-------------+-----------------+
| Variable Name | Description | Default | Possible values |
+==============================+=================================================+=============+=================+
| AMReX_SYCL_AOT | Enable SYCL ahead-of-time compilation | NO | YES, NO |
+------------------------------+-------------------------------------------------+-------------+-----------------+
| AMReX_SYCL_AOT_GRF_MODE | Specify AOT register file mode | Default | Default, Large, |
| | | | AutoLarge |
+------------------------------+-------------------------------------------------+-------------+-----------------+
| AMREX_INTEL_ARCH | Specify target if AOT is enabled | None | pvc, etc. |
+------------------------------+-------------------------------------------------+-------------+-----------------+
| AMReX_SYCL_SPLIT_KERNEL | Enable SYCL kernel splitting | YES | YES, NO |
+------------------------------+-------------------------------------------------+-------------+-----------------+
| AMReX_SYCL_ONEDPL | Enable SYCL's oneDPL algorithms | NO | YES, NO |
+------------------------------+-------------------------------------------------+-------------+-----------------+
| AMReX_SYCL_SUB_GROUP_SIZE | Specify subgroup size | 32 | 64, 32, 16 |
+------------------------------+-------------------------------------------------+-------------+-----------------+
+-------------------------------+----------------------------------------------+-------------+------------------+
| Variable Name | Description | Default | Possible values |
+===============================+==============================================+=============+==================+
| AMReX_SYCL_AOT | Enable SYCL ahead-of-time compilation | NO | YES, NO |
+-------------------------------+----------------------------------------------+-------------+------------------+
| AMReX_SYCL_AOT_GRF_MODE | Specify AOT register file mode | Default | Default, Large, |
| | | | AutoLarge |
+-------------------------------+----------------------------------------------+-------------+------------------+
| AMREX_INTEL_ARCH | Specify target if AOT is enabled | None | pvc, etc. |
+-------------------------------+----------------------------------------------+-------------+------------------+
| AMReX_SYCL_SPLIT_KERNEL | Enable SYCL kernel splitting | YES | YES, NO |
+-------------------------------+----------------------------------------------+-------------+------------------+
| AMReX_SYCL_ONEDPL | Enable SYCL's oneDPL algorithms | NO | YES, NO |
+-------------------------------+----------------------------------------------+-------------+------------------+
| AMReX_SYCL_SUB_GROUP_SIZE | Specify subgroup size | 32 | 64, 32, 16 |
+-------------------------------+----------------------------------------------+-------------+------------------+
| AMReX_PARALLEL_LINK_JOBS | Specify number of parallel link jobs | 1 | positive integer |
+-------------------------------+----------------------------------------------+-------------+------------------+
.. raw:: latex

\end{center}
Expand Down
9 changes: 5 additions & 4 deletions Docs/sphinx_documentation/source/Particle.rst
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,8 @@ tracked as the particle positions change. To do this, we provide the

::

ParticleContainer<3, 2, 4, 4> mypc;
using MyParticleContainer = ParticleContainer<3, 2, 4, 4>;
MyParticleContainer mypc;

Like the :cpp:`Particle` class itself, the :cpp:`ParticleContainer`
class is templated. The first two template parameters have the same meaning as
Expand Down Expand Up @@ -375,8 +376,8 @@ example, to iterate over all the AoS data:
::


using MyParIter = ConstParIter<2*BL_SPACEDIM>;
for (MyParIter pti(pc, lev); pti.isValid(); ++pti) {
using MyParConstIter = MyParticleContainer::ParConstIterType;
for (MyParConstIter pti(pc, lev); pti.isValid(); ++pti) {
const auto& particles = pti.GetArrayOfStructs();
for (const auto& p : particles) {
// do stuff with p...
Expand All @@ -392,7 +393,7 @@ skipped. You can also access the SoA data using the :math:`ParIter` as follows:
::


using MyParIter = ParIter<0, 0, 2, 2>;
using MyParIter = MyParticleContainer::ParIterType;
for (MyParIter pti(pc, lev); pti.isValid(); ++pti) {
auto& particle_attributes = pti.GetStructOfArrays();
RealVector& real_comp0 = particle_attributes.GetRealData(0);
Expand Down
1 change: 0 additions & 1 deletion Src/Amr/AMReX_Amr.H
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ class AmrInSituBridge;
* not belong on a single level, like establishing and updating the hierarchy
* of levels, global timestepping, and managing the different AmrLevels
*/

class Amr
: public AmrCore
{
Expand Down
Loading

0 comments on commit a6ed89c

Please sign in to comment.