Skip to content

Commit

Permalink
Merge branch 'develop' into feature/logging-updates
Browse files Browse the repository at this point in the history
  • Loading branch information
gardner48 authored Dec 10, 2024
2 parents 97daf52 + 175fe69 commit 2c744b9
Show file tree
Hide file tree
Showing 7 changed files with 83 additions and 28 deletions.
45 changes: 45 additions & 0 deletions .github/workflows/ubuntu-latest-oneapi.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
name: Build - Ubuntu/dpcpp

on:
pull_request:
merge_group:
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.ref_name }}
cancel-in-progress: true

env:
# Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)
BUILD_TYPE: Release

jobs:
build:
strategy:
matrix:
ONEAPI_VERSION: [
'2024.2.1-0-devel-ubuntu22.04',
'latest' # 2025.0.0-0-devel-ubuntu24.04 at the time of commit
]

runs-on: ubuntu-latest
container: intel/oneapi-basekit:${{ matrix.ONEAPI_VERSION }}

steps:
- uses: actions/checkout@v4

- name: Configure CMake
run: |
cmake \
-B ${{github.workspace}}/build \
-D CMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} \
-D CMAKE_C_COMPILER=$(which icx) \
-D CMAKE_CXX_COMPILER=$(which icpx) \
-D CMAKE_CXX_FLAGS="-fsycl" \
-D SUNDIALS_BUILD_WITH_PROFILING=ON \
-D ENABLE_ALL_WARNINGS=ON \
-D ENABLE_WARNINGS_AS_ERRORS=ON \
-D ENABLE_SYCL=ON
- name: Build
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ inner stepper object, `ARKodeCreateMRIStepInnerStepper`.

### Bug Fixes

Fixed a build failure with the SYCL NVector when using Intel oneAPI 2025.0
compilers. See GitHub Issue [#596](https://github.com/LLNL/sundials/issues/596).

Fixed a bug where `CVodeSetProjFailEta` would ignore the `eta` parameter.

Fixed a bug in the SPTFQMR linear solver where recoverable preconditioner errors
Expand Down
3 changes: 3 additions & 0 deletions doc/shared/RecentChanges.rst
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ inner stepper object, :c:func:`ARKodeCreateMRIStepInnerStepper`.

**Bug Fixes**

Fixed a build failure with the SYCL NVector when using Intel oneAPI 2025.0
compilers. See GitHub Issue `#596 <https://github.com/LLNL/sundials/issues/596>`__.

Fixed a bug where :c:func:`CVodeSetProjFailEta` would ignore the `eta`
parameter.

Expand Down
12 changes: 6 additions & 6 deletions examples/cvode/CXX_sycl/cvAdvDiff_kry_sycl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,9 +248,9 @@ static int f(sunrealtype t, N_Vector u, N_Vector udot, void* user_data)
h.parallel_for(sycl::range{MX, MY},
[=](sycl::id<2> idx)
{
sunindextype i = idx[0];
sunindextype j = idx[1];
sunindextype tid = i * MY + j;
size_t i = idx[0];
size_t j = idx[1];
size_t tid = i * MY + j;

sunrealtype uij = udata[tid];
sunrealtype udn = (j == 0) ? ZERO : udata[tid - 1];
Expand Down Expand Up @@ -293,9 +293,9 @@ static int jtv(N_Vector v, N_Vector Jv, sunrealtype t, N_Vector u, N_Vector fu,
h.parallel_for(sycl::range{MX, MY},
[=](sycl::id<2> idx)
{
sunindextype i = idx[0];
sunindextype j = idx[1];
sunindextype tid = i * MY + j;
size_t i = idx[0];
size_t j = idx[1];
size_t tid = i * MY + j;

// set the tid-th element of Jv
Jvdata[tid] = -TWO * (verdc + hordc) * vdata[tid];
Expand Down
2 changes: 1 addition & 1 deletion include/nvector/nvector_sycl.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ static inline sunrealtype* N_VGetDeviceArrayPointer_Sycl(N_Vector x)
* NVECTOR API functions
* ----------------------------------------------------------------- */

static inline N_Vector_ID N_VGetVectorID_Sycl(N_Vector v)
static inline N_Vector_ID N_VGetVectorID_Sycl(N_Vector)
{
return SUNDIALS_NVEC_SYCL;
}
Expand Down
16 changes: 10 additions & 6 deletions include/sundials/sundials_sycl_policies.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,14 @@ class ThreadDirectExecPolicy : public ExecPolicy
: blockDim_(ex.blockDim_)
{}

virtual size_t gridSize(size_t numWorkUnits = 0, size_t blockDim = 0) const
virtual size_t gridSize(size_t numWorkUnits = 0, size_t /* blockDim */ = 0) const
{
/* ceil(n/m) = floor((n + m - 1) / m) */
return (numWorkUnits + blockSize() - 1) / blockSize();
}

virtual size_t blockSize(size_t numWorkUnits = 0, size_t gridDim = 0) const
virtual size_t blockSize(size_t /* numWorkUnits */ = 0,
size_t /* gridDim */ = 0) const
{
return blockDim_;
}
Expand Down Expand Up @@ -86,12 +87,14 @@ class GridStrideExecPolicy : public ExecPolicy
: blockDim_(ex.blockDim_), gridDim_(ex.gridDim_)
{}

virtual size_t gridSize(size_t numWorkUnits = 0, size_t blockDim = 0) const
virtual size_t gridSize(size_t /* numWorkUnits */ = 0,
size_t /* blockDim */ = 0) const
{
return gridDim_;
}

virtual size_t blockSize(size_t numWorkUnits = 0, size_t gridDim = 0) const
virtual size_t blockSize(size_t /* numWorkUnits */ = 0,
size_t /* gridDim */ = 0) const
{
return blockDim_;
}
Expand Down Expand Up @@ -124,7 +127,7 @@ class BlockReduceExecPolicy : public ExecPolicy
: blockDim_(ex.blockDim_), gridDim_(ex.gridDim_)
{}

virtual size_t gridSize(size_t numWorkUnits = 0, size_t blockDim = 0) const
virtual size_t gridSize(size_t numWorkUnits = 0, size_t /* blockDim */ = 0) const
{
if (gridDim_ == 0)
{
Expand All @@ -133,7 +136,8 @@ class BlockReduceExecPolicy : public ExecPolicy
return gridDim_;
}

virtual size_t blockSize(size_t numWorkUnits = 0, size_t gridDim = 0) const
virtual size_t blockSize(size_t /* numWorkUnits */ = 0,
size_t /* gridDim */ = 0) const
{
return blockDim_;
}
Expand Down
30 changes: 15 additions & 15 deletions src/nvector/sycl/nvector_sycl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1146,7 +1146,7 @@ sunrealtype N_VWrmsNorm_Sycl(N_Vector x, N_Vector w)
{
const sunindextype N = NVEC_SYCL_LENGTH(x);
const sunrealtype sum = N_VWSqrSumLocal_Sycl(x, w);
return std::sqrt(sum / N);
return ::sycl::sqrt(sum / N);
}

sunrealtype N_VWSqrSumMaskLocal_Sycl(N_Vector x, N_Vector w, N_Vector id)
Expand Down Expand Up @@ -1192,7 +1192,7 @@ sunrealtype N_VWrmsNormMask_Sycl(N_Vector x, N_Vector w, N_Vector id)
{
const sunindextype N = NVEC_SYCL_LENGTH(x);
const sunrealtype sum = N_VWSqrSumMaskLocal_Sycl(x, w, id);
return std::sqrt(sum / N);
return ::sycl::sqrt(sum / N);
}

sunrealtype N_VMin_Sycl(N_Vector x)
Expand Down Expand Up @@ -1233,7 +1233,7 @@ sunrealtype N_VMin_Sycl(N_Vector x)

sunrealtype N_VWL2Norm_Sycl(N_Vector x, N_Vector w)
{
return std::sqrt(N_VWSqrSumLocal_Sycl(x, w));
return ::sycl::sqrt(N_VWSqrSumLocal_Sycl(x, w));
}

sunrealtype N_VL1Norm_Sycl(N_Vector x)
Expand Down Expand Up @@ -2285,14 +2285,14 @@ static int FusedBuffer_CopyRealArray(N_Vector v, sunrealtype* rdata, int nval,
return SUN_ERR_GENERIC;
}

sunrealtype* h_buffer = (sunrealtype*)((char*)(vcp->fused_buffer_host->ptr) +
vcp->fused_buffer_offset);
sunrealtype* h_buffer = reinterpret_cast<sunrealtype*>(
(char*)(vcp->fused_buffer_host->ptr) + vcp->fused_buffer_offset);

for (int j = 0; j < nval; j++) { h_buffer[j] = rdata[j]; }

/* Set shortcut to the device buffer and update offset*/
*shortcut = (sunrealtype*)((char*)(vcp->fused_buffer_dev->ptr) +
vcp->fused_buffer_offset);
*shortcut = reinterpret_cast<sunrealtype*>(
(char*)(vcp->fused_buffer_dev->ptr) + vcp->fused_buffer_offset);

vcp->fused_buffer_offset += nval * sizeof(sunrealtype);

Expand All @@ -2314,14 +2314,14 @@ static int FusedBuffer_CopyPtrArray1D(N_Vector v, N_Vector* X, int nvec,
return SUN_ERR_GENERIC;
}

sunrealtype** h_buffer = (sunrealtype**)((char*)(vcp->fused_buffer_host->ptr) +
vcp->fused_buffer_offset);
sunrealtype** h_buffer = reinterpret_cast<sunrealtype**>(
(char*)(vcp->fused_buffer_host->ptr) + vcp->fused_buffer_offset);

for (int j = 0; j < nvec; j++) { h_buffer[j] = NVEC_SYCL_DDATAp(X[j]); }

/* Set shortcut to the device buffer and update offset*/
*shortcut = (sunrealtype**)((char*)(vcp->fused_buffer_dev->ptr) +
vcp->fused_buffer_offset);
*shortcut = reinterpret_cast<sunrealtype**>(
(char*)(vcp->fused_buffer_dev->ptr) + vcp->fused_buffer_offset);

vcp->fused_buffer_offset += nvec * sizeof(sunrealtype*);

Expand All @@ -2342,8 +2342,8 @@ static int FusedBuffer_CopyPtrArray2D(N_Vector v, N_Vector** X, int nvec,
return SUN_ERR_GENERIC;
}

sunrealtype** h_buffer = (sunrealtype**)((char*)(vcp->fused_buffer_host->ptr) +
vcp->fused_buffer_offset);
sunrealtype** h_buffer = reinterpret_cast<sunrealtype**>(
(char*)(vcp->fused_buffer_host->ptr) + vcp->fused_buffer_offset);

for (int j = 0; j < nvec; j++)
{
Expand All @@ -2354,8 +2354,8 @@ static int FusedBuffer_CopyPtrArray2D(N_Vector v, N_Vector** X, int nvec,
}

/* Set shortcut to the device buffer and update offset*/
*shortcut = (sunrealtype**)((char*)(vcp->fused_buffer_dev->ptr) +
vcp->fused_buffer_offset);
*shortcut = reinterpret_cast<sunrealtype**>(
(char*)(vcp->fused_buffer_dev->ptr) + vcp->fused_buffer_offset);

/* Update the offset */
vcp->fused_buffer_offset += nvec * nsum * sizeof(sunrealtype*);
Expand Down

0 comments on commit 2c744b9

Please sign in to comment.