Handle write-race conditions in CUDA kernels: Add atomic operation #1104

kchristin22 · 2024-09-24T09:49:33Z

In case two or more threads read from the same address, when computing the reverse mode derivative this is translated into two or more threads writing in the same position:

__global__ void kernel(int *a, int *b) {
  int index = threadIdx.x + blockIdx.x * blockDim.x;
  a[2 * index] = b[0];
  a[2 * index + 1] = b[0];
}

void kernel_grad(int *a, int *b, int *_d_a, int *_d_b) {
    unsigned int _t1 = blockIdx.x;
    unsigned int _t0 = blockDim.x;
    int _d_index = 0;
    int index0 = threadIdx.x + _t1 * _t0;
    int _t2 = a[2 * index0];
    a[2 * index0] = b[0];
    int _t3 = a[2 * index0 + 1];
    a[2 * index0 + 1] = b[0];
    {
        a[2 * index0 + 1] = _t3;
        int _r_d1 = _d_a[2 * index0 + 1];
        _d_a[2 * index0 + 1] = 0;
        _d_b[0] += _r_d1;
    }
    {
        a[2 * index0] = _t2;
        int _r_d0 = _d_a[2 * index0];
        _d_a[2 * index0] = 0;
       _d_b[0] += _r_d0;
    }
}

A simple solution to this problem is to make the last plus-assign op atomic:

__global__ void kernel(int *a, int *b) {
  int index = threadIdx.x + blockIdx.x * blockDim.x;
  a[2 * index] = b[0];
  a[2 * index + 1] = b[0];
}

void kernel_grad(int *a, int *b, int *_d_a, int *_d_b) {
    unsigned int _t1 = blockIdx.x;
    unsigned int _t0 = blockDim.x;
    int _d_index = 0;
    int index0 = threadIdx.x + _t1 * _t0;
    int _t2 = a[2 * index0];
    a[2 * index0] = b[0];
    int _t3 = a[2 * index0 + 1];
    a[2 * index0 + 1] = b[0];
    {
        a[2 * index0 + 1] = _t3;
        int _r_d1 = _d_a[2 * index0 + 1];
        _d_a[2 * index0 + 1] = 0;
        atomicAdd(&_d_b[0], _r_d1);
    }
    {
        a[2 * index0] = _t2;
        int _r_d0 = _d_a[2 * index0];
        _d_a[2 * index0] = 0;
        atomicAdd(&_d_b[0], _r_d0);
    }
}

codecov · 2024-09-24T09:55:50Z

Codecov Report

All modified and coverable lines are covered by tests ✅

Project coverage is 94.22%. Comparing base (844d9a3) to head (4c7bea2).
Report is 1 commits behind head on master.

Additional details and impacted files

@@            Coverage Diff             @@
##           master    #1104      +/-   ##
==========================================
+ Coverage   94.20%   94.22%   +0.01%     
==========================================
  Files          48       48              
  Lines        8104     8132      +28     
==========================================
+ Hits         7634     7662      +28     
  Misses        470      470

Files with missing lines	Coverage Δ
include/clad/Differentiator/ReverseModeVisitor.h	`97.22% <ø> (ø)`
lib/Differentiator/ReverseModeVisitor.cpp	`95.44% <100.00%> (+0.05%)`	⬆️

Files with missing lines	Coverage Δ
include/clad/Differentiator/ReverseModeVisitor.h	`97.22% <ø> (ø)`
lib/Differentiator/ReverseModeVisitor.cpp	`95.44% <100.00%> (+0.05%)`	⬆️

github-actions

clang-tidy made some suggestions

github-actions · 2024-09-24T09:56:16Z

lib/Differentiator/ReverseModeVisitor.cpp

+
+        FunctionDecl* atomicAddFunc = nullptr;
+        for (LookupResult::iterator it = lookupResult.begin();
+             it != lookupResult.end(); it++) {


warning: use range-based for loop instead [modernize-loop-convert]

Suggested change

it != lookupResult.end(); it++) {

for (auto decl : lookupResult) {

lib/Differentiator/ReverseModeVisitor.cpp:1497:

- NamedDecl* decl = *it; - // FIXME: check for underlying types of the pointers + // FIXME: check for underlying types of the pointers

github-actions · 2024-09-24T09:56:16Z

lib/Differentiator/ReverseModeVisitor.cpp

+
+            FunctionDecl* atomicAddFunc = nullptr;
+            for (LookupResult::iterator it = lookupResult.begin();
+                it != lookupResult.end(); it++) {


warning: use range-based for loop instead [modernize-loop-convert]

Suggested change

it != lookupResult.end(); it++) {

for (auto decl : lookupResult) {

lib/Differentiator/ReverseModeVisitor.cpp:2317:

- NamedDecl* decl = *it; - // FIXME: check for underlying types of the pointers + // FIXME: check for underlying types of the pointers

github-actions

clang-tidy made some suggestions

github-actions · 2024-09-24T10:29:01Z

lib/Differentiator/ReverseModeVisitor.cpp

+                                   m_Context.getTranslationUnitDecl());
+
+        FunctionDecl* atomicAddFunc = nullptr;
+        for (auto decl : lookupResult) {


warning: 'auto decl' can be declared as 'auto *decl' [llvm-qualified-auto]

Suggested change

for (auto decl : lookupResult) {

for (auto *decl : lookupResult) {

github-actions · 2024-09-24T10:29:01Z

lib/Differentiator/ReverseModeVisitor.cpp

+                                       m_Context.getTranslationUnitDecl());
+
+            FunctionDecl* atomicAddFunc = nullptr;
+            for (auto decl : lookupResult) {


warning: 'auto decl' can be declared as 'auto *decl' [llvm-qualified-auto]

Suggested change

for (auto decl : lookupResult) {

for (auto *decl : lookupResult) {

parth-07

Overall looks good.

parth-07 · 2024-09-26T05:55:01Z

lib/Differentiator/ReverseModeVisitor.cpp

-      auto* add_assign = BuildOp(BO_AddAssign, result, dfdx());
-      // Add it to the body statements.
-      addToCurrentBlock(add_assign, direction::reverse);
+      if (m_DiffReq->hasAttr<clang::CUDAGlobalAttr>()) {


We also need to go in this if path when differentiating device functions as well, right?

parth-07 · 2024-09-26T05:56:00Z

lib/Differentiator/ReverseModeVisitor.cpp

-      // Add it to the body statements.
-      addToCurrentBlock(add_assign, direction::reverse);
+      if (m_DiffReq->hasAttr<clang::CUDAGlobalAttr>()) {
+        DeclarationName atomicAddId = &m_Context.Idents.get("atomicAdd");


Please create a separate function for finding the appropriate atomicAdd function.

parth-07 · 2024-09-26T06:03:15Z

lib/Differentiator/ReverseModeVisitor.cpp

+                                   m_Context.getTranslationUnitDecl());
+
+        FunctionDecl* atomicAddFunc = nullptr;
+        for (auto decl : lookupResult) {


Also, can we use unresolved lookup in Sema::ActOnCallExpr as we do in DerivativeBuilder::BuildCallToCustomDerivativeOrNumericalDiff instead of explicitly finding the correct atomicAdd declaration?

parth-07 · 2024-09-26T06:04:51Z

lib/Differentiator/ReverseModeVisitor.cpp

+        Expr* atomicCall = BuildCallExprToFunction(atomicAddFunc, atomicArgs);
+
+        // Add it to the body statements.
+        addToCurrentBlock(atomicCall, direction::reverse);


Please shift this entire if path in a separate function such as CudaAddAssign.

github-actions · 2024-09-30T10:40:13Z

clang-tidy review says "All clean, LGTM! 👍"

github-actions · 2024-10-01T08:13:04Z

clang-tidy review says "All clean, LGTM! 👍"

github-actions · 2024-10-01T08:40:29Z

clang-tidy review says "All clean, LGTM! 👍"

github-actions · 2024-10-01T09:08:42Z

clang-tidy review says "All clean, LGTM! 👍"

github-actions · 2024-10-01T09:16:53Z

clang-tidy review says "All clean, LGTM! 👍"

github-actions · 2024-10-01T12:51:10Z

clang-tidy review says "All clean, LGTM! 👍"

github-actions · 2024-10-01T14:56:19Z

clang-tidy review says "All clean, LGTM! 👍"

parth-07 · 2024-10-01T17:05:52Z

lib/Differentiator/ReverseModeVisitor.cpp

@@ -104,6 +104,39 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context,
    return CladTapeResult{*this, PushExpr, PopExpr, TapeRef};
  }

+  clang::Expr* ReverseModeVisitor::BuildCallToCudaAtomicAdd(clang::Expr* LHS,
+                                                            clang::Expr* RHS) {


Why is the function not using RHS?

Great observation! It was left from the copy-paste. I'll fix it.

parth-07 · 2024-10-01T17:08:40Z

lib/Differentiator/ReverseModeVisitor.cpp

-      auto* add_assign = BuildOp(BO_AddAssign, result, dfdx());
-      // Add it to the body statements.
-      addToCurrentBlock(add_assign, direction::reverse);
+      if (m_DiffReq->hasAttr<clang::CUDAGlobalAttr>() ||


The same if-condition is repeated at two places. Can you please create a separate function for this such as: if (shouldUseCUDAAtomicOps())?

Should I make this a variable or a function of ReverseModeVisitor?

Function sounds better to me.

github-actions · 2024-10-03T15:21:51Z

clang-tidy review says "All clean, LGTM! 👍"

parth-07

Looks good.

github-actions · 2024-10-04T06:29:19Z

clang-tidy review says "All clean, LGTM! 👍"

github-actions · 2024-10-05T20:13:07Z

clang-tidy review says "All clean, LGTM! 👍"

kchristin22 requested review from vgvassilev and parth-07 September 24, 2024 09:49

kchristin22 self-assigned this Sep 24, 2024

github-actions bot reviewed Sep 24, 2024

View reviewed changes

vgvassilev force-pushed the cuda-atomic branch 2 times, most recently from 7a1d405 to 6d6f853 Compare September 24, 2024 15:12

parth-07 reviewed Sep 26, 2024

View reviewed changes

kchristin22 requested a review from parth-07 September 30, 2024 10:35

vgvassilev force-pushed the cuda-atomic branch from 24a055f to 8a12aca Compare October 1, 2024 08:33

vgvassilev force-pushed the cuda-atomic branch from ed4e6e2 to 89bde88 Compare October 1, 2024 14:49

parth-07 requested changes Oct 1, 2024

View reviewed changes

parth-07 approved these changes Oct 3, 2024

View reviewed changes

vgvassilev force-pushed the cuda-atomic branch 2 times, most recently from 20aef9d to cd74405 Compare October 4, 2024 06:21

[cuda] Make add-assign op atomic to avoid write race conditions.

4c7bea2

vgvassilev force-pushed the cuda-atomic branch from cd74405 to 4c7bea2 Compare October 5, 2024 20:04

vgvassilev merged commit 4ac4f77 into vgvassilev:master Oct 5, 2024
90 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Handle write-race conditions in CUDA kernels: Add atomic operation #1104

Handle write-race conditions in CUDA kernels: Add atomic operation #1104

kchristin22 commented Sep 24, 2024 •

edited by vgvassilev

Loading

codecov bot commented Sep 24, 2024 •

edited

Loading

github-actions bot left a comment

github-actions bot Sep 24, 2024

github-actions bot Sep 24, 2024

github-actions bot left a comment

github-actions bot Sep 24, 2024

github-actions bot Sep 24, 2024

parth-07 left a comment

parth-07 Sep 26, 2024

parth-07 Sep 26, 2024

parth-07 Sep 26, 2024

parth-07 Sep 26, 2024

github-actions bot commented Sep 30, 2024

github-actions bot commented Oct 1, 2024

github-actions bot commented Oct 1, 2024

github-actions bot commented Oct 1, 2024

github-actions bot commented Oct 1, 2024

github-actions bot commented Oct 1, 2024

github-actions bot commented Oct 1, 2024

parth-07 Oct 1, 2024

kchristin22 Oct 2, 2024

parth-07 Oct 1, 2024

kchristin22 Oct 2, 2024

parth-07 Oct 2, 2024

github-actions bot commented Oct 3, 2024

parth-07 left a comment

github-actions bot commented Oct 4, 2024

github-actions bot commented Oct 5, 2024

	it != lookupResult.end(); it++) {
	for (auto decl : lookupResult) {

	for (auto decl : lookupResult) {
	for (auto *decl : lookupResult) {

Handle write-race conditions in CUDA kernels: Add atomic operation #1104

Handle write-race conditions in CUDA kernels: Add atomic operation #1104

Conversation

kchristin22 commented Sep 24, 2024 • edited by vgvassilev Loading

codecov bot commented Sep 24, 2024 • edited Loading

Codecov Report

github-actions bot left a comment

Choose a reason for hiding this comment

github-actions bot Sep 24, 2024

Choose a reason for hiding this comment

github-actions bot Sep 24, 2024

Choose a reason for hiding this comment

github-actions bot left a comment

Choose a reason for hiding this comment

github-actions bot Sep 24, 2024

Choose a reason for hiding this comment

github-actions bot Sep 24, 2024

Choose a reason for hiding this comment

parth-07 left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

github-actions bot commented Sep 30, 2024

github-actions bot commented Oct 1, 2024

github-actions bot commented Oct 1, 2024

github-actions bot commented Oct 1, 2024

github-actions bot commented Oct 1, 2024

github-actions bot commented Oct 1, 2024

github-actions bot commented Oct 1, 2024

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

github-actions bot commented Oct 3, 2024

parth-07 left a comment

Choose a reason for hiding this comment

github-actions bot commented Oct 4, 2024

github-actions bot commented Oct 5, 2024

kchristin22 commented Sep 24, 2024 •

edited by vgvassilev

Loading

codecov bot commented Sep 24, 2024 •

edited

Loading