-
Notifications
You must be signed in to change notification settings - Fork 80
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Support for type 3 in 1D, 2D, and 3D in the GPU library cufinufft * Removed the CPU fseries computation (only used for benchmark no longer needed). * Added complex arithmetic support for cuda_complex type * Added tests for type 3 in 1D, 2D, and 3D and cuda_complex arithmetic * integrated flipwind on type 1-2 on GPU * Minor fixes on the GPU code: - removed memory leaks in case of errors - renamed maxbatchsize to batchsize - renamed the fseries and nuft to match CPU code
- Loading branch information
1 parent
e77225d
commit 481b70e
Showing
38 changed files
with
1,764 additions
and
733 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
#ifndef FINUFFT_INCLUDE_CUFINUFFT_CONTRIB_HELPER_MATH_H | ||
#define FINUFFT_INCLUDE_CUFINUFFT_CONTRIB_HELPER_MATH_H | ||
|
||
#include <cuComplex.h> | ||
|
||
// This header provides some helper functions for cuComplex types. | ||
// It mainly wraps existing CUDA implementations to provide operator overloads | ||
// e.g. cuAdd, cuSub, cuMul, cuDiv, cuCreal, cuCimag, cuCabs, cuCarg, cuConj are all | ||
// provided by CUDA | ||
|
||
// Addition for cuDoubleComplex (double) with cuDoubleComplex (double) | ||
__host__ __device__ __forceinline__ cuDoubleComplex operator+( | ||
const cuDoubleComplex &a, const cuDoubleComplex &b) noexcept { | ||
return cuCadd(a, b); | ||
} | ||
|
||
// Subtraction for cuDoubleComplex (double) with cuDoubleComplex (double) | ||
__host__ __device__ __forceinline__ cuDoubleComplex operator-( | ||
const cuDoubleComplex &a, const cuDoubleComplex &b) noexcept { | ||
return cuCsub(a, b); | ||
} | ||
|
||
// Multiplication for cuDoubleComplex (double) with cuDoubleComplex (double) | ||
__host__ __device__ __forceinline__ cuDoubleComplex operator*( | ||
const cuDoubleComplex &a, const cuDoubleComplex &b) noexcept { | ||
return cuCmul(a, b); | ||
} | ||
|
||
// Division for cuDoubleComplex (double) with cuDoubleComplex (double) | ||
__host__ __device__ __forceinline__ cuDoubleComplex operator/( | ||
const cuDoubleComplex &a, const cuDoubleComplex &b) noexcept { | ||
return cuCdiv(a, b); | ||
} | ||
|
||
// Equality for cuDoubleComplex (double) with cuDoubleComplex (double) | ||
__host__ __device__ __forceinline__ bool operator==(const cuDoubleComplex &a, | ||
const cuDoubleComplex &b) noexcept { | ||
return cuCreal(a) == cuCreal(b) && cuCimag(a) == cuCimag(b); | ||
} | ||
|
||
// Inequality for cuDoubleComplex (double) with cuDoubleComplex (double) | ||
__host__ __device__ __forceinline__ bool operator!=(const cuDoubleComplex &a, | ||
const cuDoubleComplex &b) noexcept { | ||
return !(a == b); | ||
} | ||
|
||
// Addition for cuDoubleComplex (double) with double | ||
__host__ __device__ __forceinline__ cuDoubleComplex operator+(const cuDoubleComplex &a, | ||
double b) noexcept { | ||
return make_cuDoubleComplex(cuCreal(a) + b, cuCimag(a)); | ||
} | ||
|
||
__host__ __device__ __forceinline__ cuDoubleComplex operator+( | ||
double a, const cuDoubleComplex &b) noexcept { | ||
return make_cuDoubleComplex(a + cuCreal(b), cuCimag(b)); | ||
} | ||
|
||
// Subtraction for cuDoubleComplex (double) with double | ||
__host__ __device__ __forceinline__ cuDoubleComplex operator-(const cuDoubleComplex &a, | ||
double b) noexcept { | ||
return make_cuDoubleComplex(cuCreal(a) - b, cuCimag(a)); | ||
} | ||
|
||
__host__ __device__ __forceinline__ cuDoubleComplex operator-( | ||
double a, const cuDoubleComplex &b) noexcept { | ||
return make_cuDoubleComplex(a - cuCreal(b), -cuCimag(b)); | ||
} | ||
|
||
// Multiplication for cuDoubleComplex (double) with double | ||
__host__ __device__ __forceinline__ cuDoubleComplex operator*(const cuDoubleComplex &a, | ||
double b) noexcept { | ||
return make_cuDoubleComplex(cuCreal(a) * b, cuCimag(a) * b); | ||
} | ||
|
||
__host__ __device__ __forceinline__ cuDoubleComplex operator*( | ||
double a, const cuDoubleComplex &b) noexcept { | ||
return make_cuDoubleComplex(a * cuCreal(b), a * cuCimag(b)); | ||
} | ||
|
||
// Division for cuDoubleComplex (double) with double | ||
__host__ __device__ __forceinline__ cuDoubleComplex operator/(const cuDoubleComplex &a, | ||
double b) noexcept { | ||
return make_cuDoubleComplex(cuCreal(a) / b, cuCimag(a) / b); | ||
} | ||
|
||
__host__ __device__ __forceinline__ cuDoubleComplex operator/( | ||
double a, const cuDoubleComplex &b) noexcept { | ||
double denom = cuCreal(b) * cuCreal(b) + cuCimag(b) * cuCimag(b); | ||
return make_cuDoubleComplex((a * cuCreal(b)) / denom, (-a * cuCimag(b)) / denom); | ||
} | ||
|
||
// Addition for cuFloatComplex (float) with cuFloatComplex (float) | ||
__host__ __device__ __forceinline__ cuFloatComplex operator+( | ||
const cuFloatComplex &a, const cuFloatComplex &b) noexcept { | ||
return cuCaddf(a, b); | ||
} | ||
|
||
// Subtraction for cuFloatComplex (float) with cuFloatComplex (float) | ||
__host__ __device__ __forceinline__ cuFloatComplex operator-( | ||
const cuFloatComplex &a, const cuFloatComplex &b) noexcept { | ||
return cuCsubf(a, b); | ||
} | ||
|
||
// Multiplication for cuFloatComplex (float) with cuFloatComplex (float) | ||
__host__ __device__ __forceinline__ cuFloatComplex operator*( | ||
const cuFloatComplex &a, const cuFloatComplex &b) noexcept { | ||
return cuCmulf(a, b); | ||
} | ||
|
||
// Division for cuFloatComplex (float) with cuFloatComplex (float) | ||
__host__ __device__ __forceinline__ cuFloatComplex operator/( | ||
const cuFloatComplex &a, const cuFloatComplex &b) noexcept { | ||
return cuCdivf(a, b); | ||
} | ||
|
||
// Equality for cuFloatComplex (float) with cuFloatComplex (float) | ||
__host__ __device__ __forceinline__ bool operator==(const cuFloatComplex &a, | ||
const cuFloatComplex &b) noexcept { | ||
return cuCrealf(a) == cuCrealf(b) && cuCimagf(a) == cuCimagf(b); | ||
} | ||
|
||
// Inequality for cuFloatComplex (float) with cuFloatComplex (float) | ||
__host__ __device__ __forceinline__ bool operator!=(const cuFloatComplex &a, | ||
const cuFloatComplex &b) noexcept { | ||
return !(a == b); | ||
} | ||
|
||
// Addition for cuFloatComplex (float) with float | ||
__host__ __device__ __forceinline__ cuFloatComplex operator+(const cuFloatComplex &a, | ||
float b) noexcept { | ||
return make_cuFloatComplex(cuCrealf(a) + b, cuCimagf(a)); | ||
} | ||
|
||
__host__ __device__ __forceinline__ cuFloatComplex operator+( | ||
float a, const cuFloatComplex &b) noexcept { | ||
return make_cuFloatComplex(a + cuCrealf(b), cuCimagf(b)); | ||
} | ||
|
||
// Subtraction for cuFloatComplex (float) with float | ||
__host__ __device__ __forceinline__ cuFloatComplex operator-(const cuFloatComplex &a, | ||
float b) noexcept { | ||
return make_cuFloatComplex(cuCrealf(a) - b, cuCimagf(a)); | ||
} | ||
|
||
__host__ __device__ __forceinline__ cuFloatComplex operator-( | ||
float a, const cuFloatComplex &b) noexcept { | ||
return make_cuFloatComplex(a - cuCrealf(b), -cuCimagf(b)); | ||
} | ||
|
||
// Multiplication for cuFloatComplex (float) with float | ||
__host__ __device__ __forceinline__ cuFloatComplex operator*(const cuFloatComplex &a, | ||
float b) noexcept { | ||
return make_cuFloatComplex(cuCrealf(a) * b, cuCimagf(a) * b); | ||
} | ||
|
||
__host__ __device__ __forceinline__ cuFloatComplex operator*( | ||
float a, const cuFloatComplex &b) noexcept { | ||
return make_cuFloatComplex(a * cuCrealf(b), a * cuCimagf(b)); | ||
} | ||
|
||
// Division for cuFloatComplex (float) with float | ||
__host__ __device__ __forceinline__ cuFloatComplex operator/(const cuFloatComplex &a, | ||
float b) noexcept { | ||
return make_cuFloatComplex(cuCrealf(a) / b, cuCimagf(a) / b); | ||
} | ||
|
||
__host__ __device__ __forceinline__ cuFloatComplex operator/( | ||
float a, const cuFloatComplex &b) noexcept { | ||
float denom = cuCrealf(b) * cuCrealf(b) + cuCimagf(b) * cuCimagf(b); | ||
return make_cuFloatComplex((a * cuCrealf(b)) / denom, (-a * cuCimagf(b)) / denom); | ||
} | ||
|
||
#endif // FINUFFT_INCLUDE_CUFINUFFT_CONTRIB_HELPER_MATH_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.