Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use double for all scaling values and float-point constant values at the Device Op API #557

Merged
merged 4 commits into from
Jan 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions client_example/06_softmax/softmax4d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ int main(int argc, char* argv[])
ck::index_t num_elements =
std::accumulate(in_lengths.begin(), in_lengths.end(), 1, std::multiplies<ck::index_t>());

AccDataType alpha{2.0f};
AccDataType beta{2.0f};
double alpha{2.0};
double beta{2.0};

SimpleDeviceMem in(sizeof(InDataType) * num_elements);
SimpleDeviceMem out(sizeof(OutDataType) * num_elements);
Expand Down Expand Up @@ -82,8 +82,8 @@ int main(int argc, char* argv[])
auto argument_ptr = op_ptr->MakeArgumentPointer(in_lengths,
in_strides,
reduce_dims,
&alpha,
&beta,
alpha,
beta,
in.GetDeviceBuffer(),
out.GetDeviceBuffer(),
PassThrough{},
Expand Down Expand Up @@ -129,8 +129,8 @@ int main(int argc, char* argv[])
auto argument_ptr = op_ptr->MakeArgumentPointer(in_lengths,
in_strides,
reduce_dims,
&alpha,
&beta,
alpha,
beta,
in.GetDeviceBuffer(),
out.GetDeviceBuffer(),
PassThrough{},
Expand All @@ -147,4 +147,4 @@ int main(int argc, char* argv[])
}

return 0;
}
}
4 changes: 2 additions & 2 deletions client_example/15_reduce/reduce_nhwc_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ int main(int argc, char* argv[])
for(auto dim : reduce_dims)
reduce_length *= in_lengths[dim];

float alpha{1.0f};
float beta{0.0f};
double alpha{1.0};
double beta{0.0};

SimpleDeviceMem in(sizeof(InDataType) * num_in_elements);
SimpleDeviceMem out(sizeof(OutDataType) * num_out_elements);
Expand Down
8 changes: 4 additions & 4 deletions example/12_reduce/reduce_blockwise_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -267,8 +267,8 @@ int reduce_blockwise_impl(bool do_verification,
arrOutLengths,
arrOutStrides,
reduceDims,
alpha,
beta,
static_cast<double>(alpha),
static_cast<double>(beta),
in.mData.data(),
nullptr,
out_ref.mData.data(),
Expand All @@ -295,8 +295,8 @@ int reduce_blockwise_impl(bool do_verification,
arrOutLengths,
arrOutStrides,
reduceDims,
alpha,
beta,
static_cast<double>(alpha),
static_cast<double>(beta),
in_dev.GetDeviceBuffer(),
nullptr,
out_dev.GetDeviceBuffer(),
Expand Down
12 changes: 6 additions & 6 deletions example/12_reduce/reduce_blockwise_two_call.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,8 +226,8 @@ int main(int argc, char* argv[])
arrOutLengths,
arrOutStrides,
reduceDims,
alpha,
beta,
static_cast<double>(alpha),
static_cast<double>(beta),
in_1.mData.data(),
nullptr,
out_ref.mData.data(),
Expand All @@ -254,8 +254,8 @@ int main(int argc, char* argv[])
arrInLengths_2,
arrInStrides_2,
reduceDims_1,
1.0f,
0.0f,
1.0,
0.0,
in_1_dev.GetDeviceBuffer(),
nullptr,
in_2_dev.GetDeviceBuffer(),
Expand All @@ -278,8 +278,8 @@ int main(int argc, char* argv[])
arrOutLengths,
arrOutStrides,
reduceDims_2,
alpha,
beta,
static_cast<double>(alpha),
static_cast<double>(beta),
in_2_dev.GetDeviceBuffer(),
nullptr,
out_dev.GetDeviceBuffer(),
Expand Down
8 changes: 4 additions & 4 deletions example/12_reduce/reduce_multiblock_atomic_add_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,8 @@ int reduce_multiblock_atomic_add_impl(bool do_verification,
arrOutLengths,
arrOutStrides,
reduceDims,
alpha,
beta,
static_cast<double>(alpha),
static_cast<double>(beta),
in.mData.data(),
nullptr,
out_ref.mData.data(),
Expand All @@ -208,8 +208,8 @@ int reduce_multiblock_atomic_add_impl(bool do_verification,
arrOutLengths,
arrOutStrides,
reduceDims,
alpha,
beta,
static_cast<double>(alpha),
static_cast<double>(beta),
in_dev.GetDeviceBuffer(),
nullptr,
out_dev.GetDeviceBuffer(),
Expand Down
12 changes: 6 additions & 6 deletions example/23_softmax/softmax_blockwise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ class SimpleAppArgs
int option_index = 0;

public:
std::vector<size_t> inLengths = {8, 128, 2048};
std::vector<AccDataType> scales = {2.0f, 2.0f};
std::vector<size_t> inLengths = {8, 128, 2048};
std::vector<double> scales = {2.0, 2.0};

bool do_verification = true;
int init_method = 2;
Expand Down Expand Up @@ -151,8 +151,8 @@ int main(int argc, char* argv[])
auto inStrides = in.mDesc.GetStrides();
auto outStrides = out.mDesc.GetStrides();

AccDataType alpha = args.scales[0];
AccDataType beta = args.scales[1];
double alpha = args.scales[0];
double beta = args.scales[1];

std::cout << "in: " << in.mDesc << std::endl;
std::cout << "out: " << out.mDesc << std::endl;
Expand Down Expand Up @@ -221,8 +221,8 @@ int main(int argc, char* argv[])
auto argument_ptr = device_instance.MakeArgumentPointer(i_inLengths,
i_inStrides,
reduceDims,
&alpha,
&beta,
alpha,
beta,
in_dev.GetDeviceBuffer(),
out_dev.GetDeviceBuffer(),
PassThrough{},
Expand Down
8 changes: 4 additions & 4 deletions example/33_multiple_reduce/dual_reduce_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -217,8 +217,8 @@ int mean_meansquare_dual_reduce_test(size_t n,
size_t invariant_total_length = n;
size_t reduce_total_length = h * w * c;

const AccDataType alpha = ck::type_convert<AccDataType>(1.0f);
const AccDataType beta = ck::type_convert<AccDataType>(0.0f);
const double alpha = 1.0f;
const double beta = 0.0f;

std::size_t num_thread = 1;

Expand Down Expand Up @@ -267,8 +267,8 @@ int mean_meansquare_dual_reduce_test(size_t n,
i_outLengths,
{i_outStrides, i_outStrides},
reduceDims,
{&alpha, &alpha},
{&beta, &beta},
{alpha, alpha},
{beta, beta},
in_dev.GetDeviceBuffer(),
{mean_dev.GetDeviceBuffer(), meansquare_dev.GetDeviceBuffer()},
ck::make_tuple(InElementwiseOperation_Mean{}, InElementwiseOperation_Meansquare{}),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ struct DeviceElementwiseNormalization : public BaseOperator
const std::vector<index_t> betaStrides,
const std::vector<index_t> yStrides,
const std::vector<index_t> reduceDims,
AccDataType epsilon,
double epsilon,
const std::array<const void*, NumInput> in_dev_buffers,
const void* p_gamma,
const void* p_beta,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ struct DeviceMultipleReduce : public BaseOperator
const std::array<index_t, NumOutputDim> outLengths,
const std::array<std::array<index_t, NumOutputDim>, NumReduction> outStrides,
const std::array<int, NumReduceDim> reduceDims,
const std::array<const void*, NumReduction> alphas,
const std::array<const void*, NumReduction> betas,
const std::array<double, NumReduction> alphas,
const std::array<double, NumReduction> betas,
const void* in_dev,
const std::array<void*, NumReduction> out_dev_buffers,
const InElementwiseOperationTuple in_elementwise_op_tuple,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ struct DeviceNormalization : public BaseOperator
const std::vector<index_t> betaStrides,
const std::vector<index_t> yStrides,
const std::vector<index_t> reduceDims,
AccDataType epsilon,
double epsilon,
const void* p_x,
const void* p_gamma,
const void* p_beta,
Expand Down
4 changes: 2 additions & 2 deletions include/ck/tensor_operation/gpu/device/device_reduce.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ struct DeviceReduce : public BaseOperator
const std::array<index_t, NumOutDim> outLengths,
const std::array<index_t, NumOutDim> outStrides,
const std::array<int, NumReduceDim> reduceDims,
float alpha,
float beta,
double alpha,
double beta,
const void* in_dev,
const void* in_index_dev,
void* out_dev,
Expand Down
10 changes: 4 additions & 6 deletions include/ck/tensor_operation/gpu/device/device_softmax.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,8 @@ struct DeviceSoftmax : public BaseOperator
// @param[in] inLengths Input tensor extent(s) from high to low dimension
// @param[in] inStrides Input tensor stride(s) from high to low dimension
// @param[in] reduceDims The dimension(s) the normalization operation is applied
// @param[in] alpha Typeless pointer in host memory storing the alpha scaling
// value as type AccDataType
// @param[in] beta Typeless pointer in host memory storing the beta scaling
// value as type AccDataType
// @param[in] alpha double type value
// @param[in] beta double type value
// @param[in] in_dev Typeless const pointer in device memory storing the input
// tensor
// @param out_dev Typeless pointer in device memory storing the output tensor
Expand All @@ -43,8 +41,8 @@ struct DeviceSoftmax : public BaseOperator
MakeArgumentPointer(const std::vector<index_t> inLengths,
const std::vector<index_t> inStrides,
const std::vector<int> reduceDims,
const void* alpha,
const void* beta,
double alpha,
double beta,
const void* in_dev,
void* out_dev,
InElementwiseOp in_elementwise_op,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -270,18 +270,18 @@ struct DeviceElementwiseNormalizationImpl
const std::vector<index_t> reduceDims,
XElementwiseOperation x_elementwise_op,
YElementwiseOperation y_elementwise_op,
AccDataType epsilon,
double epsilon,
const std::array<const void*, NumInput> in_dev_buffers,
const GammaDataType* p_gamma,
const BetaDataType* p_beta,
YDataType* p_y)
: epsilon_(epsilon),
p_gamma_(p_gamma),
: p_gamma_(p_gamma),
p_beta_(p_beta),
p_y_(p_y),
x_elementwise_op_(x_elementwise_op),
y_elementwise_op_(y_elementwise_op)
{
epsilon_ = static_cast<AccDataType>(epsilon);

Lengths_ = shuffle_tensor_dimensions<Rank, NumReduceDim>(lengths, reduceDims);
for(int i = 0; i < NumInput; i++)
Expand Down Expand Up @@ -543,7 +543,7 @@ struct DeviceElementwiseNormalizationImpl
const std::vector<index_t> betaStrides,
const std::vector<index_t> yStrides,
const std::vector<index_t> reduceDims,
AccDataType epsilon,
double epsilon,
const std::array<const void*, NumInput> in_dev_buffers,
const void* p_gamma,
const void* p_beta,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -270,8 +270,8 @@ struct DeviceMultipleReduceMultiBlock : public DeviceMultipleReduce<Rank,
const std::array<index_t, NumOutputDim>& outLengths,
const std::array<std::array<index_t, NumOutputDim>, NumReduction>& outStridesArray,
const std::array<int, NumReduceDim>& reduceDims,
const std::array<const void*, NumReduction>& alphas,
const std::array<const void*, NumReduction>& betas,
const std::array<double, NumReduction>& alphas,
const std::array<double, NumReduction>& betas,
const void* in_dev,
const std::array<void*, NumReduction>& out_dev_buffers,
const InElementwiseOperationTuple in_elementwise_op_tuple,
Expand All @@ -286,8 +286,8 @@ struct DeviceMultipleReduceMultiBlock : public DeviceMultipleReduce<Rank,

for(size_t i = 0; i < NumReduction; i++)
{
alpha_values_(i) = *static_cast<const AccDataType*>(alphas[i]);
beta_values_(i) = *static_cast<const AccDataType*>(betas[i]);
alpha_values_(i) = static_cast<AccDataType>(alphas[i]);
beta_values_(i) = static_cast<AccDataType>(betas[i]);
};

in_dev_ = static_cast<const InDataType*>(in_dev);
Expand Down Expand Up @@ -547,8 +547,8 @@ struct DeviceMultipleReduceMultiBlock : public DeviceMultipleReduce<Rank,
const std::array<index_t, NumOutputDim> outLengths,
const std::array<std::array<index_t, NumOutputDim>, NumReduction> outStridesArray,
const std::array<int, NumReduceDim> reduceDims,
const std::array<const void*, NumReduction> alphas,
const std::array<const void*, NumReduction> betas,
const std::array<double, NumReduction> alphas,
const std::array<double, NumReduction> betas,
const void* in_dev,
const std::array<void*, NumReduction> out_dev_buffers,
const InElementwiseOperationTuple in_elementwise_op_tuple,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,8 @@ struct DeviceMultipleReduceThreadWise : public DeviceMultipleReduce<Rank,
const std::array<index_t, NumOutputDim>& outLengths,
const std::array<std::array<index_t, NumOutputDim>, NumReduction>& outStridesArray,
const std::array<int, NumReduceDim>& reduceDims,
const std::array<const void*, NumReduction>& alphas,
const std::array<const void*, NumReduction>& betas,
const std::array<double, NumReduction>& alphas,
const std::array<double, NumReduction>& betas,
const void* in_dev,
const std::array<void*, NumReduction>& out_dev_buffers,
const InElementwiseOperationTuple in_elementwise_op_tuple,
Expand All @@ -211,8 +211,8 @@ struct DeviceMultipleReduceThreadWise : public DeviceMultipleReduce<Rank,

for(size_t i = 0; i < NumReduction; i++)
{
alpha_values_(i) = *static_cast<const AccDataType*>(alphas[i]);
beta_values_(i) = *static_cast<const AccDataType*>(betas[i]);
alpha_values_(i) = static_cast<AccDataType>(alphas[i]);
beta_values_(i) = static_cast<AccDataType>(betas[i]);
};

in_dev_ = static_cast<const InDataType*>(in_dev);
Expand Down Expand Up @@ -374,8 +374,8 @@ struct DeviceMultipleReduceThreadWise : public DeviceMultipleReduce<Rank,
const std::array<index_t, NumOutputDim> outLengths,
const std::array<std::array<index_t, NumOutputDim>, NumReduction> outStridesArray,
const std::array<int, NumReduceDim> reduceDims,
const std::array<const void*, NumReduction> alphas,
const std::array<const void*, NumReduction> betas,
const std::array<double, NumReduction> alphas,
const std::array<double, NumReduction> betas,
const void* in_dev,
const std::array<void*, NumReduction> out_dev_buffers,
const InElementwiseOperationTuple in_elementwise_op_tuple,
Expand Down
Loading