Skip to content

Commit

Permalink
Revert "make_unique instead of new" for latency
Browse files Browse the repository at this point in the history
This reverts commit 6aece95.
  • Loading branch information
amarin16 committed Oct 2, 2024
1 parent 6aece95 commit 766c4b2
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 21 deletions.
32 changes: 18 additions & 14 deletions onnxruntime/contrib_ops/cpu/skip_layer_norm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -119,19 +119,17 @@ void ComputeJob(
float mean_square(0.0f);

const size_t num_elems = static_cast<size_t>(hidden_size);

std::unique_ptr<float[]> float_input = std::make_unique<float[]>(num_elems);
MlasConvertHalfToFloatBuffer(p_input, float_input.get(), num_elems);

std::unique_ptr<float[]> float_skip = std::make_unique<float[]>(num_elems);
MlasConvertHalfToFloatBuffer(p_skip, float_skip.get(), num_elems);
std::unique_ptr<float[]> float_bias = nullptr;
float* float_input = new float[num_elems];
MlasConvertHalfToFloatBuffer(p_input, float_input, num_elems);
float* float_skip = new float[num_elems];
MlasConvertHalfToFloatBuffer(p_skip, float_skip, num_elems);
float* float_bias = nullptr;
if (bias_data != nullptr) {
float_bias = std::make_unique<float[]>(num_elems);
MlasConvertHalfToFloatBuffer(bias_data, float_bias.get(), num_elems);
float_bias = new float[num_elems];
MlasConvertHalfToFloatBuffer(bias_data, float_bias, num_elems);
}

std::unique_ptr<float[]> float_output = std::make_unique<float[]>(num_elems);
float* float_output = new float[num_elems];
for (size_t h = 0; h < num_elems; h++) {
float val = float_input[h] + float_skip[h];

Expand All @@ -143,9 +141,12 @@ void ComputeJob(
mean += val;
mean_square += val * val;
}
if (float_bias != nullptr) {
delete[] float_bias;
}

if (nullptr != p_skip_input_bias_add_output) {
MlasConvertFloatToHalfBuffer(float_output.get(), p_skip_input_bias_add_output, num_elems);
MlasConvertFloatToHalfBuffer(float_output, p_skip_input_bias_add_output, num_elems);
}

mean = mean / hidden_size;
Expand All @@ -155,9 +156,9 @@ void ComputeJob(
mean_square = sqrt(mean_square / hidden_size - mean * mean + epsilon);
}

float* float_gamma = float_input.get(); // overwrite float_input with gamma values, since they have the same size
float* float_gamma = float_input; // overwrite float_input with gamma values, since they have the same size
MlasConvertHalfToFloatBuffer(gamma_data, float_gamma, num_elems);
float* float_beta = float_skip.get(); // overwrite float_skip with beta values, since they have the same size
float* float_beta = float_skip; // overwrite float_input with beta values, since they have the same size
MlasConvertHalfToFloatBuffer(beta_data, float_beta, num_elems);
for (size_t h = 0; h < num_elems; h++) {
if (simplified) {
Expand All @@ -168,8 +169,11 @@ void ComputeJob(
float_output[h] = (float_output[h] - mean) / mean_square * float_gamma[h] + float_beta[h];
}
}
delete[] float_gamma; // also deletes float_input
delete[] float_beta; // also deletes float_skip

MlasConvertFloatToHalfBuffer(float_output.get(), p_output, num_elems);
MlasConvertFloatToHalfBuffer(float_output, p_output, num_elems);
delete[] float_output;
}

} // namespace
Expand Down
17 changes: 10 additions & 7 deletions onnxruntime/core/providers/cpu/nn/layer_norm_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,10 @@ void ComputeJob(
float mean_square(0.0f);

const size_t num_elems = static_cast<size_t>(norm_size);
std::unique_ptr<float[]> float_input = std::make_unique<float[]>(num_elems);
MlasConvertHalfToFloatBuffer(p_input, float_input.get(), num_elems);
float* float_input = new float[num_elems];
MlasConvertHalfToFloatBuffer(p_input, float_input, num_elems);

std::unique_ptr<float[]> float_output = std::make_unique<float[]>(num_elems);
float* float_output = new float[num_elems];
for (size_t h = 0; h < num_elems; h++) {
float_output[h] = float_input[h];
mean += float_input[h];
Expand All @@ -104,10 +104,10 @@ void ComputeJob(
mean_square = sqrt(mean_square / norm_size - mean * mean + epsilon);
}

float* float_scale = float_input.get(); // overwrite float_input with scale values, since they have the same size
float* float_scale = float_input; // overwrite float_input with scale values, since they have the same size
MlasConvertHalfToFloatBuffer(scale_data, float_scale, num_elems);
std::unique_ptr<float[]> float_bias = std::make_unique<float[]>(num_elems);
MlasConvertHalfToFloatBuffer(bias_data, float_bias.get(), num_elems);
float* float_bias = new float[num_elems];
MlasConvertHalfToFloatBuffer(bias_data, float_bias, num_elems);
for (size_t h = 0; h < num_elems; h++) {
if (simplified) {
float_output[h] = float_output[h] / mean_square * float_scale[h];
Expand All @@ -117,8 +117,11 @@ void ComputeJob(
float_output[h] = (float_output[h] - mean) / mean_square * float_scale[h] + float_bias[h];
}
}
delete[] float_scale; // also deletes float_input
delete[] float_bias;

MlasConvertFloatToHalfBuffer(float_output.get(), p_output, num_elems);
MlasConvertFloatToHalfBuffer(float_output, p_output, num_elems);
delete[] float_output;

if (mean_data != nullptr) {
// ONNX spec doesn't support 'double' for 'U' so when 'T' == double, 'U' == float and we need to narrow
Expand Down

0 comments on commit 766c4b2

Please sign in to comment.