diff --git a/onnxruntime/contrib_ops/cpu/skip_layer_norm.cc b/onnxruntime/contrib_ops/cpu/skip_layer_norm.cc index 18e3d2ab29df3..4f3b49c0a7250 100644 --- a/onnxruntime/contrib_ops/cpu/skip_layer_norm.cc +++ b/onnxruntime/contrib_ops/cpu/skip_layer_norm.cc @@ -119,19 +119,17 @@ void ComputeJob( float mean_square(0.0f); const size_t num_elems = static_cast(hidden_size); - - std::unique_ptr float_input = std::make_unique(num_elems); - MlasConvertHalfToFloatBuffer(p_input, float_input.get(), num_elems); - - std::unique_ptr float_skip = std::make_unique(num_elems); - MlasConvertHalfToFloatBuffer(p_skip, float_skip.get(), num_elems); - std::unique_ptr float_bias = nullptr; + float* float_input = new float[num_elems]; + MlasConvertHalfToFloatBuffer(p_input, float_input, num_elems); + float* float_skip = new float[num_elems]; + MlasConvertHalfToFloatBuffer(p_skip, float_skip, num_elems); + float* float_bias = nullptr; if (bias_data != nullptr) { - float_bias = std::make_unique(num_elems); - MlasConvertHalfToFloatBuffer(bias_data, float_bias.get(), num_elems); + float_bias = new float[num_elems]; + MlasConvertHalfToFloatBuffer(bias_data, float_bias, num_elems); } - std::unique_ptr float_output = std::make_unique(num_elems); + float* float_output = new float[num_elems]; for (size_t h = 0; h < num_elems; h++) { float val = float_input[h] + float_skip[h]; @@ -143,9 +141,12 @@ void ComputeJob( mean += val; mean_square += val * val; } + if (float_bias != nullptr) { + delete[] float_bias; + } if (nullptr != p_skip_input_bias_add_output) { - MlasConvertFloatToHalfBuffer(float_output.get(), p_skip_input_bias_add_output, num_elems); + MlasConvertFloatToHalfBuffer(float_output, p_skip_input_bias_add_output, num_elems); } mean = mean / hidden_size; @@ -155,9 +156,9 @@ void ComputeJob( mean_square = sqrt(mean_square / hidden_size - mean * mean + epsilon); } - float* float_gamma = float_input.get(); // overwrite float_input with gamma values, since they have the same size + float* float_gamma = float_input; // overwrite float_input with gamma values, since they have the same size MlasConvertHalfToFloatBuffer(gamma_data, float_gamma, num_elems); - float* float_beta = float_skip.get(); // overwrite float_skip with beta values, since they have the same size + float* float_beta = float_skip; // overwrite float_input with beta values, since they have the same size MlasConvertHalfToFloatBuffer(beta_data, float_beta, num_elems); for (size_t h = 0; h < num_elems; h++) { if (simplified) { @@ -168,8 +169,11 @@ void ComputeJob( float_output[h] = (float_output[h] - mean) / mean_square * float_gamma[h] + float_beta[h]; } } + delete[] float_gamma; // also deletes float_input + delete[] float_beta; // also deletes float_skip - MlasConvertFloatToHalfBuffer(float_output.get(), p_output, num_elems); + MlasConvertFloatToHalfBuffer(float_output, p_output, num_elems); + delete[] float_output; } } // namespace diff --git a/onnxruntime/core/providers/cpu/nn/layer_norm_impl.cc b/onnxruntime/core/providers/cpu/nn/layer_norm_impl.cc index 3654059cbe21b..1c40071d60f7c 100644 --- a/onnxruntime/core/providers/cpu/nn/layer_norm_impl.cc +++ b/onnxruntime/core/providers/cpu/nn/layer_norm_impl.cc @@ -87,10 +87,10 @@ void ComputeJob( float mean_square(0.0f); const size_t num_elems = static_cast(norm_size); - std::unique_ptr float_input = std::make_unique(num_elems); - MlasConvertHalfToFloatBuffer(p_input, float_input.get(), num_elems); + float* float_input = new float[num_elems]; + MlasConvertHalfToFloatBuffer(p_input, float_input, num_elems); - std::unique_ptr float_output = std::make_unique(num_elems); + float* float_output = new float[num_elems]; for (size_t h = 0; h < num_elems; h++) { float_output[h] = float_input[h]; mean += float_input[h]; @@ -104,10 +104,10 @@ void ComputeJob( mean_square = sqrt(mean_square / norm_size - mean * mean + epsilon); } - float* float_scale = float_input.get(); // overwrite float_input with scale values, since they have the same size + float* float_scale = float_input; // overwrite float_input with scale values, since they have the same size MlasConvertHalfToFloatBuffer(scale_data, float_scale, num_elems); - std::unique_ptr float_bias = std::make_unique(num_elems); - MlasConvertHalfToFloatBuffer(bias_data, float_bias.get(), num_elems); + float* float_bias = new float[num_elems]; + MlasConvertHalfToFloatBuffer(bias_data, float_bias, num_elems); for (size_t h = 0; h < num_elems; h++) { if (simplified) { float_output[h] = float_output[h] / mean_square * float_scale[h]; @@ -117,8 +117,11 @@ void ComputeJob( float_output[h] = (float_output[h] - mean) / mean_square * float_scale[h] + float_bias[h]; } } + delete[] float_scale; // also deletes float_input + delete[] float_bias; - MlasConvertFloatToHalfBuffer(float_output.get(), p_output, num_elems); + MlasConvertFloatToHalfBuffer(float_output, p_output, num_elems); + delete[] float_output; if (mean_data != nullptr) { // ONNX spec doesn't support 'double' for 'U' so when 'T' == double, 'U' == float and we need to narrow