diff --git a/onnxruntime/contrib_ops/cpu/skip_layer_norm.cc b/onnxruntime/contrib_ops/cpu/skip_layer_norm.cc index 66ca8c4dfd37f..3a1ff01d870b3 100644 --- a/onnxruntime/contrib_ops/cpu/skip_layer_norm.cc +++ b/onnxruntime/contrib_ops/cpu/skip_layer_norm.cc @@ -118,19 +118,24 @@ void ComputeJob( float mean(0.0f); float mean_square(0.0f); - std::vector float_input(hidden_size); - MlasConvertHalfToFloatBuffer(p_input, &float_input[0], hidden_size); - std::vector float_skip(hidden_size); - MlasConvertHalfToFloatBuffer(p_skip, &float_skip[0], hidden_size); - std::vector float_bias; + const size_t num_elems = static_cast(hidden_size); + float* float_output = new float[num_elems]; + float* float_input = new float[num_elems]; + float* float_skip = new float[num_elems]; + float* float_gamma = new float[num_elems]; + float* float_beta = new float[num_elems]; + float* float_bias = nullptr; if (bias_data != nullptr) { - float_bias.resize(hidden_size); - MlasConvertHalfToFloatBuffer(bias_data, &float_bias[0], hidden_size); + float_bias = new float[num_elems]; + MlasConvertHalfToFloatBuffer(bias_data, float_bias, num_elems); } + MlasConvertFloatToHalfBuffer(float_output, p_output, num_elems); + MlasConvertHalfToFloatBuffer(p_input, float_input, num_elems); + MlasConvertHalfToFloatBuffer(p_skip, float_skip, num_elems); + MlasConvertHalfToFloatBuffer(gamma_data, float_gamma, num_elems); + MlasConvertHalfToFloatBuffer(beta_data, float_beta, num_elems); - std::vector float_output(hidden_size); - - for (decltype(hidden_size) h = 0; h < hidden_size; h++) { + for (size_t h = 0; h < num_elems; h++) { float val = float_input[h] + float_skip[h]; if (nullptr != bias_data) { @@ -143,7 +148,7 @@ void ComputeJob( } if (nullptr != p_skip_input_bias_add_output) { - MlasConvertFloatToHalfBuffer(&float_output[0], p_skip_input_bias_add_output, hidden_size); + MlasConvertFloatToHalfBuffer(float_output, p_skip_input_bias_add_output, num_elems); } mean = mean / hidden_size; @@ -153,12 +158,7 @@ void ComputeJob( mean_square = sqrt(mean_square / hidden_size - mean * mean + epsilon); } - std::vector float_gamma(hidden_size); - MlasConvertHalfToFloatBuffer(gamma_data, &float_gamma[0], hidden_size); - std::vector float_beta(hidden_size); - MlasConvertHalfToFloatBuffer(beta_data, &float_beta[0], hidden_size); - - for (decltype(hidden_size) h = 0; h < hidden_size; h++) { + for (size_t h = 0; h < num_elems; h++) { if (simplified) { float_output[h] = float_output[h] / mean_square * float_gamma[h]; } else if (nullptr == beta_data) { @@ -168,7 +168,15 @@ void ComputeJob( } } - MlasConvertFloatToHalfBuffer(&float_output[0], p_output, hidden_size); + MlasConvertFloatToHalfBuffer(float_output, p_output, num_elems); + delete[] float_output; + delete[] float_input; + delete[] float_skip; + delete[] float_gamma; + delete[] float_beta; + if (float_bias != nullptr) { + delete[] float_bias; + } } } // namespace diff --git a/onnxruntime/core/providers/cpu/nn/layer_norm_impl.cc b/onnxruntime/core/providers/cpu/nn/layer_norm_impl.cc index 32427b2c39db1..8787fc2cb8085 100644 --- a/onnxruntime/core/providers/cpu/nn/layer_norm_impl.cc +++ b/onnxruntime/core/providers/cpu/nn/layer_norm_impl.cc @@ -86,11 +86,17 @@ void ComputeJob( float mean(0.0f); float mean_square(0.0f); - std::vector float_input(norm_size); - MlasConvertHalfToFloatBuffer(p_input, &float_input[0], norm_size); - - std::vector float_output(norm_size); - for (int64_t h = 0; h < norm_size; h++) { + const size_t num_elems = static_cast(norm_size); + float* float_input = new float[num_elems]; + float* float_scale = new float[num_elems]; + float* float_bias = new float[num_elems]; + float* float_output = new float[num_elems]; + MlasConvertHalfToFloatBuffer(p_input, float_input, num_elems); + MlasConvertHalfToFloatBuffer(scale_data, float_scale, num_elems); + MlasConvertHalfToFloatBuffer(bias_data, float_bias, num_elems); + MlasConvertFloatToHalfBuffer(float_output, p_output, num_elems); + + for (size_t h = 0; h < num_elems; h++) { float_output[h] = float_input[h]; mean += float_input[h]; mean_square += float_input[h] * float_input[h]; @@ -103,12 +109,7 @@ void ComputeJob( mean_square = sqrt(mean_square / norm_size - mean * mean + epsilon); } - std::vector float_scale(norm_size); - MlasConvertHalfToFloatBuffer(scale_data, &float_scale[0], norm_size); - std::vector float_bias(norm_size); - MlasConvertHalfToFloatBuffer(bias_data, &float_bias[0], norm_size); - - for (int64_t h = 0; h < norm_size; h++) { + for (size_t h = 0; h < num_elems; h++) { if (simplified) { float_output[h] = float_output[h] / mean_square * float_scale[h]; } else if (nullptr == bias_data) { @@ -118,8 +119,6 @@ void ComputeJob( } } - MlasConvertFloatToHalfBuffer(&float_output[0], p_output, static_cast(norm_size)); - if (mean_data != nullptr) { // ONNX spec doesn't support 'double' for 'U' so when 'T' == double, 'U' == float and we need to narrow mean_data[task_idx] = MLFloat16(mean); @@ -128,6 +127,11 @@ void ComputeJob( if (inv_std_dev_data != nullptr) { inv_std_dev_data[task_idx] = MLFloat16(1 / mean_square); } + + delete[] float_input; + delete[] float_output; + delete[] float_scale; + delete[] float_bias; } } // namespace