From 3078acf1a62ef4ef40a9f833edb7a3a9e3038f7a Mon Sep 17 00:00:00 2001 From: Aditya Goel Date: Tue, 17 Oct 2023 21:39:03 +0100 Subject: [PATCH 1/8] RegexFullMatch operator --- cmake/deps.txt | 6 +-- .../providers/cpu/cpu_execution_provider.cc | 5 +++ .../core/providers/cpu/nn/regex_full_match.cc | 41 +++++++++++++++++++ .../core/providers/cpu/nn/regex_full_match.h | 17 ++++++++ .../providers/cpu/nn/regex_full_match_test.cc | 24 +++++++++++ .../onnx_backend_test_series_filters.jsonc | 3 -- 6 files changed, 90 insertions(+), 6 deletions(-) create mode 100644 onnxruntime/core/providers/cpu/nn/regex_full_match.cc create mode 100644 onnxruntime/core/providers/cpu/nn/regex_full_match.h create mode 100644 onnxruntime/test/providers/cpu/nn/regex_full_match_test.cc diff --git a/cmake/deps.txt b/cmake/deps.txt index ff07803013071..5266626880bd2 100644 --- a/cmake/deps.txt +++ b/cmake/deps.txt @@ -26,9 +26,9 @@ flatbuffers;https://github.com/google/flatbuffers/archive/refs/tags/v1.12.0.zip; fp16;https://github.com/Maratyszcza/FP16/archive/0a92994d729ff76a58f692d3028ca1b64b145d91.zip;b985f6985a05a1c03ff1bb71190f66d8f98a1494 fxdiv;https://github.com/Maratyszcza/FXdiv/archive/63058eff77e11aa15bf531df5dd34395ec3017c8.zip;a5658f4036402dbca7cebee32be57fb8149811e1 google_benchmark;https://github.com/google/benchmark/archive/refs/tags/v1.7.0.zip;e97c368b176e8614e3f1bf13dd9abcf6a7ad9908 -google_nsync;https://github.com/google/nsync/archive/refs/tags/1.26.0.zip;5e7c00ef6bf5b787386fc040067903ec774e2752 -googletest;https://github.com/google/googletest/archive/530d5c8c84abd2a46f38583ee817743c9b3a42b4.zip;5e3a61db2aa975cfd0f97ba92c818744e7fa7034 -googlexnnpack;https://github.com/google/XNNPACK/archive/0da379fc4808f9601faef392352018c741c0f297.zip;663883491e380b628e0a5b162b5f2658032fae73 +google_nsync;https://github.com/google/nsync/archive/refs/tags/1.23.0.zip;f3233450cf7156fc0bedd1b0e884eddec264897c +googletest;https://github.com/google/googletest/archive/519beb0e52c842729b4b53731d27c0e0c32ab4a2.zip;4b3c37972e4c1bef1185d46f702082f8772ee73f +googlexnnpack;https://github.com/google/XNNPACK/archive/003c580e696a774afdc984996ee909b7c8d8128c.zip;9f192e3f15e1e37ae9c78d53eeea47e45c5eb31c json;https://github.com/nlohmann/json/archive/refs/tags/v3.10.5.zip;f257f8dc27c5b8c085dc887b40cddd18ae1f725c microsoft_gsl;https://github.com/microsoft/GSL/archive/refs/tags/v4.0.0.zip;cf368104cd22a87b4dd0c80228919bb2df3e2a14 microsoft_wil;https://github.com/microsoft/wil/archive/refs/tags/v1.0.230629.1.zip;e4a542a323c070376f7c2d1973d0f7ddbc1d2fa5 diff --git a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc index f60c7ddac5c05..ca31e7f03ca6f 100644 --- a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc +++ b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc @@ -989,6 +989,7 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, Float8E5M2FNUZ, IsNaN); #endif class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, IsInf); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, RegexFullMatch); // !!PLEASE READ BELOW!! Following that, add new entries above this comment @@ -2418,6 +2419,7 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { // Opset 20 BuildKernelCreateInfo, +<<<<<<< HEAD BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, @@ -2447,6 +2449,9 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { BuildKernelCreateInfo, #endif BuildKernelCreateInfo, +======= + BuildKernelCreateInfo, +>>>>>>> 4f27bde807 (RegexFullMatch operator) }; for (auto& function_table_entry : function_table) { diff --git a/onnxruntime/core/providers/cpu/nn/regex_full_match.cc b/onnxruntime/core/providers/cpu/nn/regex_full_match.cc new file mode 100644 index 0000000000000..3c51d30b24313 --- /dev/null +++ b/onnxruntime/core/providers/cpu/nn/regex_full_match.cc @@ -0,0 +1,41 @@ +#include "regex_full_match.h" +#include "core/common/common.h" + +namespace onnxruntime { +ONNX_CPU_OPERATOR_KERNEL( + RegexFullMatch, + 20, + KernelDefBuilder() + .TypeConstraint("T1", DataTypeImpl::GetTensorType()) + .TypeConstraint("T2", DataTypeImpl::GetTensorType()), + RegexFullMatch); + +RegexFullMatch::RegexFullMatch(const OpKernelInfo& info) : OpKernel(info) { + ORT_ENFORCE(info.GetAttr("pattern", &pattern_).IsOK()); + ORT_ENFORCE(RE2(pattern_).ok(), "Invalid pattern: ", pattern_); +} + +Status RegexFullMatch::Compute(OpKernelContext* context) const { + RE2 re(pattern_); + const auto* input_tensor = context->Input(0); + if (nullptr == input_tensor) { + return Status(common::ONNXRUNTIME, common::FAIL, "Input count mismatch"); + } + auto* output_tensor = context->Output(0, input_tensor->Shape()); + if (nullptr == output_tensor) { + return Status(common::ONNXRUNTIME, common::FAIL, "Output count mismatch"); + } + const auto input_data = input_tensor->template DataAsSpan(); + auto output_data = output_tensor->template MutableDataAsSpan(); + const auto N = input_tensor->Shape().Size(); + auto output_iter = output_data.begin(); + auto input_iter = input_data.begin(); + while (input_iter != output_data.end()) { + *output_iter = RE2::FullMatch(*input_iter, re); + input_iter++; + output_iter++; + } + return Status::OK(); +} + +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/cpu/nn/regex_full_match.h b/onnxruntime/core/providers/cpu/nn/regex_full_match.h new file mode 100644 index 0000000000000..49290532a304e --- /dev/null +++ b/onnxruntime/core/providers/cpu/nn/regex_full_match.h @@ -0,0 +1,17 @@ +#pragma once + +#include "core/framework/op_kernel.h" +#include "re2/re2.h" + +namespace onnxruntime { + +class RegexFullMatch final : public OpKernel { + public: + explicit RegexFullMatch(const OpKernelInfo& info); + Status Compute(OpKernelContext* context) const override; + + private: + std::string pattern_; +}; + +} // namespace onnxruntime diff --git a/onnxruntime/test/providers/cpu/nn/regex_full_match_test.cc b/onnxruntime/test/providers/cpu/nn/regex_full_match_test.cc new file mode 100644 index 0000000000000..8d429fd8dfc09 --- /dev/null +++ b/onnxruntime/test/providers/cpu/nn/regex_full_match_test.cc @@ -0,0 +1,24 @@ +#include "gtest/gtest.h" +#include "test/providers/provider_test_utils.h" + +namespace onnxruntime { +namespace test { + +static void RunTest(const std::initializer_list& dims, const std::initializer_list& input, const std::string& pattern, const std::initializer_list& output) { + OpTester test("RegexFullMatch", 20, kOnnxDomain); + test.AddAttribute("pattern", pattern); + test.AddInput("Input", dims, input); + test.AddOutput("Output", dims, output); + test.Run(); +} + +TEST(RegexFullMatch, WebsiteMatch) { + RunTest({3, 1}, {"www.google.com", "www.facebook.com", "www.bbc.co.uk"}, R"(www\.[\w.-]+\.\bcom\b)", {true, true, false}); +} + +TEST(RegexFullMatch, EmailMatch) { + RunTest({2, 2}, {"account@gmail.com", "account@hotmail.com", "not email", "account@yahoo.com"}, R"((\W|^)[\w.\-]{0,25}@(yahoo|gmail)\.com(\W|$))", {true, false, false, true}); +} + +} // namespace test +} // namespace onnxruntime diff --git a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc index 3a13e39702904..3db497fa92315 100644 --- a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc +++ b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc @@ -248,9 +248,6 @@ "^test_image_decoder_decode_pnm_rgb", "^test_image_decoder_decode_tiff_rgb", "^test_image_decoder_decode_webp_rgb", - "^test_regex_full_match_basic", - "^test_regex_full_match_email_domain", - "^test_regex_full_match_empty", "^test_string_concat_broadcasting", "^test_string_concat", "^test_string_concat_empty_string", From 331ef011a5b385d0edfba228df08fb1f31a8c06f Mon Sep 17 00:00:00 2001 From: Aditya Goel Date: Thu, 2 Nov 2023 10:19:16 +0000 Subject: [PATCH 2/8] Restore deps.txt --- cmake/deps.txt | 4 ++-- .../providers/cpu/cpu_execution_provider.cc | 3 --- .../core/providers/cpu/nn/regex_full_match.cc | 24 +++++++------------ .../core/providers/cpu/nn/regex_full_match.h | 5 +++- .../providers/cpu/nn/regex_full_match_test.cc | 22 ++++++++++++++++- 5 files changed, 36 insertions(+), 22 deletions(-) diff --git a/cmake/deps.txt b/cmake/deps.txt index 5266626880bd2..a1b785df2c654 100644 --- a/cmake/deps.txt +++ b/cmake/deps.txt @@ -26,8 +26,8 @@ flatbuffers;https://github.com/google/flatbuffers/archive/refs/tags/v1.12.0.zip; fp16;https://github.com/Maratyszcza/FP16/archive/0a92994d729ff76a58f692d3028ca1b64b145d91.zip;b985f6985a05a1c03ff1bb71190f66d8f98a1494 fxdiv;https://github.com/Maratyszcza/FXdiv/archive/63058eff77e11aa15bf531df5dd34395ec3017c8.zip;a5658f4036402dbca7cebee32be57fb8149811e1 google_benchmark;https://github.com/google/benchmark/archive/refs/tags/v1.7.0.zip;e97c368b176e8614e3f1bf13dd9abcf6a7ad9908 -google_nsync;https://github.com/google/nsync/archive/refs/tags/1.23.0.zip;f3233450cf7156fc0bedd1b0e884eddec264897c -googletest;https://github.com/google/googletest/archive/519beb0e52c842729b4b53731d27c0e0c32ab4a2.zip;4b3c37972e4c1bef1185d46f702082f8772ee73f +google_nsync;https://github.com/google/nsync/archive/refs/tags/1.26.0.zip;5e7c00ef6bf5b787386fc040067903ec774e2752 +googletest;https://github.com/google/googletest/archive/refs/tags/v1.14.0.zip;0ac421f2ec11af38b0fff0f1992184032731a8bc googlexnnpack;https://github.com/google/XNNPACK/archive/003c580e696a774afdc984996ee909b7c8d8128c.zip;9f192e3f15e1e37ae9c78d53eeea47e45c5eb31c json;https://github.com/nlohmann/json/archive/refs/tags/v3.10.5.zip;f257f8dc27c5b8c085dc887b40cddd18ae1f725c microsoft_gsl;https://github.com/microsoft/GSL/archive/refs/tags/v4.0.0.zip;cf368104cd22a87b4dd0c80228919bb2df3e2a14 diff --git a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc index ca31e7f03ca6f..75f6f8d2eddd5 100644 --- a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc +++ b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc @@ -2419,7 +2419,6 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { // Opset 20 BuildKernelCreateInfo, -<<<<<<< HEAD BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, @@ -2449,9 +2448,7 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { BuildKernelCreateInfo, #endif BuildKernelCreateInfo, -======= BuildKernelCreateInfo, ->>>>>>> 4f27bde807 (RegexFullMatch operator) }; for (auto& function_table_entry : function_table) { diff --git a/onnxruntime/core/providers/cpu/nn/regex_full_match.cc b/onnxruntime/core/providers/cpu/nn/regex_full_match.cc index 3c51d30b24313..9cf3b3b92c36a 100644 --- a/onnxruntime/core/providers/cpu/nn/regex_full_match.cc +++ b/onnxruntime/core/providers/cpu/nn/regex_full_match.cc @@ -1,4 +1,7 @@ -#include "regex_full_match.h" +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/providers/cpu/nn/regex_full_match.h" #include "core/common/common.h" namespace onnxruntime { @@ -10,28 +13,19 @@ ONNX_CPU_OPERATOR_KERNEL( .TypeConstraint("T2", DataTypeImpl::GetTensorType()), RegexFullMatch); -RegexFullMatch::RegexFullMatch(const OpKernelInfo& info) : OpKernel(info) { - ORT_ENFORCE(info.GetAttr("pattern", &pattern_).IsOK()); - ORT_ENFORCE(RE2(pattern_).ok(), "Invalid pattern: ", pattern_); +RegexFullMatch::RegexFullMatch(const OpKernelInfo& info) : OpKernel(info), re_{info.GetAttr("pattern")} { + ORT_ENFORCE(re_.ok(), "Invalid regex pattern: ", re_.pattern()); } Status RegexFullMatch::Compute(OpKernelContext* context) const { - RE2 re(pattern_); const auto* input_tensor = context->Input(0); - if (nullptr == input_tensor) { - return Status(common::ONNXRUNTIME, common::FAIL, "Input count mismatch"); - } - auto* output_tensor = context->Output(0, input_tensor->Shape()); - if (nullptr == output_tensor) { - return Status(common::ONNXRUNTIME, common::FAIL, "Output count mismatch"); - } const auto input_data = input_tensor->template DataAsSpan(); + auto* output_tensor = context->Output(0, input_tensor->Shape()); auto output_data = output_tensor->template MutableDataAsSpan(); - const auto N = input_tensor->Shape().Size(); auto output_iter = output_data.begin(); auto input_iter = input_data.begin(); - while (input_iter != output_data.end()) { - *output_iter = RE2::FullMatch(*input_iter, re); + while (input_iter != input_data.end()) { + *output_iter = RE2::FullMatch(*input_iter, re_); input_iter++; output_iter++; } diff --git a/onnxruntime/core/providers/cpu/nn/regex_full_match.h b/onnxruntime/core/providers/cpu/nn/regex_full_match.h index 49290532a304e..0d3f1f4b4b824 100644 --- a/onnxruntime/core/providers/cpu/nn/regex_full_match.h +++ b/onnxruntime/core/providers/cpu/nn/regex_full_match.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #pragma once #include "core/framework/op_kernel.h" @@ -11,7 +14,7 @@ class RegexFullMatch final : public OpKernel { Status Compute(OpKernelContext* context) const override; private: - std::string pattern_; + RE2 re_; }; } // namespace onnxruntime diff --git a/onnxruntime/test/providers/cpu/nn/regex_full_match_test.cc b/onnxruntime/test/providers/cpu/nn/regex_full_match_test.cc index 8d429fd8dfc09..0fb8a9a2afd53 100644 --- a/onnxruntime/test/providers/cpu/nn/regex_full_match_test.cc +++ b/onnxruntime/test/providers/cpu/nn/regex_full_match_test.cc @@ -1,6 +1,5 @@ #include "gtest/gtest.h" #include "test/providers/provider_test_utils.h" - namespace onnxruntime { namespace test { @@ -20,5 +19,26 @@ TEST(RegexFullMatch, EmailMatch) { RunTest({2, 2}, {"account@gmail.com", "account@hotmail.com", "not email", "account@yahoo.com"}, R"((\W|^)[\w.\-]{0,25}@(yahoo|gmail)\.com(\W|$))", {true, false, false, true}); } +TEST(RegexFullMatch, MultibyteMatch) { + RunTest({1, 2}, {"ä", "a"}, "ä", {true, false}); + RunTest({1,}, {"une cédille like in Besançon"}, R"(.*cédille.*)", {true,}); + RunTest({1,}, {"une cédille like in Besançon"}, R"(.*cedille.*)", {false,}); + RunTest({1,}, {"Mit freundlichen Grüßen"}, R"(.*Grüßen$)", {true,}); + RunTest({1,}, {"Mit freundlichen Grüßen"}, R"(.*Grußen$)", {false,}); + RunTest({3,}, {"HПонедельник", "Понедельник", "недельник"}, R"(^Понед.*)", {false, true, false,}); + RunTest({3,}, {"thank you", "どうもありがとうございます", "こんにちは世界"}, R"(^こんにちは世界.*)", {false, false, true,}); + RunTest({3,}, {"नमस्ते, आपसे मिलकर अच्छा लगा", "नमस्ते", "स्वागत एवं नमस्ते"}, R"(.+नमस्ते$)", {false, false, true,}); + RunTest({3,}, {"你好,你好吗?", "你好呀", "你好呀!"}, R"(^你好.*\?$)", {true, false, false,}); +} + +TEST(RegexFullMatch, InvalidPattern) { + std::cout << "TRYING RUN\n"; + OpTester test("RegexFullMatch", 20, kOnnxDomain); + test.AddAttribute("pattern", R"([a-z)"); + test.AddInput("Input", {1,}, {"abcdef",}); + test.AddOutput("Output", {1,}, {false,}); + test.Run(BaseTester::ExpectResult::kExpectFailure, "Invalid regex pattern: [a-z"); +} + } // namespace test } // namespace onnxruntime From c553f0a2b036cb1d37ecbb061d541debd916f088 Mon Sep 17 00:00:00 2001 From: Aditya Goel Date: Sun, 7 Jan 2024 23:59:29 +0000 Subject: [PATCH 3/8] Fix test --- onnxruntime/test/providers/cpu/nn/regex_full_match_test.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/onnxruntime/test/providers/cpu/nn/regex_full_match_test.cc b/onnxruntime/test/providers/cpu/nn/regex_full_match_test.cc index 0fb8a9a2afd53..315f9aa695a9a 100644 --- a/onnxruntime/test/providers/cpu/nn/regex_full_match_test.cc +++ b/onnxruntime/test/providers/cpu/nn/regex_full_match_test.cc @@ -32,7 +32,6 @@ TEST(RegexFullMatch, MultibyteMatch) { } TEST(RegexFullMatch, InvalidPattern) { - std::cout << "TRYING RUN\n"; OpTester test("RegexFullMatch", 20, kOnnxDomain); test.AddAttribute("pattern", R"([a-z)"); test.AddInput("Input", {1,}, {"abcdef",}); From 0410a90a1368fc37275e16a9750e7e05dd65e8fb Mon Sep 17 00:00:00 2001 From: Aditya Goel Date: Mon, 8 Jan 2024 00:32:15 +0000 Subject: [PATCH 4/8] Add invalid utf8 test --- cmake/deps.txt | 4 ++-- .../test/providers/cpu/nn/regex_full_match_test.cc | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/cmake/deps.txt b/cmake/deps.txt index a1b785df2c654..ff07803013071 100644 --- a/cmake/deps.txt +++ b/cmake/deps.txt @@ -27,8 +27,8 @@ fp16;https://github.com/Maratyszcza/FP16/archive/0a92994d729ff76a58f692d3028ca1b fxdiv;https://github.com/Maratyszcza/FXdiv/archive/63058eff77e11aa15bf531df5dd34395ec3017c8.zip;a5658f4036402dbca7cebee32be57fb8149811e1 google_benchmark;https://github.com/google/benchmark/archive/refs/tags/v1.7.0.zip;e97c368b176e8614e3f1bf13dd9abcf6a7ad9908 google_nsync;https://github.com/google/nsync/archive/refs/tags/1.26.0.zip;5e7c00ef6bf5b787386fc040067903ec774e2752 -googletest;https://github.com/google/googletest/archive/refs/tags/v1.14.0.zip;0ac421f2ec11af38b0fff0f1992184032731a8bc -googlexnnpack;https://github.com/google/XNNPACK/archive/003c580e696a774afdc984996ee909b7c8d8128c.zip;9f192e3f15e1e37ae9c78d53eeea47e45c5eb31c +googletest;https://github.com/google/googletest/archive/530d5c8c84abd2a46f38583ee817743c9b3a42b4.zip;5e3a61db2aa975cfd0f97ba92c818744e7fa7034 +googlexnnpack;https://github.com/google/XNNPACK/archive/0da379fc4808f9601faef392352018c741c0f297.zip;663883491e380b628e0a5b162b5f2658032fae73 json;https://github.com/nlohmann/json/archive/refs/tags/v3.10.5.zip;f257f8dc27c5b8c085dc887b40cddd18ae1f725c microsoft_gsl;https://github.com/microsoft/GSL/archive/refs/tags/v4.0.0.zip;cf368104cd22a87b4dd0c80228919bb2df3e2a14 microsoft_wil;https://github.com/microsoft/wil/archive/refs/tags/v1.0.230629.1.zip;e4a542a323c070376f7c2d1973d0f7ddbc1d2fa5 diff --git a/onnxruntime/test/providers/cpu/nn/regex_full_match_test.cc b/onnxruntime/test/providers/cpu/nn/regex_full_match_test.cc index 315f9aa695a9a..9b7962085b7e2 100644 --- a/onnxruntime/test/providers/cpu/nn/regex_full_match_test.cc +++ b/onnxruntime/test/providers/cpu/nn/regex_full_match_test.cc @@ -39,5 +39,13 @@ TEST(RegexFullMatch, InvalidPattern) { test.Run(BaseTester::ExpectResult::kExpectFailure, "Invalid regex pattern: [a-z"); } +TEST(RegexFullMatch, NonUtf8Pattern) { + uint8_t invalid_bytes[] = { 0xC0, 0xC1, 0x41, 0x42, 0xC3, 0x80, 0xC2, 0x80, 0xC2, 0xC3, 0xC4, 0x00 }; + OpTester test("RegexFullMatch", 20, kOnnxDomain); + test.AddAttribute("pattern", std::string((char*) invalid_bytes, sizeof(invalid_bytes))); + test.AddInput("Input", {1,}, {"abcd",}); + test.AddOutput("Output", {1,}, {false,}); + test.Run(BaseTester::ExpectResult::kExpectFailure, "Invalid regex pattern"); +} } // namespace test } // namespace onnxruntime From 3bea8b1dc696a0b252c2e12cb4c4c4739a044588 Mon Sep 17 00:00:00 2001 From: Aditya Goel Date: Tue, 9 Jan 2024 00:00:28 +0000 Subject: [PATCH 5/8] Fix test --- onnxruntime/test/providers/cpu/nn/regex_full_match_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onnxruntime/test/providers/cpu/nn/regex_full_match_test.cc b/onnxruntime/test/providers/cpu/nn/regex_full_match_test.cc index 9b7962085b7e2..70982af88b22e 100644 --- a/onnxruntime/test/providers/cpu/nn/regex_full_match_test.cc +++ b/onnxruntime/test/providers/cpu/nn/regex_full_match_test.cc @@ -21,8 +21,8 @@ TEST(RegexFullMatch, EmailMatch) { TEST(RegexFullMatch, MultibyteMatch) { RunTest({1, 2}, {"ä", "a"}, "ä", {true, false}); - RunTest({1,}, {"une cédille like in Besançon"}, R"(.*cédille.*)", {true,}); - RunTest({1,}, {"une cédille like in Besançon"}, R"(.*cedille.*)", {false,}); + RunTest({1,}, {"une cédille like in Besançon"}, R"(.*Besançon.*)", {true,}); + RunTest({1,}, {"une cédille like in Besançon"}, R"(.*Besancon.*)", {false,}); RunTest({1,}, {"Mit freundlichen Grüßen"}, R"(.*Grüßen$)", {true,}); RunTest({1,}, {"Mit freundlichen Grüßen"}, R"(.*Grußen$)", {false,}); RunTest({3,}, {"HПонедельник", "Понедельник", "недельник"}, R"(^Понед.*)", {false, true, false,}); From 92e291b9b4996d94bcaccf899b141a02506f2798 Mon Sep 17 00:00:00 2001 From: Aditya Goel Date: Tue, 9 Jan 2024 17:31:30 +0000 Subject: [PATCH 6/8] Lint test file --- .../providers/cpu/nn/regex_full_match_test.cc | 96 ++++++++++++++++--- 1 file changed, 82 insertions(+), 14 deletions(-) diff --git a/onnxruntime/test/providers/cpu/nn/regex_full_match_test.cc b/onnxruntime/test/providers/cpu/nn/regex_full_match_test.cc index 70982af88b22e..4aa5a0d44b678 100644 --- a/onnxruntime/test/providers/cpu/nn/regex_full_match_test.cc +++ b/onnxruntime/test/providers/cpu/nn/regex_full_match_test.cc @@ -21,30 +21,98 @@ TEST(RegexFullMatch, EmailMatch) { TEST(RegexFullMatch, MultibyteMatch) { RunTest({1, 2}, {"ä", "a"}, "ä", {true, false}); - RunTest({1,}, {"une cédille like in Besançon"}, R"(.*Besançon.*)", {true,}); - RunTest({1,}, {"une cédille like in Besançon"}, R"(.*Besancon.*)", {false,}); - RunTest({1,}, {"Mit freundlichen Grüßen"}, R"(.*Grüßen$)", {true,}); - RunTest({1,}, {"Mit freundlichen Grüßen"}, R"(.*Grußen$)", {false,}); - RunTest({3,}, {"HПонедельник", "Понедельник", "недельник"}, R"(^Понед.*)", {false, true, false,}); - RunTest({3,}, {"thank you", "どうもありがとうございます", "こんにちは世界"}, R"(^こんにちは世界.*)", {false, false, true,}); - RunTest({3,}, {"नमस्ते, आपसे मिलकर अच्छा लगा", "नमस्ते", "स्वागत एवं नमस्ते"}, R"(.+नमस्ते$)", {false, false, true,}); - RunTest({3,}, {"你好,你好吗?", "你好呀", "你好呀!"}, R"(^你好.*\?$)", {true, false, false,}); + RunTest({ + 1, + }, + {"une cédille like in Besançon"}, R"(.*Besançon.*)", { + true, + }); + RunTest({ + 1, + }, + {"une cédille like in Besançon"}, R"(.*Besancon.*)", { + false, + }); + RunTest({ + 1, + }, + {"Mit freundlichen Grüßen"}, R"(.*Grüßen$)", { + true, + }); + RunTest({ + 1, + }, + {"Mit freundlichen Grüßen"}, R"(.*Grußen$)", { + false, + }); + RunTest({ + 3, + }, + {"HПонедельник", "Понедельник", "недельник"}, R"(^Понед.*)", { + false, + true, + false, + }); + RunTest({ + 3, + }, + {"thank you", "どうもありがとうございます", "こんにちは世界"}, R"(^こんにちは世界.*)", { + false, + false, + true, + }); + RunTest({ + 3, + }, + {"नमस्ते, आपसे मिलकर अच्छा लगा", "नमस्ते", "स्वागत एवं नमस्ते"}, R"(.+नमस्ते$)", { + false, + false, + true, + }); + RunTest({ + 3, + }, + {"你好,你好吗?", "你好呀", "你好呀!"}, R"(^你好.*\?$)", { + true, + false, + false, + }); } TEST(RegexFullMatch, InvalidPattern) { OpTester test("RegexFullMatch", 20, kOnnxDomain); test.AddAttribute("pattern", R"([a-z)"); - test.AddInput("Input", {1,}, {"abcdef",}); - test.AddOutput("Output", {1,}, {false,}); + test.AddInput("Input", { + 1, + }, + { + "abcdef", + }); + test.AddOutput("Output", { + 1, + }, + { + false, + }); test.Run(BaseTester::ExpectResult::kExpectFailure, "Invalid regex pattern: [a-z"); } TEST(RegexFullMatch, NonUtf8Pattern) { - uint8_t invalid_bytes[] = { 0xC0, 0xC1, 0x41, 0x42, 0xC3, 0x80, 0xC2, 0x80, 0xC2, 0xC3, 0xC4, 0x00 }; + uint8_t invalid_bytes[] = {0xC0, 0xC1, 0x41, 0x42, 0xC3, 0x80, 0xC2, 0x80, 0xC2, 0xC3, 0xC4, 0x00}; OpTester test("RegexFullMatch", 20, kOnnxDomain); - test.AddAttribute("pattern", std::string((char*) invalid_bytes, sizeof(invalid_bytes))); - test.AddInput("Input", {1,}, {"abcd",}); - test.AddOutput("Output", {1,}, {false,}); + test.AddAttribute("pattern", std::string((char*)invalid_bytes, sizeof(invalid_bytes))); + test.AddInput("Input", { + 1, + }, + { + "abcd", + }); + test.AddOutput("Output", { + 1, + }, + { + false, + }); test.Run(BaseTester::ExpectResult::kExpectFailure, "Invalid regex pattern"); } } // namespace test From 7760bad9b80755b6d4ea7329ad37677c3dba336c Mon Sep 17 00:00:00 2001 From: Aditya Goel Date: Wed, 10 Jan 2024 11:32:40 +0000 Subject: [PATCH 7/8] Move to text directory --- onnxruntime/core/providers/cpu/{nn => text}/regex_full_match.cc | 2 +- onnxruntime/core/providers/cpu/{nn => text}/regex_full_match.h | 0 .../core/providers/cpu/{nn => text}/string_normalizer.cc | 0 onnxruntime/core/providers/cpu/{nn => text}/string_normalizer.h | 0 .../test/providers/cpu/{nn => text}/regex_full_match_test.cc | 0 .../test/providers/cpu/{nn => text}/string_normalizer_test.cc | 0 6 files changed, 1 insertion(+), 1 deletion(-) rename onnxruntime/core/providers/cpu/{nn => text}/regex_full_match.cc (95%) rename onnxruntime/core/providers/cpu/{nn => text}/regex_full_match.h (100%) rename onnxruntime/core/providers/cpu/{nn => text}/string_normalizer.cc (100%) rename onnxruntime/core/providers/cpu/{nn => text}/string_normalizer.h (100%) rename onnxruntime/test/providers/cpu/{nn => text}/regex_full_match_test.cc (100%) rename onnxruntime/test/providers/cpu/{nn => text}/string_normalizer_test.cc (100%) diff --git a/onnxruntime/core/providers/cpu/nn/regex_full_match.cc b/onnxruntime/core/providers/cpu/text/regex_full_match.cc similarity index 95% rename from onnxruntime/core/providers/cpu/nn/regex_full_match.cc rename to onnxruntime/core/providers/cpu/text/regex_full_match.cc index 9cf3b3b92c36a..cc4a5a9ae4e61 100644 --- a/onnxruntime/core/providers/cpu/nn/regex_full_match.cc +++ b/onnxruntime/core/providers/cpu/text/regex_full_match.cc @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "core/providers/cpu/nn/regex_full_match.h" +#include "regex_full_match.h" #include "core/common/common.h" namespace onnxruntime { diff --git a/onnxruntime/core/providers/cpu/nn/regex_full_match.h b/onnxruntime/core/providers/cpu/text/regex_full_match.h similarity index 100% rename from onnxruntime/core/providers/cpu/nn/regex_full_match.h rename to onnxruntime/core/providers/cpu/text/regex_full_match.h diff --git a/onnxruntime/core/providers/cpu/nn/string_normalizer.cc b/onnxruntime/core/providers/cpu/text/string_normalizer.cc similarity index 100% rename from onnxruntime/core/providers/cpu/nn/string_normalizer.cc rename to onnxruntime/core/providers/cpu/text/string_normalizer.cc diff --git a/onnxruntime/core/providers/cpu/nn/string_normalizer.h b/onnxruntime/core/providers/cpu/text/string_normalizer.h similarity index 100% rename from onnxruntime/core/providers/cpu/nn/string_normalizer.h rename to onnxruntime/core/providers/cpu/text/string_normalizer.h diff --git a/onnxruntime/test/providers/cpu/nn/regex_full_match_test.cc b/onnxruntime/test/providers/cpu/text/regex_full_match_test.cc similarity index 100% rename from onnxruntime/test/providers/cpu/nn/regex_full_match_test.cc rename to onnxruntime/test/providers/cpu/text/regex_full_match_test.cc diff --git a/onnxruntime/test/providers/cpu/nn/string_normalizer_test.cc b/onnxruntime/test/providers/cpu/text/string_normalizer_test.cc similarity index 100% rename from onnxruntime/test/providers/cpu/nn/string_normalizer_test.cc rename to onnxruntime/test/providers/cpu/text/string_normalizer_test.cc From c46e80b39bb1b3f6445733fa7af7ff9d72463728 Mon Sep 17 00:00:00 2001 From: Aditya Goel Date: Wed, 10 Jan 2024 23:07:52 +0000 Subject: [PATCH 8/8] Update docs --- docs/OperatorKernels.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/OperatorKernels.md b/docs/OperatorKernels.md index f985cf10ded60..b026369e12c80 100644 --- a/docs/OperatorKernels.md +++ b/docs/OperatorKernels.md @@ -305,6 +305,7 @@ Do not modify directly.* |||[13, 17]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)| |||[11, 12]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)| |||[1, 10]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)| +|RegexFullMatch|*in* X:**T1**
*out* Y:**T2**|20+|**T1** = tensor(string)
**T2** = tensor(bool)| |Relu|*in* X:**T**
*out* Y:**T**|14+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int8)| |||13|**T** = tensor(double), tensor(float)| |||[6, 12]|**T** = tensor(double), tensor(float)|