Skip to content

Commit

Permalink
RegexFullMatch operator
Browse files Browse the repository at this point in the history
  • Loading branch information
adityagoel4512 committed Oct 17, 2023
1 parent 2efab54 commit 4f27bde
Show file tree
Hide file tree
Showing 8 changed files with 85 additions and 8 deletions.
2 changes: 1 addition & 1 deletion cgmanifests/generated/cgmanifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@
"component": {
"type": "git",
"git": {
"commitHash": "fdefbe85ed9c362b95b9b401cd19db068a76141f",
"commitHash": "0c296085f9f65f0f8ef7aec7b9eed55faf37dc40",
"repositoryUrl": "https://github.com/onnx/onnx.git"
},
"comments": "onnx"
Expand Down
6 changes: 3 additions & 3 deletions cmake/deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ flatbuffers;https://github.com/google/flatbuffers/archive/refs/tags/v1.12.0.zip;
fp16;https://github.com/Maratyszcza/FP16/archive/0a92994d729ff76a58f692d3028ca1b64b145d91.zip;b985f6985a05a1c03ff1bb71190f66d8f98a1494
fxdiv;https://github.com/Maratyszcza/FXdiv/archive/63058eff77e11aa15bf531df5dd34395ec3017c8.zip;a5658f4036402dbca7cebee32be57fb8149811e1
google_benchmark;https://github.com/google/benchmark/archive/refs/tags/v1.7.0.zip;e97c368b176e8614e3f1bf13dd9abcf6a7ad9908
google_nsync;https://github.com/google/nsync/archive/refs/tags/1.26.0.zip;5e7c00ef6bf5b787386fc040067903ec774e2752
googletest;https://github.com/google/googletest/archive/refs/tags/v1.14.0.zip;0ac421f2ec11af38b0fff0f1992184032731a8bc
google_nsync;https://github.com/google/nsync/archive/refs/tags/1.23.0.zip;f3233450cf7156fc0bedd1b0e884eddec264897c
googletest;https://github.com/google/googletest/archive/519beb0e52c842729b4b53731d27c0e0c32ab4a2.zip;4b3c37972e4c1bef1185d46f702082f8772ee73f
googlexnnpack;https://github.com/google/XNNPACK/archive/003c580e696a774afdc984996ee909b7c8d8128c.zip;9f192e3f15e1e37ae9c78d53eeea47e45c5eb31c
json;https://github.com/nlohmann/json/archive/refs/tags/v3.10.5.zip;f257f8dc27c5b8c085dc887b40cddd18ae1f725c
microsoft_gsl;https://github.com/microsoft/GSL/archive/refs/tags/v4.0.0.zip;cf368104cd22a87b4dd0c80228919bb2df3e2a14
microsoft_wil;https://github.com/microsoft/wil/archive/refs/tags/v1.0.230629.1.zip;e4a542a323c070376f7c2d1973d0f7ddbc1d2fa5
mimalloc;https://github.com/microsoft/mimalloc/archive/refs/tags/v2.1.1.zip;d5ee7d34223d0567892db5179849939c8769dc41
mp11;https://github.com/boostorg/mp11/archive/refs/tags/boost-1.82.0.zip;9bc9e01dffb64d9e0773b2e44d2f22c51aace063
onnx;https://github.com/onnx/onnx/archive/14303de049144035dfd94ace5f7a3b44773b1aad.zip;250eab9690392b248d75b56e605fb49eca373442
onnx;https://github.com/onnx/onnx/archive/0c296085f9f65f0f8ef7aec7b9eed55faf37dc40.zip;01ca9e955a03a9183e3d278e96f975f1a762cef1
#use the commit of supporting all the plugins and TRT 8.6-GA (https://github.com/onnx/onnx-tensorrt/commit/0462dc31ae78f48744b6141ae376df1f96d3f459)
onnx_tensorrt;https://github.com/onnx/onnx-tensorrt/archive/0462dc31ae78f48744b6141ae376df1f96d3f459.zip;5ff086361956cceb81ed17453a1fd8db2aa4328d
protobuf;https://github.com/protocolbuffers/protobuf/archive/refs/tags/v21.12.zip;7cf2733949036c7d52fda017badcab093fe73bfa
Expand Down
2 changes: 1 addition & 1 deletion cmake/external/onnx
Submodule onnx updated 956 files
2 changes: 2 additions & 0 deletions onnxruntime/core/providers/cpu/cpu_execution_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -960,6 +960,7 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 19, Sh

// Opset 20
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, ConstantOfShape);
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, RegexFullMatch);

// !!PLEASE READ BELOW!! Following that, add new entries above this comment

Expand Down Expand Up @@ -2389,6 +2390,7 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) {

// Opset 20
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, ConstantOfShape)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, RegexFullMatch)>,
};

for (auto& function_table_entry : function_table) {
Expand Down
37 changes: 37 additions & 0 deletions onnxruntime/core/providers/cpu/nn/regex_full_match.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#include "regex_full_match.h"
#include "core/common/common.h"

namespace onnxruntime {
ONNX_CPU_OPERATOR_KERNEL(
RegexFullMatch,
20,
KernelDefBuilder()
.TypeConstraint("T1", DataTypeImpl::GetTensorType<std::string>())
.TypeConstraint("T2", DataTypeImpl::GetTensorType<bool>()),
RegexFullMatch);

RegexFullMatch::RegexFullMatch(const OpKernelInfo& info) : OpKernel(info) {
ORT_ENFORCE(info.GetAttr<std::string>("pattern", &pattern_).IsOK());
ORT_ENFORCE(RE2(pattern_).ok(), "Invalid pattern: ", pattern_);
}

Status RegexFullMatch::Compute(OpKernelContext* context) const {
RE2 re(pattern_);
const auto* input_tensor = context->Input<Tensor>(0);
if (nullptr == input_tensor) {
return Status(common::ONNXRUNTIME, common::FAIL, "Input count mismatch");
}
auto* output_tensor = context->Output(0, input_tensor->Shape());
if (nullptr == output_tensor) {
return Status(common::ONNXRUNTIME, common::FAIL, "Output count mismatch");
}
const auto input_data = input_tensor->template DataAsSpan<std::string>();
auto output_data = output_tensor->template MutableDataAsSpan<bool>();
const auto N = input_tensor->Shape().Size();
for (int64_t i = 0; i < N; ++i) {
output_data[i] = RE2::FullMatch(input_data[i], re);
}
return Status::OK();
}

} // namespace onnxruntime
17 changes: 17 additions & 0 deletions onnxruntime/core/providers/cpu/nn/regex_full_match.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#pragma once

#include "core/framework/op_kernel.h"
#include "re2/re2.h"

namespace onnxruntime {

class RegexFullMatch final : public OpKernel {
public:
explicit RegexFullMatch(const OpKernelInfo& info);
Status Compute(OpKernelContext* context) const override;

private:
std::string pattern_;
};

} // namespace onnxruntime
24 changes: 24 additions & 0 deletions onnxruntime/test/providers/cpu/nn/regex_full_match_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#include "gtest/gtest.h"
#include "test/providers/provider_test_utils.h"

namespace onnxruntime {
namespace test {

static void RunTest(const std::initializer_list<int64_t>& dims, const std::initializer_list<std::string>& input, const std::string& pattern, const std::initializer_list<bool>& output) {
OpTester test("RegexFullMatch", 20, kOnnxDomain);
test.AddAttribute("pattern", pattern);
test.AddInput<std::string>("Input", dims, input);
test.AddOutput<bool>("Output", dims, output);
test.Run();
}

TEST(RegexFullMatch, WebsiteMatch) {
RunTest({3, 1}, {"www.google.com", "www.facebook.com", "www.bbc.co.uk"}, R"(www\.[\w.-]+\.\bcom\b)", {true, true, false});
}

TEST(RegexFullMatch, EmailMatch) {
RunTest({2, 2}, {"[email protected]", "[email protected]", "not email", "[email protected]"}, R"((\W|^)[\w.\-]{0,25}@(yahoo|gmail)\.com(\W|$))", {true, false, false, true});
}

} // namespace test
} // namespace onnxruntime
Original file line number Diff line number Diff line change
Expand Up @@ -266,9 +266,6 @@
"^test_image_decoder_decode_pnm_rgb",
"^test_image_decoder_decode_tiff_rgb",
"^test_image_decoder_decode_webp_rgb",
"^test_regex_full_match_basic",
"^test_regex_full_match_email_domain",
"^test_regex_full_match_empty",
"^test_string_concat_broadcasting",
"^test_string_concat",
"^test_string_concat_empty_string",
Expand Down

0 comments on commit 4f27bde

Please sign in to comment.