diff --git a/docs/OperatorKernels.md b/docs/OperatorKernels.md index 5b91e0f88ab9c..f4b1d570b0bbe 100644 --- a/docs/OperatorKernels.md +++ b/docs/OperatorKernels.md @@ -420,7 +420,7 @@ Do not modify directly.* |DictVectorizer|*in* X:**T1**
*out* Y:**T2**|1+|**T1** = map(int64,tensor(double)), map(int64,tensor(float)), map(int64,tensor(string)), map(string,tensor(double)), map(string,tensor(float)), map(string,tensor(int64))
**T2** = tensor(double), tensor(float), tensor(int64), tensor(string)| |FeatureVectorizer|*in* X:**T1**
*out* Y:**tensor(float)**|1+|**T1** = tensor(double), tensor(float), tensor(int32), tensor(int64)| |Imputer|*in* X:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(int64)| -|LabelEncoder|*in* X:**T1**
*out* Y:**T2**|4+|**T1** = tensor(float), tensor(int64), tensor(string)
**T2** = tensor(float), tensor(int16), tensor(int64), tensor(string)| +|LabelEncoder|*in* X:**T1**
*out* Y:**T2**|4+|**T1** = tensor(double), tensor(float), tensor(int64), tensor(string)
**T2** = tensor(double), tensor(float), tensor(int16), tensor(int64), tensor(string)| |||[2, 3]|**T1** = tensor(float), tensor(int64), tensor(string)
**T2** = tensor(float), tensor(int64), tensor(string)| |||1|**T1** = tensor(int64), tensor(string)
**T2** = tensor(int64), tensor(string)| |LinearClassifier|*in* X:**T1**
*out* Y:**T2**
*out* Z:**tensor(float)**|1+|**T1** = tensor(double), tensor(float), tensor(int32), tensor(int64)
**T2** = tensor(int64), tensor(string)| diff --git a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc index ba1510e262d27..658b0f28c6acd 100644 --- a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc +++ b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc @@ -2532,6 +2532,11 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMLDomain, 4, class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMLDomain, 4, string_string, LabelEncoder); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMLDomain, 4, float_float, LabelEncoder); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMLDomain, 4, string_int16, LabelEncoder); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMLDomain, 4, double_string, LabelEncoder); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMLDomain, 4, string_double, LabelEncoder); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMLDomain, 4, int64_double, LabelEncoder); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMLDomain, 4, double_int64, LabelEncoder); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMLDomain, 4, double_double, LabelEncoder); template <> KernelCreateInfo BuildKernelCreateInfo() { @@ -2656,6 +2661,16 @@ Status RegisterOnnxMLOperatorKernels(KernelRegistry& kernel_registry) { LabelEncoder)>, BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, }; for (auto& function_table_entry : function_table) { diff --git a/onnxruntime/core/providers/cpu/ml/label_encoder.cc b/onnxruntime/core/providers/cpu/ml/label_encoder.cc index 3c8049511897c..bc7688e6c3363 100644 --- a/onnxruntime/core/providers/cpu/ml/label_encoder.cc +++ b/onnxruntime/core/providers/cpu/ml/label_encoder.cc @@ -328,5 +328,70 @@ void LabelEncoder_4::InitializeAttrFields(const OpKer default_value_ = static_cast(GetDefault(kernel_info, "", static_cast(-1))); }; +ONNX_CPU_OPERATOR_TYPED_ML_KERNEL(LabelEncoder, 4, double_double, + KernelDefBuilder() + .TypeConstraint("T1", + std::vector{DataTypeImpl::GetTensorType()}) + .TypeConstraint("T2", + std::vector{DataTypeImpl::GetTensorType()}), + LabelEncoder_4) + +template <> +void LabelEncoder_4::InitializeAttrFields(const OpKernelInfo& kernel_info) { + default_value_ = GetDefault(kernel_info, "default_float", -0.); +}; + +ONNX_CPU_OPERATOR_TYPED_ML_KERNEL( + LabelEncoder, 4, double_string, + KernelDefBuilder() + .TypeConstraint("T1", std::vector{DataTypeImpl::GetTensorType()}) + .TypeConstraint("T2", std::vector{DataTypeImpl::GetTensorType()}), + LabelEncoder_4) + +template <> +void LabelEncoder_4::InitializeAttrFields(const OpKernelInfo& kernel_info) { + value_field_name_ = "values_strings"; + default_value_ = GetDefault(kernel_info, "default_string", std::string("_Unused")); +}; + +ONNX_CPU_OPERATOR_TYPED_ML_KERNEL( + LabelEncoder, 4, string_double, + KernelDefBuilder() + .TypeConstraint("T1", std::vector{DataTypeImpl::GetTensorType()}) + .TypeConstraint("T2", std::vector{DataTypeImpl::GetTensorType()}), + LabelEncoder_4) + +template <> +void LabelEncoder_4::InitializeAttrFields(const OpKernelInfo& kernel_info) { + key_field_name_ = "keys_strings"; + default_value_ = GetDefault(kernel_info, "default_float", -0.); +}; + +ONNX_CPU_OPERATOR_TYPED_ML_KERNEL( + LabelEncoder, 4, double_int64, + KernelDefBuilder() + .TypeConstraint("T1", std::vector{DataTypeImpl::GetTensorType()}) + .TypeConstraint("T2", std::vector{DataTypeImpl::GetTensorType()}), + LabelEncoder_4) + +template <> +void LabelEncoder_4::InitializeAttrFields(const OpKernelInfo& kernel_info) { + value_field_name_ = "values_int64s"; + default_value_ = GetDefault(kernel_info, "default_int64", static_cast(-1)); +}; + +ONNX_CPU_OPERATOR_TYPED_ML_KERNEL( + LabelEncoder, 4, int64_double, + KernelDefBuilder() + .TypeConstraint("T1", std::vector{DataTypeImpl::GetTensorType()}) + .TypeConstraint("T2", std::vector{DataTypeImpl::GetTensorType()}), + LabelEncoder_4) + +template <> +void LabelEncoder_4::InitializeAttrFields(const OpKernelInfo& kernel_info) { + key_field_name_ = "keys_int64s"; + default_value_ = GetDefault(kernel_info, "default_float", -0.); +}; + } // namespace ml } // namespace onnxruntime diff --git a/onnxruntime/core/providers/cpu/ml/label_encoder.h b/onnxruntime/core/providers/cpu/ml/label_encoder.h index 6a73c55a134de..da2cfdc9fd548 100644 --- a/onnxruntime/core/providers/cpu/ml/label_encoder.h +++ b/onnxruntime/core/providers/cpu/ml/label_encoder.h @@ -105,8 +105,7 @@ template std::vector GetAttribute(const OpKernelInfo& info, const std::string& name, const std::string& tensor_name) { if constexpr (std::is_same_v || std::is_same_v || std::is_same_v) { std::vector attrs; - auto result = info.GetAttrs(name, attrs); - if (result.IsOK()) { + if (info.GetAttrs(name, attrs).IsOK()) { return attrs; } } diff --git a/onnxruntime/test/providers/cpu/ml/label_encoder_test.cc b/onnxruntime/test/providers/cpu/ml/label_encoder_test.cc index ae03fc976bd99..bc98fbf0d5903 100644 --- a/onnxruntime/test/providers/cpu/ml/label_encoder_test.cc +++ b/onnxruntime/test/providers/cpu/ml/label_encoder_test.cc @@ -372,6 +372,46 @@ TEST(LabelEncoder, StringToFloatOpset4) { test.Run(); } +TEST(LabelEncoder, StringToDoubleOpset4) { + std::vector dims{1, 5}; + + std::vector input{"Hello", "world", "Random", "onnxruntime", "!"}; + std::vector output{0.1, 1.1231e30, -0.0, 2.718, 5.0}; + std::vector key_data{"Hello", "world", "onnxruntime", "!"}; + std::vector value_data{0.1, 1.1231e30, 2.718, 5.0}; + + OpTester test("LabelEncoder", 4, onnxruntime::kMLDomain); + + ONNX_NAMESPACE::TensorProto keys_proto; + keys_proto.set_name("keys_tensor"); + keys_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_STRING); + keys_proto.add_dims(key_data.size()); + for (const auto& key : key_data) { + keys_proto.add_string_data(key); + } + test.AddAttribute("keys_tensor", keys_proto); + + ONNX_NAMESPACE::TensorProto values_proto; + values_proto.set_name("values_tensor"); + values_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_DOUBLE); + values_proto.add_dims(value_data.size()); + for (const auto& value : value_data) { + values_proto.add_double_data(value); + } + test.AddAttribute("values_tensor", values_proto); + + ONNX_NAMESPACE::TensorProto default_proto; + default_proto.set_name("default_tensor"); + default_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_DOUBLE); + default_proto.add_dims(1); + default_proto.add_double_data(-0.0); + test.AddAttribute("default_tensor", default_proto); + test.AddInput("X", dims, input); + test.AddOutput("Y", dims, output); + + test.Run(); +} + TEST(LabelEncoder, TensorBasedAttributesOpset4) { std::vector dims{1, 5}; @@ -437,5 +477,39 @@ TEST(LabelEncoder, NaNsMappedTogetherOpset4) { test.Run(); } + +TEST(LabelEncoder, DoubleNaNsMappedTogetherOpset4) { + std::vector dims{1, 6}; + std::vector input{3.14, std::nan("1"), 2.718, std::nan("2"), 5.0, -1}; + std::vector output{"a", "ONNX", "b", "ONNX", "c", "onnxruntime"}; + std::vector key_data{3.14, 2.718, 5.0, std::nan("3")}; + std::vector value_data{"a", "b", "c", "ONNX"}; + + OpTester test("LabelEncoder", 4, onnxruntime::kMLDomain); + + ONNX_NAMESPACE::TensorProto keys_proto; + keys_proto.set_name("keys_tensor"); + keys_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_DOUBLE); + keys_proto.add_dims(key_data.size()); + for (const auto key : key_data) { + keys_proto.add_double_data(key); + } + test.AddAttribute("keys_tensor", keys_proto); + + test.AddAttribute("values_strings", value_data); + + ONNX_NAMESPACE::TensorProto default_proto; + default_proto.set_name("default_tensor"); + default_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_STRING); + default_proto.add_dims(1); + default_proto.add_string_data("onnxruntime"); + test.AddAttribute("default_tensor", default_proto); + + test.AddInput("X", dims, input); + test.AddOutput("Y", dims, output); + + test.Run(); +} + } // namespace test } // namespace onnxruntime