Skip to content

Commit

Permalink
do not mutate the ebm model during conversion
Browse files Browse the repository at this point in the history
This prevents from using the ebm model correctly after the conversion.
Fixes #16
  • Loading branch information
MainRo committed Jul 25, 2024
1 parent 54530c6 commit 7b7b3cc
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 19 deletions.
35 changes: 17 additions & 18 deletions ebm2onnx/convert.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from collections import namedtuple
from copy import deepcopy
from .utils import get_latest_opset_version
from ebm2onnx import graph
from ebm2onnx import ebm
Expand All @@ -10,7 +10,7 @@
from interpret.glassbox import ExplainableBoostingClassifier, ExplainableBoostingRegressor


onnx_type_for={
onnx_type_for = {
'bool': onnx.TensorProto.BOOL,
'float': onnx.TensorProto.FLOAT,
'double': onnx.TensorProto.DOUBLE,
Expand All @@ -23,6 +23,7 @@
'True': '1',
}


def infer_features_dtype(dtype, feature_name):
feature_dtype = onnx.TensorProto.DOUBLE
if dtype is not None:
Expand Down Expand Up @@ -94,7 +95,6 @@ def to_onnx(model, dtype, name="ebm",
target_opset = target_opset or get_latest_opset_version()
root = graph.create_graph()

class_index=0
inputs = [None for _ in model.feature_names_in_]
parts = []

Expand All @@ -103,14 +103,16 @@ def to_onnx(model, dtype, name="ebm",
for _ in range(interaction_count):
feature_types.append('interaction')

model_bins = deepcopy(model.bins_)

# first compute the score of each feature
for feature_index in range(len(model.term_names_)):
feature_name=model.term_names_[feature_index]
feature_type=feature_types[feature_index]
feature_group=model.term_features_[feature_index]
feature_name = model.term_names_[feature_index]
feature_type = feature_types[feature_index]
feature_group = model.term_features_[feature_index]

if feature_type == 'continuous':
bins = [-np.inf, -np.inf] + list(model.bins_[feature_group[0]][0])
bins = [-np.inf, -np.inf] + list(model_bins[feature_group[0]][0])
additive_terms = model.term_scores_[feature_index]

feature_dtype = infer_features_dtype(dtype, feature_name)
Expand All @@ -122,7 +124,7 @@ def to_onnx(model, dtype, name="ebm",
parts.append(part)

elif feature_type in ['nominal', 'ordinal']:
col_mapping = model.bins_[feature_group[0]][0]
col_mapping = model_bins[feature_group[0]][0]
additive_terms = model.term_scores_[feature_index]

feature_dtype = infer_features_dtype(dtype, feature_name)
Expand All @@ -133,8 +135,7 @@ def to_onnx(model, dtype, name="ebm",
bool_remap[k]: v
for k, v in col_mapping.items()
}
# replace inplace to re-use it in interactions
model.bins_[feature_group[0]][0] = col_mapping
model_bins[feature_group[0]][0] = col_mapping
if feature_dtype != onnx.TensorProto.STRING:
part = ops.cast(onnx.TensorProto.STRING)(part)
part = ops.flatten()(part)
Expand All @@ -156,13 +157,13 @@ def to_onnx(model, dtype, name="ebm",
# There may be one binning per interaction way or not.
# the rule is to use bins_ index if there is one binning available for the way count.
# otherwise, use the last binning for the feature
bin_index = -1 if way_count > len(model.bins_[i_feature_index]) else way_count - 1
bins = [-np.inf, -np.inf] + list(model.bins_[i_feature_index][bin_index])
bin_index = -1 if way_count > len(model_bins[i_feature_index]) else way_count - 1
bins = [-np.inf, -np.inf] + list(model_bins[i_feature_index][bin_index])
input = graph.strip_to_transients(inputs[i_feature_index])
i_parts.append(ebm.get_bin_index_on_continuous_value(bins)(input))

elif i_feature_type in ['nominal', 'ordinal']:
col_mapping = model.bins_[i_feature_index][0]
col_mapping = model_bins[i_feature_index][0]
input = graph.strip_to_transients(inputs[i_feature_index])
i_parts.append(ebm.get_bin_index_on_categorical_value(col_mapping)(input))

Expand All @@ -181,13 +182,13 @@ def to_onnx(model, dtype, name="ebm",
g = graph.merge(*parts)
if type(model) is ExplainableBoostingClassifier:
class_type = onnx.TensorProto.STRING if model.classes_.dtype.type is np.str_ else onnx.TensorProto.INT64
classes=model.classes_
classes = model.classes_
if class_type == onnx.TensorProto.STRING:
classes=[ c.encode("utf-8") for c in classes]
classes = [c.encode("utf-8") for c in classes]

g, scores_output_name = ebm.compute_class_score(model.intercept_, explain_name)(g)
g_scores = graph.strip_to_transients(g)
if len(model.classes_) == 2: # binary classification
if len(model.classes_) == 2: # binary classification
g = ebm.predict_class(
classes=classes, class_type=class_type,
binary=True, prediction_name=prediction_name
Expand Down Expand Up @@ -221,7 +222,5 @@ def to_onnx(model, dtype, name="ebm",
else:
raise NotImplementedError("{} models are not supported".format(type(model)))



model = graph.compile(g, target_opset, name=name)
return model
9 changes: 8 additions & 1 deletion tests/test_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def test_predict_regression_without_interactions(interactions, explain):

@pytest.mark.parametrize("explain", [False, True])
@pytest.mark.parametrize("interactions", [0, 2, [(0, 1, 2)], [(0, 1, 2, 3)]])
@pytest.mark.parametrize("old_th", [65, 0])
@pytest.mark.parametrize("old_th", [65, 35, 0])
def test_predict_binary_classification_with_categorical(interactions, explain, old_th):
model_ebm, x_test, y_test = train_titanic_binary_classification(
interactions=interactions,
Expand Down Expand Up @@ -199,6 +199,13 @@ def test_predict_binary_classification_with_categorical(interactions, explain, o

if explain is True:
assert len(pred_onnx) == 2
local_explain = model_ebm.explain_local(x_test, y_test)
for i in range(len(x_test)):
assert np.allclose(
local_explain.data(i)['scores'],
pred_onnx[1][i][:, 0]
)

assert np.allclose(pred_ebm, pred_onnx[0])


Expand Down

0 comments on commit 7b7b3cc

Please sign in to comment.