Skip to content

Commit

Permalink
using scikit-learn model instead of scikit-learn pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
maggiemhanna committed Jun 3, 2022
1 parent 1217954 commit b938f9c
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 17 deletions.
9 changes: 4 additions & 5 deletions classical/aml-cli-v2/data-science/src/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,18 +205,17 @@ def main():


# -------------------- Explainability ------------------- #
tree_explainer = TabularExplainer(model.steps[-1][1],
tabular_explainer = TabularExplainer(model,
initialization_examples=X_train,
features=X_train.columns,
transformations=model.steps[0][1])
features=X_train.columns)

# save explainer
#joblib.dump(tree_explainer, os.path.join(tree_explainer, "explainer"))
#joblib.dump(tabular_explainer, os.path.join(tabular_explainer, "explainer"))

# find global explanations for feature importance
# you can use the training data or the test data here,
# but test data would allow you to use Explanation Exploration
global_explanation = tree_explainer.explain_global(X_test)
global_explanation = tabular_explainer.explain_global(X_test)

# sorted feature importance values and feature names
sorted_global_importance_values = global_explanation.get_ranked_global_values()
Expand Down
34 changes: 22 additions & 12 deletions classical/aml-cli-v2/data-science/src/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,15 +131,25 @@ def main():

# append regressor to preprocessing pipeline.
# now we have a full prediction pipeline.
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
('regressor', RandomForestRegressor(
n_estimators = args.regressor__n_estimators,
bootstrap = args.regressor__bootstrap,
max_depth = args.regressor__max_depth,
max_features = args.regressor__max_features,
min_samples_leaf = args.regressor__min_samples_leaf,
min_samples_split = args.regressor__min_samples_split,
random_state=0))])

#model = Pipeline(steps=[('preprocessor', preprocessor),
# ('regressor', RandomForestRegressor(
# n_estimators = args.regressor__n_estimators,
# bootstrap = args.regressor__bootstrap,
# max_depth = args.regressor__max_depth,
# max_features = args.regressor__max_features,
# min_samples_leaf = args.regressor__min_samples_leaf,
# min_samples_split = args.regressor__min_samples_split,
# random_state=0))])


model = RandomForestRegressor(n_estimators = args.regressor__n_estimators,
bootstrap = args.regressor__bootstrap,
max_depth = args.regressor__max_depth,
max_features = args.regressor__max_features,
min_samples_leaf = args.regressor__min_samples_leaf,
min_samples_split = args.regressor__min_samples_split,
random_state=0)

mlflow.log_param("model", "RandomForestRegressor")
mlflow.log_param("n_estimators", args.regressor__n_estimators)
Expand All @@ -149,10 +159,10 @@ def main():
mlflow.log_param("min_samples_leaf", args.regressor__min_samples_leaf)
mlflow.log_param("min_samples_split", args.regressor__min_samples_split)

pipeline.fit(X_train, y_train)
model.fit(X_train, y_train)

# Predict using the Regression Model
yhat_train = pipeline.predict(X_train)
yhat_train = model.predict(X_train)

# Evaluate Regression performance with the train set
r2 = r2_score(y_train, yhat_train)
Expand All @@ -174,7 +184,7 @@ def main():
mlflow.log_artifact("regression_results.png")

# Save the model
pickle.dump(pipeline, open((Path(args.model_output) / "model.pkl"), "wb"))
pickle.dump(model, open((Path(args.model_output) / "model.pkl"), "wb"))

if __name__ == "__main__":
main()
Expand Down

0 comments on commit b938f9c

Please sign in to comment.