Skip to content

Commit

Permalink
Added serving with mlflow
Browse files Browse the repository at this point in the history
  • Loading branch information
TopCoder2K committed Nov 30, 2023
1 parent a5c7bcf commit aff1f38
Show file tree
Hide file tree
Showing 6 changed files with 140 additions and 53 deletions.
32 changes: 31 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ poetry run python3 commands.py train --config_name cb_config
```

_N.B. Do not forget to set `logging.mlflow.tracking_uri` before the launch. The logs are
saved in the default directory: `mlruns`._
saved in the default directory: `mlruns`. If you are using the standard MLFlow server,
then run it before the training with `poetry run mlflow ui`._

### Evaluation

Expand All @@ -90,3 +91,32 @@ If you want to infer a previously trained model, make sure you've placed the che
```
poetry run python3 commands.py infer --config_name [config_name_without_extension]
```

### Deployment

In order to deploy a trained model, run:

```
poetry run mlflow models serve -p 5001 -m checkpoints/mlflow_[model_type]_ckpt/ --env-manager=local
```

where `[model_type]` is `cb` or `rf`.

After this, it is possible to send requests to the model. I've created a script for the
correct json generation with the first example from the training set:

```
poetry run python3 create_example_request.py create_example_request
```

Send a request to the deployed model using the generated json:

```
curl http://127.0.0.1:5001/invocations -H 'Content-Type: application/json' -d @example_request.json
```

The model should reply with something like this:

```
{"predictions": [20.8]}
```
30 changes: 30 additions & 0 deletions create_example_request.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import json

import fire
import pandas as pd


def create_example_request() -> None:
example_df = pd.DataFrame(
[
{
"season": "spring",
"month": 1,
"hour": 0,
"holiday": False,
"weekday": 6,
"workingday": False,
"weather": "clear",
"temp": 9.84,
"feel_temp": 14.395,
"humidity": 0.81,
"windspeed": 0.0,
}
]
) # This is the first row of the training split
with open("example_request.json", "w") as f:
json.dump({"dataframe_split": example_df.to_dict(orient="split")}, f)


if __name__ == "__main__":
fire.Fire()
19 changes: 19 additions & 0 deletions example_request.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"dataframe_split": {
"index": [0],
"columns": [
"season",
"month",
"hour",
"holiday",
"weekday",
"workingday",
"weather",
"temp",
"feel_temp",
"humidity",
"windspeed"
],
"data": [["spring", 1, 0, false, 6, false, "clear", 9.84, 14.395, 0.81, 0.0]]
}
}
35 changes: 16 additions & 19 deletions mlopscourse/models/catboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,24 +71,21 @@ def __call__(self, X_sample: pd.DataFrame) -> np.ndarray:
return self.model.predict(sample_data)

def log_fis_and_metrics(self, exp_id: str, col_names: List[str]) -> None:
with mlflow.start_run(
experiment_id=exp_id, run_name=f"training-{self.cfg.model.name}"
):
# Log the model's hyperparameters and the code version
mlflow.log_params(self.cfg.model.hyperparams)
mlflow.log_param("commit_id", self.cfg.logging.commit_id)
# Log feature importances
# Log the model's hyperparameters and the code version
mlflow.log_params(self.cfg.model.hyperparams)
mlflow.log_param("commit_id", self.cfg.logging.commit_id)
# Log feature importances
mlflow.log_metrics(
{
f"fi_of_{col_name}": self.model.feature_importances_[i]
for i, col_name in enumerate(col_names)
}
)
r2_scores = self.model.evals_result_["learn"]["R2"]
rmse_scores = self.model.evals_result_["learn"]["RMSE"]
assert len(r2_scores) == len(rmse_scores), "Something wrong with metrics!"
for i in range(len(r2_scores)):
mlflow.log_metrics(
{
f"fi_of_{col_name}": self.model.feature_importances_[i]
for i, col_name in enumerate(col_names)
}
{"R2_metric": r2_scores[i], "RMSE_loss": rmse_scores[i]},
step=i * self.cfg.model.hyperparams.metric_period,
)
r2_scores = self.model.evals_result_["learn"]["R2"]
rmse_scores = self.model.evals_result_["learn"]["RMSE"]
assert len(r2_scores) == len(rmse_scores), "Something wrong with metrics!"
for i in range(len(r2_scores)):
mlflow.log_metrics(
{"R2_metric": r2_scores[i], "RMSE_loss": rmse_scores[i]},
step=i * self.cfg.model.hyperparams.metric_period,
)
51 changes: 24 additions & 27 deletions mlopscourse/models/random_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,33 +59,30 @@ def __call__(self, X_sample: pd.DataFrame) -> np.ndarray:
def log_fis_and_metrics(
self, exp_id: str, X_train: pd.DataFrame, y_train: pd.Series
) -> None:
with mlflow.start_run(
experiment_id=exp_id, run_name=f"training-{self.cfg.model.name}"
):
# Log the model's hyperparameters and the code version
mlflow.log_params(self.cfg.model.hyperparams)
mlflow.log_param("commit_id", self.cfg.logging.commit_id)
# Log feature importances
# Log the model's hyperparameters and the code version
mlflow.log_params(self.cfg.model.hyperparams)
mlflow.log_param("commit_id", self.cfg.logging.commit_id)
# Log feature importances
mlflow.log_metrics(
{
f"fi_of_{col_name}": self.model.named_steps[
"randomforestregressor"
].feature_importances_[i]
for i, col_name in enumerate(X_train.columns)
}
)
# Log R2 and RMSE metrics
for i in tqdm(range(0, self.cfg.model.hyperparams.n_estimators)):
model_i = make_pipeline(
self.preprocessor, RandomForestRegressor(**self.cfg.model.hyperparams)
)
model_i.named_steps["randomforestregressor"].n_estimators = i + 1
model_i.fit(X_train, y_train)
y_pred = model_i.predict(X_train)
mlflow.log_metrics(
{
f"fi_of_{col_name}": self.model.named_steps[
"randomforestregressor"
].feature_importances_[i]
for i, col_name in enumerate(X_train.columns)
}
"R2_metric": r2_score(y_train, y_pred),
"RMSE_metric": mean_squared_error(y_train, y_pred, squared=False),
},
step=i,
)
# Log R2 and RMSE metrics
for i in tqdm(range(0, self.cfg.model.hyperparams.n_estimators)):
model_i = make_pipeline(
self.preprocessor, RandomForestRegressor(**self.cfg.model.hyperparams)
)
model_i.named_steps["randomforestregressor"].n_estimators = i + 1
model_i.fit(X_train, y_train)
y_pred = model_i.predict(X_train)
mlflow.log_metrics(
{
"R2_metric": r2_score(y_train, y_pred),
"RMSE_metric": mean_squared_error(y_train, y_pred, squared=False),
},
step=i,
)
26 changes: 20 additions & 6 deletions mlopscourse/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,26 @@ def train(self) -> None:
mlflow_cfg = self.cfg.logging.mlflow
mlflow.set_tracking_uri(mlflow_cfg.tracking_uri)
exp_id = mlflow.set_experiment(mlflow_cfg.exp_name).experiment_id
# Unfortunately, logging is model dependent, at least because
# RandomForestRegressor doesn't provide the target metric progress.
if self.cfg.model.name == "cb":
model.log_fis_and_metrics(exp_id, X_train.columns)
else:
model.log_fis_and_metrics(exp_id, X_train, y_train)
with mlflow.start_run(
experiment_id=exp_id, run_name=f"training-{self.cfg.model.name}"
):
signature = mlflow.models.infer_signature(X_train, y_train)
# Unfortunately, logging is model dependent, at least because
# RandomForestRegressor doesn't provide the target metric progress.
if self.cfg.model.name == "cb":
mlflow.catboost.save_model(
model.model,
f"checkpoints/mlflow_{self.cfg.model.name}_ckpt/",
signature=signature,
)
model.log_fis_and_metrics(exp_id, X_train.columns)
else:
mlflow.sklearn.save_model(
model.model,
f"checkpoints/mlflow_{self.cfg.model.name}_ckpt/",
signature=signature,
)
model.log_fis_and_metrics(exp_id, X_train, y_train)


if __name__ == "__main__":
Expand Down

0 comments on commit aff1f38

Please sign in to comment.