From 55eef8ecd53399cd1903ccc7e683775b1f23ff4e Mon Sep 17 00:00:00 2001
From: Adrian Gonzalez-Martin <agm@seldon.io>
Date: Wed, 24 May 2023 11:57:27 +0100
Subject: [PATCH] Add support for `.ubj` models and improve XGBoost docs
 (#1168)

---
 runtimes/xgboost/README.md                   | 70 ++++++++++++++++++++
 runtimes/xgboost/mlserver_xgboost/xgboost.py |  2 +-
 runtimes/xgboost/tests/conftest.py           |  8 ++-
 runtimes/xgboost/tests/test_xgboost.py       |  9 +--
 4 files changed, 81 insertions(+), 8 deletions(-)

diff --git a/runtimes/xgboost/README.md b/runtimes/xgboost/README.md
index a00630fbd..cff6b104b 100644
--- a/runtimes/xgboost/README.md
+++ b/runtimes/xgboost/README.md
@@ -13,6 +13,37 @@ pip install mlserver mlserver-xgboost
 For further information on how to use MLServer with XGBoost, you can check out
 this [worked out example](../../docs/examples/xgboost/README.md).
 
+## XGBoost Artifact Type
+
+The XGBoost inference runtime will expect that your model is serialised via one
+of the following methods:
+
+| Extension | Docs                                                                                                                 | Example                            |
+| --------- | -------------------------------------------------------------------------------------------------------------------- | ---------------------------------- |
+| `*.json`  | [JSON Format](https://xgboost.readthedocs.io/en/stable/tutorials/saving_model.html#introduction-to-model-io)         | `booster.save_model("model.json")` |
+| `*.ubj`   | [Binary JSON Format](https://xgboost.readthedocs.io/en/stable/tutorials/saving_model.html#introduction-to-model-io)  | `booster.save_model("model.ubj")`  |
+| `*.bst`   | [(Old) Binary Format](https://xgboost.readthedocs.io/en/stable/tutorials/saving_model.html#introduction-to-model-io) | `booster.save_model("model.bst")`  |
+
+````{note}
+By default, the runtime will look for a file called `model.[json | ubj | bst]`.
+However, this can be modified through the `parameters.uri` field of your
+{class}`ModelSettings <mlserver.settings.ModelSettings>` config (see the
+section on [Model Settings](../../docs/reference/model-settings.md) for more
+details).
+
+```{code-block} json
+---
+emphasize-lines: 3-5
+---
+{
+  "name": "foo",
+  "parameters": {
+    "uri": "./my-own-model-filename.json"
+  }
+}
+```
+````
+
 ## Content Types
 
 If no [content type](../../docs/user-guide/content-type) is present on the
@@ -21,3 +52,42 @@ request or metadata, the XGBoost runtime will try to decode the payload as a
 To avoid this, either send a different content type explicitly, or define the
 correct one as part of your [model's
 metadata](../../docs/reference/model-settings).
+
+## Model Outputs
+
+The XGBoost inference runtime exposes a number of outputs depending on the
+model type.
+These outputs match to the `predict` and `predict_proba` methods of the XGBoost
+model.
+
+| Output          | Returned By Default | Availability                                                          |
+| --------------- | ------------------- | --------------------------------------------------------------------- |
+| `predict`       | ✅                  | Available on all XGBoost models.                                      |
+| `predict_proba` | ❌                  | Only available on non-regressor models (i.e. `XGBClassifier` models). |
+
+By default, the runtime will only return the output of `predict`.
+However, you are able to control which outputs you want back through the
+`outputs` field of your {class}`InferenceRequest
+<mlserver.types.InferenceRequest>` payload.
+
+For example, to only return the model's `predict_proba` output, you could
+define a payload such as:
+
+```{code-block} json
+---
+emphasize-lines: 10-12
+---
+{
+  "inputs": [
+    {
+      "name": "my-input",
+      "datatype": "INT32",
+      "shape": [2, 2],
+      "data": [1, 2, 3, 4]
+    }
+  ],
+  "outputs": [
+    { "name": "predict_proba" }
+  ]
+}
+```
diff --git a/runtimes/xgboost/mlserver_xgboost/xgboost.py b/runtimes/xgboost/mlserver_xgboost/xgboost.py
index ba4f0f44d..9e97fe132 100644
--- a/runtimes/xgboost/mlserver_xgboost/xgboost.py
+++ b/runtimes/xgboost/mlserver_xgboost/xgboost.py
@@ -18,7 +18,7 @@
 PREDICT_PROBA_OUTPUT = "predict_proba"
 VALID_OUTPUTS = [PREDICT_OUTPUT, PREDICT_PROBA_OUTPUT]
 
-WELLKNOWN_MODEL_FILENAMES = ["model.bst", "model.json"]
+WELLKNOWN_MODEL_FILENAMES = ["model.bst", "model.json", "model.ubj"]
 
 
 def _load_sklearn_interface(model_uri: str) -> XGBModel:
diff --git a/runtimes/xgboost/tests/conftest.py b/runtimes/xgboost/tests/conftest.py
index e7525332c..13272290c 100644
--- a/runtimes/xgboost/tests/conftest.py
+++ b/runtimes/xgboost/tests/conftest.py
@@ -9,6 +9,7 @@
 from mlserver.utils import install_uvloop_event_loop
 
 from mlserver_xgboost import XGBoostModel
+from mlserver_xgboost.xgboost import WELLKNOWN_MODEL_FILENAMES
 
 TESTS_PATH = os.path.dirname(__file__)
 TESTDATA_PATH = os.path.join(TESTS_PATH, "testdata")
@@ -23,15 +24,16 @@ def event_loop():
     loop.close()
 
 
-@pytest.fixture
-def model_uri(tmp_path) -> str:
+@pytest.fixture(params=WELLKNOWN_MODEL_FILENAMES)
+def model_uri(request, tmp_path) -> str:
     n = 4
     d = 3
 
     dtrain = xgb.DMatrix(data=np.random.rand(n, d), label=np.random.rand(n))
     bst = xgb.train(params={}, dtrain=dtrain)
 
-    model_uri = os.path.join(tmp_path, "xgboost-model.json")
+    _, ext = os.path.splitext(request.param)
+    model_uri = os.path.join(tmp_path, f"xgboost-model{ext}")
     bst.save_model(model_uri)
 
     return model_uri
diff --git a/runtimes/xgboost/tests/test_xgboost.py b/runtimes/xgboost/tests/test_xgboost.py
index 39847d08d..748cb74a7 100644
--- a/runtimes/xgboost/tests/test_xgboost.py
+++ b/runtimes/xgboost/tests/test_xgboost.py
@@ -11,7 +11,6 @@
 
 from mlserver_xgboost import XGBoostModel
 from mlserver_xgboost.xgboost import (
-    WELLKNOWN_MODEL_FILENAMES,
     PREDICT_OUTPUT,
     PREDICT_PROBA_OUTPUT,
 )
@@ -27,13 +26,15 @@ def test_load_classifier(classifier: XGBoostModel):
     assert type(classifier._model) == xgb.XGBClassifier
 
 
-@pytest.mark.parametrize("fname", WELLKNOWN_MODEL_FILENAMES)
-async def test_load_folder(fname, model_uri: str, model_settings: ModelSettings):
+async def test_load_folder(model_uri: str, model_settings: ModelSettings):
+    # Rename `xgboost-model.[ext]` to `model.[ext]`
+    _, ext = os.path.splitext(model_uri)
+    fname = f"model{ext}"
     model_folder = os.path.dirname(model_uri)
     model_path = os.path.join(model_folder, fname)
     os.rename(model_uri, model_path)
 
-    model_settings.parameters.uri = model_path  # type: ignore
+    model_settings.parameters.uri = model_folder  # type: ignore
 
     model = XGBoostModel(model_settings)
     model.ready = await model.load()