Almost finished the third hw3 (perf analysis is light)

TopCoder2K · Dec 23, 2023 · 6bbaccd · 6bbaccd
1 parent faf0f1d
commit 6bbaccd
Show file tree

Hide file tree

Showing 11 changed files with 721 additions and 4 deletions.
diff --git a/.dvc/config b/.dvc/config
@@ -1,5 +1,6 @@
 [core]
     analytics = false
     remote = storage
+    autostage = true
 ['remote "storage"']
     url = gdrive://1fCTKCtocuLIhDQ5OaL8lQKtI8fPcBVFZ
diff --git a/README.md b/README.md
@@ -61,7 +61,7 @@ The command should download two .csv files from my
 [GDrive](https://drive.google.com/drive/folders/1fCTKCtocuLIhDQ5OaL8lQKtI8fPcBVFZ?usp=sharing)
 and place them inside the `mlopscourse/data/` directory.
 
-## Running experiments
+## Running Training and Evaluation
 
 ### Training
 
@@ -92,7 +92,7 @@ If you want to infer a previously trained model, make sure you've placed the che
 poetry run python3 commands.py infer --config_name [config_name_without_extension]
 ```
 
-### Deployment with MLflow
+## Deployment with MLflow
 
 **Warning! This feature works stably only with the CatBoost model.** Predictions of the
 onnx version of the Random Forest differ from the original one (see
@@ -127,5 +127,73 @@ curl http://127.0.0.1:5001/invocations -H 'Content-Type: application/json' -d @e
 The model should reply with something like this:
 
 ```
-{"predictions": [20.8]}
+{"predictions": [31.22848957148021]}
 ```
+
+## Deployment with Triton
+
+Since there are problems with the onnx version of the Random Forest model, this part is
+done only for the CatBoost model.
+
+### System configuration
+
+```
+OS:   Ubuntu 20.04.6 LTS
+CPU:  12th Gen Intel(R) Core(TM) i7-12700H
+vCPU: 10
+RAM:  15.29GiB
+```
+
+### Run deployment and test it
+
+Run the following to deploy the model:
+
+```
+docker build -t triton_with_catboost:latest mlopscourse/triton/
+docker run -it --rm --cpus 12 -v ./mlopscourse/triton/model_repository:/models -v ./mlopscourse/triton/assets:/assets -p 8000:8000 -p 8001:8001 -p 8002:8002 triton_with_catboost:latest
+(You are inside the container from now)
+cd mlops-course
+tritonserver --model-repository /models
+```
+
+Test the model:
+
+```
+poetry run python3 mlopscourse/triton/client.py
+```
+
+The client will check the predicted output with a hardcoded value. The client should print
+
+```
+Predicted: 31.22848957148021
+The test is passed!
+```
+
+### Optimization
+
+Without any optimizations:
+
+```
+Inferences/Second vs. Client Average Batch Latency
+Concurrency: 1, throughput: 674.924 infer/sec, latency 1480 usec
+Concurrency: 2, throughput: 861.473 infer/sec, latency 2320 usec
+Concurrency: 3, throughput: 861.696 infer/sec, latency 3480 usec
+Concurrency: 4, throughput: 841.59 infer/sec, latency 4751 usec
+Concurrency: 5, throughput: 839.948 infer/sec, latency 5951 usec
+```
+
+With dynamic batching (`{ max_queue_delay_microseconds: 500 }`):
+
+```
+Inferences/Second vs. Client Average Batch Latency
+Concurrency: 1, throughput: 291.835 infer/sec, latency 3424 usec
+Concurrency: 2, throughput: 588.008 infer/sec, latency 3400 usec
+Concurrency: 3, throughput: 860.309 infer/sec, latency 3485 usec
+Concurrency: 4, throughput: 1118.63 infer/sec, latency 3574 usec
+Concurrency: 5, throughput: 1365.42 infer/sec, latency 3661 usec
+```
+
+and 2 times less CPU usage!
+
+With `{ max_queue_delay_microseconds: 2000 }` and `{ max_queue_delay_microseconds: 1000 }`
+I got worse results.
diff --git a/mlopscourse/triton/Dockerfile b/mlopscourse/triton/Dockerfile
@@ -0,0 +1,8 @@
+FROM nvcr.io/nvidia/tritonserver:23.12-py3
+
+COPY requirements.txt .
+RUN pip3 install -r requirements.txt --ignore-installed
+RUN git clone https://github.com/TopCoder2K/mlops-course.git
+
+# ENTRYPOINT ["cd", "mlops-course", "&&", "tritonserver", "--model-repository", "/models", "--log-info", "1"]
+ENTRYPOINT ["bash"]
diff --git a/mlopscourse/triton/assets/.gitignore b/mlopscourse/triton/assets/.gitignore
@@ -0,0 +1 @@
+/catboost.p
diff --git a/mlopscourse/triton/assets/catboost.p.dvc b/mlopscourse/triton/assets/catboost.p.dvc
@@ -0,0 +1,5 @@
+outs:
+- md5: 02c2243ee7ebf7a4c7f03203a2a76102
+  size: 2903484
+  hash: md5
+  path: catboost.p
diff --git a/mlopscourse/triton/client.py b/mlopscourse/triton/client.py
@@ -0,0 +1,65 @@
+import numpy as np
+from tritonclient.http import InferenceServerClient, InferInput, InferRequestedOutput
+from tritonclient.utils import np_to_triton_dtype
+
+
+def test_catboost_with_triton():
+    example = {
+        "season": "spring".encode("utf-8"),
+        "month": 1,
+        "hour": 0,
+        "holiday": 0,
+        "weekday": 6,
+        "workingday": 0,
+        "weather": "clear".encode("utf-8"),
+        "temp": 9.84,
+        "feel_temp": 14.395,
+        "humidity": 0.81,
+        "windspeed": 0.0,
+    }  # This is the first row of the training split
+    input_example = list()
+    for k, v in example.items():
+        if k in ["temp", "feel_temp", "humidity", "windspeed"]:
+            v = np.array(
+                [
+                    v,
+                ],
+                dtype=np.float32,
+            ).reshape(-1, 1)
+        elif k in ["month", "hour", "holiday", "weekday", "workingday"]:
+            v = np.array(
+                [
+                    v,
+                ],
+                dtype=np.int32,
+            ).reshape(-1, 1)
+        else:
+            v = np.array(
+                [
+                    v,
+                ]
+            ).reshape(-1, 1)
+        input_example.append(
+            InferInput(
+                name=k, shape=[1, 1], datatype=np_to_triton_dtype(v.dtype)
+            ).set_data_from_numpy(v)
+        )
+
+    client = InferenceServerClient(url="localhost:8000")
+    result = client.infer(
+        "catboost",
+        input_example,
+        outputs=[
+            InferRequestedOutput("prediction"),
+        ],
+    )
+    expected_pred = 31.22848957148021  # Is taken from the mlflow inference result
+    assert (
+        expected_pred == result.as_numpy("prediction")[0]
+    ), "Something is wrong with the inference :(("
+    print("Predicted:", result.as_numpy("prediction")[0])
+    print("The test is passed!")
+
+
+if __name__ == "__main__":
+    test_catboost_with_triton()
diff --git a/mlopscourse/triton/model_repository/catboost/1/model.py b/mlopscourse/triton/model_repository/catboost/1/model.py
@@ -0,0 +1,72 @@
+import pickle
+from typing import Any, List
+
+import c_python_backend_utils as c_utils
+import numpy as np
+import pandas as pd
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    def initialize(self, args):
+        with open(f"/assets/{args['model_name']}.p", "rb") as f:
+            self.model = pickle.load(f)
+
+    @staticmethod
+    def get_from_request_by_name(request: c_utils.InferenceRequest, name: str) -> Any:
+        return pb_utils.get_input_tensor_by_name(request, name).as_numpy().tolist()[0]
+
+    def execute(
+        self, requests: List[c_utils.InferenceRequest]
+    ) -> List[c_utils.InferenceResponse]:
+        reqs = list()
+        for request in requests:
+            reqs.append(
+                {
+                    "season": TritonPythonModel.get_from_request_by_name(
+                        request, "season"
+                    )[0].decode(),
+                    "weather": TritonPythonModel.get_from_request_by_name(
+                        request, "weather"
+                    )[0].decode(),
+                    "month": TritonPythonModel.get_from_request_by_name(request, "month")[
+                        0
+                    ],
+                    "hour": TritonPythonModel.get_from_request_by_name(request, "hour")[
+                        0
+                    ],
+                    "holiday": TritonPythonModel.get_from_request_by_name(
+                        request, "holiday"
+                    )[0],
+                    "weekday": TritonPythonModel.get_from_request_by_name(
+                        request, "weekday"
+                    )[0],
+                    "workingday": TritonPythonModel.get_from_request_by_name(
+                        request, "workingday"
+                    )[0],
+                    "temp": TritonPythonModel.get_from_request_by_name(request, "temp")[
+                        0
+                    ],
+                    "feel_temp": TritonPythonModel.get_from_request_by_name(
+                        request, "feel_temp"
+                    )[0],
+                    "humidity": TritonPythonModel.get_from_request_by_name(
+                        request, "humidity"
+                    )[0],
+                    "windspeed": TritonPythonModel.get_from_request_by_name(
+                        request, "windspeed"
+                    )[0],
+                }
+            )
+        preds = self.model(pd.DataFrame(reqs))
+
+        responses = list()
+        for pred in preds:
+            responses.append(
+                c_utils.InferenceResponse(
+                    output_tensors=[
+                        c_utils.Tensor("prediction", np.array(pred).reshape(1))
+                    ]
+                )
+            )
+        return responses
diff --git a/mlopscourse/triton/model_repository/catboost/config.pbtxt b/mlopscourse/triton/model_repository/catboost/config.pbtxt
@@ -0,0 +1,78 @@
+name: "catboost"
+backend: "python"
+max_batch_size: 1024
+
+input [
+    {
+        name: "season"
+        data_type: TYPE_STRING
+        dims: [ 1 ]
+    },
+    {
+        name: "weather"
+        data_type: TYPE_STRING
+        dims: [ 1 ]
+    },
+    {
+        name: "month"
+        data_type: TYPE_INT32
+        dims: [ 1 ]
+    },
+    {
+        name: "hour"
+        data_type: TYPE_INT32
+        dims: [ 1 ]
+    },
+    {
+        name: "holiday"
+        data_type: TYPE_INT32
+        dims: [ 1 ]
+    },
+    {
+        name: "weekday"
+        data_type: TYPE_INT32
+        dims: [ 1 ]
+    },
+    {
+        name: "workingday"
+        data_type: TYPE_INT32
+        dims: [ 1 ]
+    },
+    {
+        name: "temp"
+        data_type: TYPE_FP32
+        dims: [ 1 ]
+    },
+    {
+        name: "feel_temp"
+        data_type: TYPE_FP32
+        dims: [ 1 ]
+    },
+    {
+        name: "humidity"
+        data_type: TYPE_FP32
+        dims: [ 1 ]
+    },
+    {
+        name: "windspeed"
+        data_type: TYPE_FP32
+        dims: [ 1 ]
+    }
+]
+
+output [
+    {
+        name: "prediction"
+        data_type: TYPE_FP32
+        dims: [ 1 ]
+    }
+]
+
+instance_group [
+    {
+        count: 1
+        kind: KIND_CPU
+    }
+]
+
+dynamic_batching: { max_queue_delay_microseconds: 500 }
diff --git a/mlopscourse/triton/requirements.txt b/mlopscourse/triton/requirements.txt
@@ -0,0 +1,3 @@
+catboost==1.2.2
+mlflow==2.8.1
+omegaconf==2.3.0