Skip to content

Commit

Permalink
Adding basic handler cache tests
Browse files Browse the repository at this point in the history
Signed-off-by: Sachin Varghese <[email protected]>
  • Loading branch information
SachinVarghese committed Oct 20, 2023
1 parent b30c41f commit ffc5ae1
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 5 deletions.
9 changes: 9 additions & 0 deletions mlserver/cache/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,12 @@ async def lookup(self, key: str) -> str:
**This method should be overriden to implement your custom cache logic.**
"""
raise NotImplementedError("lookup() method not implemented")

async def size(self) -> int:
"""
Method responsible for returning the size of the cache.
**This method should be overriden to implement your custom cache logic.**
"""
raise NotImplementedError("size() method not implemented")
5 changes: 4 additions & 1 deletion mlserver/cache/local/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def __init__(self, size=100):

async def insert(self, key: str, value: str):
self.cache[key] = value
if len(self.cache) > self.size_limit:
if self.size() > self.size_limit:
# The cache removes the first entry if it overflows (i.e. in FIFO order)
self.cache.popitem(last=False)
return None
Expand All @@ -19,3 +19,6 @@ async def lookup(self, key: str) -> str:
return self.cache[key]
else:
return ""

async def size(self) -> int:
return len(self.cache)
5 changes: 4 additions & 1 deletion mlserver/handlers/dataplane.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class DataPlane:
def __init__(self, settings: Settings, model_registry: MultiModelRegistry):
self._settings = settings
self._model_registry = model_registry

self._response_cache = None
if settings.cache_enabled:
self._response_cache = self._create_response_cache()
self._inference_middleware = InferenceMiddlewares(
Expand Down Expand Up @@ -132,3 +132,6 @@ async def infer(

def _create_response_cache(self) -> ResponseCache:
return LocalCache(size=self._settings.cache_size)

def _get_response_cache(self) -> Optional[ResponseCache]:
return self._response_cache
17 changes: 16 additions & 1 deletion tests/handlers/test_dataplane.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from mlserver.errors import ModelNotReady
from mlserver.settings import ModelSettings, ModelParameters
from mlserver.types import MetadataTensor
from mlserver.types import MetadataTensor, InferenceResponse

from ..fixtures import SumModel

Expand Down Expand Up @@ -114,3 +114,18 @@ async def test_infer_generates_uuid(data_plane, sum_model, inference_request):

assert prediction.id is not None
assert prediction.id == str(uuid.UUID(prediction.id))


async def test_infer_response_cache(data_plane, sum_model, inference_request):
prediction = await data_plane.infer(
payload=inference_request, name=sum_model.name, version=sum_model.version
)
response_cache = data_plane._get_response_cache()

assert response_cache is not None
assert response_cache.size() == 1
assert len(
InferenceResponse.parse_raw(
response_cache.lookup(inference_request.json())
).outputs
) == len(prediction.outputs)
4 changes: 3 additions & 1 deletion tests/testdata/model-settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,7 @@

"parameters": {
"version": "v1.2.3"
}
},

"cache_enabled": true
}
3 changes: 2 additions & 1 deletion tests/testdata/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@
"parallel_workers": 2,
"cors_settings": {
"allow_origins": ["*"]
}
},
"cache_enabled": true
}

0 comments on commit ffc5ae1

Please sign in to comment.