From b30c41f690076eb92d785e4268165ff7a36834aa Mon Sep 17 00:00:00 2001 From: Sachin Varghese Date: Thu, 19 Oct 2023 10:03:41 -0400 Subject: [PATCH] minor code refactor Signed-off-by: Sachin Varghese --- mlserver/handlers/dataplane.py | 21 +++++++++------------ mlserver/settings.py | 6 +++--- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/mlserver/handlers/dataplane.py b/mlserver/handlers/dataplane.py index bf085845c..250f2874a 100644 --- a/mlserver/handlers/dataplane.py +++ b/mlserver/handlers/dataplane.py @@ -30,7 +30,8 @@ def __init__(self, settings: Settings, model_registry: MultiModelRegistry): self._settings = settings self._model_registry = model_registry - self._response_cache = self._create_response_cache() + if settings.cache_enabled: + self._response_cache = self._create_response_cache() self._inference_middleware = InferenceMiddlewares( CloudEventsMiddleware(settings) ) @@ -91,8 +92,10 @@ async def infer( model=name, version=version ).count_exceptions() - with infer_duration, infer_errors: + if self._response_cache is not None: cache_key = payload.json() + + with infer_duration, infer_errors: if payload.id is None: payload.id = generate_uuid() @@ -105,9 +108,8 @@ async def infer( # TODO: Make await optional for sync methods with model_context(model.settings): if ( - self._settings.cache_enabled + self._response_cache is not None and model.settings.cache_enabled is not False - and self._response_cache is not None ): cache_value = await self._response_cache.lookup(cache_key) if cache_value != "": @@ -115,7 +117,7 @@ async def infer( else: prediction = await model.predict(payload) # ignore cache insertion error if any - self._response_cache.insert(cache_key, prediction.json()) + await self._response_cache.insert(cache_key, prediction.json()) else: prediction = await model.predict(payload) @@ -128,10 +130,5 @@ async def infer( return prediction - def _create_response_cache(self) -> Optional[ResponseCache]: - if self._settings.cache_enabled: - if self._settings.cache_size is None: - # Default cache size if caching is enabled - self._settings.cache_size = 100 - return LocalCache(size=self._settings.cache_size) - return None + def _create_response_cache(self) -> ResponseCache: + return LocalCache(size=self._settings.cache_size) diff --git a/mlserver/settings.py b/mlserver/settings.py index 8fe1d42e2..e610efd68 100644 --- a/mlserver/settings.py +++ b/mlserver/settings.py @@ -241,10 +241,10 @@ class Config: _custom_metrics_server_settings: Optional[dict] = None _custom_grpc_server_settings: Optional[dict] = None - cache_enabled: Optional[bool] = None + cache_enabled: bool = False """Enable caching for the model predictions.""" - cache_size: Optional[int] = None + cache_size: int = 100 """Cache size to be used if caching is enabled.""" @@ -400,7 +400,7 @@ def version(self) -> Optional[str]: parameters: Optional[ModelParameters] = None """Extra parameters for each instance of this model.""" - cache_enabled: Optional[bool] = None + cache_enabled: bool = False """Enable caching for a specific model. This parameter can be used to disable cache for a specific model, if the server level caching is enabled. If the server level caching is disabled, this parameter value will have no effect."""