Add the file storage to the python api of llm kv cache. (#1848)

Fix #1845 Signed-off-by: Ye Cao <[email protected]>
v6d-io · Mar 26, 2024 · 4cfbaa7 · 4cfbaa7
1 parent 6359ec9
commit 4cfbaa7
Show file tree

Hide file tree

Showing 4 changed files with 203 additions and 61 deletions.
diff --git a/python/vineyard/llm/__init__.py b/python/vineyard/llm/__init__.py
@@ -18,14 +18,15 @@
 
 from typing import List
 from typing import Tuple
+from typing import Union
 
 import numpy as np
 
 import torch
 from torch import dtype
 
-import vineyard
-
+from .config import FileCacheConfig
+from .config import VineyardCacheConfig
 from .llm_C import KVTensor
 from .llm_C import _generate
 
@@ -35,24 +36,19 @@ class KV_Cache: # pylint: disable=too-many-instance-attributes
 
  def __init__(
  self,
- socket: str,
+ cache_config: Union[VineyardCacheConfig, FileCacheConfig],
  tensor_bytes: int = 10,
  cache_capacity: int = 10,
  layer: int = 1,
  torch_size: torch.Size = None,
  dtype: dtype = None,
- block_size: int = 5,
- sync_interval: int = 3,
- llm_cache_sync_lock: str = "llmCacheSyncLock",
- llm_cache_object_name: str = "llm_cache_object",
- llm_ref_cnt_object_name: str = "llm_refcnt_object",
  **kwargs
  ):
  """Create a llm kv cache manager based on vineyard blob.
 
  Args:
- socket (str):
- The vineyard socket path.
+ cache_config (Union[VineyardCacheConfig, FileCacheConfig]):
+ The config of the kv cache, including vineyard cache and file cache.
  tensor_bytes (int, optional):
  The size of the kv cache tensor.
  Defaults to 10.
@@ -67,19 +63,13 @@ def __init__(
  dtype (dtype, optional):
  The dtype of the tensor. Defaults to None.
  e.g., torch.float32, torch.float64.
- block_size (int, optional):
- The block size of the kv cache. Defaults to 5.
- sync_interval (int, optional):
- The sync interval of the kv cache. Defaults to 3.
- llm_cache_sync_lock (str, optional):
- The name of the kv cache sync lock. Defaults to "llmCacheSyncLock".
- llm_cache_object_name (str, optional):
- The name of the kv cache object. Defaults to "llm_cache_object".
- llm_ref_cnt_object_name (str, optional):
- The name of the kv cache ref cnt object.
- Defaults to "llm_refcnt_object".
  """
- self.client = vineyard.connect(socket)
+ if not isinstance(cache_config, VineyardCacheConfig) and not isinstance(
+ cache_config, FileCacheConfig
+ ):
+ raise ValueError(
+ "The cache_config should be VineyardCacheConfig or FileCacheConfig."
+ )
  self.tensor_bytes = tensor_bytes
  self.cache_capacity = cache_capacity
  self.layer = layer
@@ -88,21 +78,12 @@ def __init__(
  self.tensor_dtype = dtype
  # the dtype of the numpy array of the tensor
  self.numpy_dtype = None
- self.block_size = block_size
- self.sync_interval = sync_interval
- self.llm_cache_sync_lock = llm_cache_sync_lock
- self.llm_cache_object_name = llm_cache_object_name
- self.llm_ref_cnt_object_name = llm_ref_cnt_object_name
+
  self.kv_cache_manager = _generate(
- ipc_client=self.client.ipc_client,
  tensor_bytes=tensor_bytes,
  cache_capacity=cache_capacity,
  layer=layer,
- block_size=block_size,
- sync_interval=sync_interval,
- llm_cache_sync_lock=llm_cache_sync_lock,
- llm_cache_object_name=llm_cache_object_name,
- llm_ref_cnt_object_name=llm_ref_cnt_object_name,
+ **cache_config.__dict__,
  **kwargs
  )
 

diff --git a/python/vineyard/llm/config.py b/python/vineyard/llm/config.py
@@ -0,0 +1,89 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2020-2023 Alibaba Group Holding Limited.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import vineyard
+
+from .llm_C import FilesystemType
+
+
+class VineyardCacheConfig:
+ """VineyardCacheConfig is a class to configure the llm kv cache in vineyard."""
+
+ def __init__(
+ self,
+ socket: str,
+ block_size: int = 5,
+ sync_interval: int = 3,
+ llm_cache_sync_lock: str = "llmCacheSyncLock",
+ llm_cache_object_name: str = "llm_cache_object",
+ llm_ref_cnt_object_name: str = "llm_refcnt_object",
+ ):
+ """Create a vineyard cache config.
+
+ Args:
+ socket (str):
+ The ipc socket of the vineyardd instance.
+ block_size (int, optional):
+ The block size of the kv cache. Defaults to 5.
+ sync_interval (int, optional):
+ The sync interval of the kv cache. Defaults to 3.
+ llm_cache_sync_lock (str, optional):
+ The name of the kv cache sync lock. Defaults to "llmCacheSyncLock".
+ llm_cache_object_name (str, optional):
+ The name of the kv cache object. Defaults to "llm_cache_object".
+ llm_ref_cnt_object_name (str, optional):
+ The name of the kv cache ref cnt object.
+ Defaults to "llm_refcnt_object".
+ """
+ self.ipc_client = vineyard.connect(socket).ipc_client
+ self.block_size = block_size
+ self.sync_interval = sync_interval
+ self.llm_cache_sync_lock = llm_cache_sync_lock
+ self.llm_cache_object_name = llm_cache_object_name
+ self.llm_ref_cnt_object_name = llm_ref_cnt_object_name
+
+
+class FileCacheConfig:
+ """FileCacheConfig is a class to configure the llm kv cache on filesystem."""
+
+ def __init__(
+ self,
+ batch_size: int = 16,
+ split_number: int = 2,
+ root: str = "/tmp/vineyard/llm_cache",
+ filesystem_type: FilesystemType = FilesystemType.LOCAL,
+ ):
+ """Create a file cache config.
+
+ Args:
+ batch_size (int):
+ Divide the token list into batches, each batch
+ contains batchSize tokens. Defaults to 16.
+ split_number (int):
+ Split the hash value into the file with multiple directories.
+ e.g, splitNumber=2, hash value=123456, the file path is 12/34/56.
+ root (str):
+ The root directory of the kv state files.
+ Defaults to "/tmp/vineyard/llm_cache".
+ filesystem_type (str):
+ The type of the filesystem. Defaults to "local".
+ """
+ self.batch_size = batch_size
+ self.split_number = split_number
+ self.root = root
+ self.filesystem_type = filesystem_type
diff --git a/python/vineyard/llm/kv_state_cache.cc b/python/vineyard/llm/kv_state_cache.cc
@@ -34,6 +34,10 @@ LLMKV create_llmkv_from_buffer(py::buffer buffer, size_t size) {
 PYBIND11_MODULE(llm_C, m) {
  m.doc() = "vineyard llm kv cache manager module";
 
+ pybind11::enum_<FilesystemType>(m, "FilesystemType")
+ .value("LOCAL", FilesystemType::LOCAL)
+ .export_values();
+
  py::class_<LLMKV>(m, "KVTensor")
  .def(py::init(&create_llmkv_from_buffer), py::arg("buffer"),
  py::arg("size"))
@@ -81,30 +85,50 @@ PYBIND11_MODULE(llm_C, m) {
  .def("close", [](KVStateCacheManager* self) { self->Close(); });
 
  m.def(
- "_generate",
- [](py::object ipc_client, int tensor_bytes, int cache_capacity, int layer,
- int block_size, int sync_interval, std::string llm_cache_sync_lock,
- std::string llm_cache_object_name,
- std::string llm_ref_cnt_object_name) -> py::object {
- std::shared_ptr<KVStateCacheManager> manager;
- VineyardCacheConfig config(tensor_bytes, cache_capacity, layer,
- block_size, sync_interval,
- llm_cache_sync_lock, llm_cache_object_name,
- llm_ref_cnt_object_name);
- Client& client = ipc_client.cast<Client&>();
- vineyard::Status status =
- vineyard::KVStateCacheManager::Make(client, manager, config);
- if (!status.ok()) {
- throw std::runtime_error(status.ToString());
- }
- return py::cast(manager);
- },
- py::arg("ipc_client"), py::arg("tensor_bytes") = 10,
- py::arg("cache_capacity") = 10, py::arg("layer") = 1,
- py::arg("block_size") = 5, py::arg("sync_interval") = 3,
- py::arg("llm_cache_sync_lock") = "llmCacheSyncLock",
- py::arg("llm_cache_object_name") = "llm_cache_object",
- py::arg("llm_ref_cnt_object_name") = "llm_refcnt_object");
+ "_generate",
+ [](py::object ipc_client, int tensor_bytes, int cache_capacity,
+ int layer, int block_size, int sync_interval,
+ std::string llm_cache_sync_lock, std::string llm_cache_object_name,
+ std::string llm_ref_cnt_object_name) -> py::object {
+ std::shared_ptr<KVStateCacheManager> manager;
+ VineyardCacheConfig config(tensor_bytes, cache_capacity, layer,
+ block_size, sync_interval,
+ llm_cache_sync_lock, llm_cache_object_name,
+ llm_ref_cnt_object_name);
+ Client& client = ipc_client.cast<Client&>();
+ vineyard::Status status =
+ vineyard::KVStateCacheManager::Make(client, manager, config);
+ if (!status.ok()) {
+ throw std::runtime_error(status.ToString());
+ }
+ return py::cast(manager);
+ },
+ py::arg("ipc_client"), py::arg("tensor_bytes") = 10,
+ py::arg("cache_capacity") = 10, py::arg("layer") = 1,
+ py::arg("block_size") = 5, py::arg("sync_interval") = 3,
+ py::arg("llm_cache_sync_lock") = "llmCacheSyncLock",
+ py::arg("llm_cache_object_name") = "llm_cache_object",
+ py::arg("llm_ref_cnt_object_name") = "llm_refcnt_object")
+ .def(
+ "_generate",
+ [](int tensor_bytes, int cache_capacity, int layer, int batch_size,
+ int split_number, std::string root,
+ FilesystemType filesystemType) -> py::object {
+ std::shared_ptr<KVStateCacheManager> manager;
+ FileCacheConfig config(tensor_bytes, cache_capacity, layer,
+ batch_size, split_number, root,
+ filesystemType);
+ vineyard::Status status =
+ vineyard::KVStateCacheManager::Make(manager, config);
+ if (!status.ok()) {
+ throw std::runtime_error(status.ToString());
+ }
+ return py::cast(manager);
+ },
+ py::arg("tensor_bytes") = 10, py::arg("cache_capacity") = 10,
+ py::arg("layer") = 1, py::arg("batch_size") = 5,
+ py::arg("split_number") = 3, py::arg("root") = "root",
+ py::arg("filesystem_type") = FilesystemType::LOCAL);
 }
 
 } // namespace vineyard
diff --git a/python/vineyard/llm/tests/test_llm.py b/python/vineyard/llm/tests/test_llm.py
@@ -19,18 +19,26 @@
 import torch
 
 from vineyard.llm import KV_Cache
+from vineyard.llm.config import FileCacheConfig
+from vineyard.llm.config import VineyardCacheConfig
 
 
-def test_kv_cache_update_and_query(vineyard_ipc_sockets):
- cache = KV_Cache(
+def test_kv_cache_update_and_query_on_blob(vineyard_ipc_sockets):
+ vineyard_cache_config = VineyardCacheConfig(
  socket=vineyard_ipc_sockets[0],
+ block_size=5,
+ sync_interval=3,
+ llm_cache_sync_lock="llmCacheSyncLock",
+ llm_cache_object_name="llm_cache_object",
+ llm_ref_cnt_object_name="llm_refcnt_object",
+ )
+ cache = KV_Cache(
+ cache_config=vineyard_cache_config,
  tensor_bytes=16, # should be the same as the nbytes of the tensor
  cache_capacity=10,
  layer=1,
  torch_size=torch.Size([2, 2]),
  dtype=torch.float32,
- block_size=5,
- sync_interval=3,
  )
 
  kv_cache_list = [
@@ -52,3 +60,43 @@ def test_kv_cache_update_and_query(vineyard_ipc_sockets):
  assert torch.equal(k_tensor, queried_k_tensor) and torch.equal(
  v_tensor, queried_v_tensor
  )
+
+
+def test_kv_cache_update_and_query_on_fs():
+ file_cache_config = FileCacheConfig(
+ batch_size=2,
+ split_number=2,
+ root="/tmp/vineyard/llm_cache",
+ )
+ cache = KV_Cache(
+ cache_config=file_cache_config,
+ tensor_bytes=10000, # should be the same as the nbytes of the tensor
+ cache_capacity=10,
+ layer=2,
+ torch_size=torch.Size([50, 50]),
+ dtype=torch.float32,
+ )
+
+ kv_cache_list = [
+ (torch.rand(50, 50), torch.rand(50, 50)),
+ (torch.rand(50, 50), torch.rand(50, 50)),
+ (torch.rand(50, 50), torch.rand(50, 50)),
+ (torch.rand(50, 50), torch.rand(50, 50)),
+ (torch.rand(50, 50), torch.rand(50, 50)),
+ (torch.rand(50, 50), torch.rand(50, 50)),
+ (torch.rand(50, 50), torch.rand(50, 50)),
+ (torch.rand(50, 50), torch.rand(50, 50)),
+ ]
+
+ tokens = [1, 2, 3, 4]
+ # insert the token list and the related kv cache list
+ cache.update(tokens, kv_cache_list)
+
+ queried_kv_cache_list = cache.query(tokens)
+
+ for (k_tensor, v_tensor), (queried_k_tensor, queried_v_tensor) in zip(
+ kv_cache_list, queried_kv_cache_list
+ ):
+ assert torch.equal(k_tensor, queried_k_tensor) and torch.equal(
+ v_tensor, queried_v_tensor
+ )