diff --git a/python/vineyard/llm/__init__.py b/python/vineyard/llm/__init__.py
index 0907d0a93e..4b842b64a3 100644
--- a/python/vineyard/llm/__init__.py
+++ b/python/vineyard/llm/__init__.py
@@ -18,14 +18,15 @@
 
 from typing import List
 from typing import Tuple
+from typing import Union
 
 import numpy as np
 
 import torch
 from torch import dtype
 
-import vineyard
-
+from .config import FileCacheConfig
+from .config import VineyardCacheConfig
 from .llm_C import KVTensor
 from .llm_C import _generate
 
@@ -35,24 +36,19 @@ class KV_Cache:  # pylint: disable=too-many-instance-attributes
 
     def __init__(
         self,
-        socket: str,
+        cache_config: Union[VineyardCacheConfig, FileCacheConfig],
         tensor_bytes: int = 10,
         cache_capacity: int = 10,
         layer: int = 1,
         torch_size: torch.Size = None,
         dtype: dtype = None,
-        block_size: int = 5,
-        sync_interval: int = 3,
-        llm_cache_sync_lock: str = "llmCacheSyncLock",
-        llm_cache_object_name: str = "llm_cache_object",
-        llm_ref_cnt_object_name: str = "llm_refcnt_object",
         **kwargs
     ):
         """Create a llm kv cache manager based on vineyard blob.
 
         Args:
-            socket (str):
-                The vineyard socket path.
+            cache_config (Union[VineyardCacheConfig, FileCacheConfig]):
+                The config of the kv cache, including vineyard cache and file cache.
             tensor_bytes (int, optional):
                 The size of the kv cache tensor.
                 Defaults to 10.
@@ -67,19 +63,13 @@ def __init__(
             dtype (dtype, optional):
                 The dtype of the tensor. Defaults to None.
                 e.g., torch.float32, torch.float64.
-            block_size (int, optional):
-                The block size of the kv cache. Defaults to 5.
-            sync_interval (int, optional):
-                The sync interval of the kv cache. Defaults to 3.
-            llm_cache_sync_lock (str, optional):
-                The name of the kv cache sync lock. Defaults to "llmCacheSyncLock".
-            llm_cache_object_name (str, optional):
-                The name of the kv cache object. Defaults to "llm_cache_object".
-            llm_ref_cnt_object_name (str, optional):
-                The name of the kv cache ref cnt object.
-                Defaults to "llm_refcnt_object".
         """
-        self.client = vineyard.connect(socket)
+        if not isinstance(cache_config, VineyardCacheConfig) and not isinstance(
+            cache_config, FileCacheConfig
+        ):
+            raise ValueError(
+                "The cache_config should be VineyardCacheConfig or FileCacheConfig."
+            )
         self.tensor_bytes = tensor_bytes
         self.cache_capacity = cache_capacity
         self.layer = layer
@@ -88,21 +78,12 @@ def __init__(
         self.tensor_dtype = dtype
         # the dtype of the numpy array of the tensor
         self.numpy_dtype = None
-        self.block_size = block_size
-        self.sync_interval = sync_interval
-        self.llm_cache_sync_lock = llm_cache_sync_lock
-        self.llm_cache_object_name = llm_cache_object_name
-        self.llm_ref_cnt_object_name = llm_ref_cnt_object_name
+
         self.kv_cache_manager = _generate(
-            ipc_client=self.client.ipc_client,
             tensor_bytes=tensor_bytes,
             cache_capacity=cache_capacity,
             layer=layer,
-            block_size=block_size,
-            sync_interval=sync_interval,
-            llm_cache_sync_lock=llm_cache_sync_lock,
-            llm_cache_object_name=llm_cache_object_name,
-            llm_ref_cnt_object_name=llm_ref_cnt_object_name,
+            **cache_config.__dict__,
             **kwargs
         )
 
diff --git a/python/vineyard/llm/config.py b/python/vineyard/llm/config.py
new file mode 100644
index 0000000000..f8b4cbfc4b
--- /dev/null
+++ b/python/vineyard/llm/config.py
@@ -0,0 +1,89 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright 2020-2023 Alibaba Group Holding Limited.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import vineyard
+
+from .llm_C import FilesystemType
+
+
+class VineyardCacheConfig:
+    """VineyardCacheConfig is a class to configure the llm kv cache in vineyard."""
+
+    def __init__(
+        self,
+        socket: str,
+        block_size: int = 5,
+        sync_interval: int = 3,
+        llm_cache_sync_lock: str = "llmCacheSyncLock",
+        llm_cache_object_name: str = "llm_cache_object",
+        llm_ref_cnt_object_name: str = "llm_refcnt_object",
+    ):
+        """Create a vineyard cache config.
+
+        Args:
+            socket (str):
+                The ipc socket of the vineyardd instance.
+            block_size (int, optional):
+                The block size of the kv cache. Defaults to 5.
+            sync_interval (int, optional):
+                The sync interval of the kv cache. Defaults to 3.
+            llm_cache_sync_lock (str, optional):
+                The name of the kv cache sync lock. Defaults to "llmCacheSyncLock".
+            llm_cache_object_name (str, optional):
+                The name of the kv cache object. Defaults to "llm_cache_object".
+            llm_ref_cnt_object_name (str, optional):
+                The name of the kv cache ref cnt object.
+                Defaults to "llm_refcnt_object".
+        """
+        self.ipc_client = vineyard.connect(socket).ipc_client
+        self.block_size = block_size
+        self.sync_interval = sync_interval
+        self.llm_cache_sync_lock = llm_cache_sync_lock
+        self.llm_cache_object_name = llm_cache_object_name
+        self.llm_ref_cnt_object_name = llm_ref_cnt_object_name
+
+
+class FileCacheConfig:
+    """FileCacheConfig is a class to configure the llm kv cache on filesystem."""
+
+    def __init__(
+        self,
+        batch_size: int = 16,
+        split_number: int = 2,
+        root: str = "/tmp/vineyard/llm_cache",
+        filesystem_type: FilesystemType = FilesystemType.LOCAL,
+    ):
+        """Create a file cache config.
+
+        Args:
+            batch_size (int):
+                Divide the token list into batches, each batch
+                contains batchSize tokens. Defaults to 16.
+            split_number (int):
+                Split the hash value into the file with multiple directories.
+                e.g, splitNumber=2, hash value=123456, the file path is 12/34/56.
+            root (str):
+                The root directory of the kv state files.
+                Defaults to "/tmp/vineyard/llm_cache".
+            filesystem_type (str):
+                The type of the filesystem. Defaults to "local".
+        """
+        self.batch_size = batch_size
+        self.split_number = split_number
+        self.root = root
+        self.filesystem_type = filesystem_type
diff --git a/python/vineyard/llm/kv_state_cache.cc b/python/vineyard/llm/kv_state_cache.cc
index d4e3a860e0..8d0dec7904 100644
--- a/python/vineyard/llm/kv_state_cache.cc
+++ b/python/vineyard/llm/kv_state_cache.cc
@@ -34,6 +34,10 @@ LLMKV create_llmkv_from_buffer(py::buffer buffer, size_t size) {
 PYBIND11_MODULE(llm_C, m) {
   m.doc() = "vineyard llm kv cache manager module";
 
+  pybind11::enum_<FilesystemType>(m, "FilesystemType")
+      .value("LOCAL", FilesystemType::LOCAL)
+      .export_values();
+
   py::class_<LLMKV>(m, "KVTensor")
       .def(py::init(&create_llmkv_from_buffer), py::arg("buffer"),
            py::arg("size"))
@@ -81,30 +85,50 @@ PYBIND11_MODULE(llm_C, m) {
       .def("close", [](KVStateCacheManager* self) { self->Close(); });
 
   m.def(
-      "_generate",
-      [](py::object ipc_client, int tensor_bytes, int cache_capacity, int layer,
-         int block_size, int sync_interval, std::string llm_cache_sync_lock,
-         std::string llm_cache_object_name,
-         std::string llm_ref_cnt_object_name) -> py::object {
-        std::shared_ptr<KVStateCacheManager> manager;
-        VineyardCacheConfig config(tensor_bytes, cache_capacity, layer,
-                                   block_size, sync_interval,
-                                   llm_cache_sync_lock, llm_cache_object_name,
-                                   llm_ref_cnt_object_name);
-        Client& client = ipc_client.cast<Client&>();
-        vineyard::Status status =
-            vineyard::KVStateCacheManager::Make(client, manager, config);
-        if (!status.ok()) {
-          throw std::runtime_error(status.ToString());
-        }
-        return py::cast(manager);
-      },
-      py::arg("ipc_client"), py::arg("tensor_bytes") = 10,
-      py::arg("cache_capacity") = 10, py::arg("layer") = 1,
-      py::arg("block_size") = 5, py::arg("sync_interval") = 3,
-      py::arg("llm_cache_sync_lock") = "llmCacheSyncLock",
-      py::arg("llm_cache_object_name") = "llm_cache_object",
-      py::arg("llm_ref_cnt_object_name") = "llm_refcnt_object");
+       "_generate",
+       [](py::object ipc_client, int tensor_bytes, int cache_capacity,
+          int layer, int block_size, int sync_interval,
+          std::string llm_cache_sync_lock, std::string llm_cache_object_name,
+          std::string llm_ref_cnt_object_name) -> py::object {
+         std::shared_ptr<KVStateCacheManager> manager;
+         VineyardCacheConfig config(tensor_bytes, cache_capacity, layer,
+                                    block_size, sync_interval,
+                                    llm_cache_sync_lock, llm_cache_object_name,
+                                    llm_ref_cnt_object_name);
+         Client& client = ipc_client.cast<Client&>();
+         vineyard::Status status =
+             vineyard::KVStateCacheManager::Make(client, manager, config);
+         if (!status.ok()) {
+           throw std::runtime_error(status.ToString());
+         }
+         return py::cast(manager);
+       },
+       py::arg("ipc_client"), py::arg("tensor_bytes") = 10,
+       py::arg("cache_capacity") = 10, py::arg("layer") = 1,
+       py::arg("block_size") = 5, py::arg("sync_interval") = 3,
+       py::arg("llm_cache_sync_lock") = "llmCacheSyncLock",
+       py::arg("llm_cache_object_name") = "llm_cache_object",
+       py::arg("llm_ref_cnt_object_name") = "llm_refcnt_object")
+      .def(
+          "_generate",
+          [](int tensor_bytes, int cache_capacity, int layer, int batch_size,
+             int split_number, std::string root,
+             FilesystemType filesystemType) -> py::object {
+            std::shared_ptr<KVStateCacheManager> manager;
+            FileCacheConfig config(tensor_bytes, cache_capacity, layer,
+                                   batch_size, split_number, root,
+                                   filesystemType);
+            vineyard::Status status =
+                vineyard::KVStateCacheManager::Make(manager, config);
+            if (!status.ok()) {
+              throw std::runtime_error(status.ToString());
+            }
+            return py::cast(manager);
+          },
+          py::arg("tensor_bytes") = 10, py::arg("cache_capacity") = 10,
+          py::arg("layer") = 1, py::arg("batch_size") = 5,
+          py::arg("split_number") = 3, py::arg("root") = "root",
+          py::arg("filesystem_type") = FilesystemType::LOCAL);
 }
 
 }  // namespace vineyard
diff --git a/python/vineyard/llm/tests/test_llm.py b/python/vineyard/llm/tests/test_llm.py
index 0d536975a0..019e0008f6 100644
--- a/python/vineyard/llm/tests/test_llm.py
+++ b/python/vineyard/llm/tests/test_llm.py
@@ -19,18 +19,26 @@
 import torch
 
 from vineyard.llm import KV_Cache
+from vineyard.llm.config import FileCacheConfig
+from vineyard.llm.config import VineyardCacheConfig
 
 
-def test_kv_cache_update_and_query(vineyard_ipc_sockets):
-    cache = KV_Cache(
+def test_kv_cache_update_and_query_on_blob(vineyard_ipc_sockets):
+    vineyard_cache_config = VineyardCacheConfig(
         socket=vineyard_ipc_sockets[0],
+        block_size=5,
+        sync_interval=3,
+        llm_cache_sync_lock="llmCacheSyncLock",
+        llm_cache_object_name="llm_cache_object",
+        llm_ref_cnt_object_name="llm_refcnt_object",
+    )
+    cache = KV_Cache(
+        cache_config=vineyard_cache_config,
         tensor_bytes=16,  # should be the same as the nbytes of the tensor
         cache_capacity=10,
         layer=1,
         torch_size=torch.Size([2, 2]),
         dtype=torch.float32,
-        block_size=5,
-        sync_interval=3,
     )
 
     kv_cache_list = [
@@ -52,3 +60,43 @@ def test_kv_cache_update_and_query(vineyard_ipc_sockets):
         assert torch.equal(k_tensor, queried_k_tensor) and torch.equal(
             v_tensor, queried_v_tensor
         )
+
+
+def test_kv_cache_update_and_query_on_fs():
+    file_cache_config = FileCacheConfig(
+        batch_size=2,
+        split_number=2,
+        root="/tmp/vineyard/llm_cache",
+    )
+    cache = KV_Cache(
+        cache_config=file_cache_config,
+        tensor_bytes=10000,  # should be the same as the nbytes of the tensor
+        cache_capacity=10,
+        layer=2,
+        torch_size=torch.Size([50, 50]),
+        dtype=torch.float32,
+    )
+
+    kv_cache_list = [
+        (torch.rand(50, 50), torch.rand(50, 50)),
+        (torch.rand(50, 50), torch.rand(50, 50)),
+        (torch.rand(50, 50), torch.rand(50, 50)),
+        (torch.rand(50, 50), torch.rand(50, 50)),
+        (torch.rand(50, 50), torch.rand(50, 50)),
+        (torch.rand(50, 50), torch.rand(50, 50)),
+        (torch.rand(50, 50), torch.rand(50, 50)),
+        (torch.rand(50, 50), torch.rand(50, 50)),
+    ]
+
+    tokens = [1, 2, 3, 4]
+    # insert the token list and the related kv cache list
+    cache.update(tokens, kv_cache_list)
+
+    queried_kv_cache_list = cache.query(tokens)
+
+    for (k_tensor, v_tensor), (queried_k_tensor, queried_v_tensor) in zip(
+        kv_cache_list, queried_kv_cache_list
+    ):
+        assert torch.equal(k_tensor, queried_k_tensor) and torch.equal(
+            v_tensor, queried_v_tensor
+        )