From 2bdf4cd546636e0700e63c9d89a335547f38d5f0 Mon Sep 17 00:00:00 2001 From: "Xida Ren (Cedar)" Date: Fri, 17 Jan 2025 10:08:02 -0500 Subject: [PATCH] Remove some unused variables in page_pool (#837) Added them when I first implemented page_pool for some concurrency related tracking, but we should get rid of them until we actually need to use them. --- .../shortfin_apps/llm/components/kvcache/page_pool.py | 10 ---------- .../llm/components/kvcache/trie_attention_cache.py | 2 -- .../components/kvcache/base_attention_cache_test.py | 2 +- 3 files changed, 1 insertion(+), 13 deletions(-) diff --git a/shortfin/python/shortfin_apps/llm/components/kvcache/page_pool.py b/shortfin/python/shortfin_apps/llm/components/kvcache/page_pool.py index 0acb7dc95..3e8f91097 100644 --- a/shortfin/python/shortfin_apps/llm/components/kvcache/page_pool.py +++ b/shortfin/python/shortfin_apps/llm/components/kvcache/page_pool.py @@ -22,10 +22,6 @@ class PageInfo: index: int pool: PagePool - token_offset: int # Offset within the page - token_count: int # Number of tokens stored in this page - writing: bool = False - read_ref_count: int = 0 # Number of threads that still need to read this page. When this reaches 0, page is eligible for release @dataclass @@ -80,8 +76,6 @@ def __init__(self, *, devices: Sequence[sf.ScopedDevice], config: PagePoolConfig PageInfo( index=i, pool=self, - token_offset=0, - token_count=0, ) for i in range(self.config.alloc_page_count) ] @@ -127,7 +121,6 @@ def copy_page(self, src_page: PageInfo) -> PageInfo: Args: src_page: Source page to copy from - token_count: Optional number of tokens to copy. If None, copies all tokens. Returns: New PageInfo containing the copied data @@ -145,9 +138,6 @@ def copy_page(self, src_page: PageInfo) -> PageInfo: # Copy the data dst_view.copy_from(src_view) - # Setup destination page metadata - dst_page.token_offset = 0 # Always start at beginning of new page - return dst_page def __repr__(self): diff --git a/shortfin/python/shortfin_apps/llm/components/kvcache/trie_attention_cache.py b/shortfin/python/shortfin_apps/llm/components/kvcache/trie_attention_cache.py index 3993e2444..8967a8435 100644 --- a/shortfin/python/shortfin_apps/llm/components/kvcache/trie_attention_cache.py +++ b/shortfin/python/shortfin_apps/llm/components/kvcache/trie_attention_cache.py @@ -297,8 +297,6 @@ def __init__(self, page_pool: PagePool, tokens_per_page: int): dummy_page = PageInfo( index=0, # Root uses reserved index 0 pool=self.page_pool, - token_offset=0, - token_count=0, ) self.root = TrieNode(tokens=tuple(), page=dummy_page) self.leaves: Set[TrieNode] = set() diff --git a/shortfin/tests/apps/llm/components/kvcache/base_attention_cache_test.py b/shortfin/tests/apps/llm/components/kvcache/base_attention_cache_test.py index 8f2c4c060..5ba9f432c 100644 --- a/shortfin/tests/apps/llm/components/kvcache/base_attention_cache_test.py +++ b/shortfin/tests/apps/llm/components/kvcache/base_attention_cache_test.py @@ -23,7 +23,7 @@ class MockPagePool(PagePool): def __init__(self, total_pages: int): self._queue = queue.Queue() for i in range(total_pages): - page = PageInfo(index=i, pool=self, token_offset=0, token_count=0) + page = PageInfo(index=i, pool=self) self._queue.put(page) def acquire_free_pages(self, count: int) -> List[PageInfo]: