From 40133cde17cd90edca312c6d43ea7fbf262f46ee Mon Sep 17 00:00:00 2001 From: Yulu Jia Date: Mon, 16 Dec 2024 16:17:20 -0800 Subject: [PATCH] add bulk_init_chunk_size in torchrec (#2638) Summary: fbgemm ssd TBE takes a parameter `bulk_init_chunk_size`, it controls the number of rows to initialize during TBE bulk initialization. Add `bulk_init_chunk_size` to torchrec so it can be accessable from torchrec. Reviewed By: duduyi2013 Differential Revision: D67300985 --- torchrec/distributed/types.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/torchrec/distributed/types.py b/torchrec/distributed/types.py index 44461752a..0ed73f3bc 100644 --- a/torchrec/distributed/types.py +++ b/torchrec/distributed/types.py @@ -633,6 +633,7 @@ class KeyValueParams: gather_ssd_cache_stats: bool: whether enable ssd stats collection, std reporter and ods reporter report_interval: int: report interval in train iteration if gather_ssd_cache_stats is enabled ods_prefix: str: ods prefix for ods reporting + bulk_init_chunk_size: int: number of rows to insert into rocksdb in each chunk # Parameter Server (PS) Attributes ps_hosts (Optional[Tuple[Tuple[str, int]]]): List of PS host ip addresses @@ -652,6 +653,7 @@ class KeyValueParams: l2_cache_size: Optional[int] = None # size in GB max_l1_cache_size: Optional[int] = None # size in MB enable_async_update: Optional[bool] = None + bulk_init_chunk_size: Optional[int] = None # number of rows # Parameter Server (PS) Attributes ps_hosts: Optional[Tuple[Tuple[str, int], ...]] = None @@ -676,6 +678,7 @@ def __hash__(self) -> int: self.l2_cache_size, self.max_l1_cache_size, self.enable_async_update, + self.bulk_init_chunk_size, ) )