Minimize d2h syncs in calculating length_per_key from `stride_per_k…

…ey` (#1485) Summary: Pull Request resolved: #1485 for large numbers of features, we will call .item() for each one causing a large number of d2h syncs. this diff combines list of tensors into a single tensor and calls a single .tolist() Reviewed By: bigning Differential Revision: D51046476 fbshipit-source-id: 26fd38767d1d48dade24057cd2136b15ea29c16c
pytorch · Nov 13, 2023 · e2cc13a · e2cc13a
1 parent 9db35bb
commit e2cc13a
Showing 1 changed file with 3 additions and 3 deletions.
diff --git a/torchrec/sparse/jagged_tensor.py b/torchrec/sparse/jagged_tensor.py
@@ -631,9 +631,9 @@ def _maybe_compute_stride_kjt_scripted(
 def _length_per_key_from_stride_per_key(
     lengths: torch.Tensor, stride_per_key: List[int]
 ) -> List[int]:
-    return [
-        int(torch.sum(chunk).item()) for chunk in torch.split(lengths, stride_per_key)
-    ]
+    return torch.cat(
+        [torch.sum(chunk).view(1) for chunk in torch.split(lengths, stride_per_key)]
+    ).tolist()
 
 
 def _maybe_compute_length_per_key(