From e814d6b41a47c9f9b030e6c6febddfcb58932ecd Mon Sep 17 00:00:00 2001 From: Jack Kleeman Date: Mon, 23 Dec 2024 14:34:40 +0000 Subject: [PATCH] Reduce in memory queue limit by 16x (#2455) This was previously across all partitions, but since 1.1 its per partition. And it is 350M per partition. Those entries are not initially used, but as you scale to 1m invocations per partition, all the memory pages in the queue's ring buffer are dirtied and contribute to RSS. This leads to 9G of usage on a 24 partition node. This PR reduces the limit by 16x to 21M per partition, or 562M on a 24 partition node, which it will reach after 1.5 million invocations. A more manageable figure, even if it still appears as a 'leak' until that amount is reached. --- crates/types/src/config/worker.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/types/src/config/worker.rs b/crates/types/src/config/worker.rs index b8de434b7..4a0537725 100644 --- a/crates/types/src/config/worker.rs +++ b/crates/types/src/config/worker.rs @@ -163,7 +163,7 @@ pub struct InvokerOptions { /// /// Defines the threshold after which queues invocations will spill to disk at /// the path defined in `tmp-dir`. In other words, this is the number of invocations - /// that can be kept in memory before spilling to disk. + /// that can be kept in memory before spilling to disk. This is a per-partition limit. in_memory_queue_length_limit: NonZeroUsize, /// # Limit number of concurrent invocations from this node @@ -207,7 +207,7 @@ impl Default for InvokerOptions { None, Some(Duration::from_secs(10)), ), - in_memory_queue_length_limit: NonZeroUsize::new(1_056_784).unwrap(), + in_memory_queue_length_limit: NonZeroUsize::new(66_049).unwrap(), inactivity_timeout: Duration::from_secs(60).into(), abort_timeout: Duration::from_secs(60).into(), message_size_warning: NonZeroUsize::new(10_000_000).unwrap(), // 10MB