From 1ac1f709fcd7426fe5eea8446cf5d986351a4b10 Mon Sep 17 00:00:00 2001 From: Jack Kleeman Date: Mon, 23 Dec 2024 14:34:40 +0000 Subject: [PATCH] Reduce in memory queue limit by 16x (#2455) This was previously across all partitions, but since 1.1 its per partition. And it is 350M per partition. Those entries are not initially used, but as you scale to 1m invocations per partition, all the memory pages in the queue's ring buffer are dirtied and contribute to RSS. This leads to 9G of usage on a 24 partition node. This PR reduces the limit by 16x to 21M per partition, or 562M on a 24 partition node, which it will reach after 1.5 million invocations. A more manageable figure, even if it still appears as a 'leak' until that amount is reached. --- crates/types/src/config/worker.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/types/src/config/worker.rs b/crates/types/src/config/worker.rs index 99c75f6d3..18c2240b2 100644 --- a/crates/types/src/config/worker.rs +++ b/crates/types/src/config/worker.rs @@ -179,7 +179,7 @@ pub struct InvokerOptions { /// /// Defines the threshold after which queues invocations will spill to disk at /// the path defined in `tmp-dir`. In other words, this is the number of invocations - /// that can be kept in memory before spilling to disk. + /// that can be kept in memory before spilling to disk. This is a per-partition limit. in_memory_queue_length_limit: NonZeroUsize, /// # Limit number of concurrent invocations from this node @@ -223,7 +223,7 @@ impl Default for InvokerOptions { None, Some(Duration::from_secs(10)), ), - in_memory_queue_length_limit: NonZeroUsize::new(1_056_784).unwrap(), + in_memory_queue_length_limit: NonZeroUsize::new(66_049).unwrap(), inactivity_timeout: Duration::from_secs(60).into(), abort_timeout: Duration::from_secs(60).into(), message_size_warning: NonZeroUsize::new(10_000_000).unwrap(), // 10MB