diff --git a/inference/incr_decoding/incr_decoding.cc b/inference/incr_decoding/incr_decoding.cc
index 5a18daab47..ccbb85c595 100644
--- a/inference/incr_decoding/incr_decoding.cc
+++ b/inference/incr_decoding/incr_decoding.cc
@@ -51,7 +51,7 @@ void parse_input_args(char **argv,
                       int &max_tokens_per_prefilling_batch,
                       int &max_sequence_length,
                       int &max_output_length,
-                      int &max_kv_cache_size,
+                      size_t &max_kv_cache_size,
                       int &sampling_seed,
                       bool &streaming_cache,
                       bool &slo_attainment_early_termination,
@@ -209,7 +209,7 @@ void FlexFlow::top_level_task(Task const *task,
   int max_tokens_per_prefilling_batch = -1;
   int max_sequence_length = 256;
   int max_output_length = 512;
-  int max_kv_cache_size = -1; // if -1, then use the default value
+  size_t max_kv_cache_size = 0; // if -1, then use the default value
   RequestManager::DecodingMode decoding_mode =
       RequestManager::INCREMENTAL_DECODING;
   int sampling_seed = 0;