From 4d3d137937d92d0caadf6dd4e65d62a1c2555055 Mon Sep 17 00:00:00 2001 From: Yingfeng Date: Wed, 13 Nov 2024 15:02:57 +0800 Subject: [PATCH] Fix rag analyzer (#2231) ### What problem does this PR solve? Fix memory issue of rage tokenizer ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- src/common/analyzer/rag_analyzer.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/common/analyzer/rag_analyzer.cpp b/src/common/analyzer/rag_analyzer.cpp index 86f8134618..b936af6f7d 100644 --- a/src/common/analyzer/rag_analyzer.cpp +++ b/src/common/analyzer/rag_analyzer.cpp @@ -432,8 +432,8 @@ class NLTKWordTokenizer { return text; } - size_t outlength = text.length() * 1.5; - UniquePtr buffer = MakeUnique(outlength); + size_t outlength = text.length() * 2 < 1024 ? 1024 : text.length() * 2; + auto buffer = MakeUnique(outlength); pcre2_substitute(re, pcre2_subject, text.length(), @@ -445,7 +445,6 @@ class NLTKWordTokenizer { PCRE2_ZERO_TERMINATED, buffer.get(), &outlength); - pcre2_match_data_free(match_data); pcre2_code_free(re);