diff --git a/models/uri_drain/template_miner.py b/models/uri_drain/template_miner.py
index 6f96362..f1a066c 100644
--- a/models/uri_drain/template_miner.py
+++ b/models/uri_drain/template_miner.py
@@ -114,6 +114,12 @@ def load_state(self):
 
         loaded_drain: Drain = jsonpickle.loads(state, keys=True)
 
+        # load all words into clusters
+        if len(loaded_drain.id_to_cluster) > 0:
+            for _, cluster in loaded_drain.id_to_cluster.items():
+                if isinstance(cluster, LogCluster):
+                    cluster.token_words_check()
+
         # json-pickle encoded keys as string by default, so we have to convert those back to int
         # this is only relevant for backwards compatibility when loading a snapshot of drain <= v0.9.1
         # which did not use json-pickle's keys=true
@@ -137,8 +143,8 @@ def save_state(self, snapshot_reason):
             state = base64.b64encode(zlib.compress(state))
 
         logger.info(f"Saving state of {len(self.drain.clusters)} clusters "
-                    f"with {self.drain.get_total_cluster_size()} messages, {len(state)} bytes, "
-                    f"reason: {snapshot_reason}")
+                    f"with {self.drain.get_total_cluster_size()} messages to service <{self.persistence_handler.get_service()}>, "
+                    f"{len(state)} bytes, reason: {snapshot_reason}")
         self.persistence_handler.save_state(state)
 
     def get_snapshot_reason(self, change_type, cluster_id):
diff --git a/models/uri_drain/uri_drain.py b/models/uri_drain/uri_drain.py
index 36380a2..2caac02 100644
--- a/models/uri_drain/uri_drain.py
+++ b/models/uri_drain/uri_drain.py
@@ -4,11 +4,11 @@
 # Again, it's further modified to suit URI clustering needs,
 # changes are kept minimal to avoid divergence from Drain3 upstream.
 # TODO Note:: Every change to upstream Drain3 algorithm MUST be commented starting with "Modified::"
-
 from typing import List, Dict, Sequence
 
 from cachetools import LRUCache, Cache
 
+from models.uri_drain.word_splitter import check_all_word_correct
 from models.utils.simple_profiler import Profiler, NullProfiler
 
 import logger
@@ -18,7 +18,7 @@ class LogCluster:  # TODO Modified:: Changed to URICluster
     __slots__ = ["log_template_tokens", "cluster_id", "size", "latest_urls"]
 
     def __init__(self, log_template_tokens: list, cluster_id: int, combine_min_url_count: int):
-        self.log_template_tokens = tuple(log_template_tokens)
+        self.log_template_tokens = tuple(parse_token_list(log_template_tokens))
         self.cluster_id = cluster_id
         self.size = 1
         self.latest_urls = LRUCache(combine_min_url_count+1)
@@ -57,6 +57,27 @@ def __str__(self):
         # return f"ID={str(self.cluster_id).ljust(5)} : size={str(self.size).ljust(10)}: {self.get_template()}"
         return f"size={str(self.size).ljust(10)}: {self.get_template()}"
 
+    def token_words_check(self):
+        self.log_template_tokens = parse_token_list(self.log_template_tokens)
+
+
+class Token(str):
+    __slots__ = ["token", "word_correct"]
+
+    def __new__(cls, token: str, word_correct: bool = False):
+        return super().__new__(cls, token)
+
+    def __init__(self, token: str, word_correct: bool):
+        self.token = token
+        self.word_correct = word_correct
+
+
+def parse_token_list(tokens: List[str]) -> List[Token]:
+    result = []
+    for token in tokens:
+        result.append(Token(token, check_all_word_correct(token)))
+    return result
+
 
 class SingleURILogCluster:
     __slots__ = ["uri", "cluster_id", "size"]
@@ -198,13 +219,16 @@ def fast_match(self, cluster_ids: Sequence, tokens: list, sim_th: float, include
         max_param_count = -1
         max_cluster = None
 
+        # pre-parse tokens to avoid repeated parsing
+        parsed_token = parse_token_list(tokens)
+
         for cluster_id in cluster_ids:
             # Try to retrieve cluster from cache with bypassing eviction
             # algorithm as we are only testing candidates for a match.
             cluster = self.id_to_cluster.get(cluster_id)
             if cluster is None:
                 continue
-            cur_sim, param_count = self.get_seq_distance(cluster.log_template_tokens, tokens, include_params)
+            cur_sim, param_count = self.get_seq_distance(cluster.log_template_tokens, parsed_token, include_params)
             # self.logger.debug(f'SIMILARITY = {cur_sim} for c{cluster_id}, {cluster.log_template_tokens} param={param_count}')
             if cur_sim > max_sim or (cur_sim == max_sim and param_count > max_param_count):
                 # todo: this is known caveat
@@ -495,6 +519,9 @@ def get_seq_distance(self, seq1, seq2, include_params: bool):
             if (index == 0 or index == 1) and '.' in token1 and token1 != token2:
                 # self.logger.debug('this is domain mismatch!')
                 return 0.0, 0
+            # if all new tokens are words, then we can consider it cannot be combined
+            if token1 != token2 and (token1.word_correct or token2.word_correct):
+                return -1, -1
             # if token1 in self.possible_params or token1 == self.param_str:
             if token1 == self.param_str:
                 param_count += 1
@@ -518,14 +545,6 @@ def create_template(self, seq1, seq2):
         ret_val = list(seq2)
         seq_length = len(seq1)
 
-        # SPECIAL ASSUMPTION THAT MIGHT BE FALSE::
-        # /api/getconnection
-        # /api/dropconnection
-        if seq_length == 2:
-            if (seq1[0] == seq2[0] and seq1[1] != seq2[1]  # can be simplified
-                    and not self.has_numbers(seq1[1]) and not self.has_numbers(seq2[1])):
-                print(f'first token match but second token mismatch, seq1 = {seq1}, seq2 = {seq2}')
-                return 'rejected'
         # TODO, radical assumption if there's absolutely 0 digit in seq1 and seq2, then don't consider them similar?
         # To implement this, we increase the false negative rate, but decrease false positive rate
 
@@ -626,7 +645,7 @@ def create_template(self, seq1, seq2):
                 ret_val[i] = self.param_str
 
         # self.logger.debug(f'After change: {ret_val}')
-        return ret_val
+        return parse_token_list(ret_val)
 
     def match(self, content: str, full_search_strategy="never"):
         """
diff --git a/models/uri_drain/word_splitter.py b/models/uri_drain/word_splitter.py
new file mode 100644
index 0000000..c61af11
--- /dev/null
+++ b/models/uri_drain/word_splitter.py
@@ -0,0 +1,52 @@
+# Copyright 2023 SkyAPM org
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import re
+
+from cachetools import LRUCache
+from textblob import TextBlob
+
+last_word_correct_lru = LRUCache(1000)
+
+
+def split_for_url(text):
+    # split text by camel case
+    pattern = r"(?<=[a-z])(?=[A-Z])"
+    return re.split(pattern, text)
+
+
+def check_all_word_correct(text):
+    # if contains digits, then it's not a word, ignore the word check
+    if any(char.isdigit() for char in text):
+        return False
+    for word in split_for_url(text):
+        # if a word is too long, then it's not a word, just ignore to verify to reduce the analysis time
+        if len(word) > 20:
+            return False
+        word = word.lower()
+        cached_result = last_word_correct_lru.get(word)
+        if cached_result is not None:
+            if cached_result:
+                continue
+            else:
+                return False
+        # When a word is not corrected, then it's not a param
+        # text blob would also split the world by regex `\w+`, so no worry about special characters(such as "_", ".")
+        corrected_word = TextBlob(word).correct()
+        correct = word == corrected_word
+        last_word_correct_lru[word] = correct
+        if not correct:
+            return False
+
+    return True
diff --git a/poetry.lock b/poetry.lock
index c058f6b..c54169b 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1144,13 +1144,13 @@ files = [
 
 [[package]]
 name = "jsonpickle"
-version = "3.2.2"
+version = "3.3.0"
 description = "Python library for serializing arbitrary object graphs into JSON"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "jsonpickle-3.2.2-py3-none-any.whl", hash = "sha256:87cd82d237fd72c5a34970e7222dddc0accc13fddf49af84111887ed9a9445aa"},
-    {file = "jsonpickle-3.2.2.tar.gz", hash = "sha256:d425fd2b8afe9f5d7d57205153403fbf897782204437882a477e8eed60930f8c"},
+    {file = "jsonpickle-3.3.0-py3-none-any.whl", hash = "sha256:287c12143f35571ab00e224fa323aa4b090d5a7f086f5f494d7ee9c7eb1a380a"},
+    {file = "jsonpickle-3.3.0.tar.gz", hash = "sha256:ab467e601e5b1a1cd76f1819d014795165da071744ef30bf3786e9bc549de25a"},
 ]
 
 [package.extras]
@@ -1505,13 +1505,13 @@ files = [
 
 [[package]]
 name = "narwhals"
-version = "1.6.0"
+version = "1.6.1"
 description = "Extremely lightweight compatibility layer between dataframe libraries"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "narwhals-1.6.0-py3-none-any.whl", hash = "sha256:4ec5b248998ae552491bc1f497448e94f0f539f5664428a31ddf662c5d46c244"},
-    {file = "narwhals-1.6.0.tar.gz", hash = "sha256:0b0d12f994ac7832c70af29241c32a4f7afddc1cf669f40f6318533d52204595"},
+    {file = "narwhals-1.6.1-py3-none-any.whl", hash = "sha256:5dd0dd3691dbc5b44567d6dcb7506a099523ef70cd024d0e6e34af6284eed02b"},
+    {file = "narwhals-1.6.1.tar.gz", hash = "sha256:c618e451a77ade63beccd55ddbe64434f14f939cd1672d3d1156c20c1e1642ff"},
 ]
 
 [package.extras]
@@ -2511,13 +2511,13 @@ doc = ["Sphinx", "sphinx-rtd-theme"]
 
 [[package]]
 name = "setuptools"
-version = "74.0.0"
+version = "74.1.0"
 description = "Easily download, build, install, upgrade, and uninstall Python packages"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "setuptools-74.0.0-py3-none-any.whl", hash = "sha256:0274581a0037b638b9fc1c6883cc71c0210865aaa76073f7882376b641b84e8f"},
-    {file = "setuptools-74.0.0.tar.gz", hash = "sha256:a85e96b8be2b906f3e3e789adec6a9323abf79758ecfa3065bd740d81158b11e"},
+    {file = "setuptools-74.1.0-py3-none-any.whl", hash = "sha256:cee604bd76cc092355a4e43ec17aee5369095974f41f088676724dc6bc2c9ef8"},
+    {file = "setuptools-74.1.0.tar.gz", hash = "sha256:bea195a800f510ba3a2bc65645c88b7e016fe36709fefc58a880c4ae8a0138d7"},
 ]
 
 [package.extras]
@@ -2564,13 +2564,13 @@ files = [
 
 [[package]]
 name = "starlette"
-version = "0.38.3"
+version = "0.38.4"
 description = "The little ASGI library that shines."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "starlette-0.38.3-py3-none-any.whl", hash = "sha256:0e4af343a4e59324b96fbe0f3c6ad8c3a908d73f12f5c80a797803a6c3ad4687"},
-    {file = "starlette-0.38.3.tar.gz", hash = "sha256:f674450f0f46a790be1f3a128f386080600b58fa358f8e320d93dbef6d7f676c"},
+    {file = "starlette-0.38.4-py3-none-any.whl", hash = "sha256:526f53a77f0e43b85f583438aee1a940fd84f8fd610353e8b0c1a77ad8a87e76"},
+    {file = "starlette-0.38.4.tar.gz", hash = "sha256:53a7439060304a208fea17ed407e998f46da5e5d9b1addfea3040094512a6379"},
 ]
 
 [package.dependencies]
@@ -2580,6 +2580,25 @@ typing-extensions = {version = ">=3.10.0", markers = "python_version < \"3.10\""
 [package.extras]
 full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart (>=0.0.7)", "pyyaml"]
 
+[[package]]
+name = "textblob"
+version = "0.18.0"
+description = "Simple, Pythonic text processing. Sentiment analysis, part-of-speech tagging, noun phrase parsing, and more."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "textblob-0.18.0-py3-none-any.whl", hash = "sha256:eb29995ab2a9acc2e0fde10dde6b069b01193c75f3dfc2550d0d1ffdd97802bf"},
+    {file = "textblob-0.18.0.tar.gz", hash = "sha256:eb507b62bf2283a71f56bed3e0fc4eec7d388ef76b03699cf994166572a8daf3"},
+]
+
+[package.dependencies]
+nltk = ">=3.8"
+
+[package.extras]
+dev = ["pre-commit (>=3.5,<4.0)", "textblob[tests]", "tox"]
+docs = ["PyYAML (==6.0.1)", "sphinx (==7.2.6)", "sphinx-issues (==4.0.0)"]
+tests = ["numpy", "pytest"]
+
 [[package]]
 name = "tomli"
 version = "2.0.1"
@@ -2889,4 +2908,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8,<3.12"
-content-hash = "e43f94f67c34ccdc1232bc2e63ef4b4fde01d8b2c9cd51e85dfa4b80a347ce99"
+content-hash = "213599d8152e698f5954913ab45f8c084513920aba9672da65a980053d93a4e2"
diff --git a/pyproject.toml b/pyproject.toml
index 0f4d0a9..b7aef81 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -63,6 +63,7 @@ inflect = "^6.0.4"
 pytest = "^7.3.2"
 apache-skywalking = "^1.0.1"
 flask = "^2.3.2"
+textblob = "0.18.0"
 
 
 
diff --git a/servers/simple/uri_drain.ini b/servers/simple/uri_drain.ini
index f844707..a6ab199 100644
--- a/servers/simple/uri_drain.ini
+++ b/servers/simple/uri_drain.ini
@@ -35,7 +35,7 @@ max_children = ${DRAIN_MAX_CHILDREN:100}
 max_clusters = ${DRAIN_MAX_CLUSTERS:1024}
 extra_delimiters = ${DRAIN_EXTRA_DELIMITERS:["/"]}
 analysis_min_url_count = ${DRAIN_ANALYSIS_MIN_URL_COUNT:20}
-combine_min_url_count = ${DRAIN_COMBINE_MIN_URL_COUNT:8}
+combine_min_url_count = ${DRAIN_COMBINE_MIN_URL_COUNT:3}
 
 [PROFILING]
 enabled = ${PROFILING_ENABLED:False}
diff --git a/servers/simple/worker.py b/servers/simple/worker.py
index 10c0c7a..714c27a 100644
--- a/servers/simple/worker.py
+++ b/servers/simple/worker.py
@@ -50,7 +50,8 @@ def run_worker(uri_main_queue, shared_results_object, config, existing_miners):
             uris, service = uri_package[0], uri_package[1]
             # print(uri_main_queue.get(timeout=1))
             start_time = time.time()
-            for uri in uris:
+            sorted_uris = sorted(uris)
+            for uri in sorted_uris:
                 drain_instances[service].add_log_message(uri)
             logger.info(f'Processed {len(uris)} uris of service {service} in {time.time() - start_time} seconds')
             patterns = drain_instances[service].drain.cluster_patterns
diff --git a/test/e2e/expected/endpoint_hard.yaml b/test/e2e/expected/endpoint_hard.yaml
index 5615201..7ca2efe 100644
--- a/test/e2e/expected/endpoint_hard.yaml
+++ b/test/e2e/expected/endpoint_hard.yaml
@@ -19,8 +19,13 @@ patterns:
     - /api/v1/services/{var}
     - /api/v1/users/{var}/posts/{var}/comments
     - /api/v1/wallets/{var}
+    - /api/v2/admin/users/{var}
     - /api/v2/courses/{var}/modules/{var}/lessons
     - /api/v2/customers/{var}
     - /api/v3/products/{var}/reviews/{var}/comments
     - /api/v4/orders/{var}/items/{var}/tracking
+    - /customer/{var}
+    - /customer/{var}/order/{var}
+    - ABC/{var}
+    - www.google.com/api/v1/users/{var}
 version: '1'
\ No newline at end of file
diff --git a/test/e2e/expected/endpoint_hard_3k.yaml b/test/e2e/expected/endpoint_hard_3k.yaml
index 5615201..fbdef4d 100644
--- a/test/e2e/expected/endpoint_hard_3k.yaml
+++ b/test/e2e/expected/endpoint_hard_3k.yaml
@@ -19,8 +19,12 @@ patterns:
     - /api/v1/services/{var}
     - /api/v1/users/{var}/posts/{var}/comments
     - /api/v1/wallets/{var}
+    - /api/v2/admin/users/{var}
     - /api/v2/courses/{var}/modules/{var}/lessons
     - /api/v2/customers/{var}
     - /api/v3/products/{var}/reviews/{var}/comments
     - /api/v4/orders/{var}/items/{var}/tracking
+    - /customer/{var}
+    - /customer/{var}/order/{var}
+    - www.google.com/api/v1/users/{var}
 version: '1'
\ No newline at end of file
diff --git a/test/e2e/expected/endpoint_trivial.yaml b/test/e2e/expected/endpoint_trivial.yaml
index 885557c..ad239bf 100644
--- a/test/e2e/expected/endpoint_trivial.yaml
+++ b/test/e2e/expected/endpoint_trivial.yaml
@@ -14,8 +14,13 @@
 
 patterns:
     - /api/v1/accounts/{var}
+    - /api/v1/invoices/{var}
     - /api/v1/orders/{var}
     - /api/v1/posts/{var}
     - /api/v1/products/{var}
     - /api/v1/users/{var}
+    - /api/v2/data/users/{var}
+    - /api/v999/orders/{var}
+    - /user/{var}/post/{var}
+    - /user/{var}/profile/{var}/compare/{var}/profile/{var}
 version: '1'
\ No newline at end of file
diff --git a/test/e2e/expected/endpoint_trivial_3k.yaml b/test/e2e/expected/endpoint_trivial_3k.yaml
index 7367b04..a4c0da4 100644
--- a/test/e2e/expected/endpoint_trivial_3k.yaml
+++ b/test/e2e/expected/endpoint_trivial_3k.yaml
@@ -19,4 +19,9 @@ patterns:
     - /api/v1/posts/{var}
     - /api/v1/products/{var}
     - /api/v1/users/{var}
+    - /api/v2/data/users/{var}
+    - /api/v999/orders/{var}
+    - /user/{var}
+    - /user/{var}/post/{var}
+    - /user/{var}/profile/{var}/compare/{var}/profile/{var}
 version: '1'
\ No newline at end of file