Skip to content

Commit

Permalink
Support minial combine pattern count setting (#17)
Browse files Browse the repository at this point in the history
  • Loading branch information
mrproliu authored Aug 7, 2024
1 parent 9e92bc8 commit 9daff10
Show file tree
Hide file tree
Showing 11 changed files with 47 additions and 60 deletions.
1 change: 1 addition & 0 deletions demo/uri_drain.ini
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ depth = 4
max_children = 100
max_clusters = 1024
extra_delimiters = ["/"]
combine_min_url_count = ${DRAIN_COMBINE_MIN_URL_COUNT:8}

[PROFILING]
enabled = True
Expand Down
1 change: 1 addition & 0 deletions models/Configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ Drain is the core algorithm of URI Drain.
| max_clusters | int | DRAIN_MAX_CLUSTERS | 1024 | Max number of tracked clusters (unlimited by default). When this number is reached, model starts replacing old clusters with a new ones according to the LRU policy. |
| extra_delimiters | string | DRAIN_EXTRA_DELIMITERS | \["/"\] | The extra delimiters to split the sequence. |
| analysis_min_url_count | int | DRAIN_ANALYSIS_MIN_URL_COUNT | 20 | The minimum number of unique URLs(each service) to trigger the analysis. |
| combine_min_url_count | int | DRAIN_COMBINE_MIN_URL_COUNT | 8 | The minimum number of unique URLs(candidate of each service) to mask as variable URL(encase some similar URL are not restful, such as `/test/one` and `test/two`). |

### Profiling

Expand Down
1 change: 1 addition & 0 deletions models/uri_drain/template_miner.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ def __init__(self,
max_children=self.config.drain_max_children,
max_clusters=self.config.drain_max_clusters,
extra_delimiters=self.config.drain_extra_delimiters,
combine_min_url_count=self.config.drain_combine_min_url_count,
profiler=self.profiler,
param_str=param_str,
# param_extra=param_extra, # MODIFIED:: for URI Drain < It is now a dict since contains multiple types
Expand Down
3 changes: 3 additions & 0 deletions models/uri_drain/template_miner_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def __init__(self):
self.drain_max_children = 100
self.drain_max_clusters = None
self.drain_analysis_min_url_count = 20
self.drain_combine_min_url_count = 8
self.masking_instructions = []
self.mask_prefix = "<"
self.mask_suffix = ">"
Expand Down Expand Up @@ -82,6 +83,8 @@ def load(self, config_filename: str):
self.parameter_extraction_cache_capacity)
self.drain_analysis_min_url_count = self.read_config_value(parser, section_drain, 'analysis_min_url_count', int,
self.drain_analysis_min_url_count)
self.drain_combine_min_url_count = self.read_config_value(parser, section_drain, 'combine_min_url_count', int,
self.drain_combine_min_url_count)

masking_instructions = []
masking_list = json.loads(masking_instructions_str)
Expand Down
45 changes: 39 additions & 6 deletions models/uri_drain/uri_drain.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,13 @@


class LogCluster: # TODO Modified:: Changed to URICluster
__slots__ = ["log_template_tokens", "cluster_id", "size"]
__slots__ = ["log_template_tokens", "cluster_id", "size", "latest_urls"]

def __init__(self, log_template_tokens: list, cluster_id: int):
def __init__(self, log_template_tokens: list, cluster_id: int, combine_min_url_count: int):
self.log_template_tokens = tuple(log_template_tokens)
self.cluster_id = cluster_id
self.size = 1
self.latest_urls = LRUCache(combine_min_url_count+1)

def get_template(self):
# Modified:: Changed to join by slash instead of space for
Expand All @@ -47,6 +48,27 @@ def get_template(self):
template = '/'.join(self.log_template_tokens)
return f'/{template}'

def adding_url(self, url: str):
if self.latest_urls.__contains__(url):
return
self.latest_urls[url] = True

def __str__(self):
# return f"ID={str(self.cluster_id).ljust(5)} : size={str(self.size).ljust(10)}: {self.get_template()}"
return f"size={str(self.size).ljust(10)}: {self.get_template()}"


class SingleURILogCluster:
__slots__ = ["uri", "cluster_id", "size"]

def __init__(self, uri: str):
self.uri = uri
self.cluster_id = -1
self.size = 1

def get_template(self):
return self.uri

def __str__(self):
# return f"ID={str(self.cluster_id).ljust(5)} : size={str(self.size).ljust(10)}: {self.get_template()}"
return f"size={str(self.size).ljust(10)}: {self.get_template()}"
Expand Down Expand Up @@ -83,6 +105,7 @@ def __init__(self,
sim_th=0.4,
max_children=100,
max_clusters=None,
combine_min_url_count=8,
extra_delimiters=(),
profiler: Profiler = NullProfiler(),
param_str="{var}", # Modified:: required param_str
Expand Down Expand Up @@ -116,6 +139,7 @@ def __init__(self,
self.max_node_depth = depth - 2 # max depth of a prefix tree node, starting from zero
self.sim_th = sim_th
self.max_children = max_children
self.combine_min_url_count = combine_min_url_count
self.root_node = Node()
self.profiler = profiler
self.extra_delimiters = extra_delimiters
Expand All @@ -133,7 +157,14 @@ def __init__(self,

@property
def clusters(self):
return self.id_to_cluster.values()
result = []
for cluster in self.id_to_cluster.values():
if cluster.latest_urls and cluster.latest_urls.__len__() >= self.combine_min_url_count:
result.append(cluster)
continue
for url, _ in cluster.latest_urls.items():
result.append(SingleURILogCluster(url))
return result

@property
def cluster_patterns(self):
Expand Down Expand Up @@ -245,7 +276,7 @@ def add_log_message(self, content: str):
self.profiler.start_section("create_cluster")
self.clusters_counter += 1
cluster_id = self.clusters_counter
match_cluster = LogCluster(content_tokens, cluster_id)
match_cluster = LogCluster(content_tokens, cluster_id, self.combine_min_url_count)
self.id_to_cluster[cluster_id] = match_cluster
self.add_seq_to_prefix_tree(self.root_node, match_cluster)
update_type = "cluster_created"
Expand All @@ -261,7 +292,7 @@ def add_log_message(self, content: str):
update_type = "rejected (create new)"
self.clusters_counter += 1
cluster_id = self.clusters_counter
match_cluster = LogCluster(content_tokens, cluster_id)
match_cluster = LogCluster(content_tokens, cluster_id, self.combine_min_url_count)
self.id_to_cluster[cluster_id] = match_cluster
self.add_seq_to_prefix_tree(self.root_node, match_cluster)
match_cluster.size -= 1
Expand All @@ -278,6 +309,7 @@ def add_log_message(self, content: str):
if self.profiler:
self.profiler.end_section()

match_cluster.adding_url(content)
return match_cluster, update_type

def get_total_cluster_size(self):
Expand Down Expand Up @@ -315,12 +347,13 @@ def __init__(self,
sim_th=0.4,
max_children=100,
max_clusters=None,
combine_min_url_count=8,
extra_delimiters=(),
profiler: Profiler = NullProfiler(),
param_str="<*>",
# param_extra=None, # Modified:: Added param_extra
parametrize_numeric_tokens=True):
super().__init__(depth, sim_th, max_children, max_clusters, extra_delimiters, profiler, param_str,
super().__init__(depth, sim_th, max_children, max_clusters, combine_min_url_count, extra_delimiters, profiler, param_str,
# param_extra,
parametrize_numeric_tokens)

Expand Down
1 change: 1 addition & 0 deletions servers/simple/uri_drain.ini
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ max_children = ${DRAIN_MAX_CHILDREN:100}
max_clusters = ${DRAIN_MAX_CLUSTERS:1024}
extra_delimiters = ${DRAIN_EXTRA_DELIMITERS:["/"]}
analysis_min_url_count = ${DRAIN_ANALYSIS_MIN_URL_COUNT:20}
combine_min_url_count = ${DRAIN_COMBINE_MIN_URL_COUNT:8}

[PROFILING]
enabled = ${PROFILING_ENABLED:False}
Expand Down
4 changes: 1 addition & 3 deletions test/e2e/expected/endpoint_counterexamples.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,5 @@
# See the License for the specific language governing permissions and
# limitations under the License.

patterns:
- "/api/v1/usernames/{var}"
- "/api/v1/users/{var}"
patterns: []
version: "1"
18 changes: 0 additions & 18 deletions test/e2e/expected/endpoint_hard.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,32 +13,14 @@
# limitations under the License.

patterns:
- /api-this-is-a-special-case/v99999999999999999/orders/delete/{var}
- /api-this-is-a-special-case/v99999999999999999/orders/reorder/{var}
- /api-this-is-a-special-case/v99999999999999999/orders/update/{var}
- /api/v1/bills/{var}
- /api/v1/companies/{var}
- /api/v1/companies/{var}/employees/{var}/reviews/{var}
- /api/v1/companies/{var}/tasks/{var}/assignees/{var}
- /api/v1/projects/{var}
- /api/v1/services/{var}
- /api/v1/users/{var}/posts/{var}/comments
- /api/v1/users/{var}/posts/{var}/comments/{var}
- /api/v1/wallets/{var}
- /api/v2/admin/users/{var}
- /api/v2/courses/{var}/modules/{var}/lessons
- /api/v2/customers/{var}
- /api/v3/products/{var}/reviews/{var}/comments
- /api/v3/providers/{var}
- /api/v4/orders/{var}/items/{var}/tracking
- /customer/{var}
- /customer/{var}/order/{var}
- /customer/{var}/profile/{var}/compare/{var}/profile/{var}
- ABC/{var}
- HikariCP/Connection/{var}
- google.com/api/v1/users/{var}
- http://www.google.com/api/v1/users/{var}
- https://www.google.com/api/v1/users/{var}
- top1.abc.example.com.net.cn/api/v1/users/{var}
- www.google.com/api/v1/users/{var}
version: '1'
16 changes: 0 additions & 16 deletions test/e2e/expected/endpoint_hard_3k.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,30 +13,14 @@
# limitations under the License.

patterns:
- /api-this-is-a-special-case/v99999999999999999/orders/delete/{var}
- /api-this-is-a-special-case/v99999999999999999/orders/reorder/{var}
- /api-this-is-a-special-case/v99999999999999999/orders/update/{var}
- /api/v1/bills/{var}
- /api/v1/companies/{var}
- /api/v1/companies/{var}/employees/{var}/reviews/{var}
- /api/v1/companies/{var}/tasks/{var}/assignees/{var}
- /api/v1/projects/{var}
- /api/v1/services/{var}
- /api/v1/users/{var}/posts/{var}/comments
- /api/v1/users/{var}/posts/{var}/comments/{var}
- /api/v1/wallets/{var}
- /api/v2/admin/users/{var}
- /api/v2/courses/{var}/modules/{var}/lessons
- /api/v2/customers/{var}
- /api/v3/products/{var}/reviews/{var}/comments
- /api/v3/providers/{var}
- /api/v4/orders/{var}/items/{var}/tracking
- /customer/{var}
- /customer/{var}/order/{var}
- /customer/{var}/profile/{var}/compare/{var}/profile/{var}
- google.com/api/v1/users/{var}
- http://www.google.com/api/v1/users/{var}
- https://www.google.com/api/v1/users/{var}
- top1.abc.example.com.net.cn/api/v1/users/{var}
- www.google.com/api/v1/users/{var}
version: '1'
10 changes: 0 additions & 10 deletions test/e2e/expected/endpoint_trivial.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,8 @@

patterns:
- /api/v1/accounts/{var}
- /api/v1/invoices/{var}
- /api/v1/orders/{var}
- /api/v1/posts/{var}
- /api/v1/products/{var}
- /api/v1/users/{var}
- /api/v2/data/users/{var}
- /api/v999/orders/{var}
- /product/{var}
- /user/{var}
- /user/{var}/post/{var}
- /user/{var}/profile/{var}/compare/{var}/profile/{var}
- GET:/api/v1/users/{var}
- http://www.google.com/api/v1/users/{var}
- https://www.google.com/api/v1/users/{var}
version: '1'
7 changes: 0 additions & 7 deletions test/e2e/expected/endpoint_trivial_3k.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,4 @@ patterns:
- /api/v1/posts/{var}
- /api/v1/products/{var}
- /api/v1/users/{var}
- /api/v2/data/users/{var}
- /api/v999/orders/{var}
- /product/{var}
- /user/{var}
- /user/{var}/post/{var}
- /user/{var}/profile/{var}/compare/{var}/profile/{var}
- GET:/api/v1/users/{var}
version: '1'

0 comments on commit 9daff10

Please sign in to comment.