From 8378a679a5076eddaeb4445368fbbfce310208bb Mon Sep 17 00:00:00 2001 From: Vito Piserchia Date: Sat, 19 Feb 2022 14:55:25 +0100 Subject: [PATCH 01/22] allow dotted group names in patterns; those will be translated in nested fields in the resulting match --- pygrok/pygrok.py | 54 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 47 insertions(+), 7 deletions(-) diff --git a/pygrok/pygrok.py b/pygrok/pygrok.py index da8951b..b3da0df 100644 --- a/pygrok/pygrok.py +++ b/pygrok/pygrok.py @@ -1,8 +1,33 @@ try: + from regex._regex_core import error import regex as re -except ImportError as e: + + def parse_name(source, allow_numeric=False, allow_group_0=False): + "Parses a name." + name = source.get_while(set(")>"), include=False) + + if not name: + raise error("missing group name", source.string, source.pos) + + if name.isdigit(): + min_group = 0 if allow_group_0 else 1 + if not allow_numeric or int(name) < min_group: + raise error("bad character in group name", source.string, + source.pos) + else: + if not name.replace(".","").isidentifier(): + raise error("character in group name", source.string, + source.pos) + + return name + + # this allows dots in the group names + re._regex_core.parse_name = parse_name + +except ImportError: # If you import re, grok_match can't handle regular expression containing atomic group(?>) import re + import codecs import os import pkg_resources @@ -58,9 +83,9 @@ def match(self, text): matches[key] = int(match) if self.type_mapper[key] == "float": matches[key] = float(match) - except (TypeError, KeyError) as e: + except (TypeError, KeyError): pass - return matches + return unflatten(matches) def set_search_pattern(self, pattern=None): if type(pattern) is not str: @@ -73,16 +98,16 @@ def _load_search_pattern(self): py_regex_pattern = self.pattern while True: # Finding all types specified in the groks - m = re.findall(r"%{(\w+):(\w+):(\w+)}", py_regex_pattern) + m = re.findall(r"%{(\w+):\[?([\w\.?]+)\]?:(\w+)}", py_regex_pattern) for n in m: self.type_mapper[n[1]] = n[2] # replace %{pattern_name:custom_name} (or %{pattern_name:custom_name:type} # with regex and regex group name py_regex_pattern = re.sub( - r"%{(\w+):(\w+)(?::\w+)?}", + r"%{(\w+):(\[?[\w\]\[\.]+\]?)(?::\w+)?}", lambda m: "(?P<" - + m.group(2) + + m.group(2).replace("][", ".").replace("[", "").replace("]", "") + ">" + self.predefined_patterns[m.group(1)].regex_str + ")", @@ -108,7 +133,7 @@ def _load_search_pattern(self): py_regex_pattern, ) - if re.search("%{\w+(:\w+)?}", py_regex_pattern) is None: + if re.search("%{\w+(:\[?[\w\.\]\[]+\]?)?}", py_regex_pattern) is None: break self.regex_obj = re.compile(py_regex_pattern) @@ -146,6 +171,20 @@ def _load_patterns_from_file(file): return patterns +def unflatten(dictionary, nullable=False): + resultDict = dict() + for key, value in dictionary.items(): + if nullable or value is not None: + parts = key.split(".") + d = resultDict + for part in parts[:-1]: + if part not in d: + d[part] = dict() + d = d[part] + d[parts[-1]] = value + return resultDict + + class Pattern(object): """ """ @@ -160,3 +199,4 @@ def __str__(self): self.regex_str, self.sub_patterns, ) + From 67ee8f58dbe59b5fa66b3bd47228378e52ccbcaa Mon Sep 17 00:00:00 2001 From: Vito Piserchia Date: Sat, 19 Feb 2022 14:57:00 +0100 Subject: [PATCH 02/22] Added latest ECS ready grok patterns --- pygrok/patterns/aws | 34 ++++++------ pygrok/patterns/bacula | 56 +++++++++---------- pygrok/patterns/bind | 4 +- pygrok/patterns/bro | 34 ++++++------ pygrok/patterns/exim | 16 +++--- pygrok/patterns/firewalls | 100 +++++++++++++++++----------------- pygrok/patterns/grok-patterns | 8 +-- pygrok/patterns/haproxy | 22 ++++---- pygrok/patterns/httpd | 12 ++-- pygrok/patterns/java | 8 +-- pygrok/patterns/junos | 10 ++-- pygrok/patterns/linux-syslog | 10 ++-- pygrok/patterns/mcollective | 2 +- pygrok/patterns/mongodb | 6 +- pygrok/patterns/nagios | 56 +++++++++---------- pygrok/patterns/postgresql | 2 +- pygrok/patterns/rails | 12 ++-- pygrok/patterns/redis | 4 +- pygrok/patterns/ruby | 2 +- pygrok/patterns/squid | 6 +- pygrok/patterns/zeek | 40 +++++++------- 21 files changed, 222 insertions(+), 222 deletions(-) diff --git a/pygrok/patterns/aws b/pygrok/patterns/aws index ad1ed1c..35d1467 100644 --- a/pygrok/patterns/aws +++ b/pygrok/patterns/aws @@ -1,28 +1,28 @@ -S3_REQUEST_LINE (?:%{WORD:[http][request][method]} %{NOTSPACE:[url][original]}(?: HTTP/%{NUMBER:[http][version]})?) +S3_REQUEST_LINE (?:%{WORD:http.request.method} %{NOTSPACE:url.original}(?: HTTP/%{NUMBER:http.version})?) -S3_ACCESS_LOG %{WORD:[aws][s3access][bucket_owner]} %{NOTSPACE:[aws][s3access][bucket]} \[%{HTTPDATE:timestamp}\] (?:-|%{IP:[client][ip]}) (?:-|%{NOTSPACE:[client][user][id]}) %{NOTSPACE:[aws][s3access][request_id]} %{NOTSPACE:[aws][s3access][operation]} (?:-|%{NOTSPACE:[aws][s3access][key]}) (?:-|"%{S3_REQUEST_LINE:[aws][s3access][request_uri]}") (?:-|%{INT:[http][response][status_code]:int}) (?:-|%{NOTSPACE:[aws][s3access][error_code]}) (?:-|%{INT:[aws][s3access][bytes_sent]:int}) (?:-|%{INT:[aws][s3access][object_size]:int}) (?:-|%{INT:[aws][s3access][total_time]:int}) (?:-|%{INT:[aws][s3access][turn_around_time]:int}) "(?:-|%{DATA:[http][request][referrer]})" "(?:-|%{DATA:[user_agent][original]})" (?:-|%{NOTSPACE:[aws][s3access][version_id]})(?: (?:-|%{NOTSPACE:[aws][s3access][host_id]}) (?:-|%{NOTSPACE:[aws][s3access][signature_version]}) (?:-|%{NOTSPACE:[tls][cipher]}) (?:-|%{NOTSPACE:[aws][s3access][authentication_type]}) (?:-|%{NOTSPACE:[aws][s3access][host_header]}) (?:-|%{NOTSPACE:[aws][s3access][tls_version]}))? -# :long - %{INT:[aws][s3access][bytes_sent]:int} -# :long - %{INT:[aws][s3access][object_size]:int} +S3_ACCESS_LOG %{WORD:aws.s3access.bucket_owner} %{NOTSPACE:aws.s3access.bucket} \%{HTTPDATE:timestamp}\ (?:-|%{IP:client.ip}) (?:-|%{NOTSPACE:client.user.id}) %{NOTSPACE:aws.s3access.request_id} %{NOTSPACE:aws.s3access.operation} (?:-|%{NOTSPACE:aws.s3access.key}) (?:-|"%{S3_REQUEST_LINE:aws.s3access.request_uri}") (?:-|%{INT:http.response.status_code:int}) (?:-|%{NOTSPACE:aws.s3access.error_code}) (?:-|%{INT:aws.s3access.bytes_sent:long}) (?:-|%{INT:aws.s3access.object_size:long}) (?:-|%{INT:aws.s3access.total_time:int}) (?:-|%{INT:aws.s3access.turn_around_time:int}) "(?:-|%{DATA:http.request.referrer})" "(?:-|%{DATA:user_agent.original})" (?:-|%{NOTSPACE:aws.s3access.version_id})(?: (?:-|%{NOTSPACE:aws.s3access.host_id}) (?:-|%{NOTSPACE:aws.s3access.signature_version}) (?:-|%{NOTSPACE:tls.cipher}) (?:-|%{NOTSPACE:aws.s3access.authentication_type}) (?:-|%{NOTSPACE:aws.s3access.host_header}) (?:-|%{NOTSPACE:aws.s3access.tls_version}))? +# :long - %{INT:aws.s3access.bytes_sent:int} +# :long - %{INT:aws.s3access.object_size:int} -ELB_URIHOST %{IPORHOST:[url][domain]}(?::%{POSINT:[url][port]:int})? -ELB_URIPATHQUERY %{URIPATH:[url][path]}(?:\?%{URIQUERY:[url][query]})? +ELB_URIHOST %{IPORHOST:url.domain}(?::%{POSINT:url.port:int})? +ELB_URIPATHQUERY %{URIPATH:url.path}(?:\?%{URIQUERY:url.query})? # deprecated - old name: ELB_URIPATHPARAM %{ELB_URIPATHQUERY} -ELB_URI %{URIPROTO:[url][scheme]}://(?:%{USER:[url][username]}(?::[^@]*)?@)?(?:%{ELB_URIHOST})?(?:%{ELB_URIPATHQUERY})? +ELB_URI %{URIPROTO:url.scheme}://(?:%{USER:url.username}(?::^@*)?@)?(?:%{ELB_URIHOST})?(?:%{ELB_URIPATHQUERY})? -ELB_REQUEST_LINE (?:%{WORD:[http][request][method]} %{ELB_URI:[url][original]}(?: HTTP/%{NUMBER:[http][version]})?) +ELB_REQUEST_LINE (?:%{WORD:http.request.method} %{ELB_URI:url.original}(?: HTTP/%{NUMBER:http.version})?) # pattern supports 'regular' HTTP ELB format -ELB_V1_HTTP_LOG %{TIMESTAMP_ISO8601:timestamp} %{NOTSPACE:[aws][elb][name]} %{IP:[source][ip]}:%{INT:[source][port]:int} (?:-|(?:%{IP:[aws][elb][backend][ip]}:%{INT:[aws][elb][backend][port]:int})) (?:-1|%{NUMBER:[aws][elb][request_processing_time][sec]:float}) (?:-1|%{NUMBER:[aws][elb][backend_processing_time][sec]:float}) (?:-1|%{NUMBER:[aws][elb][response_processing_time][sec]:float}) %{INT:[http][response][status_code]:int} (?:-|%{INT:[aws][elb][backend][http][response][status_code]:int}) %{INT:[http][request][body][bytes]:int} %{INT:[http][response][body][bytes]:int} "%{ELB_REQUEST_LINE}"(?: "(?:-|%{DATA:[user_agent][original]})" (?:-|%{NOTSPACE:[tls][cipher]}) (?:-|%{NOTSPACE:[aws][elb][ssl_protocol]}))? -# :long - %{INT:[http][request][body][bytes]:int} -# :long - %{INT:[http][response][body][bytes]:int} +ELB_V1_HTTP_LOG %{TIMESTAMP_ISO8601:timestamp} %{NOTSPACE:aws.elb.name} %{IP:source.ip}:%{INT:source.port:int} (?:-|(?:%{IP:aws.elb.backend.ip}:%{INT:aws.elb.backend.port:int})) (?:-1|%{NUMBER:aws.elb.request_processing_time.sec:float}) (?:-1|%{NUMBER:aws.elb.backend_processing_time.sec:float}) (?:-1|%{NUMBER:aws.elb.response_processing_time.sec:float}) %{INT:http.response.status_code:int} (?:-|%{INT:aws.elb.backend.http.response.status_code:int}) %{INT:http.request.body.bytes:long} %{INT:http.response.body.bytes:long} "%{ELB_REQUEST_LINE}"(?: "(?:-|%{DATA:user_agent.original})" (?:-|%{NOTSPACE:tls.cipher}) (?:-|%{NOTSPACE:aws.elb.ssl_protocol}))? +# :long - %{INT:http.request.body.bytes:int} +# :long - %{INT:http.response.body.bytes:int} ELB_ACCESS_LOG %{ELB_V1_HTTP_LOG} # pattern used to match a shorted format, that's why we have the optional part (starting with *http.version*) at the end -CLOUDFRONT_ACCESS_LOG (?%{YEAR}-%{MONTHNUM}-%{MONTHDAY}\t%{TIME})\t%{WORD:[aws][cloudfront][x_edge_location]}\t(?:-|%{INT:[destination][bytes]:int})\t%{IPORHOST:[source][ip]}\t%{WORD:[http][request][method]}\t%{HOSTNAME:[url][domain]}\t%{NOTSPACE:[url][path]}\t(?:(?:000)|%{INT:[http][response][status_code]:int})\t(?:-|%{DATA:[http][request][referrer]})\t%{DATA:[user_agent][original]}\t(?:-|%{DATA:[url][query]})\t(?:-|%{DATA:[aws][cloudfront][http][request][cookie]})\t%{WORD:[aws][cloudfront][x_edge_result_type]}\t%{NOTSPACE:[aws][cloudfront][x_edge_request_id]}\t%{HOSTNAME:[aws][cloudfront][http][request][host]}\t%{URIPROTO:[network][protocol]}\t(?:-|%{INT:[source][bytes]:int})\t%{NUMBER:[aws][cloudfront][time_taken]:float}\t(?:-|%{IP:[network][forwarded_ip]})\t(?:-|%{DATA:[aws][cloudfront][ssl_protocol]})\t(?:-|%{NOTSPACE:[tls][cipher]})\t%{WORD:[aws][cloudfront][x_edge_response_result_type]}(?:\t(?:-|HTTP/%{NUMBER:[http][version]})\t(?:-|%{DATA:[aws][cloudfront][fle_status]})\t(?:-|%{DATA:[aws][cloudfront][fle_encrypted_fields]})\t%{INT:[source][port]:int}\t%{NUMBER:[aws][cloudfront][time_to_first_byte]:float}\t(?:-|%{DATA:[aws][cloudfront][x_edge_detailed_result_type]})\t(?:-|%{NOTSPACE:[http][request][mime_type]})\t(?:-|%{INT:[aws][cloudfront][http][request][size]:int})\t(?:-|%{INT:[aws][cloudfront][http][request][range][start]:int})\t(?:-|%{INT:[aws][cloudfront][http][request][range][end]:int}))? -# :long - %{INT:[destination][bytes]:int} -# :long - %{INT:[source][bytes]:int} -# :long - %{INT:[aws][cloudfront][http][request][size]:int} -# :long - %{INT:[aws][cloudfront][http][request][range][start]:int} -# :long - %{INT:[aws][cloudfront][http][request][range][end]:int} +CLOUDFRONT_ACCESS_LOG (?%{YEAR}-%{MONTHNUM}-%{MONTHDAY}\t%{TIME})\t%{WORD:aws.cloudfront.x_edge_location}\t(?:-|%{INT:destination.bytes:long})\t%{IPORHOST:source.ip}\t%{WORD:http.request.method}\t%{HOSTNAME:url.domain}\t%{NOTSPACE:url.path}\t(?:(?:000)|%{INT:http.response.status_code:int})\t(?:-|%{DATA:http.request.referrer})\t%{DATA:user_agent.original}\t(?:-|%{DATA:url.query})\t(?:-|%{DATA:aws.cloudfront.http.request.cookie})\t%{WORD:aws.cloudfront.x_edge_result_type}\t%{NOTSPACE:aws.cloudfront.x_edge_request_id}\t%{HOSTNAME:aws.cloudfront.http.request.host}\t%{URIPROTO:network.protocol}\t(?:-|%{INT:source.bytes:long})\t%{NUMBER:aws.cloudfront.time_taken:float}\t(?:-|%{IP:network.forwarded_ip})\t(?:-|%{DATA:aws.cloudfront.ssl_protocol})\t(?:-|%{NOTSPACE:tls.cipher})\t%{WORD:aws.cloudfront.x_edge_response_result_type}(?:\t(?:-|HTTP/%{NUMBER:http.version})\t(?:-|%{DATA:aws.cloudfront.fle_status})\t(?:-|%{DATA:aws.cloudfront.fle_encrypted_fields})\t%{INT:source.port:int}\t%{NUMBER:aws.cloudfront.time_to_first_byte:float}\t(?:-|%{DATA:aws.cloudfront.x_edge_detailed_result_type})\t(?:-|%{NOTSPACE:http.request.mime_type})\t(?:-|%{INT:aws.cloudfront.http.request.size:long})\t(?:-|%{INT:aws.cloudfront.http.request.range.start:long})\t(?:-|%{INT:aws.cloudfront.http.request.range.end:long}))? +# :long - %{INT:destination.bytes:int} +# :long - %{INT:source.bytes:int} +# :long - %{INT:aws.cloudfront.http.request.size:int} +# :long - %{INT:aws.cloudfront.http.request.range.start:int} +# :long - %{INT:aws.cloudfront.http.request.range.end:int} diff --git a/pygrok/patterns/bacula b/pygrok/patterns/bacula index 4ab42b3..169defd 100644 --- a/pygrok/patterns/bacula +++ b/pygrok/patterns/bacula @@ -7,47 +7,47 @@ BACULA_CAPACITY %{INT}{1,3}(,%{INT}{3})* BACULA_VERSION %{USER} BACULA_JOB %{USER} -BACULA_LOG_MAX_CAPACITY User defined maximum volume capacity %{BACULA_CAPACITY:[bacula][volume][max_capacity]} exceeded on device \"%{BACULA_DEVICE:[bacula][volume][device]}\" \(%{BACULA_DEVICEPATH:[bacula][volume][path]}\).? -BACULA_LOG_END_VOLUME End of medium on Volume \"%{BACULA_VOLUME:[bacula][volume][name]}\" Bytes=%{BACULA_CAPACITY:[bacula][volume][bytes]} Blocks=%{BACULA_CAPACITY:[bacula][volume][blocks]} at %{BACULA_TIMESTAMP:[bacula][timestamp]}. -BACULA_LOG_NEW_VOLUME Created new Volume \"%{BACULA_VOLUME:[bacula][volume][name]}\" in catalog. -BACULA_LOG_NEW_LABEL Labeled new Volume \"%{BACULA_VOLUME:[bacula][volume][name]}\" on (?:file )?device \"%{BACULA_DEVICE:[bacula][volume][device]}\" \(%{BACULA_DEVICEPATH:[bacula][volume][path]}\). -BACULA_LOG_WROTE_LABEL Wrote label to prelabeled Volume \"%{BACULA_VOLUME:[bacula][volume][name]}\" on device \"%{BACULA_DEVICE:[bacula][volume][device]}\" \(%{BACULA_DEVICEPATH:[bacula][volume][path]}\) -BACULA_LOG_NEW_MOUNT New volume \"%{BACULA_VOLUME:[bacula][volume][name]}\" mounted on device \"%{BACULA_DEVICE:[bacula][volume][device]}\" \(%{BACULA_DEVICEPATH:[bacula][volume][path]}\) at %{BACULA_TIMESTAMP:[bacula][timestamp]}. -BACULA_LOG_NOOPEN \s*Cannot open %{DATA}: ERR=%{GREEDYDATA:[error][message]} -BACULA_LOG_NOOPENDIR \s*Could not open directory \"?%{DATA:[file][path]}\"?: ERR=%{GREEDYDATA:[error][message]} -BACULA_LOG_NOSTAT \s*Could not stat %{DATA:[file][path]}: ERR=%{GREEDYDATA:[error][message]} -BACULA_LOG_NOJOBS There are no more Jobs associated with Volume \"%{BACULA_VOLUME:[bacula][volume][name]}\". Marking it purged. -BACULA_LOG_ALL_RECORDS_PRUNED .*?All records pruned from Volume \"%{BACULA_VOLUME:[bacula][volume][name]}\"; marking it \"Purged\" +BACULA_LOG_MAX_CAPACITY User defined maximum volume capacity %{BACULA_CAPACITY:bacula.volume.max_capacity} exceeded on device \"%{BACULA_DEVICE:bacula.volume.device}\" \(%{BACULA_DEVICEPATH:bacula.volume.path}\).? +BACULA_LOG_END_VOLUME End of medium on Volume \"%{BACULA_VOLUME:bacula.volume.name}\" Bytes=%{BACULA_CAPACITY:bacula.volume.bytes} Blocks=%{BACULA_CAPACITY:bacula.volume.blocks} at %{BACULA_TIMESTAMP:bacula.timestamp}. +BACULA_LOG_NEW_VOLUME Created new Volume \"%{BACULA_VOLUME:bacula.volume.name}\" in catalog. +BACULA_LOG_NEW_LABEL Labeled new Volume \"%{BACULA_VOLUME:bacula.volume.name}\" on (?:file )?device \"%{BACULA_DEVICE:bacula.volume.device}\" \(%{BACULA_DEVICEPATH:bacula.volume.path}\). +BACULA_LOG_WROTE_LABEL Wrote label to prelabeled Volume \"%{BACULA_VOLUME:bacula.volume.name}\" on device \"%{BACULA_DEVICE:bacula.volume.device}\" \(%{BACULA_DEVICEPATH:bacula.volume.path}\) +BACULA_LOG_NEW_MOUNT New volume \"%{BACULA_VOLUME:bacula.volume.name}\" mounted on device \"%{BACULA_DEVICE:bacula.volume.device}\" \(%{BACULA_DEVICEPATH:bacula.volume.path}\) at %{BACULA_TIMESTAMP:bacula.timestamp}. +BACULA_LOG_NOOPEN \s*Cannot open %{DATA}: ERR=%{GREEDYDATA:error.message} +BACULA_LOG_NOOPENDIR \s*Could not open directory \"?%{DATA:file.path}\"?: ERR=%{GREEDYDATA:error.message} +BACULA_LOG_NOSTAT \s*Could not stat %{DATA:file.path}: ERR=%{GREEDYDATA:error.message} +BACULA_LOG_NOJOBS There are no more Jobs associated with Volume \"%{BACULA_VOLUME:bacula.volume.name}\". Marking it purged. +BACULA_LOG_ALL_RECORDS_PRUNED .*?All records pruned from Volume \"%{BACULA_VOLUME:bacula.volume.name}\"; marking it \"Purged\" BACULA_LOG_BEGIN_PRUNE_JOBS Begin pruning Jobs older than %{INT} month %{INT} days . BACULA_LOG_BEGIN_PRUNE_FILES Begin pruning Files. -BACULA_LOG_PRUNED_JOBS Pruned %{INT} Jobs* for client %{BACULA_HOST:[bacula][client][name]} from catalog. -BACULA_LOG_PRUNED_FILES Pruned Files from %{INT} Jobs* for client %{BACULA_HOST:[bacula][client][name]} from catalog. +BACULA_LOG_PRUNED_JOBS Pruned %{INT} Jobs* for client %{BACULA_HOST:bacula.client.name} from catalog. +BACULA_LOG_PRUNED_FILES Pruned Files from %{INT} Jobs* for client %{BACULA_HOST:bacula.client.name} from catalog. BACULA_LOG_ENDPRUNE End auto prune. -BACULA_LOG_STARTJOB Start Backup JobId %{INT}, Job=%{BACULA_JOB:[bacula][job][name]} -BACULA_LOG_STARTRESTORE Start Restore Job %{BACULA_JOB:[bacula][job][name]} -BACULA_LOG_USEDEVICE Using Device \"%{BACULA_DEVICE:[bacula][volume][device]}\" +BACULA_LOG_STARTJOB Start Backup JobId %{INT}, Job=%{BACULA_JOB:bacula.job.name} +BACULA_LOG_STARTRESTORE Start Restore Job %{BACULA_JOB:bacula.job.name} +BACULA_LOG_USEDEVICE Using Device \"%{BACULA_DEVICE:bacula.volume.device}\" BACULA_LOG_DIFF_FS \s*%{UNIXPATH} is a different filesystem. Will not descend from %{UNIXPATH} into it. -BACULA_LOG_JOBEND Job write elapsed time = %{DATA:[bacula][job][elapsed_time]}, Transfer rate = %{NUMBER} (K|M|G)? Bytes/second +BACULA_LOG_JOBEND Job write elapsed time = %{DATA:bacula.job.elapsed_time}, Transfer rate = %{NUMBER} (K|M|G)? Bytes/second BACULA_LOG_NOPRUNE_JOBS No Jobs found to prune. BACULA_LOG_NOPRUNE_FILES No Files found to prune. -BACULA_LOG_VOLUME_PREVWRITTEN Volume \"?%{BACULA_VOLUME:[bacula][volume][name]}\"? previously written, moving to end of data. -BACULA_LOG_READYAPPEND Ready to append to end of Volume \"%{BACULA_VOLUME:[bacula][volume][name]}\" size=%{INT:[bacula][volume][size]:int} -# :long - %{INT:[bacula][volume][size]:int} -BACULA_LOG_CANCELLING Cancelling duplicate JobId=%{INT:[bacula][job][other_id]}. -BACULA_LOG_MARKCANCEL JobId %{INT:[bacula][job][id]}, Job %{BACULA_JOB:[bacula][job][name]} marked to be canceled. -BACULA_LOG_CLIENT_RBJ shell command: run ClientRunBeforeJob \"%{GREEDYDATA:[bacula][job][client_run_before_command]}\" +BACULA_LOG_VOLUME_PREVWRITTEN Volume \"?%{BACULA_VOLUME:bacula.volume.name}\"? previously written, moving to end of data. +BACULA_LOG_READYAPPEND Ready to append to end of Volume \"%{BACULA_VOLUME:bacula.volume.name}\" size=%{INT:bacula.volume.size:long} +# :long - %{INT:bacula.volume.size:int} +BACULA_LOG_CANCELLING Cancelling duplicate JobId=%{INT:bacula.job.other_id}. +BACULA_LOG_MARKCANCEL JobId %{INT:bacula.job.id}, Job %{BACULA_JOB:bacula.job.name} marked to be canceled. +BACULA_LOG_CLIENT_RBJ shell command: run ClientRunBeforeJob \"%{GREEDYDATA:bacula.job.client_run_before_command}\" BACULA_LOG_VSS (Generate )?VSS (Writer)? BACULA_LOG_MAXSTART Fatal [eE]rror: Job canceled because max start delay time exceeded. -BACULA_LOG_DUPLICATE Fatal [eE]rror: JobId %{INT:[bacula][job][other_id]} already running. Duplicate job not allowed. +BACULA_LOG_DUPLICATE Fatal [eE]rror: JobId %{INT:bacula.job.other_id} already running. Duplicate job not allowed. BACULA_LOG_NOJOBSTAT Fatal [eE]rror: No Job status returned from FD. -BACULA_LOG_FATAL_CONN Fatal [eE]rror: bsock.c:133 Unable to connect to (Client: %{BACULA_HOST:[bacula][client][name]}|Storage daemon) on %{IPORHOST:[client][address]}:%{POSINT:[client][port]:int}. ERR=%{GREEDYDATA:[error][message]} -BACULA_LOG_NO_CONNECT Warning: bsock.c:127 Could not connect to (Client: %{BACULA_HOST:[bacula][client][name]}|Storage daemon) on %{IPORHOST:[client][address]}:%{POSINT:[client][port]:int}. ERR=%{GREEDYDATA:[error][message]} -BACULA_LOG_NO_AUTH Fatal error: Unable to authenticate with File daemon at \"?%{IPORHOST:[client][address]}(?::%{POSINT:[client][port]:int})?\"?. Possible causes: +BACULA_LOG_FATAL_CONN Fatal [eE]rror: bsock.c:133 Unable to connect to (Client: %{BACULA_HOST:bacula.client.name}|Storage daemon) on %{IPORHOST:client.address}:%{POSINT:client.port:int}. ERR=%{GREEDYDATA:error.message} +BACULA_LOG_NO_CONNECT Warning: bsock.c:127 Could not connect to (Client: %{BACULA_HOST:bacula.client.name}|Storage daemon) on %{IPORHOST:client.address}:%{POSINT:client.port:int}. ERR=%{GREEDYDATA:error.message} +BACULA_LOG_NO_AUTH Fatal error: Unable to authenticate with File daemon at \"?%{IPORHOST:client.address}(?::%{POSINT:client.port:int})?\"?. Possible causes: BACULA_LOG_NOSUIT No prior or suitable Full backup found in catalog. Doing FULL backup. BACULA_LOG_NOPRIOR No prior Full backup Job record found. BACULA_LOG_JOB (Error: )?Bacula %{BACULA_HOST} %{BACULA_VERSION} \(%{BACULA_VERSION}\): -BACULA_LOG %{BACULA_TIMESTAMP:timestamp} %{BACULA_HOST:[host][hostname]}(?: JobId %{INT:[bacula][job][id]})?:? (%{BACULA_LOG_MAX_CAPACITY}|%{BACULA_LOG_END_VOLUME}|%{BACULA_LOG_NEW_VOLUME}|%{BACULA_LOG_NEW_LABEL}|%{BACULA_LOG_WROTE_LABEL}|%{BACULA_LOG_NEW_MOUNT}|%{BACULA_LOG_NOOPEN}|%{BACULA_LOG_NOOPENDIR}|%{BACULA_LOG_NOSTAT}|%{BACULA_LOG_NOJOBS}|%{BACULA_LOG_ALL_RECORDS_PRUNED}|%{BACULA_LOG_BEGIN_PRUNE_JOBS}|%{BACULA_LOG_BEGIN_PRUNE_FILES}|%{BACULA_LOG_PRUNED_JOBS}|%{BACULA_LOG_PRUNED_FILES}|%{BACULA_LOG_ENDPRUNE}|%{BACULA_LOG_STARTJOB}|%{BACULA_LOG_STARTRESTORE}|%{BACULA_LOG_USEDEVICE}|%{BACULA_LOG_DIFF_FS}|%{BACULA_LOG_JOBEND}|%{BACULA_LOG_NOPRUNE_JOBS}|%{BACULA_LOG_NOPRUNE_FILES}|%{BACULA_LOG_VOLUME_PREVWRITTEN}|%{BACULA_LOG_READYAPPEND}|%{BACULA_LOG_CANCELLING}|%{BACULA_LOG_MARKCANCEL}|%{BACULA_LOG_CLIENT_RBJ}|%{BACULA_LOG_VSS}|%{BACULA_LOG_MAXSTART}|%{BACULA_LOG_DUPLICATE}|%{BACULA_LOG_NOJOBSTAT}|%{BACULA_LOG_FATAL_CONN}|%{BACULA_LOG_NO_CONNECT}|%{BACULA_LOG_NO_AUTH}|%{BACULA_LOG_NOSUIT}|%{BACULA_LOG_JOB}|%{BACULA_LOG_NOPRIOR}) +BACULA_LOG %{BACULA_TIMESTAMP:timestamp} %{BACULA_HOST:host.hostname}(?: JobId %{INT:bacula.job.id})?:? (%{BACULA_LOG_MAX_CAPACITY}|%{BACULA_LOG_END_VOLUME}|%{BACULA_LOG_NEW_VOLUME}|%{BACULA_LOG_NEW_LABEL}|%{BACULA_LOG_WROTE_LABEL}|%{BACULA_LOG_NEW_MOUNT}|%{BACULA_LOG_NOOPEN}|%{BACULA_LOG_NOOPENDIR}|%{BACULA_LOG_NOSTAT}|%{BACULA_LOG_NOJOBS}|%{BACULA_LOG_ALL_RECORDS_PRUNED}|%{BACULA_LOG_BEGIN_PRUNE_JOBS}|%{BACULA_LOG_BEGIN_PRUNE_FILES}|%{BACULA_LOG_PRUNED_JOBS}|%{BACULA_LOG_PRUNED_FILES}|%{BACULA_LOG_ENDPRUNE}|%{BACULA_LOG_STARTJOB}|%{BACULA_LOG_STARTRESTORE}|%{BACULA_LOG_USEDEVICE}|%{BACULA_LOG_DIFF_FS}|%{BACULA_LOG_JOBEND}|%{BACULA_LOG_NOPRUNE_JOBS}|%{BACULA_LOG_NOPRUNE_FILES}|%{BACULA_LOG_VOLUME_PREVWRITTEN}|%{BACULA_LOG_READYAPPEND}|%{BACULA_LOG_CANCELLING}|%{BACULA_LOG_MARKCANCEL}|%{BACULA_LOG_CLIENT_RBJ}|%{BACULA_LOG_VSS}|%{BACULA_LOG_MAXSTART}|%{BACULA_LOG_DUPLICATE}|%{BACULA_LOG_NOJOBSTAT}|%{BACULA_LOG_FATAL_CONN}|%{BACULA_LOG_NO_CONNECT}|%{BACULA_LOG_NO_AUTH}|%{BACULA_LOG_NOSUIT}|%{BACULA_LOG_JOB}|%{BACULA_LOG_NOPRIOR}) # old (deprecated) name : BACULA_LOGLINE %{BACULA_LOG} diff --git a/pygrok/patterns/bind b/pygrok/patterns/bind index 9d07e0d..ec212de 100644 --- a/pygrok/patterns/bind +++ b/pygrok/patterns/bind @@ -5,9 +5,9 @@ BIND9_CATEGORY (?:queries) # dns.question.class is static - only 'IN' is supported by Bind9 # bind.log.question.name is expected to be a 'duplicate' (same as the dns.question.name capture) -BIND9_QUERYLOGBASE client(:? @0x(?:[0-9A-Fa-f]+))? %{IP:[client][ip]}#%{POSINT:[client][port]:int} \(%{GREEDYDATA:[bind][log][question][name]}\): query: %{GREEDYDATA:[dns][question][name]} (?<[dns][question][class]>IN) %{BIND9_DNSTYPE:[dns][question][type]}(:? %{DATA:[bind][log][question][flags]})? \(%{IP:[server][ip]}\) +BIND9_QUERYLOGBASE client(:? @0x(?:[0-9A-Fa-f]+))? %{IP:client.ip}#%{POSINT:client.port:int} \(%{GREEDYDATA:bind.log.question.name}\): query: %{GREEDYDATA:dns.question.name} (?IN) %{BIND9_DNSTYPE:dns.question.type}(:? %{DATA:bind.log.question.flags})? \(%{IP:server.ip}\) # for query-logging category and severity are always fixed as "queries: info: " -BIND9_QUERYLOG %{BIND9_TIMESTAMP:timestamp} %{BIND9_CATEGORY:[bing][log][category]}: %{LOGLEVEL:[log][level]}: %{BIND9_QUERYLOGBASE} +BIND9_QUERYLOG %{BIND9_TIMESTAMP:timestamp} %{BIND9_CATEGORY:bing.log.category}: %{LOGLEVEL:log.level}: %{BIND9_QUERYLOGBASE} BIND9 %{BIND9_QUERYLOG} diff --git a/pygrok/patterns/bro b/pygrok/patterns/bro index 7f7ea00..dc38d5a 100644 --- a/pygrok/patterns/bro +++ b/pygrok/patterns/bro @@ -5,26 +5,26 @@ BRO_BOOL [TF] BRO_DATA [^\t]+ # http.log - old format (before the Zeek rename) : -BRO_HTTP %{NUMBER:timestamp}\t%{NOTSPACE:[zeek][session_id]}\t%{IP:[source][ip]}\t%{INT:[source][port]:int}\t%{IP:[destination][ip]}\t%{INT:[destination][port]:int}\t%{INT:[zeek][http][trans_depth]:int}\t(?:-|%{WORD:[http][request][method]})\t(?:-|%{BRO_DATA:[url][domain]})\t(?:-|%{BRO_DATA:[url][original]})\t(?:-|%{BRO_DATA:[http][request][referrer]})\t(?:-|%{BRO_DATA:[user_agent][original]})\t(?:-|%{NUMBER:[http][request][body][bytes]:int})\t(?:-|%{NUMBER:[http][response][body][bytes]:int})\t(?:-|%{POSINT:[http][response][status_code]:int})\t(?:-|%{DATA:[zeek][http][status_msg]})\t(?:-|%{POSINT:[zeek][http][info_code]:int})\t(?:-|%{DATA:[zeek][http][info_msg]})\t(?:-|%{BRO_DATA:[zeek][http][filename]})\t(?:\(empty\)|%{BRO_DATA:[zeek][http][tags]})\t(?:-|%{BRO_DATA:[url][username]})\t(?:-|%{BRO_DATA:[url][password]})\t(?:-|%{BRO_DATA:[zeek][http][proxied]})\t(?:-|%{BRO_DATA:[zeek][http][orig_fuids]})\t(?:-|%{BRO_DATA:[http][request][mime_type]})\t(?:-|%{BRO_DATA:[zeek][http][resp_fuids]})\t(?:-|%{BRO_DATA:[http][response][mime_type]}) -# :long - %{NUMBER:[http][request][body][bytes]:int} -# :long - %{NUMBER:[http][response][body][bytes]:int} +BRO_HTTP %{NUMBER:timestamp}\t%{NOTSPACE:zeek.session_id}\t%{IP:source.ip}\t%{INT:source.port:int}\t%{IP:destination.ip}\t%{INT:destination.port:int}\t%{INT:zeek.http.trans_depth:int}\t(?:-|%{WORD:http.request.method})\t(?:-|%{BRO_DATA:url.domain})\t(?:-|%{BRO_DATA:url.original})\t(?:-|%{BRO_DATA:http.request.referrer})\t(?:-|%{BRO_DATA:user_agent.original})\t(?:-|%{NUMBER:http.request.body.bytes:long})\t(?:-|%{NUMBER:http.response.body.bytes:long})\t(?:-|%{POSINT:http.response.status_code:int})\t(?:-|%{DATA:zeek.http.status_msg})\t(?:-|%{POSINT:zeek.http.info_code:int})\t(?:-|%{DATA:zeek.http.info_msg})\t(?:-|%{BRO_DATA:zeek.http.filename})\t(?:\(empty\)|%{BRO_DATA:zeek.http.tags})\t(?:-|%{BRO_DATA:url.username})\t(?:-|%{BRO_DATA:url.password})\t(?:-|%{BRO_DATA:zeek.http.proxied})\t(?:-|%{BRO_DATA:zeek.http.orig_fuids})\t(?:-|%{BRO_DATA:http.request.mime_type})\t(?:-|%{BRO_DATA:zeek.http.resp_fuids})\t(?:-|%{BRO_DATA:http.response.mime_type}) +# :long - %{NUMBER:http.request.body.bytes:int} +# :long - %{NUMBER:http.response.body.bytes:int} # dns.log - old format -BRO_DNS %{NUMBER:timestamp}\t%{NOTSPACE:[zeek][session_id]}\t%{IP:[source][ip]}\t%{INT:[source][port]:int}\t%{IP:[destination][ip]}\t%{INT:[destination][port]:int}\t%{WORD:[network][transport]}\t(?:-|%{INT:[dns][id]:int})\t(?:-|%{BRO_DATA:[dns][question][name]})\t(?:-|%{INT:[zeek][dns][qclass]:int})\t(?:-|%{BRO_DATA:[zeek][dns][qclass_name]})\t(?:-|%{INT:[zeek][dns][qtype]:int})\t(?:-|%{BRO_DATA:[dns][question][type]})\t(?:-|%{INT:[zeek][dns][rcode]:int})\t(?:-|%{BRO_DATA:[dns][response_code]})\t(?:-|%{BRO_BOOL:[zeek][dns][AA]})\t(?:-|%{BRO_BOOL:[zeek][dns][TC]})\t(?:-|%{BRO_BOOL:[zeek][dns][RD]})\t(?:-|%{BRO_BOOL:[zeek][dns][RA]})\t(?:-|%{NONNEGINT:[zeek][dns][Z]:int})\t(?:-|%{BRO_DATA:[zeek][dns][answers]})\t(?:-|%{DATA:[zeek][dns][TTLs]})\t(?:-|%{BRO_BOOL:[zeek][dns][rejected]}) +BRO_DNS %{NUMBER:timestamp}\t%{NOTSPACE:zeek.session_id}\t%{IP:source.ip}\t%{INT:source.port:int}\t%{IP:destination.ip}\t%{INT:destination.port:int}\t%{WORD:network.transport}\t(?:-|%{INT:dns.id:int})\t(?:-|%{BRO_DATA:dns.question.name})\t(?:-|%{INT:zeek.dns.qclass:int})\t(?:-|%{BRO_DATA:zeek.dns.qclass_name})\t(?:-|%{INT:zeek.dns.qtype:int})\t(?:-|%{BRO_DATA:dns.question.type})\t(?:-|%{INT:zeek.dns.rcode:int})\t(?:-|%{BRO_DATA:dns.response_code})\t(?:-|%{BRO_BOOL:zeek.dns.AA})\t(?:-|%{BRO_BOOL:zeek.dns.TC})\t(?:-|%{BRO_BOOL:zeek.dns.RD})\t(?:-|%{BRO_BOOL:zeek.dns.RA})\t(?:-|%{NONNEGINT:zeek.dns.Z:int})\t(?:-|%{BRO_DATA:zeek.dns.answers})\t(?:-|%{DATA:zeek.dns.TTLs})\t(?:-|%{BRO_BOOL:zeek.dns.rejected}) # conn.log - old bro, also supports 'newer' format (optional *zeek.connection.local_resp* flag) compared to non-ecs mode -BRO_CONN %{NUMBER:timestamp}\t%{NOTSPACE:[zeek][session_id]}\t%{IP:[source][ip]}\t%{INT:[source][port]:int}\t%{IP:[destination][ip]}\t%{INT:[destination][port]:int}\t%{WORD:[network][transport]}\t(?:-|%{BRO_DATA:[network][protocol]})\t(?:-|%{NUMBER:[zeek][connection][duration]:float})\t(?:-|%{INT:[zeek][connection][orig_bytes]:int})\t(?:-|%{INT:[zeek][connection][resp_bytes]:int})\t(?:-|%{BRO_DATA:[zeek][connection][state]})\t(?:-|%{BRO_BOOL:[zeek][connection][local_orig]})\t(?:(?:-|%{BRO_BOOL:[zeek][connection][local_resp]})\t)?(?:-|%{INT:[zeek][connection][missed_bytes]:int})\t(?:-|%{BRO_DATA:[zeek][connection][history]})\t(?:-|%{INT:[source][packets]:int})\t(?:-|%{INT:[source][bytes]:int})\t(?:-|%{INT:[destination][packets]:int})\t(?:-|%{INT:[destination][bytes]:int})\t(?:\(empty\)|%{BRO_DATA:[zeek][connection][tunnel_parents]}) -# :long - %{INT:[zeek][connection][orig_bytes]:int} -# :long - %{INT:[zeek][connection][resp_bytes]:int} -# :long - %{INT:[zeek][connection][missed_bytes]:int} -# :long - %{INT:[source][packets]:int} -# :long - %{INT:[source][bytes]:int} -# :long - %{INT:[destination][packets]:int} -# :long - %{INT:[destination][bytes]:int} +BRO_CONN %{NUMBER:timestamp}\t%{NOTSPACE:zeek.session_id}\t%{IP:source.ip}\t%{INT:source.port:int}\t%{IP:destination.ip}\t%{INT:destination.port:int}\t%{WORD:network.transport}\t(?:-|%{BRO_DATA:network.protocol})\t(?:-|%{NUMBER:zeek.connection.duration:float})\t(?:-|%{INT:zeek.connection.orig_bytes:long})\t(?:-|%{INT:zeek.connection.resp_bytes:long})\t(?:-|%{BRO_DATA:zeek.connection.state})\t(?:-|%{BRO_BOOL:zeek.connection.local_orig})\t(?:(?:-|%{BRO_BOOL:zeek.connection.local_resp})\t)?(?:-|%{INT:zeek.connection.missed_bytes:long})\t(?:-|%{BRO_DATA:zeek.connection.history})\t(?:-|%{INT:source.packets:long})\t(?:-|%{INT:source.bytes:long})\t(?:-|%{INT:destination.packets:long})\t(?:-|%{INT:destination.bytes:long})\t(?:\(empty\)|%{BRO_DATA:zeek.connection.tunnel_parents}) +# :long - %{INT:zeek.connection.orig_bytes:int} +# :long - %{INT:zeek.connection.resp_bytes:int} +# :long - %{INT:zeek.connection.missed_bytes:int} +# :long - %{INT:source.packets:int} +# :long - %{INT:source.bytes:int} +# :long - %{INT:destination.packets:int} +# :long - %{INT:destination.bytes:int} # files.log - old format -BRO_FILES %{NUMBER:timestamp}\t%{NOTSPACE:[zeek][files][fuid]}\t(?:-|%{IP:[server][ip]})\t(?:-|%{IP:[client][ip]})\t(?:-|%{BRO_DATA:[zeek][files][session_ids]})\t(?:-|%{BRO_DATA:[zeek][files][source]})\t(?:-|%{INT:[zeek][files][depth]:int})\t(?:-|%{BRO_DATA:[zeek][files][analyzers]})\t(?:-|%{BRO_DATA:[file][mime_type]})\t(?:-|%{BRO_DATA:[file][name]})\t(?:-|%{NUMBER:[zeek][files][duration]:float})\t(?:-|%{BRO_DATA:[zeek][files][local_orig]})\t(?:-|%{BRO_BOOL:[zeek][files][is_orig]})\t(?:-|%{INT:[zeek][files][seen_bytes]:int})\t(?:-|%{INT:[file][size]:int})\t(?:-|%{INT:[zeek][files][missing_bytes]:int})\t(?:-|%{INT:[zeek][files][overflow_bytes]:int})\t(?:-|%{BRO_BOOL:[zeek][files][timedout]})\t(?:-|%{BRO_DATA:[zeek][files][parent_fuid]})\t(?:-|%{BRO_DATA:[file][hash][md5]})\t(?:-|%{BRO_DATA:[file][hash][sha1]})\t(?:-|%{BRO_DATA:[file][hash][sha256]})\t(?:-|%{BRO_DATA:[zeek][files][extracted]}) -# :long - %{INT:[zeek][files][seen_bytes]:int} -# :long - %{INT:[file][size]:int} -# :long - %{INT:[zeek][files][missing_bytes]:int} -# :long - %{INT:[zeek][files][overflow_bytes]:int} +BRO_FILES %{NUMBER:timestamp}\t%{NOTSPACE:zeek.files.fuid}\t(?:-|%{IP:server.ip})\t(?:-|%{IP:client.ip})\t(?:-|%{BRO_DATA:zeek.files.session_ids})\t(?:-|%{BRO_DATA:zeek.files.source})\t(?:-|%{INT:zeek.files.depth:int})\t(?:-|%{BRO_DATA:zeek.files.analyzers})\t(?:-|%{BRO_DATA:file.mime_type})\t(?:-|%{BRO_DATA:file.name})\t(?:-|%{NUMBER:zeek.files.duration:float})\t(?:-|%{BRO_DATA:zeek.files.local_orig})\t(?:-|%{BRO_BOOL:zeek.files.is_orig})\t(?:-|%{INT:zeek.files.seen_bytes:long})\t(?:-|%{INT:file.size:long})\t(?:-|%{INT:zeek.files.missing_bytes:long})\t(?:-|%{INT:zeek.files.overflow_bytes:long})\t(?:-|%{BRO_BOOL:zeek.files.timedout})\t(?:-|%{BRO_DATA:zeek.files.parent_fuid})\t(?:-|%{BRO_DATA:file.hash.md5})\t(?:-|%{BRO_DATA:file.hash.sha1})\t(?:-|%{BRO_DATA:file.hash.sha256})\t(?:-|%{BRO_DATA:zeek.files.extracted}) +# :long - %{INT:zeek.files.seen_bytes:int} +# :long - %{INT:file.size:int} +# :long - %{INT:zeek.files.missing_bytes:int} +# :long - %{INT:zeek.files.overflow_bytes:int} diff --git a/pygrok/patterns/exim b/pygrok/patterns/exim index 2899b36..dba7950 100644 --- a/pygrok/patterns/exim +++ b/pygrok/patterns/exim @@ -7,20 +7,20 @@ EXIM_MSGID [0-9A-Za-z]{6}-[0-9A-Za-z]{6}-[0-9A-Za-z]{2} # == delivery deferred; temporary problem EXIM_FLAGS (?:<=|=>|->|\*>|\*\*|==|<>|>>) EXIM_DATE (:?%{YEAR}-%{MONTHNUM}-%{MONTHDAY} %{TIME}) -EXIM_PID \[%{POSINT:[process][pid]:int}\] +EXIM_PID \[%{POSINT:process.pid:int}\] EXIM_QT ((\d+y)?(\d+w)?(\d+d)?(\d+h)?(\d+m)?(\d+s)?) EXIM_EXCLUDE_TERMS (Message is frozen|(Start|End) queue run| Warning: | retry time not reached | no (IP address|host name) found for (IP address|host) | unexpected disconnection while reading SMTP command | no immediate delivery: |another process is handling this message) -EXIM_REMOTE_HOST (H=(%{NOTSPACE:[source][address]} )?(\(%{NOTSPACE:[exim][log][remote_address]}\) )?\[%{IP:[source][ip]}\](?::%{POSINT:[source][port]:int})?) -EXIM_INTERFACE (I=\[%{IP:[destination][ip]}\](?::%{NUMBER:[destination][port]:int})) -EXIM_PROTOCOL (P=%{NOTSPACE:[network][protocol]}) -EXIM_MSG_SIZE (S=%{NUMBER:[exim][log][message][size]:int}) -EXIM_HEADER_ID (id=%{NOTSPACE:[exim][log][header_id]}) +EXIM_REMOTE_HOST (H=(%{NOTSPACE:source.address} )?(\(%{NOTSPACE:exim.log.remote_address}\) )?\%{IP:source.ip}\](?::%{POSINT:source.port:int})?) +EXIM_INTERFACE (I=\[%{IP:destination.ip}\](?::%{NUMBER:destination.port:int})) +EXIM_PROTOCOL (P=%{NOTSPACE:network.protocol}) +EXIM_MSG_SIZE (S=%{NUMBER:exim.log.message.size:int}) +EXIM_HEADER_ID (id=%{NOTSPACE:exim.log.header_id}) EXIM_QUOTED_CONTENT (?:\\.|[^\\"])* -EXIM_SUBJECT (T="%{EXIM_QUOTED_CONTENT:[exim][log][message][subject]}") +EXIM_SUBJECT (T="%{EXIM_QUOTED_CONTENT:exim.log.message.subject}") EXIM_UNKNOWN_FIELD (?:[A-Za-z0-9]{1,4}=(?:%{QUOTEDSTRING}|%{NOTSPACE})) EXIM_NAMED_FIELDS (?: (?:%{EXIM_REMOTE_HOST}|%{EXIM_INTERFACE}|%{EXIM_PROTOCOL}|%{EXIM_MSG_SIZE}|%{EXIM_HEADER_ID}|%{EXIM_SUBJECT}|%{EXIM_UNKNOWN_FIELD}))* -EXIM_MESSAGE_ARRIVAL %{EXIM_DATE:timestamp} (?:%{EXIM_PID} )?%{EXIM_MSGID:[exim][log][message][id]} (?<[exim][log][flags]><=) (?<[exim][log][status]>[a-z:] )?%{EMAILADDRESS:[exim][log][sender][email]}%{EXIM_NAMED_FIELDS}(?:(?: from ?)? for %{EMAILADDRESS:[exim][log][recipient][email]})? +EXIM_MESSAGE_ARRIVAL %{EXIM_DATE:timestamp} (?:%{EXIM_PID} )?%{EXIM_MSGID:exim.log.message.id} (?<=) (?[a-z:] )?%{EMAILADDRESS:exim.log.sender.email}%{EXIM_NAMED_FIELDS}(?:(?: from ?)? for %{EMAILADDRESS:exim.log.recipient.email})? EXIM %{EXIM_MESSAGE_ARRIVAL} diff --git a/pygrok/patterns/firewalls b/pygrok/patterns/firewalls index 938ea1e..892b3a5 100644 --- a/pygrok/patterns/firewalls +++ b/pygrok/patterns/firewalls @@ -1,10 +1,10 @@ # NetScreen firewall logs -NETSCREENSESSIONLOG %{SYSLOGTIMESTAMP:timestamp} %{IPORHOST:[observer][hostname]} %{NOTSPACE:[observer][name]}: (?<[observer][product]>NetScreen) device_id=%{WORD:[netscreen][device_id]} .*?(system-\w+-%{NONNEGINT:[event][code]}\(%{WORD:[netscreen][session][type]}\))?: start_time="%{DATA:[netscreen][session][start_time]}" duration=%{INT:[netscreen][session][duration]:int} policy_id=%{INT:[netscreen][policy_id]} service=%{DATA:[netscreen][service]} proto=%{INT:[netscreen][protocol_number]:int} src zone=%{WORD:[observer][ingress][zone]} dst zone=%{WORD:[observer][egress][zone]} action=%{WORD:[event][action]} sent=%{INT:[source][bytes]:int} rcvd=%{INT:[destination][bytes]:int} src=%{IPORHOST:[source][address]} dst=%{IPORHOST:[destination][address]}(?: src_port=%{INT:[source][port]:int} dst_port=%{INT:[destination][port]:int})?(?: src-xlated ip=%{IP:[source][nat][ip]} port=%{INT:[source][nat][port]:int} dst-xlated ip=%{IP:[destination][nat][ip]} port=%{INT:[destination][nat][port]:int})?(?: session_id=%{INT:[netscreen][session][id]} reason=%{GREEDYDATA:[netscreen][session][reason]})? -# :long - %{INT:[source][bytes]:int} -# :long - %{INT:[destination][bytes]:int} +NETSCREENSESSIONLOG %{SYSLOGTIMESTAMP:timestamp} %{IPORHOST:observer.hostname} %{NOTSPACE:observer.name}: (?NetScreen) device_id=%{WORD:netscreen.device_id} .*?(system-\w+-%{NONNEGINT:event.code}\(%{WORD:netscreen.session.type}\))?: start_time="%{DATA:netscreen.session.start_time}" duration=%{INT:netscreen.session.duration:int} policy_id=%{INT:netscreen.policy_id} service=%{DATA:netscreen.service} proto=%{INT:netscreen.protocol_number:int} src zone=%{WORD:observer.ingress.zone} dst zone=%{WORD:observer.egress.zone} action=%{WORD:event.action} sent=%{INT:source.bytes:long} rcvd=%{INT:destination.bytes:long} src=%{IPORHOST:source.address} dst=%{IPORHOST:destination.address}(?: src_port=%{INT:source.port:int} dst_port=%{INT:destination.port:int})?(?: src-xlated ip=%{IP:source.nat.ip} port=%{INT:source.nat.port:int} dst-xlated ip=%{IP:destination.nat.ip} port=%{INT:destination.nat.port:int})?(?: session_id=%{INT:netscreen.session.id} reason=%{GREEDYDATA:netscreen.session.reason})? +# :long - %{INT:source.bytes:int} +# :long - %{INT:destination.bytes:int} #== Cisco ASA == -CISCO_TAGGED_SYSLOG ^<%{POSINT:[log][syslog][priority]:int}>%{CISCOTIMESTAMP:timestamp}( %{SYSLOGHOST:[host][hostname]})? ?: %%{CISCOTAG:[cisco][asa][tag]}: +CISCO_TAGGED_SYSLOG ^<%{POSINT:log.syslog.priority:int}>%{CISCOTIMESTAMP:timestamp}( %{SYSLOGHOST:host.hostname})? ?: %%{CISCOTAG:cisco.asa.tag}: CISCOTIMESTAMP %{MONTH} +%{MONTHDAY}(?: %{YEAR})? %{TIME} CISCOTAG [A-Z0-9]+-%{INT}-(?:[A-Z0-9_]+) # Common Particles @@ -14,98 +14,98 @@ CISCO_DIRECTION Inbound|inbound|Outbound|outbound CISCO_INTERVAL first hit|%{INT}-second interval CISCO_XLATE_TYPE static|dynamic # helpers -CISCO_HITCOUNT_INTERVAL hit-cnt %{INT:[cisco][asa][hit_count]:int} (?:first hit|%{INT:[cisco][asa][interval]:int}-second interval) -CISCO_SRC_IP_USER %{NOTSPACE:[observer][ingress][interface][name]}:%{IP:[source][ip]}(?:\(%{DATA:[source][user][name]}\))? -CISCO_DST_IP_USER %{NOTSPACE:[observer][egress][interface][name]}:%{IP:[destination][ip]}(?:\(%{DATA:[destination][user][name]}\))? -CISCO_SRC_HOST_PORT_USER %{NOTSPACE:[observer][ingress][interface][name]}:(?:(?:%{IP:[source][ip]})|(?:%{HOSTNAME:[source][address]}))(?:/%{INT:[source][port]:int})?(?:\(%{DATA:[source][user][name]}\))? -CISCO_DST_HOST_PORT_USER %{NOTSPACE:[observer][egress][interface][name]}:(?:(?:%{IP:[destination][ip]})|(?:%{HOSTNAME:[destination][address]}))(?:/%{INT:[destination][port]:int})?(?:\(%{DATA:[destination][user][name]}\))? +CISCO_HITCOUNT_INTERVAL hit-cnt %{INT:cisco.asa.hit_count:int} (?:first hit|%{INT:cisco.asa.interval:int}-second interval) +CISCO_SRC_IP_USER %{NOTSPACE:observer.ingress.interface.name}:%{IP:source.ip}(?:\(%{DATA:source.user.name}\))? +CISCO_DST_IP_USER %{NOTSPACE:observer.egress.interface.name}:%{IP:destination.ip}(?:\(%{DATA:destination.user.name}\))? +CISCO_SRC_HOST_PORT_USER %{NOTSPACE:observer.ingress.interface.name}:(?:(?:%{IP:source.ip})|(?:%{HOSTNAME:source.address}))(?:/%{INT:source.port:int})?(?:\(%{DATA:source.user.name}\))? +CISCO_DST_HOST_PORT_USER %{NOTSPACE:observer.egress.interface.name}:(?:(?:%{IP:destination.ip})|(?:%{HOSTNAME:destination.address}))(?:/%{INT:destination.port:int})?(?:\(%{DATA:destination.user.name}\))? # ASA-1-104001 -CISCOFW104001 \((?:Primary|Secondary)\) Switching to ACTIVE - %{GREEDYDATA:[event][reason]} +CISCOFW104001 \((?:Primary|Secondary)\) Switching to ACTIVE - %{GREEDYDATA:event.reason} # ASA-1-104002 -CISCOFW104002 \((?:Primary|Secondary)\) Switching to STANDBY - %{GREEDYDATA:[event][reason]} +CISCOFW104002 \((?:Primary|Secondary)\) Switching to STANDBY - %{GREEDYDATA:event.reason} # ASA-1-104003 CISCOFW104003 \((?:Primary|Secondary)\) Switching to FAILED\. # ASA-1-104004 CISCOFW104004 \((?:Primary|Secondary)\) Switching to OK\. # ASA-1-105003 -CISCOFW105003 \((?:Primary|Secondary)\) Monitoring on [Ii]nterface %{NOTSPACE:[network][interface][name]} waiting +CISCOFW105003 \((?:Primary|Secondary)\) Monitoring on [Ii]nterface %{NOTSPACE:network.interface.name} waiting # ASA-1-105004 -CISCOFW105004 \((?:Primary|Secondary)\) Monitoring on [Ii]nterface %{NOTSPACE:[network][interface][name]} normal +CISCOFW105004 \((?:Primary|Secondary)\) Monitoring on [Ii]nterface %{NOTSPACE:network.interface.name} normal # ASA-1-105005 -CISCOFW105005 \((?:Primary|Secondary)\) Lost Failover communications with mate on [Ii]nterface %{NOTSPACE:[network][interface][name]} +CISCOFW105005 \((?:Primary|Secondary)\) Lost Failover communications with mate on [Ii]nterface %{NOTSPACE:network.interface.name} # ASA-1-105008 -CISCOFW105008 \((?:Primary|Secondary)\) Testing [Ii]nterface %{NOTSPACE:[network][interface][name]} +CISCOFW105008 \((?:Primary|Secondary)\) Testing [Ii]nterface %{NOTSPACE:network.interface.name} # ASA-1-105009 -CISCOFW105009 \((?:Primary|Secondary)\) Testing on [Ii]nterface %{NOTSPACE:[network][interface][name]} (?:Passed|Failed) +CISCOFW105009 \((?:Primary|Secondary)\) Testing on [Ii]nterface %{NOTSPACE:network.interface.name} (?:Passed|Failed) # ASA-2-106001 -CISCOFW106001 %{CISCO_DIRECTION:[cisco][asa][network][direction]} %{WORD:[cisco][asa][network][transport]} connection %{CISCO_ACTION:[cisco][asa][outcome]} from %{IP:[source][ip]}/%{INT:[source][port]:int} to %{IP:[destination][ip]}/%{INT:[destination][port]:int} flags %{DATA:[cisco][asa][tcp_flags]} on interface %{NOTSPACE:[observer][egress][interface][name]} +CISCOFW106001 %{CISCO_DIRECTION:cisco.asa.network.direction} %{WORD:cisco.asa.network.transport} connection %{CISCO_ACTION:cisco.asa.outcome} from %{IP:source.ip}/%{INT:source.port:int} to %{IP:destination.ip}/%{INT:destination.port:int} flags %{DATA:cisco.asa.tcp_flags} on interface %{NOTSPACE:observer.egress.interface.name} # ASA-2-106006, ASA-2-106007, ASA-2-106010 -CISCOFW106006_106007_106010 %{CISCO_ACTION:[cisco][asa][outcome]} %{CISCO_DIRECTION:[cisco][asa][network][direction]} %{WORD:[cisco][asa][network][transport]} (?:from|src) %{IP:[source][ip]}/%{INT:[source][port]:int}(?:\(%{DATA:[source][user][name]}\))? (?:to|dst) %{IP:[destination][ip]}/%{INT:[destination][port]:int}(?:\(%{DATA:[destination][user][name]}\))? (?:(?:on interface %{NOTSPACE:[observer][egress][interface][name]})|(?:due to %{CISCO_REASON:[event][reason]})) +CISCOFW106006_106007_106010 %{CISCO_ACTION:cisco.asa.outcome} %{CISCO_DIRECTION:cisco.asa.network.direction} %{WORD:cisco.asa.network.transport} (?:from|src) %{IP:source.ip}/%{INT:source.port:int}(?:\(%{DATA:source.user.name}\))? (?:to|dst) %{IP:destination.ip}/%{INT:destination.port:int}(?:\(%{DATA:destination.user.name}\))? (?:(?:on interface %{NOTSPACE:observer.egress.interface.name})|(?:due to %{CISCO_REASON:event.reason})) # ASA-3-106014 -CISCOFW106014 %{CISCO_ACTION:[cisco][asa][outcome]} %{CISCO_DIRECTION:[cisco][asa][network][direction]} %{WORD:[cisco][asa][network][transport]} src %{CISCO_SRC_IP_USER} dst %{CISCO_DST_IP_USER}\s?\(type %{INT:[cisco][asa][icmp_type]:int}, code %{INT:[cisco][asa][icmp_code]:int}\) +CISCOFW106014 %{CISCO_ACTION:cisco.asa.outcome} %{CISCO_DIRECTION:cisco.asa.network.direction} %{WORD:cisco.asa.network.transport} src %{CISCO_SRC_IP_USER} dst %{CISCO_DST_IP_USER}\s?\(type %{INT:cisco.asa.icmp_type:int}, code %{INT:cisco.asa.icmp_code:int}\) # ASA-6-106015 -CISCOFW106015 %{CISCO_ACTION:[cisco][asa][outcome]} %{WORD:[cisco][asa][network][transport]} \(%{DATA:[cisco][asa][rule_name]}\) from %{IP:[source][ip]}/%{INT:[source][port]:int} to %{IP:[destination][ip]}/%{INT:[destination][port]:int} flags %{DATA:[cisco][asa][tcp_flags]} on interface %{NOTSPACE:[observer][egress][interface][name]} +CISCOFW106015 %{CISCO_ACTION:cisco.asa.outcome} %{WORD:cisco.asa.network.transport} \(%{DATA:cisco.asa.rule_name}\) from %{IP:source.ip}/%{INT:source.port:int} to %{IP:destination.ip}/%{INT:destination.port:int} flags %{DATA:cisco.asa.tcp_flags} on interface %{NOTSPACE:observer.egress.interface.name} # ASA-1-106021 -CISCOFW106021 %{CISCO_ACTION:[cisco][asa][outcome]} %{WORD:[cisco][asa][network][transport]} reverse path check from %{IP:[source][ip]} to %{IP:[destination][ip]} on interface %{NOTSPACE:[observer][egress][interface][name]} +CISCOFW106021 %{CISCO_ACTION:cisco.asa.outcome} %{WORD:cisco.asa.network.transport} reverse path check from %{IP:source.ip} to %{IP:destination.ip} on interface %{NOTSPACE:observer.egress.interface.name} # ASA-4-106023 -CISCOFW106023 %{CISCO_ACTION:[cisco][asa][outcome]}(?: protocol)? %{WORD:[cisco][asa][network][transport]} src %{CISCO_SRC_HOST_PORT_USER} dst %{CISCO_DST_HOST_PORT_USER}( \(type %{INT:[cisco][asa][icmp_type]:int}, code %{INT:[cisco][asa][icmp_code]:int}\))? by access-group "?%{DATA:[cisco][asa][rule_name]}"? \[%{DATA:[@metadata][cisco][asa][hashcode1]}, %{DATA:[@metadata][cisco][asa][hashcode2]}\] +CISCOFW106023 %{CISCO_ACTION:cisco.asa.outcome}(?: protocol)? %{WORD:cisco.asa.network.transport} src %{CISCO_SRC_HOST_PORT_USER} dst %{CISCO_DST_HOST_PORT_USER}( \(type %{INT:cisco.asa.icmp_type:int}, code %{INT:cisco.asa.icmp_code:int}\))? by access-group "?%{DATA:cisco.asa.rule_name}"? \%{DATA:[@metadata.cisco.asa.hashcode1}, %{DATA:@metadata.cisco.asa.hashcode2}\] # ASA-4-106100, ASA-4-106102, ASA-4-106103 -CISCOFW106100_2_3 access-list %{NOTSPACE:[cisco][asa][rule_name]} %{CISCO_ACTION:[cisco][asa][outcome]} %{WORD:[cisco][asa][network][transport]} for user '%{DATA:[user][name]}' %{DATA:[observer][ingress][interface][name]}/%{IP:[source][ip]}\(%{INT:[source][port]:int}\) -> %{DATA:[observer][egress][interface][name]}/%{IP:[destination][ip]}\(%{INT:[destination][port]:int}\) %{CISCO_HITCOUNT_INTERVAL} \[%{DATA:[@metadata][cisco][asa][hashcode1]}, %{DATA:[@metadata][cisco][asa][hashcode2]}\] +CISCOFW106100_2_3 access-list %{NOTSPACE:cisco.asa.rule_name} %{CISCO_ACTION:cisco.asa.outcome} %{WORD:cisco.asa.network.transport} for user '%{DATA:user.name}' %{DATA:observer.ingress.interface.name}/%{IP:source.ip}\(%{INT:source.port:int}\) -> %{DATA:observer.egress.interface.name}/%{IP:destination.ip}\(%{INT:destination.port:int}\) %{CISCO_HITCOUNT_INTERVAL} \%{DATA:[@metadata.cisco.asa.hashcode1}, %{DATA:@metadata.cisco.asa.hashcode2}\] # ASA-5-106100 -CISCOFW106100 access-list %{NOTSPACE:[cisco][asa][rule_name]} %{CISCO_ACTION:[cisco][asa][outcome]} %{WORD:[cisco][asa][network][transport]} %{DATA:[observer][ingress][interface][name]}/%{IP:[source][ip]}\(%{INT:[source][port]:int}\)(?:\(%{DATA:[source][user][name]}\))? -> %{DATA:[observer][egress][interface][name]}/%{IP:[destination][ip]}\(%{INT:[destination][port]:int}\)(?:\(%{DATA:[source][user][name]}\))? hit-cnt %{INT:[cisco][asa][hit_count]:int} %{CISCO_INTERVAL} \[%{DATA:[@metadata][cisco][asa][hashcode1]}, %{DATA:[@metadata][cisco][asa][hashcode2]}\] +CISCOFW106100 access-list %{NOTSPACE:cisco.asa.rule_name} %{CISCO_ACTION:cisco.asa.outcome} %{WORD:cisco.asa.network.transport} %{DATA:observer.ingress.interface.name}/%{IP:source.ip}\(%{INT:source.port:int}\)(?:\(%{DATA:source.user.name}\))? -> %{DATA:observer.egress.interface.name}/%{IP:destination.ip}\(%{INT:destination.port:int}\)(?:\(%{DATA:source.user.name}\))? hit-cnt %{INT:cisco.asa.hit_count:int} %{CISCO_INTERVAL} \%{DATA:[@metadata.cisco.asa.hashcode1}, %{DATA:@metadata.cisco.asa.hashcode2}\] # ASA-5-304001 -CISCOFW304001 %{IP:[source][ip]}(?:\(%{DATA:[source][user][name]}\))? Accessed URL %{IP:[destination][ip]}:%{GREEDYDATA:[url][original]} +CISCOFW304001 %{IP:source.ip}(?:\(%{DATA:source.user.name}\))? Accessed URL %{IP:destination.ip}:%{GREEDYDATA:url.original} # ASA-6-110002 -CISCOFW110002 %{CISCO_REASON:[event][reason]} for %{WORD:[cisco][asa][network][transport]} from %{DATA:[observer][ingress][interface][name]}:%{IP:[source][ip]}/%{INT:[source][port]:int} to %{IP:[destination][ip]}/%{INT:[destination][port]:int} +CISCOFW110002 %{CISCO_REASON:event.reason} for %{WORD:cisco.asa.network.transport} from %{DATA:observer.ingress.interface.name}:%{IP:source.ip}/%{INT:source.port:int} to %{IP:destination.ip}/%{INT:destination.port:int} # ASA-6-302010 -CISCOFW302010 %{INT:[cisco][asa][connections][in_use]:int} in use, %{INT:[cisco][asa][connections][most_used]:int} most used +CISCOFW302010 %{INT:cisco.asa.connections.in_use:int} in use, %{INT:cisco.asa.connections.most_used:int} most used # ASA-6-302013, ASA-6-302014, ASA-6-302015, ASA-6-302016 -CISCOFW302013_302014_302015_302016 %{CISCO_ACTION:[cisco][asa][outcome]}(?: %{CISCO_DIRECTION:[cisco][asa][network][direction]})? %{WORD:[cisco][asa][network][transport]} connection %{INT:[cisco][asa][connection_id]} for %{NOTSPACE:[observer][ingress][interface][name]}:%{IP:[source][ip]}/%{INT:[source][port]:int}(?: \(%{IP:[source][nat][ip]}/%{INT:[source][nat][port]:int}\))?(?:\(%{DATA:[source][user][name?]}\))? to %{NOTSPACE:[observer][egress][interface][name]}:%{IP:[destination][ip]}/%{INT:[destination][port]:int}( \(%{IP:[destination][nat][ip]}/%{INT:[destination][nat][port]:int}\))?(?:\(%{DATA:[destination][user][name]}\))?( duration %{TIME:[cisco][asa][duration]} bytes %{INT:[network][bytes]:int})?(?: %{CISCO_REASON:[event][reason]})?(?: \(%{DATA:[user][name]}\))? -# :long - %{INT:[network][bytes]:int} +CISCOFW302013_302014_302015_302016 %{CISCO_ACTION:cisco.asa.outcome}(?: %{CISCO_DIRECTION:cisco.asa.network.direction})? %{WORD:cisco.asa.network.transport} connection %{INT:cisco.asa.connection_id} for %{NOTSPACE:observer.ingress.interface.name}:%{IP:source.ip}/%{INT:source.port:int}(?: \(%{IP:source.nat.ip}/%{INT:source.nat.port:int}\))?(?:\(%{DATA:source.user.name?}\))? to %{NOTSPACE:observer.egress.interface.name}:%{IP:destination.ip}/%{INT:destination.port:int}( \(%{IP:destination.nat.ip}/%{INT:destination.nat.port:int}\))?(?:\(%{DATA:destination.user.name}\))?( duration %{TIME:cisco.asa.duration} bytes %{INT:network.bytes:long})?(?: %{CISCO_REASON:event.reason})?(?: \(%{DATA:user.name}\))? +# :long - %{INT:network.bytes:int} # ASA-6-302020, ASA-6-302021 -CISCOFW302020_302021 %{CISCO_ACTION:[cisco][asa][outcome]}(?: %{CISCO_DIRECTION:[cisco][asa][network][direction]})? %{WORD:[cisco][asa][network][transport]} connection for faddr %{IP:[destination][ip]}/%{INT:[cisco][asa][icmp_seq]:int}(?:\(%{DATA:[destination][user][name]}\))? gaddr %{IP:[source][nat][ip]}/%{INT:[cisco][asa][icmp_type]:int} laddr %{IP:[source][ip]}/%{INT}(?: \(%{DATA:[source][user][name]}\))? +CISCOFW302020_302021 %{CISCO_ACTION:cisco.asa.outcome}(?: %{CISCO_DIRECTION:cisco.asa.network.direction})? %{WORD:cisco.asa.network.transport} connection for faddr %{IP:destination.ip}/%{INT:cisco.asa.icmp_seq:int}(?:\(%{DATA:destination.user.name}\))? gaddr %{IP:source.nat.ip}/%{INT:cisco.asa.icmp_type:int} laddr %{IP:source.ip}/%{INT}(?: \(%{DATA:source.user.name}\))? # ASA-6-305011 -CISCOFW305011 %{CISCO_ACTION:[cisco][asa][outcome]} %{CISCO_XLATE_TYPE} %{WORD:[cisco][asa][network][transport]} translation from %{DATA:[observer][ingress][interface][name]}:%{IP:[source][ip]}(/%{INT:[source][port]:int})?(?:\(%{DATA:[source][user][name]}\))? to %{DATA:[observer][egress][interface][name]}:%{IP:[destination][ip]}/%{INT:[destination][port]:int} +CISCOFW305011 %{CISCO_ACTION:cisco.asa.outcome} %{CISCO_XLATE_TYPE} %{WORD:cisco.asa.network.transport} translation from %{DATA:observer.ingress.interface.name}:%{IP:source.ip}(/%{INT:source.port:int})?(?:\(%{DATA:source.user.name}\))? to %{DATA:observer.egress.interface.name}:%{IP:destination.ip}/%{INT:destination.port:int} # ASA-3-313001, ASA-3-313004, ASA-3-313008 -CISCOFW313001_313004_313008 %{CISCO_ACTION:[cisco][asa][outcome]} %{WORD:[cisco][asa][network][transport]} type=%{INT:[cisco][asa][icmp_type]:int}, code=%{INT:[cisco][asa][icmp_code]:int} from %{IP:[source][ip]} on interface %{NOTSPACE:[observer][egress][interface][name]}(?: to %{IP:[destination][ip]})? +CISCOFW313001_313004_313008 %{CISCO_ACTION:cisco.asa.outcome} %{WORD:cisco.asa.network.transport} type=%{INT:cisco.asa.icmp_type:int}, code=%{INT:cisco.asa.icmp_code:int} from %{IP:source.ip} on interface %{NOTSPACE:observer.egress.interface.name}(?: to %{IP:destination.ip})? # ASA-4-313005 -CISCOFW313005 %{CISCO_REASON:[event][reason]} for %{WORD:[cisco][asa][network][transport]} error message: %{WORD} src %{CISCO_SRC_IP_USER} dst %{CISCO_DST_IP_USER} \(type %{INT:[cisco][asa][icmp_type]:int}, code %{INT:[cisco][asa][icmp_code]:int}\) on %{NOTSPACE} interface\.\s+Original IP payload: %{WORD:[cisco][asa][original_ip_payload][network][transport]} src %{IP:[cisco][asa][original_ip_payload][source][ip]}/%{INT:[cisco][asa][original_ip_payload][source][port]:int}(?:\(%{DATA:[cisco][asa][original_ip_payload][source][user][name]}\))? dst %{IP:[cisco][asa][original_ip_payload][destination][ip]}/%{INT:[cisco][asa][original_ip_payload][destination][port]:int}(?:\(%{DATA:[cisco][asa][original_ip_payload][destination][user][name]}\))? +CISCOFW313005 %{CISCO_REASON:event.reason} for %{WORD:cisco.asa.network.transport} error message: %{WORD} src %{CISCO_SRC_IP_USER} dst %{CISCO_DST_IP_USER} \(type %{INT:cisco.asa.icmp_type:int}, code %{INT:cisco.asa.icmp_code:int}\) on %{NOTSPACE} interface\.\s+Original IP payload: %{WORD:cisco.asa.original_ip_payload.network.transport} src %{IP:cisco.asa.original_ip_payload.source.ip}/%{INT:cisco.asa.original_ip_payload.source.port:int}(?:\(%{DATA:cisco.asa.original_ip_payload.source.user.name}\))? dst %{IP:cisco.asa.original_ip_payload.destination.ip}/%{INT:cisco.asa.original_ip_payload.destination.port:int}(?:\(%{DATA:cisco.asa.original_ip_payload.destination.user.name}\))? # ASA-5-321001 -CISCOFW321001 Resource '%{DATA:[cisco][asa][resource][name]}' limit of %{POSINT:[cisco][asa][resource][limit]:int} reached for system +CISCOFW321001 Resource '%{DATA:cisco.asa.resource.name}' limit of %{POSINT:cisco.asa.resource.limit:int} reached for system # ASA-4-402117 -CISCOFW402117 %{WORD:[cisco][asa][network][type]}: Received a non-IPSec packet \(protocol=\s?%{WORD:[cisco][asa][network][transport]}\) from %{IP:[source][ip]} to %{IP:[destination][ip]}\.? +CISCOFW402117 %{WORD:cisco.asa.network.type}: Received a non-IPSec packet \(protocol=\s?%{WORD:cisco.asa.network.transport}\) from %{IP:source.ip} to %{IP:destination.ip}\.? # ASA-4-402119 -CISCOFW402119 %{WORD:[cisco][asa][network][type]}: Received an %{WORD:[cisco][asa][ipsec][protocol]} packet \(SPI=\s?%{DATA:[cisco][asa][ipsec][spi]}, sequence number=\s?%{DATA:[cisco][asa][ipsec][seq_num]}\) from %{IP:[source][ip]} \(user=\s?%{DATA:[source][user][name]}\) to %{IP:[destination][ip]} that failed anti-replay checking\.? +CISCOFW402119 %{WORD:cisco.asa.network.type}: Received an %{WORD:cisco.asa.ipsec.protocol} packet \(SPI=\s?%{DATA:cisco.asa.ipsec.spi}, sequence number=\s?%{DATA:cisco.asa.ipsec.seq_num}\) from %{IP:source.ip} \(user=\s?%{DATA:source.user.name}\) to %{IP:destination.ip} that failed anti-replay checking\.? # ASA-4-419001 -CISCOFW419001 %{CISCO_ACTION:[cisco][asa][outcome]} %{WORD:[cisco][asa][network][transport]} packet from %{NOTSPACE:[observer][ingress][interface][name]}:%{IP:[source][ip]}/%{INT:[source][port]:int} to %{NOTSPACE:[observer][egress][interface][name]}:%{IP:[destination][ip]}/%{INT:[destination][port]:int}, reason: %{GREEDYDATA:[event][reason]} +CISCOFW419001 %{CISCO_ACTION:cisco.asa.outcome} %{WORD:cisco.asa.network.transport} packet from %{NOTSPACE:observer.ingress.interface.name}:%{IP:source.ip}/%{INT:source.port:int} to %{NOTSPACE:observer.egress.interface.name}:%{IP:destination.ip}/%{INT:destination.port:int}, reason: %{GREEDYDATA:event.reason} # ASA-4-419002 -CISCOFW419002 %{CISCO_REASON:[event][reason]} from %{DATA:[observer][ingress][interface][name]}:%{IP:[source][ip]}/%{INT:[source][port]:int} to %{DATA:[observer][egress][interface][name]}:%{IP:[destination][ip]}/%{INT:[destination][port]:int} with different initial sequence number +CISCOFW419002 %{CISCO_REASON:event.reason} from %{DATA:observer.ingress.interface.name}:%{IP:source.ip}/%{INT:source.port:int} to %{DATA:observer.egress.interface.name}:%{IP:destination.ip}/%{INT:destination.port:int} with different initial sequence number # ASA-4-500004 -CISCOFW500004 %{CISCO_REASON:[event][reason]} for protocol=%{WORD:[cisco][asa][network][transport]}, from %{IP:[source][ip]}/%{INT:[source][port]:int} to %{IP:[destination][ip]}/%{INT:[destination][port]:int} +CISCOFW500004 %{CISCO_REASON:event.reason} for protocol=%{WORD:cisco.asa.network.transport}, from %{IP:source.ip}/%{INT:source.port:int} to %{IP:destination.ip}/%{INT:destination.port:int} # ASA-6-602303, ASA-6-602304 -CISCOFW602303_602304 %{WORD:[cisco][asa][network][type]}: An %{CISCO_DIRECTION:[cisco][asa][network][direction]} %{DATA:[cisco][asa][ipsec][tunnel_type]} SA \(SPI=\s?%{DATA:[cisco][asa][ipsec][spi]}\) between %{IP:[source][ip]} and %{IP:[destination][ip]} \(user=\s?%{DATA:[source][user][name]}\) has been %{CISCO_ACTION:[cisco][asa][outcome]} +CISCOFW602303_602304 %{WORD:cisco.asa.network.type}: An %{CISCO_DIRECTION:cisco.asa.network.direction} %{DATA:cisco.asa.ipsec.tunnel_type} SA \(SPI=\s?%{DATA:cisco.asa.ipsec.spi}\) between %{IP:source.ip} and %{IP:destination.ip} \(user=\s?%{DATA:source.user.name}\) has been %{CISCO_ACTION:cisco.asa.outcome} # ASA-7-710001, ASA-7-710002, ASA-7-710003, ASA-7-710005, ASA-7-710006 -CISCOFW710001_710002_710003_710005_710006 %{WORD:[cisco][asa][network][transport]} (?:request|access) %{CISCO_ACTION:[cisco][asa][outcome]} from %{IP:[source][ip]}/%{INT:[source][port]:int} to %{DATA:[observer][egress][interface][name]}:%{IP:[destination][ip]}/%{INT:[destination][port]:int} +CISCOFW710001_710002_710003_710005_710006 %{WORD:cisco.asa.network.transport} (?:request|access) %{CISCO_ACTION:cisco.asa.outcome} from %{IP:source.ip}/%{INT:source.port:int} to %{DATA:observer.egress.interface.name}:%{IP:destination.ip}/%{INT:destination.port:int} # ASA-6-713172 -CISCOFW713172 Group = %{DATA:[cisco][asa][source][group]}, IP = %{IP:[source][ip]}, Automatic NAT Detection Status:\s+Remote end\s*%{DATA:[@metadata][cisco][asa][remote_nat]}\s*behind a NAT device\s+This\s+end\s*%{DATA:[@metadata][cisco][asa][local_nat]}\s*behind a NAT device +CISCOFW713172 Group = %{DATA:cisco.asa.source.group}, IP = %{IP:source.ip}, Automatic NAT Detection Status:\s+Remote end\s*%{DATA:@metadata.cisco.asa.remote_nat}\s*behind a NAT device\s+This\s+end\s*%{DATA:@metadata.cisco.asa.local_nat}\s*behind a NAT device # ASA-4-733100 -CISCOFW733100 \[\s*%{DATA:[cisco][asa][burst][object]}\s*\] drop %{DATA:[cisco][asa][burst][id]} exceeded. Current burst rate is %{INT:[cisco][asa][burst][current_rate]:int} per second, max configured rate is %{INT:[cisco][asa][burst][configured_rate]:int}; Current average rate is %{INT:[cisco][asa][burst][avg_rate]:int} per second, max configured rate is %{INT:[cisco][asa][burst][configured_avg_rate]:int}; Cumulative total count is %{INT:[cisco][asa][burst][cumulative_count]:int} +CISCOFW733100 \\s*%{DATA:[cisco.asa.burst.object}\s*\] drop %{DATA:cisco.asa.burst.id} exceeded. Current burst rate is %{INT:cisco.asa.burst.current_rate:int} per second, max configured rate is %{INT:cisco.asa.burst.configured_rate:int}; Current average rate is %{INT:cisco.asa.burst.avg_rate:int} per second, max configured rate is %{INT:cisco.asa.burst.configured_avg_rate:int}; Cumulative total count is %{INT:cisco.asa.burst.cumulative_count:int} #== End Cisco ASA == IPTABLES_TCP_FLAGS (CWR |ECE |URG |ACK |PSH |RST |SYN |FIN )* -IPTABLES_TCP_PART (?:SEQ=%{INT:[iptables][tcp][seq]:int}\s+)?(?:ACK=%{INT:[iptables][tcp][ack]:int}\s+)?WINDOW=%{INT:[iptables][tcp][window]:int}\s+RES=0x%{BASE16NUM:[iptables][tcp_reserved_bits]}\s+%{IPTABLES_TCP_FLAGS:[iptables][tcp][flags]} +IPTABLES_TCP_PART (?:SEQ=%{INT:iptables.tcp.seq:int}\s+)?(?:ACK=%{INT:iptables.tcp.ack:int}\s+)?WINDOW=%{INT:iptables.tcp.window:int}\s+RES=0x%{BASE16NUM:iptables.tcp_reserved_bits}\s+%{IPTABLES_TCP_FLAGS:iptables.tcp.flags} IPTABLES4_FRAG (?:(?<= )(?:CE|DF|MF))* -IPTABLES4_PART SRC=%{IPV4:[source][ip]}\s+DST=%{IPV4:[destination][ip]}\s+LEN=(?:%{INT:[iptables][length]:int})?\s+TOS=(?:0|0x%{BASE16NUM:[iptables][tos]})?\s+PREC=(?:0x%{BASE16NUM:[iptables][precedence_bits]})?\s+TTL=(?:%{INT:[iptables][ttl]:int})?\s+ID=(?:%{INT:[iptables][id]})?\s+(?:%{IPTABLES4_FRAG:[iptables][fragment_flags]})?(?:\s+FRAG: %{INT:[iptables][fragment_offset]:int})? -IPTABLES6_PART SRC=%{IPV6:[source][ip]}\s+DST=%{IPV6:[destination][ip]}\s+LEN=(?:%{INT:[iptables][length]:int})?\s+TC=(?:0|0x%{BASE16NUM:[iptables][tos]})?\s+HOPLIMIT=(?:%{INT:[iptables][ttl]:int})?\s+FLOWLBL=(?:%{INT:[iptables][flow_label]})? +IPTABLES4_PART SRC=%{IPV4:source.ip}\s+DST=%{IPV4:destination.ip}\s+LEN=(?:%{INT:iptables.length:int})?\s+TOS=(?:0|0x%{BASE16NUM:iptables.tos})?\s+PREC=(?:0x%{BASE16NUM:iptables.precedence_bits})?\s+TTL=(?:%{INT:iptables.ttl:int})?\s+ID=(?:%{INT:iptables.id})?\s+(?:%{IPTABLES4_FRAG:iptables.fragment_flags})?(?:\s+FRAG: %{INT:iptables.fragment_offset:int})? +IPTABLES6_PART SRC=%{IPV6:source.ip}\s+DST=%{IPV6:destination.ip}\s+LEN=(?:%{INT:iptables.length:int})?\s+TC=(?:0|0x%{BASE16NUM:iptables.tos})?\s+HOPLIMIT=(?:%{INT:iptables.ttl:int})?\s+FLOWLBL=(?:%{INT:iptables.flow_label})? -IPTABLES IN=(?:%{NOTSPACE:[observer][ingress][interface][name]})?\s+OUT=(?:%{NOTSPACE:[observer][egress][interface][name]})?\s+(?:MAC=(?:%{COMMONMAC:[destination][mac]})?(?::%{COMMONMAC:[source][mac]})?(?::[A-Fa-f0-9]{2}:[A-Fa-f0-9]{2})?\s+)?(:?%{IPTABLES4_PART}|%{IPTABLES6_PART}).*?PROTO=(?:%{WORD:[network][transport]})?\s+SPT=(?:%{INT:[source][port]:int})?\s+DPT=(?:%{INT:[destination][port]:int})?\s+(?:%{IPTABLES_TCP_PART})? +IPTABLES IN=(?:%{NOTSPACE:observer.ingress.interface.name})?\s+OUT=(?:%{NOTSPACE:observer.egress.interface.name})?\s+(?:MAC=(?:%{COMMONMAC:destination.mac})?(?::%{COMMONMAC:source.mac})?(?::A-Fa-f0-9{2}:A-Fa-f0-9{2})?\s+)?(:?%{IPTABLES4_PART}|%{IPTABLES6_PART}).*?PROTO=(?:%{WORD:network.transport})?\s+SPT=(?:%{INT:source.port:int})?\s+DPT=(?:%{INT:destination.port:int})?\s+(?:%{IPTABLES_TCP_PART})? # Shorewall firewall logs -SHOREWALL (?:%{SYSLOGTIMESTAMP:timestamp}) (?:%{WORD:[observer][hostname]}) .*Shorewall:(?:%{WORD:[shorewall][firewall][type]})?:(?:%{WORD:[shorewall][firewall][action]})?.*%{IPTABLES} +SHOREWALL (?:%{SYSLOGTIMESTAMP:timestamp}) (?:%{WORD:observer.hostname}) .*Shorewall:(?:%{WORD:shorewall.firewall.type})?:(?:%{WORD:shorewall.firewall.action})?.*%{IPTABLES} #== End Shorewall #== SuSE Firewall 2 == -SFW2_LOG_PREFIX SFW2\-INext\-%{NOTSPACE:[suse][firewall][action]} -SFW2 ((?:%{SYSLOGTIMESTAMP:timestamp})|(?:%{TIMESTAMP_ISO8601:timestamp}))\s*%{HOSTNAME:[observer][hostname]}.*?%{SFW2_LOG_PREFIX:[suse][firewall][log_prefix]}\s*%{IPTABLES} +SFW2_LOG_PREFIX SFW2\-INext\-%{NOTSPACE:suse.firewall.action} +SFW2 ((?:%{SYSLOGTIMESTAMP:timestamp})|(?:%{TIMESTAMP_ISO8601:timestamp}))\s*%{HOSTNAME:observer.hostname}.*?%{SFW2_LOG_PREFIX:suse.firewall.log_prefix}\s*%{IPTABLES} #== End SuSE == diff --git a/pygrok/patterns/grok-patterns b/pygrok/patterns/grok-patterns index 4fbd4a4..6f58f3f 100644 --- a/pygrok/patterns/grok-patterns +++ b/pygrok/patterns/grok-patterns @@ -34,7 +34,7 @@ HOSTPORT %{IPORHOST}:%{POSINT} # paths (only absolute paths are matched) PATH (?:%{UNIXPATH}|%{WINPATH}) -UNIXPATH (/[\w_%!$@:.,+~-]*)+ +UNIXPATH (/[[[:alnum:]]_%!$@:.,+~-]*)+ TTY (?:/dev/(pts|tty([pq])?)(\w+)?/?(?:[0-9]+)) WINPATH (?>[A-Za-z]+:|\\)(?:\\[^\\?*]*)+ URIPROTO [A-Za-z]([A-Za-z0-9+\-.]+)+ @@ -80,16 +80,16 @@ DATESTAMP_EVENTLOG %{YEAR}%{MONTHNUM2}%{MONTHDAY}%{HOUR}%{MINUTE}%{SECOND} # Syslog Dates: Month Day HH:MM:SS SYSLOGTIMESTAMP %{MONTH} +%{MONTHDAY} %{TIME} PROG [\x21-\x5a\x5c\x5e-\x7e]+ -SYSLOGPROG %{PROG:[process][name]}(?:\[%{POSINT:[process][pid]:int}\])? +SYSLOGPROG %{PROG:process.name}(?:\[%{POSINT:process.pid:int}\])? SYSLOGHOST %{IPORHOST} -SYSLOGFACILITY <%{NONNEGINT:[log][syslog][facility][code]:int}.%{NONNEGINT:[log][syslog][priority]:int}> +SYSLOGFACILITY <%{NONNEGINT:log.syslog.facility.code:int}.%{NONNEGINT:log.syslog.priority:int}> HTTPDATE %{MONTHDAY}/%{MONTH}/%{YEAR}:%{TIME} %{INT} # Shortcuts QS %{QUOTEDSTRING} # Log formats -SYSLOGBASE %{SYSLOGTIMESTAMP:timestamp} (?:%{SYSLOGFACILITY} )?%{SYSLOGHOST:[host][hostname]} %{SYSLOGPROG}: +SYSLOGBASE %{SYSLOGTIMESTAMP:timestamp} (?:%{SYSLOGFACILITY} )?%{SYSLOGHOST:host.hostname} %{SYSLOGPROG}: # Log Levels LOGLEVEL ([Aa]lert|ALERT|[Tt]race|TRACE|[Dd]ebug|DEBUG|[Nn]otice|NOTICE|[Ii]nfo?(?:rmation)?|INFO?(?:RMATION)?|[Ww]arn?(?:ing)?|WARN?(?:ING)?|[Ee]rr?(?:or)?|ERR?(?:OR)?|[Cc]rit?(?:ical)?|CRIT?(?:ICAL)?|[Ff]atal|FATAL|[Ss]evere|SEVERE|EMERG(?:ENCY)?|[Ee]merg(?:ency)?) diff --git a/pygrok/patterns/haproxy b/pygrok/patterns/haproxy index ae61fd4..f46d4ba 100644 --- a/pygrok/patterns/haproxy +++ b/pygrok/patterns/haproxy @@ -3,8 +3,8 @@ HAPROXYTIME (?!<[0-9])%{HOUR}:%{MINUTE}(?::%{SECOND})(?![0-9]) HAPROXYDATE %{MONTHDAY}/%{MONTH}/%{YEAR}:%{HAPROXYTIME}.%{INT} # Override these default patterns to parse out what is captured in your haproxy.cfg -HAPROXYCAPTUREDREQUESTHEADERS %{DATA:[haproxy][http][request][captured_headers]} -HAPROXYCAPTUREDRESPONSEHEADERS %{DATA:[haproxy][http][response][captured_headers]} +HAPROXYCAPTUREDREQUESTHEADERS %{DATA:haproxy.http.request.captured_headers} +HAPROXYCAPTUREDRESPONSEHEADERS %{DATA:haproxy.http.response.captured_headers} # Example: # These haproxy config lines will add data to the logs that are captured @@ -22,19 +22,19 @@ HAPROXYCAPTUREDRESPONSEHEADERS %{DATA:[haproxy][http][response][captured_headers # capture response header Cache-Control len 200 # capture response header Last-Modified len 200 # -# HAPROXYCAPTUREDREQUESTHEADERS %{DATA:[haproxy][http][request][host]}\|%{DATA:[haproxy][http][request][x_forwarded_for]}\|%{DATA:[haproxy][http][request][accept_language]}\|%{DATA:[http][request][referrer]}\|%{DATA:[user_agent][original]} -# HAPROXYCAPTUREDRESPONSEHEADERS %{DATA:[http][response][mime_type]}\|%{DATA:[haproxy][http][response][encoding]}\|%{DATA:[haproxy][http][response][cache_control]}\|%{DATA:[haproxy][http][response][last_modified]} +# HAPROXYCAPTUREDREQUESTHEADERS %{DATA:haproxy.http.request.host}\|%{DATA:haproxy.http.request.x_forwarded_for}\|%{DATA:haproxy.http.request.accept_language}\|%{DATA:http.request.referrer}\|%{DATA:user_agent.original} +# HAPROXYCAPTUREDRESPONSEHEADERS %{DATA:http.response.mime_type}\|%{DATA:haproxy.http.response.encoding}\|%{DATA:haproxy.http.response.cache_control}\|%{DATA:haproxy.http.response.last_modified} -HAPROXYURI (?:%{URIPROTO:[url][scheme]}://)?(?:%{USER:[url][username]}(?::[^@]*)?@)?(?:%{IPORHOST:[url][domain]}(?::%{POSINT:[url][port]:int})?)?(?:%{URIPATH:[url][path]}(?:\?%{URIQUERY:[url][query]})?)? +HAPROXYURI (?:%{URIPROTO:url.scheme}://)?(?:%{USER:url.username}(?::[^@]*)?@)?(?:%{IPORHOST:url.domain}(?::%{POSINT:url.port:int})?)?(?:%{URIPATH:url.path}(?:\?%{URIQUERY:url.query})?)? -HAPROXYHTTPREQUESTLINE (?:|(?:%{WORD:[http][request][method]} %{HAPROXYURI:[url][original]}(?: HTTP/%{NUMBER:[http][version]})?)) +HAPROXYHTTPREQUESTLINE (?:|(?:%{WORD:http.request.method} %{HAPROXYURI:url.original}(?: HTTP/%{NUMBER:http.version})?)) # parse a haproxy 'httplog' line -HAPROXYHTTPBASE %{IP:[source][address]}:%{INT:[source][port]:int} \[%{HAPROXYDATE:[haproxy][request_date]}\] %{NOTSPACE:[haproxy][frontend_name]} %{NOTSPACE:[haproxy][backend_name]}/(?:|%{NOTSPACE:[haproxy][server_name]}) (?:-1|%{INT:[haproxy][http][request][time_wait_ms]:int})/(?:-1|%{INT:[haproxy][total_waiting_time_ms]:int})/(?:-1|%{INT:[haproxy][connection_wait_time_ms]:int})/(?:-1|%{INT:[haproxy][http][request][time_wait_without_data_ms]:int})/%{NOTSPACE:[haproxy][total_time_ms]} %{INT:[http][response][status_code]:int} %{INT:[source][bytes]:int} (?:-|%{DATA:[haproxy][http][request][captured_cookie]}) (?:-|%{DATA:[haproxy][http][response][captured_cookie]}) %{NOTSPACE:[haproxy][termination_state]} %{INT:[haproxy][connections][active]:int}/%{INT:[haproxy][connections][frontend]:int}/%{INT:[haproxy][connections][backend]:int}/%{INT:[haproxy][connections][server]:int}/%{INT:[haproxy][connections][retries]:int} %{INT:[haproxy][server_queue]:int}/%{INT:[haproxy][backend_queue]:int}(?: \{%{HAPROXYCAPTUREDREQUESTHEADERS}\}(?: \{%{HAPROXYCAPTUREDRESPONSEHEADERS}\})?)?(?: "%{HAPROXYHTTPREQUESTLINE}"?)? -# :long - %{INT:[source][bytes]:int} +HAPROXYHTTPBASE %{IP:source.address}:%{INT:source.port:int} \[%{HAPROXYDATE:haproxy.request_date}\] %{NOTSPACE:haproxy.frontend_name} %{NOTSPACE:haproxy.backend_name}/(?:|%{NOTSPACE:haproxy.server_name}) (?:-1|%{INT:haproxy.http.request.time_wait_ms:int})/(?:-1|%{INT:haproxy.total_waiting_time_ms:int})/(?:-1|%{INT:haproxy.connection_wait_time_ms:int})/(?:-1|%{INT:haproxy.http.request.time_wait_without_data_ms:int})/%{NOTSPACE:haproxy.total_time_ms} %{INT:http.response.status_code:int} %{INT:source.bytes:long} (?:-|%{DATA:haproxy.http.request.captured_cookie}) (?:-|%{DATA:haproxy.http.response.captured_cookie}) %{NOTSPACE:haproxy.termination_state} %{INT:haproxy.connections.active:int}/%{INT:haproxy.connections.frontend:int}/%{INT:haproxy.connections.backend:int}/%{INT:haproxy.connections.server:int}/%{INT:haproxy.connections.retries:int} %{INT:haproxy.server_queue:int}/%{INT:haproxy.backend_queue:int}(?: \{%{HAPROXYCAPTUREDREQUESTHEADERS}\}(?: \{%{HAPROXYCAPTUREDRESPONSEHEADERS}\})?)?(?: "%{HAPROXYHTTPREQUESTLINE}"?)? +# :long - %{INT:source.bytes:int} -HAPROXYHTTP (?:%{SYSLOGTIMESTAMP:timestamp}|%{TIMESTAMP_ISO8601:timestamp}) %{IPORHOST:[host][hostname]} %{SYSLOGPROG}: %{HAPROXYHTTPBASE} +HAPROXYHTTP (?:%{SYSLOGTIMESTAMP:timestamp}|%{TIMESTAMP_ISO8601:timestamp}) %{IPORHOST:host.hostname} %{SYSLOGPROG}: %{HAPROXYHTTPBASE} # parse a haproxy 'tcplog' line -HAPROXYTCP (?:%{SYSLOGTIMESTAMP:timestamp}|%{TIMESTAMP_ISO8601:timestamp}) %{IPORHOST:[host][hostname]} %{SYSLOGPROG}: %{IP:[source][address]}:%{INT:[source][port]:int} \[%{HAPROXYDATE:[haproxy][request_date]}\] %{NOTSPACE:[haproxy][frontend_name]} %{NOTSPACE:[haproxy][backend_name]}/(?:|%{NOTSPACE:[haproxy][server_name]}) (?:-1|%{INT:[haproxy][total_waiting_time_ms]:int})/(?:-1|%{INT:[haproxy][connection_wait_time_ms]:int})/%{NOTSPACE:[haproxy][total_time_ms]} %{INT:[source][bytes]:int} %{NOTSPACE:[haproxy][termination_state]} %{INT:[haproxy][connections][active]:int}/%{INT:[haproxy][connections][frontend]:int}/%{INT:[haproxy][connections][backend]:int}/%{INT:[haproxy][connections][server]:int}/%{INT:[haproxy][connections][retries]:int} %{INT:[haproxy][server_queue]:int}/%{INT:[haproxy][backend_queue]:int} -# :long - %{INT:[source][bytes]:int} +HAPROXYTCP (?:%{SYSLOGTIMESTAMP:timestamp}|%{TIMESTAMP_ISO8601:timestamp}) %{IPORHOST:host.hostname} %{SYSLOGPROG}: %{IP:source.address}:%{INT:source.port:int} \[%{HAPROXYDATE:haproxy.request_date}\] %{NOTSPACE:haproxy.frontend_name} %{NOTSPACE:haproxy.backend_name}/(?:|%{NOTSPACE:haproxy.server_name}) (?:-1|%{INT:haproxy.total_waiting_time_ms:int})/(?:-1|%{INT:haproxy.connection_wait_time_ms:int})/%{NOTSPACE:haproxy.total_time_ms} %{INT:source.bytes:long} %{NOTSPACE:haproxy.termination_state} %{INT:haproxy.connections.active:int}/%{INT:haproxy.connections.frontend:int}/%{INT:haproxy.connections.backend:int}/%{INT:haproxy.connections.server:int}/%{INT:haproxy.connections.retries:int} %{INT:haproxy.server_queue:int}/%{INT:haproxy.backend_queue:int} +# :long - %{INT:source.bytes:int} diff --git a/pygrok/patterns/httpd b/pygrok/patterns/httpd index 52accd2..9b58e50 100644 --- a/pygrok/patterns/httpd +++ b/pygrok/patterns/httpd @@ -2,14 +2,14 @@ HTTPDUSER %{EMAILADDRESS}|%{USER} HTTPDERROR_DATE %{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{YEAR} # Log formats -HTTPD_COMMONLOG %{IPORHOST:[source][address]} (?:-|%{HTTPDUSER:[apache][access][user][identity]}) (?:-|%{HTTPDUSER:[user][name]}) \[%{HTTPDATE:timestamp}\] "(?:%{WORD:[http][request][method]} %{NOTSPACE:[url][original]}(?: HTTP/%{NUMBER:[http][version]})?|%{DATA})" (?:-|%{INT:[http][response][status_code]:int}) (?:-|%{INT:[http][response][body][bytes]:int}) -# :long - %{INT:[http][response][body][bytes]:int} -HTTPD_COMBINEDLOG %{HTTPD_COMMONLOG} "(?:-|%{DATA:[http][request][referrer]})" "(?:-|%{DATA:[user_agent][original]})" +HTTPD_COMMONLOG %{IPORHOST:source.address} (?:-|%{HTTPDUSER:apache.access.user.identity}) (?:-|%{HTTPDUSER:user.name}) \[%{HTTPDATE:timestamp}\] "(?:%{WORD:http.request.method} %{NOTSPACE:url.original}(?: HTTP/%{NUMBER:http.version})?|%{DATA})" (?:-|%{INT:http.response.status_code:int}) (?:-|%{INT:http.response.body.bytes:long}) +# :long - %{INT:http.response.body.bytes:int} +HTTPD_COMBINEDLOG %{HTTPD_COMMONLOG} "(?:-|%{DATA:http.request.referrer})" "(?:-|%{DATA:user_agent.original})" # Error logs -HTTPD20_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{LOGLEVEL:[log][level]}\] (?:\[client %{IPORHOST:[source][address]}\] )?%{GREEDYDATA:message} -HTTPD24_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[(?:%{WORD:[apache][error][module]})?:%{LOGLEVEL:[log][level]}\] \[pid %{POSINT:[process][pid]:int}(:tid %{INT:[process][thread][id]:int})?\](?: \(%{POSINT:[apache][error][proxy][error][code]?}\)%{DATA:[apache][error][proxy][error][message]}:)?(?: \[client %{IPORHOST:[source][address]}(?::%{POSINT:[source][port]:int})?\])?(?: %{DATA:[error][code]}:)? %{GREEDYDATA:message} -# :long - %{INT:[process][thread][id]:int} +HTTPD20_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{LOGLEVEL:log.level}\] (?:\[client %{IPORHOST:source.address}\] )?%{GREEDYDATA:message} +HTTPD24_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[(?:%{WORD:apache.error.module})?:%{LOGLEVEL:log.level}\] \[pid %{POSINT:process.pid:long}(:tid %{INT:process.thread.id:int})?\](?: \(%{POSINT:apache.error.proxy.error.code?}\)%{DATA:apache.error.proxy.error.message}:)?(?: \[client %{IPORHOST:source.address}(?::%{POSINT:source.port:int})?\])?(?: %{DATA:error.code}:)? %{GREEDYDATA:message} +# :long - %{INT:process.thread.id:int} HTTPD_ERRORLOG %{HTTPD20_ERRORLOG}|%{HTTPD24_ERRORLOG} # Deprecated diff --git a/pygrok/patterns/java b/pygrok/patterns/java index 0c51a09..8dd539f 100644 --- a/pygrok/patterns/java +++ b/pygrok/patterns/java @@ -4,7 +4,7 @@ JAVAFILE (?:[a-zA-Z$_0-9. -]+) #Allow special , methods JAVAMETHOD (?:(<(?:cl)?init>)|[a-zA-Z$_][a-zA-Z$_0-9]*) #Line number is optional in special cases 'Native method' or 'Unknown source' -JAVASTACKTRACEPART %{SPACE}at %{JAVACLASS:[java][log][origin][class][name]}\.%{JAVAMETHOD:[log][origin][function]}\(%{JAVAFILE:[log][origin][file][name]}(?::%{INT:[log][origin][file][line]:int})?\) +JAVASTACKTRACEPART %{SPACE}at %{JAVACLASS:java.log.origin.class.name}\.%{JAVAMETHOD:log.origin.function}\(%{JAVAFILE:log.origin.file.name}(?::%{INT:log.origin.file.line:int})?\) # Java Logs JAVATHREAD (?:[A-Z]{2}-Processor[\d]+) JAVALOGMESSAGE (?:.*) @@ -12,11 +12,11 @@ JAVALOGMESSAGE (?:.*) # MMM dd, yyyy HH:mm:ss eg: Jan 9, 2014 7:13:13 AM # matches default logging configuration in Tomcat 4.1, 5.0, 5.5, 6.0, 7.0 CATALINA7_DATESTAMP %{MONTH} %{MONTHDAY}, %{YEAR} %{HOUR}:%{MINUTE}:%{SECOND} (?:AM|PM) -CATALINA7_LOG %{CATALINA7_DATESTAMP:timestamp} %{JAVACLASS:[java][log][origin][class][name]}(?: %{JAVAMETHOD:[log][origin][function]})?\s*(?:%{LOGLEVEL:[log][level]}:)? %{JAVALOGMESSAGE:message} +CATALINA7_LOG %{CATALINA7_DATESTAMP:timestamp} %{JAVACLASS:java.log.origin.class.name}(?: %{JAVAMETHOD:log.origin.function})?\s*(?:%{LOGLEVEL:log.level}:)? %{JAVALOGMESSAGE:message} # 31-Jul-2020 16:40:38.578 in Tomcat 8.5/9.0 CATALINA8_DATESTAMP %{MONTHDAY}-%{MONTH}-%{YEAR} %{HOUR}:%{MINUTE}:%{SECOND} -CATALINA8_LOG %{CATALINA8_DATESTAMP:timestamp} %{LOGLEVEL:[log][level]} \[%{DATA:[java][log][origin][thread][name]}\] %{JAVACLASS:[java][log][origin][class][name]}\.(?:%{JAVAMETHOD:[log][origin][function]})? %{JAVALOGMESSAGE:message} +CATALINA8_LOG %{CATALINA8_DATESTAMP:timestamp} %{LOGLEVEL:log.level} \[%{DATA:java.log.origin.thread.name}\] %{JAVACLASS:java.log.origin.class.name}\.(?:%{JAVAMETHOD:log.origin.function})? %{JAVALOGMESSAGE:message} CATALINA_DATESTAMP (?:%{CATALINA8_DATESTAMP})|(?:%{CATALINA7_DATESTAMP}) CATALINALOG (?:%{CATALINA8_LOG})|(?:%{CATALINA7_LOG}) @@ -27,7 +27,7 @@ TOMCAT8_LOG %{CATALINA8_LOG} # NOTE: a weird log we started with - not sure what TC version this should match out of the box (due the | delimiters) TOMCATLEGACY_DATESTAMP %{YEAR}-%{MONTHNUM}-%{MONTHDAY} %{HOUR}:%{MINUTE}:%{SECOND}(?: %{ISO8601_TIMEZONE})? -TOMCATLEGACY_LOG %{TOMCATLEGACY_DATESTAMP:timestamp} \| %{LOGLEVEL:[log][level]} \| %{JAVACLASS:[java][log][origin][class][name]} - %{JAVALOGMESSAGE:message} +TOMCATLEGACY_LOG %{TOMCATLEGACY_DATESTAMP:timestamp} \| %{LOGLEVEL:log.level} \| %{JAVACLASS:java.log.origin.class.name} - %{JAVALOGMESSAGE:message} TOMCAT_DATESTAMP (?:%{CATALINA8_DATESTAMP})|(?:%{CATALINA7_DATESTAMP})|(?:%{TOMCATLEGACY_DATESTAMP}) diff --git a/pygrok/patterns/junos b/pygrok/patterns/junos index 8869f7a..d23d455 100644 --- a/pygrok/patterns/junos +++ b/pygrok/patterns/junos @@ -3,11 +3,11 @@ RT_FLOW_TAG (?:RT_FLOW_SESSION_CREATE|RT_FLOW_SESSION_CLOSE|RT_FLOW_SESSION_DENY # deprecated legacy name: RT_FLOW_EVENT RT_FLOW_TAG -RT_FLOW1 %{RT_FLOW_TAG:[juniper][srx][tag]}: %{GREEDYDATA:[juniper][srx][reason]}: %{IP:[source][ip]}/%{INT:[source][port]:int}->%{IP:[destination][ip]}/%{INT:[destination][port]:int} %{DATA:[juniper][srx][service_name]} %{IP:[source][nat][ip]}/%{INT:[source][nat][port]:int}->%{IP:[destination][nat][ip]}/%{INT:[destination][nat][port]:int} (?:(?:None)|(?:%{DATA:[juniper][srx][src_nat_rule_name]})) (?:(?:None)|(?:%{DATA:[juniper][srx][dst_nat_rule_name]})) %{INT:[network][iana_number]} %{DATA:[rule][name]} %{DATA:[observer][ingress][zone]} %{DATA:[observer][egress][zone]} %{INT:[juniper][srx][session_id]} \d+\(%{INT:[source][bytes]:int}\) \d+\(%{INT:[destination][bytes]:int}\) %{INT:[juniper][srx][elapsed_time]:int} .* -# :long - %{INT:[source][bytes]:int} -# :long - %{INT:[destination][bytes]:int} +RT_FLOW1 %{RT_FLOW_TAG:juniper.srx.tag}: %{GREEDYDATA:juniper.srx.reason}: %{IP:source.ip}/%{INT:source.port:int}->%{IP:destination.ip}/%{INT:destination.port:int} %{DATA:juniper.srx.service_name} %{IP:source.nat.ip}/%{INT:source.nat.port:int}->%{IP:destination.nat.ip}/%{INT:destination.nat.port:int} (?:(?:None)|(?:%{DATA:juniper.srx.src_nat_rule_name})) (?:(?:None)|(?:%{DATA:juniper.srx.dst_nat_rule_name})) %{INT:network.iana_number} %{DATA:rule.name} %{DATA:observer.ingress.zone} %{DATA:observer.egress.zone} %{INT:juniper.srx.session_id} \d+\(%{INT:source.bytes:long}\) \d+\(%{INT:destination.bytes:long}\) %{INT:juniper.srx.elapsed_time:int} .* +# :long - %{INT:source.bytes:int} +# :long - %{INT:destination.bytes:int} -RT_FLOW2 %{RT_FLOW_TAG:[juniper][srx][tag]}: session created %{IP:[source][ip]}/%{INT:[source][port]:int}->%{IP:[destination][ip]}/%{INT:[destination][port]:int} %{DATA:[juniper][srx][service_name]} %{IP:[source][nat][ip]}/%{INT:[source][nat][port]:int}->%{IP:[destination][nat][ip]}/%{INT:[destination][nat][port]:int} (?:(?:None)|(?:%{DATA:[juniper][srx][src_nat_rule_name]})) (?:(?:None)|(?:%{DATA:[juniper][srx][dst_nat_rule_name]})) %{INT:[network][iana_number]} %{DATA:[rule][name]} %{DATA:[observer][ingress][zone]} %{DATA:[observer][egress][zone]} %{INT:[juniper][srx][session_id]} .* +RT_FLOW2 %{RT_FLOW_TAG:juniper.srx.tag}: session created %{IP:source.ip}/%{INT:source.port:int}->%{IP:destination.ip}/%{INT:destination.port:int} %{DATA:juniper.srx.service_name} %{IP:source.nat.ip}/%{INT:source.nat.port:int}->%{IP:destination.nat.ip}/%{INT:destination.nat.port:int} (?:(?:None)|(?:%{DATA:juniper.srx.src_nat_rule_name})) (?:(?:None)|(?:%{DATA:juniper.srx.dst_nat_rule_name})) %{INT:network.iana_number} %{DATA:rule.name} %{DATA:observer.ingress.zone} %{DATA:observer.egress.zone} %{INT:juniper.srx.session_id} .* -RT_FLOW3 %{RT_FLOW_TAG:[juniper][srx][tag]}: session denied %{IP:[source][ip]}/%{INT:[source][port]:int}->%{IP:[destination][ip]}/%{INT:[destination][port]:int} %{DATA:[juniper][srx][service_name]} %{INT:[network][iana_number]}\(\d\) %{DATA:[rule][name]} %{DATA:[observer][ingress][zone]} %{DATA:[observer][egress][zone]} .* +RT_FLOW3 %{RT_FLOW_TAG:juniper.srx.tag}: session denied %{IP:source.ip}/%{INT:source.port:int}->%{IP:destination.ip}/%{INT:destination.port:int} %{DATA:juniper.srx.service_name} %{INT:network.iana_number}\(\d\) %{DATA:rule.name} %{DATA:observer.ingress.zone} %{DATA:observer.egress.zone} .* diff --git a/pygrok/patterns/linux-syslog b/pygrok/patterns/linux-syslog index f118794..f2582f5 100644 --- a/pygrok/patterns/linux-syslog +++ b/pygrok/patterns/linux-syslog @@ -1,16 +1,16 @@ SYSLOG5424PRINTASCII [!-~]+ -SYSLOGBASE2 (?:%{SYSLOGTIMESTAMP:timestamp}|%{TIMESTAMP_ISO8601:timestamp})(?: %{SYSLOGFACILITY})?(?: %{SYSLOGHOST:[host][hostname]})?(?: %{SYSLOGPROG}:)? -SYSLOGPAMSESSION %{SYSLOGBASE} (?=%{GREEDYDATA:message})%{WORD:[system][auth][pam][module]}\(%{DATA:[system][auth][pam][origin]}\): session %{WORD:[system][auth][pam][session_state]} for user %{USERNAME:[user][name]}(?: by %{GREEDYDATA})? +SYSLOGBASE2 (?:%{SYSLOGTIMESTAMP:timestamp}|%{TIMESTAMP_ISO8601:timestamp})(?: %{SYSLOGFACILITY})?(?: %{SYSLOGHOST:host.hostname})?(?: %{SYSLOGPROG}:)? +SYSLOGPAMSESSION %{SYSLOGBASE} (?=%{GREEDYDATA:message})%{WORD:system.auth.pam.module}\(%{DATA:system.auth.pam.origin}\): session %{WORD:system.auth.pam.session_state} for user %{USERNAME:user.name}(?: by %{GREEDYDATA})? CRON_ACTION [A-Z ]+ -CRONLOG %{SYSLOGBASE} \(%{USER:[user][name]}\) %{CRON_ACTION:[system][cron][action]} \(%{DATA:message}\) +CRONLOG %{SYSLOGBASE} \(%{USER:user.name}\) %{CRON_ACTION:system.cron.action} \(%{DATA:message}\) SYSLOGLINE %{SYSLOGBASE2} %{GREEDYDATA:message} # IETF 5424 syslog(8) format (see http://www.rfc-editor.org/info/rfc5424) -SYSLOG5424PRI <%{NONNEGINT:[log][syslog][priority]:int}> +SYSLOG5424PRI <%{NONNEGINT:log.syslog.priority:int}> SYSLOG5424SD \[%{DATA}\]+ -SYSLOG5424BASE %{SYSLOG5424PRI}%{NONNEGINT:[system][syslog][version]} +(?:-|%{TIMESTAMP_ISO8601:timestamp}) +(?:-|%{IPORHOST:[host][hostname]}) +(?:-|%{SYSLOG5424PRINTASCII:[process][name]}) +(?:-|%{POSINT:[process][pid]:int}) +(?:-|%{SYSLOG5424PRINTASCII:[event][code]}) +(?:-|%{SYSLOG5424SD:[system][syslog][structured_data]})? +SYSLOG5424BASE %{SYSLOG5424PRI}%{NONNEGINT:system.syslog.version} +(?:-|%{TIMESTAMP_ISO8601:timestamp}) +(?:-|%{IPORHOST:host.hostname}) +(?:-|%{SYSLOG5424PRINTASCII:process.name}) +(?:-|%{POSINT:process.pid:int}) +(?:-|%{SYSLOG5424PRINTASCII:event.code}) +(?:-|%{SYSLOG5424SD:system.syslog.structured_data})? SYSLOG5424LINE %{SYSLOG5424BASE} +%{GREEDYDATA:message} diff --git a/pygrok/patterns/mcollective b/pygrok/patterns/mcollective index abf2331..f797cbd 100644 --- a/pygrok/patterns/mcollective +++ b/pygrok/patterns/mcollective @@ -1,4 +1,4 @@ # Remember, these can be multi-line events. -MCOLLECTIVE ., \[%{TIMESTAMP_ISO8601:timestamp} #%{POSINT:[process][pid]:int}\]%{SPACE}%{LOGLEVEL:[log][level]} +MCOLLECTIVE ., \[%{TIMESTAMP_ISO8601:timestamp} #%{POSINT:process.pid:int}\]%{SPACE}%{LOGLEVEL:log.level} MCOLLECTIVEAUDIT %{TIMESTAMP_ISO8601:timestamp}: diff --git a/pygrok/patterns/mongodb b/pygrok/patterns/mongodb index 5a33256..7f1c03d 100644 --- a/pygrok/patterns/mongodb +++ b/pygrok/patterns/mongodb @@ -1,7 +1,7 @@ -MONGO_LOG %{SYSLOGTIMESTAMP:timestamp} \[%{WORD:[mongodb][component]}\] %{GREEDYDATA:message} +MONGO_LOG %{SYSLOGTIMESTAMP:timestamp} \[%{WORD:mongodb.component}\] %{GREEDYDATA:message} MONGO_QUERY \{ (?<={ ).*(?= } ntoreturn:) \} -MONGO_SLOWQUERY %{WORD:[mongodb][profile][op]} %{MONGO_WORDDASH:[mongodb][database]}\.%{MONGO_WORDDASH:[mongodb][collection]} %{WORD}: %{MONGO_QUERY:[mongodb][query][original]} ntoreturn:%{NONNEGINT:[mongodb][profile][ntoreturn]:int} ntoskip:%{NONNEGINT:[mongodb][profile][ntoskip]:int} nscanned:%{NONNEGINT:[mongodb][profile][nscanned]:int}.*? nreturned:%{NONNEGINT:[mongodb][profile][nreturned]:int}.*? %{INT:[mongodb][profile][duration]:int}ms +MONGO_SLOWQUERY %{WORD:mongodb.profile.op} %{MONGO_WORDDASH:mongodb.database}\.%{MONGO_WORDDASH:mongodb.collection} %{WORD}: %{MONGO_QUERY:mongodb.query.original} ntoreturn:%{NONNEGINT:mongodb.profile.ntoreturn:int} ntoskip:%{NONNEGINT:mongodb.profile.ntoskip:int} nscanned:%{NONNEGINT:mongodb.profile.nscanned:int}.*? nreturned:%{NONNEGINT:mongodb.profile.nreturned:int}.*? %{INT:mongodb.profile.duration:int}ms MONGO_WORDDASH \b[\w-]+\b MONGO3_SEVERITY \w MONGO3_COMPONENT %{WORD} -MONGO3_LOG %{TIMESTAMP_ISO8601:timestamp} %{MONGO3_SEVERITY:[log][level]} (?:-|%{MONGO3_COMPONENT:[mongodb][component]})%{SPACE}(?:\[%{DATA:[mongodb][context]}\])? %{GREEDYDATA:message} +MONGO3_LOG %{TIMESTAMP_ISO8601:timestamp} %{MONGO3_SEVERITY:log.level} (?:-|%{MONGO3_COMPONENT:mongodb.component})%{SPACE}(?:\[%{DATA:mongodb.context}\])? %{GREEDYDATA:message} diff --git a/pygrok/patterns/nagios b/pygrok/patterns/nagios index c4dbd89..d0a3b42 100644 --- a/pygrok/patterns/nagios +++ b/pygrok/patterns/nagios @@ -68,57 +68,57 @@ NAGIOS_EC_ENABLE_SVC_NOTIFICATIONS ENABLE_SVC_NOTIFICATIONS ############################################### NAGIOS_WARNING Warning:%{SPACE}%{GREEDYDATA:message} -NAGIOS_CURRENT_SERVICE_STATE %{NAGIOS_TYPE_CURRENT_SERVICE_STATE:[nagios][log][type]}: %{DATA:[host][hostname]};%{DATA:[service][name]};%{DATA:[service][state]};%{DATA:[nagios][log][state_type]};%{INT:[nagios][log][attempt]:int};%{GREEDYDATA:message} -NAGIOS_CURRENT_HOST_STATE %{NAGIOS_TYPE_CURRENT_HOST_STATE:[nagios][log][type]}: %{DATA:[host][hostname]};%{DATA:[service][state]};%{DATA:[nagios][log][state_type]};%{INT:[nagios][log][attempt]:int};%{GREEDYDATA:message} +NAGIOS_CURRENT_SERVICE_STATE %{NAGIOS_TYPE_CURRENT_SERVICE_STATE:nagios.log.type}: %{DATA:host.hostname};%{DATA:service.name};%{DATA:service.state};%{DATA:nagios.log.state_type};%{INT:nagios.log.attempt:int};%{GREEDYDATA:message} +NAGIOS_CURRENT_HOST_STATE %{NAGIOS_TYPE_CURRENT_HOST_STATE:nagios.log.type}: %{DATA:host.hostname};%{DATA:service.state};%{DATA:nagios.log.state_type};%{INT:nagios.log.attempt:int};%{GREEDYDATA:message} -NAGIOS_SERVICE_NOTIFICATION %{NAGIOS_TYPE_SERVICE_NOTIFICATION:[nagios][log][type]}: %{DATA:[user][name]};%{DATA:[host][hostname]};%{DATA:[service][name]};%{DATA:[service][state]};%{DATA:[nagios][log][notification_command]};%{GREEDYDATA:message} -NAGIOS_HOST_NOTIFICATION %{NAGIOS_TYPE_HOST_NOTIFICATION:[nagios][log][type]}: %{DATA:[user][name]};%{DATA:[host][hostname]};%{DATA:[service][state]};%{DATA:[nagios][log][notification_command]};%{GREEDYDATA:message} +NAGIOS_SERVICE_NOTIFICATION %{NAGIOS_TYPE_SERVICE_NOTIFICATION:nagios.log.type}: %{DATA:user.name};%{DATA:host.hostname};%{DATA:service.name};%{DATA:service.state};%{DATA:nagios.log.notification_command};%{GREEDYDATA:message} +NAGIOS_HOST_NOTIFICATION %{NAGIOS_TYPE_HOST_NOTIFICATION:nagios.log.type}: %{DATA:user.name};%{DATA:host.hostname};%{DATA:service.state};%{DATA:nagios.log.notification_command};%{GREEDYDATA:message} -NAGIOS_SERVICE_ALERT %{NAGIOS_TYPE_SERVICE_ALERT:[nagios][log][type]}: %{DATA:[host][hostname]};%{DATA:[service][name]};%{DATA:[service][state]};%{DATA:[nagios][log][state_type]};%{INT:[nagios][log][attempt]:int};%{GREEDYDATA:message} -NAGIOS_HOST_ALERT %{NAGIOS_TYPE_HOST_ALERT:[nagios][log][type]}: %{DATA:[host][hostname]};%{DATA:[service][state]};%{DATA:[nagios][log][state_type]};%{INT:[nagios][log][attempt]:int};%{GREEDYDATA:message} +NAGIOS_SERVICE_ALERT %{NAGIOS_TYPE_SERVICE_ALERT:nagios.log.type}: %{DATA:host.hostname};%{DATA:service.name};%{DATA:service.state};%{DATA:nagios.log.state_type};%{INT:nagios.log.attempt:int};%{GREEDYDATA:message} +NAGIOS_HOST_ALERT %{NAGIOS_TYPE_HOST_ALERT:nagios.log.type}: %{DATA:host.hostname};%{DATA:service.state};%{DATA:nagios.log.state_type};%{INT:nagios.log.attempt:int};%{GREEDYDATA:message} -NAGIOS_SERVICE_FLAPPING_ALERT %{NAGIOS_TYPE_SERVICE_FLAPPING_ALERT:[nagios][log][type]}: %{DATA:[host][hostname]};%{DATA:[service][name]};%{DATA:[service][state]};%{GREEDYDATA:message} -NAGIOS_HOST_FLAPPING_ALERT %{NAGIOS_TYPE_HOST_FLAPPING_ALERT:[nagios][log][type]}: %{DATA:[host][hostname]};%{DATA:[service][state]};%{GREEDYDATA:message} +NAGIOS_SERVICE_FLAPPING_ALERT %{NAGIOS_TYPE_SERVICE_FLAPPING_ALERT:nagios.log.type}: %{DATA:host.hostname};%{DATA:service.name};%{DATA:service.state};%{GREEDYDATA:message} +NAGIOS_HOST_FLAPPING_ALERT %{NAGIOS_TYPE_HOST_FLAPPING_ALERT:nagios.log.type}: %{DATA:host.hostname};%{DATA:service.state};%{GREEDYDATA:message} -NAGIOS_SERVICE_DOWNTIME_ALERT %{NAGIOS_TYPE_SERVICE_DOWNTIME_ALERT:[nagios][log][type]}: %{DATA:[host][hostname]};%{DATA:[service][name]};%{DATA:[service][state]};%{GREEDYDATA:[nagios][log][comment]} -NAGIOS_HOST_DOWNTIME_ALERT %{NAGIOS_TYPE_HOST_DOWNTIME_ALERT:[nagios][log][type]}: %{DATA:[host][hostname]};%{DATA:[service][state]};%{GREEDYDATA:[nagios][log][comment]} +NAGIOS_SERVICE_DOWNTIME_ALERT %{NAGIOS_TYPE_SERVICE_DOWNTIME_ALERT:nagios.log.type}: %{DATA:host.hostname};%{DATA:service.name};%{DATA:service.state};%{GREEDYDATA:nagios.log.comment} +NAGIOS_HOST_DOWNTIME_ALERT %{NAGIOS_TYPE_HOST_DOWNTIME_ALERT:nagios.log.type}: %{DATA:host.hostname};%{DATA:service.state};%{GREEDYDATA:nagios.log.comment} -NAGIOS_PASSIVE_SERVICE_CHECK %{NAGIOS_TYPE_PASSIVE_SERVICE_CHECK:[nagios][log][type]}: %{DATA:[host][hostname]};%{DATA:[service][name]};%{DATA:[service][state]};%{GREEDYDATA:[nagios][log][comment]} -NAGIOS_PASSIVE_HOST_CHECK %{NAGIOS_TYPE_PASSIVE_HOST_CHECK:[nagios][log][type]}: %{DATA:[host][hostname]};%{DATA:[service][state]};%{GREEDYDATA:[nagios][log][comment]} +NAGIOS_PASSIVE_SERVICE_CHECK %{NAGIOS_TYPE_PASSIVE_SERVICE_CHECK:nagios.log.type}: %{DATA:host.hostname};%{DATA:service.name};%{DATA:service.state};%{GREEDYDATA:nagios.log.comment} +NAGIOS_PASSIVE_HOST_CHECK %{NAGIOS_TYPE_PASSIVE_HOST_CHECK:nagios.log.type}: %{DATA:host.hostname};%{DATA:service.state};%{GREEDYDATA:nagios.log.comment} -NAGIOS_SERVICE_EVENT_HANDLER %{NAGIOS_TYPE_SERVICE_EVENT_HANDLER:[nagios][log][type]}: %{DATA:[host][hostname]};%{DATA:[service][name]};%{DATA:[service][state]};%{DATA:[nagios][log][state_type]};%{DATA:[nagios][log][event_handler_name]} -NAGIOS_HOST_EVENT_HANDLER %{NAGIOS_TYPE_HOST_EVENT_HANDLER:[nagios][log][type]}: %{DATA:[host][hostname]};%{DATA:[service][state]};%{DATA:[nagios][log][state_type]};%{DATA:[nagios][log][event_handler_name]} +NAGIOS_SERVICE_EVENT_HANDLER %{NAGIOS_TYPE_SERVICE_EVENT_HANDLER:nagios.log.type}: %{DATA:host.hostname};%{DATA:service.name};%{DATA:service.state};%{DATA:nagios.log.state_type};%{DATA:nagios.log.event_handler_name} +NAGIOS_HOST_EVENT_HANDLER %{NAGIOS_TYPE_HOST_EVENT_HANDLER:nagios.log.type}: %{DATA:host.hostname};%{DATA:service.state};%{DATA:nagios.log.state_type};%{DATA:nagios.log.event_handler_name} -NAGIOS_TIMEPERIOD_TRANSITION %{NAGIOS_TYPE_TIMEPERIOD_TRANSITION:[nagios][log][type]}: %{DATA:[service][name]};%{NUMBER:[nagios][log][period_from]:int};%{NUMBER:[nagios][log][period_to]:int} +NAGIOS_TIMEPERIOD_TRANSITION %{NAGIOS_TYPE_TIMEPERIOD_TRANSITION:nagios.log.type}: %{DATA:service.name};%{NUMBER:nagios.log.period_from:int};%{NUMBER:nagios.log.period_to:int} #################### #### External checks #################### #Disable host & service check -NAGIOS_EC_LINE_DISABLE_SVC_CHECK %{NAGIOS_TYPE_EXTERNAL_COMMAND:[nagios][log][type]}: %{NAGIOS_EC_DISABLE_SVC_CHECK:[nagios][log][command]};%{DATA:[host][hostname]};%{DATA:[service][name]} -NAGIOS_EC_LINE_DISABLE_HOST_CHECK %{NAGIOS_TYPE_EXTERNAL_COMMAND:[nagios][log][type]}: %{NAGIOS_EC_DISABLE_HOST_CHECK:[nagios][log][command]};%{DATA:[host][hostname]} +NAGIOS_EC_LINE_DISABLE_SVC_CHECK %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios.log.type}: %{NAGIOS_EC_DISABLE_SVC_CHECK:nagios.log.command};%{DATA:host.hostname};%{DATA:service.name} +NAGIOS_EC_LINE_DISABLE_HOST_CHECK %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios.log.type}: %{NAGIOS_EC_DISABLE_HOST_CHECK:nagios.log.command};%{DATA:host.hostname} #Enable host & service check -NAGIOS_EC_LINE_ENABLE_SVC_CHECK %{NAGIOS_TYPE_EXTERNAL_COMMAND:[nagios][log][type]}: %{NAGIOS_EC_ENABLE_SVC_CHECK:[nagios][log][command]};%{DATA:[host][hostname]};%{DATA:[service][name]} -NAGIOS_EC_LINE_ENABLE_HOST_CHECK %{NAGIOS_TYPE_EXTERNAL_COMMAND:[nagios][log][type]}: %{NAGIOS_EC_ENABLE_HOST_CHECK:[nagios][log][command]};%{DATA:[host][hostname]} +NAGIOS_EC_LINE_ENABLE_SVC_CHECK %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios.log.type}: %{NAGIOS_EC_ENABLE_SVC_CHECK:nagios.log.command};%{DATA:host.hostname};%{DATA:service.name} +NAGIOS_EC_LINE_ENABLE_HOST_CHECK %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios.log.type}: %{NAGIOS_EC_ENABLE_HOST_CHECK:nagios.log.command};%{DATA:host.hostname} #Process host & service check -NAGIOS_EC_LINE_PROCESS_SERVICE_CHECK_RESULT %{NAGIOS_TYPE_EXTERNAL_COMMAND:[nagios][log][type]}: %{NAGIOS_EC_PROCESS_SERVICE_CHECK_RESULT:[nagios][log][command]};%{DATA:[host][hostname]};%{DATA:[service][name]};%{DATA:[service][state]};%{GREEDYDATA:[nagios][log][check_result]} -NAGIOS_EC_LINE_PROCESS_HOST_CHECK_RESULT %{NAGIOS_TYPE_EXTERNAL_COMMAND:[nagios][log][type]}: %{NAGIOS_EC_PROCESS_HOST_CHECK_RESULT:[nagios][log][command]};%{DATA:[host][hostname]};%{DATA:[service][state]};%{GREEDYDATA:[nagios][log][check_result]} +NAGIOS_EC_LINE_PROCESS_SERVICE_CHECK_RESULT %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios.log.type}: %{NAGIOS_EC_PROCESS_SERVICE_CHECK_RESULT:nagios.log.command};%{DATA:host.hostname};%{DATA:service.name};%{DATA:service.state};%{GREEDYDATA:nagios.log.check_result} +NAGIOS_EC_LINE_PROCESS_HOST_CHECK_RESULT %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios.log.type}: %{NAGIOS_EC_PROCESS_HOST_CHECK_RESULT:nagios.log.command};%{DATA:host.hostname};%{DATA:service.state};%{GREEDYDATA:nagios.log.check_result} #Disable host & service notifications -NAGIOS_EC_LINE_DISABLE_HOST_SVC_NOTIFICATIONS %{NAGIOS_TYPE_EXTERNAL_COMMAND:[nagios][log][type]}: %{NAGIOS_EC_DISABLE_HOST_SVC_NOTIFICATIONS:[nagios][log][command]};%{GREEDYDATA:[host][hostname]} -NAGIOS_EC_LINE_DISABLE_HOST_NOTIFICATIONS %{NAGIOS_TYPE_EXTERNAL_COMMAND:[nagios][log][type]}: %{NAGIOS_EC_DISABLE_HOST_NOTIFICATIONS:[nagios][log][command]};%{GREEDYDATA:[host][hostname]} -NAGIOS_EC_LINE_DISABLE_SVC_NOTIFICATIONS %{NAGIOS_TYPE_EXTERNAL_COMMAND:[nagios][log][type]}: %{NAGIOS_EC_DISABLE_SVC_NOTIFICATIONS:[nagios][log][command]};%{DATA:[host][hostname]};%{GREEDYDATA:[service][name]} +NAGIOS_EC_LINE_DISABLE_HOST_SVC_NOTIFICATIONS %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios.log.type}: %{NAGIOS_EC_DISABLE_HOST_SVC_NOTIFICATIONS:nagios.log.command};%{GREEDYDATA:host.hostname} +NAGIOS_EC_LINE_DISABLE_HOST_NOTIFICATIONS %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios.log.type}: %{NAGIOS_EC_DISABLE_HOST_NOTIFICATIONS:nagios.log.command};%{GREEDYDATA:host.hostname} +NAGIOS_EC_LINE_DISABLE_SVC_NOTIFICATIONS %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios.log.type}: %{NAGIOS_EC_DISABLE_SVC_NOTIFICATIONS:nagios.log.command};%{DATA:host.hostname};%{GREEDYDATA:service.name} #Enable host & service notifications -NAGIOS_EC_LINE_ENABLE_HOST_SVC_NOTIFICATIONS %{NAGIOS_TYPE_EXTERNAL_COMMAND:[nagios][log][type]}: %{NAGIOS_EC_ENABLE_HOST_SVC_NOTIFICATIONS:[nagios][log][command]};%{GREEDYDATA:[host][hostname]} -NAGIOS_EC_LINE_ENABLE_HOST_NOTIFICATIONS %{NAGIOS_TYPE_EXTERNAL_COMMAND:[nagios][log][type]}: %{NAGIOS_EC_ENABLE_HOST_NOTIFICATIONS:[nagios][log][command]};%{GREEDYDATA:[host][hostname]} -NAGIOS_EC_LINE_ENABLE_SVC_NOTIFICATIONS %{NAGIOS_TYPE_EXTERNAL_COMMAND:[nagios][log][type]}: %{NAGIOS_EC_ENABLE_SVC_NOTIFICATIONS:[nagios][log][command]};%{DATA:[host][hostname]};%{GREEDYDATA:[service][name]} +NAGIOS_EC_LINE_ENABLE_HOST_SVC_NOTIFICATIONS %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios.log.type}: %{NAGIOS_EC_ENABLE_HOST_SVC_NOTIFICATIONS:nagios.log.command};%{GREEDYDATA:host.hostname} +NAGIOS_EC_LINE_ENABLE_HOST_NOTIFICATIONS %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios.log.type}: %{NAGIOS_EC_ENABLE_HOST_NOTIFICATIONS:nagios.log.command};%{GREEDYDATA:host.hostname} +NAGIOS_EC_LINE_ENABLE_SVC_NOTIFICATIONS %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios.log.type}: %{NAGIOS_EC_ENABLE_SVC_NOTIFICATIONS:nagios.log.command};%{DATA:host.hostname};%{GREEDYDATA:service.name} #Schedule host & service downtime -NAGIOS_EC_LINE_SCHEDULE_HOST_DOWNTIME %{NAGIOS_TYPE_EXTERNAL_COMMAND:[nagios][log][type]}: %{NAGIOS_EC_SCHEDULE_HOST_DOWNTIME:[nagios][log][command]};%{DATA:[host][hostname]};%{NUMBER:[nagios][log][start_time]};%{NUMBER:[nagios][log][end_time]};%{NUMBER:[nagios][log][fixed]};%{NUMBER:[nagios][log][trigger_id]};%{NUMBER:[nagios][log][duration]:int};%{DATA:[user][name]};%{DATA:[nagios][log][comment]} +NAGIOS_EC_LINE_SCHEDULE_HOST_DOWNTIME %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios.log.type}: %{NAGIOS_EC_SCHEDULE_HOST_DOWNTIME:nagios.log.command};%{DATA:host.hostname};%{NUMBER:nagios.log.start_time};%{NUMBER:nagios.log.end_time};%{NUMBER:nagios.log.fixed};%{NUMBER:nagios.log.trigger_id};%{NUMBER:nagios.log.duration:int};%{DATA:user.name};%{DATA:nagios.log.comment} #End matching line NAGIOSLOGLINE %{NAGIOSTIME} (?:%{NAGIOS_WARNING}|%{NAGIOS_CURRENT_SERVICE_STATE}|%{NAGIOS_CURRENT_HOST_STATE}|%{NAGIOS_SERVICE_NOTIFICATION}|%{NAGIOS_HOST_NOTIFICATION}|%{NAGIOS_SERVICE_ALERT}|%{NAGIOS_HOST_ALERT}|%{NAGIOS_SERVICE_FLAPPING_ALERT}|%{NAGIOS_HOST_FLAPPING_ALERT}|%{NAGIOS_SERVICE_DOWNTIME_ALERT}|%{NAGIOS_HOST_DOWNTIME_ALERT}|%{NAGIOS_PASSIVE_SERVICE_CHECK}|%{NAGIOS_PASSIVE_HOST_CHECK}|%{NAGIOS_SERVICE_EVENT_HANDLER}|%{NAGIOS_HOST_EVENT_HANDLER}|%{NAGIOS_TIMEPERIOD_TRANSITION}|%{NAGIOS_EC_LINE_DISABLE_SVC_CHECK}|%{NAGIOS_EC_LINE_ENABLE_SVC_CHECK}|%{NAGIOS_EC_LINE_DISABLE_HOST_CHECK}|%{NAGIOS_EC_LINE_ENABLE_HOST_CHECK}|%{NAGIOS_EC_LINE_PROCESS_HOST_CHECK_RESULT}|%{NAGIOS_EC_LINE_PROCESS_SERVICE_CHECK_RESULT}|%{NAGIOS_EC_LINE_SCHEDULE_HOST_DOWNTIME}|%{NAGIOS_EC_LINE_DISABLE_HOST_SVC_NOTIFICATIONS}|%{NAGIOS_EC_LINE_ENABLE_HOST_SVC_NOTIFICATIONS}|%{NAGIOS_EC_LINE_DISABLE_HOST_NOTIFICATIONS}|%{NAGIOS_EC_LINE_ENABLE_HOST_NOTIFICATIONS}|%{NAGIOS_EC_LINE_DISABLE_SVC_NOTIFICATIONS}|%{NAGIOS_EC_LINE_ENABLE_SVC_NOTIFICATIONS}) diff --git a/pygrok/patterns/postgresql b/pygrok/patterns/postgresql index 07a40a9..cbfd5a6 100644 --- a/pygrok/patterns/postgresql +++ b/pygrok/patterns/postgresql @@ -1,2 +1,2 @@ # Default postgresql pg_log format pattern -POSTGRESQL %{DATESTAMP:timestamp} %{TZ:[event][timezone]} %{DATA:[user][name]} %{GREEDYDATA:[postgresql][log][connection_id]} %{POSINT:[process][pid]:int} +POSTGRESQL %{DATESTAMP:timestamp} %{TZ:event.timezone} %{DATA:user.name} %{GREEDYDATA:postgresql.log.connection_id} %{POSINT:process.pid:int} diff --git a/pygrok/patterns/rails b/pygrok/patterns/rails index 21348a9..81717d9 100644 --- a/pygrok/patterns/rails +++ b/pygrok/patterns/rails @@ -1,13 +1,13 @@ RUUID \h{32} # rails controller with action -RCONTROLLER (?<[rails][controller][class]>[^#]+)#(?<[rails][controller][action]>\w+) +RCONTROLLER (?[^#]+)#(?\w+) # this will often be the only line: -RAILS3HEAD (?m)Started %{WORD:[http][request][method]} "%{URIPATHPARAM:[url][original]}" for %{IPORHOST:[source][address]} at (?%{YEAR}-%{MONTHNUM}-%{MONTHDAY} %{HOUR}:%{MINUTE}:%{SECOND} %{ISO8601_TIMEZONE}) +RAILS3HEAD (?m)Started %{WORD:http.request.method} "%{URIPATHPARAM:url.original}" for %{IPORHOST:source.address} at (?%{YEAR}-%{MONTHNUM}-%{MONTHDAY} %{HOUR}:%{MINUTE}:%{SECOND} %{ISO8601_TIMEZONE}) # for some a strange reason, params are stripped of {} - not sure that's a good idea. -RPROCESSING \W*Processing by %{RCONTROLLER} as (?<[rails][request][format]>\S+)(?:\W*Parameters: {%{DATA:[rails][request][params]}}\W*)? -RAILS3FOOT Completed %{POSINT:[http][response][status_code]:int}%{DATA} in %{NUMBER:[rails][request][duration][total]:float}ms %{RAILS3PROFILE}%{GREEDYDATA} -RAILS3PROFILE (?:\(Views: %{NUMBER:[rails][request][duration][view]:float}ms \| ActiveRecord: %{NUMBER:[rails][request][duration][active_record]:float}ms|\(ActiveRecord: %{NUMBER:[rails][request][duration][active_record]:float}ms)? +RPROCESSING \W*Processing by %{RCONTROLLER} as (?\S+)(?:\W*Parameters: {%{DATA:rails.request.params}}\W*)? +RAILS3FOOT Completed %{POSINT:http.response.status_code:int}%{DATA} in %{NUMBER:rails.request.duration.total:float}ms %{RAILS3PROFILE}%{GREEDYDATA} +RAILS3PROFILE (?:\(Views: %{NUMBER:rails.request.duration.view:float}ms \| ActiveRecord: %{NUMBER:rails.request.duration.active_record:float}ms|\(ActiveRecord: %{NUMBER:rails.request.duration.active_record:float}ms)? # putting it all together -RAILS3 %{RAILS3HEAD}(?:%{RPROCESSING})?(?<[rails][request][explain][original]>(?:%{DATA}\n)*)(?:%{RAILS3FOOT})? +RAILS3 %{RAILS3HEAD}(?:%{RPROCESSING})?(?(?:%{DATA}\n)*)(?:%{RAILS3FOOT})? diff --git a/pygrok/patterns/redis b/pygrok/patterns/redis index 79dacd3..063290e 100644 --- a/pygrok/patterns/redis +++ b/pygrok/patterns/redis @@ -1,3 +1,3 @@ REDISTIMESTAMP %{MONTHDAY} %{MONTH} %{TIME} -REDISLOG \[%{POSINT:[process][pid]:int}\] %{REDISTIMESTAMP:timestamp} \* -REDISMONLOG %{NUMBER:timestamp} \[%{INT:[redis][database][id]} %{IP:[client][ip]}:%{POSINT:[client][port]:int}\] "%{WORD:[redis][command][name]}"\s?%{GREEDYDATA:[redis][command][args]} +REDISLOG \[%{POSINT:process.pid:int}\] %{REDISTIMESTAMP:timestamp} \* +REDISMONLOG %{NUMBER:timestamp} \[%{INT:redis.database.id} %{IP:client.ip}:%{POSINT:client.port:int}\] "%{WORD:redis.command.name}"\s?%{GREEDYDATA:redis.command.args} diff --git a/pygrok/patterns/ruby b/pygrok/patterns/ruby index 01bc35a..2c9a7ce 100644 --- a/pygrok/patterns/ruby +++ b/pygrok/patterns/ruby @@ -1,2 +1,2 @@ RUBY_LOGLEVEL (?:DEBUG|FATAL|ERROR|WARN|INFO) -RUBY_LOGGER [DFEWI], \[%{TIMESTAMP_ISO8601:timestamp} #%{POSINT:[process][pid]:int}\] *%{RUBY_LOGLEVEL:[log][level]} -- +%{DATA:[process][name]}: %{GREEDYDATA:message} +RUBY_LOGGER [DFEWI], \[%{TIMESTAMP_ISO8601:timestamp} #%{POSINT:process.pid:int}\] *%{RUBY_LOGLEVEL:log.level} -- +%{DATA:process.name}: %{GREEDYDATA:message} diff --git a/pygrok/patterns/squid b/pygrok/patterns/squid index b981288..dfff4f6 100644 --- a/pygrok/patterns/squid +++ b/pygrok/patterns/squid @@ -1,6 +1,6 @@ # Pattern squid3 # Documentation of squid3 logs formats can be found at the following link: # http://wiki.squid-cache.org/Features/LogFormat -SQUID3_STATUS (?:%{POSINT:[http][response][status_code]:int}|0|000) -SQUID3 %{NUMBER:timestamp}\s+%{NUMBER:[squid][request][duration]:int}\s%{IP:[source][ip]}\s%{WORD:[event][action]}/%{SQUID3_STATUS}\s%{INT:[http][response][bytes]:int}\s%{WORD:[http][request][method]}\s%{NOTSPACE:[url][original]}\s(?:-|%{NOTSPACE:[user][name]})\s%{WORD:[squid][hierarchy_code]}/(?:-|%{IPORHOST:[destination][address]})\s(?:-|%{NOTSPACE:[http][response][mime_type]}) -# :long - %{INT:[http][response][bytes]:int} +SQUID3_STATUS (?:%{POSINT:http.response.status_code:int}|0|000) +SQUID3 %{NUMBER:timestamp}\s+%{NUMBER:squid.request.duration:int}\s%{IP:source.ip}\s%{WORD:event.action}/%{SQUID3_STATUS}\s%{INT:http.response.bytes:long}\s%{WORD:http.request.method}\s%{NOTSPACE:url.original}\s(?:-|%{NOTSPACE:user.name})\s%{WORD:squid.hierarchy_code}/(?:-|%{IPORHOST:destination.address})\s(?:-|%{NOTSPACE:http.response.mime_type}) +# :long - %{INT:http.response.bytes:int} diff --git a/pygrok/patterns/zeek b/pygrok/patterns/zeek index 1af50b2..397e84a 100644 --- a/pygrok/patterns/zeek +++ b/pygrok/patterns/zeek @@ -5,29 +5,29 @@ ZEEK_DATA [^\t]+ # http.log - the 'new' format (compared to BRO_HTTP) # has *version* and *origin* fields added and *filename* replaced with *orig_filenames* + *resp_filenames* -ZEEK_HTTP %{NUMBER:timestamp}\t%{NOTSPACE:[zeek][session_id]}\t%{IP:[source][ip]}\t%{INT:[source][port]:int}\t%{IP:[destination][ip]}\t%{INT:[destination][port]:int}\t%{INT:[zeek][http][trans_depth]:int}\t(?:-|%{WORD:[http][request][method]})\t(?:-|%{ZEEK_DATA:[url][domain]})\t(?:-|%{ZEEK_DATA:[url][original]})\t(?:-|%{ZEEK_DATA:[http][request][referrer]})\t(?:-|%{NUMBER:[http][version]})\t(?:-|%{ZEEK_DATA:[user_agent][original]})\t(?:-|%{ZEEK_DATA:[zeek][http][origin]})\t(?:-|%{NUMBER:[http][request][body][bytes]:int})\t(?:-|%{NUMBER:[http][response][body][bytes]:int})\t(?:-|%{POSINT:[http][response][status_code]:int})\t(?:-|%{DATA:[zeek][http][status_msg]})\t(?:-|%{POSINT:[zeek][http][info_code]:int})\t(?:-|%{DATA:[zeek][http][info_msg]})\t(?:\(empty\)|%{ZEEK_DATA:[zeek][http][tags]})\t(?:-|%{ZEEK_DATA:[url][username]})\t(?:-|%{ZEEK_DATA:[url][password]})\t(?:-|%{ZEEK_DATA:[zeek][http][proxied]})\t(?:-|%{ZEEK_DATA:[zeek][http][orig_fuids]})\t(?:-|%{ZEEK_DATA:[zeek][http][orig_filenames]})\t(?:-|%{ZEEK_DATA:[http][request][mime_type]})\t(?:-|%{ZEEK_DATA:[zeek][http][resp_fuids]})\t(?:-|%{ZEEK_DATA:[zeek][http][resp_filenames]})\t(?:-|%{ZEEK_DATA:[http][response][mime_type]}) -# :long - %{NUMBER:[http][request][body][bytes]:int} -# :long - %{NUMBER:[http][response][body][bytes]:int} +ZEEK_HTTP %{NUMBER:timestamp}\t%{NOTSPACE:zeek.session_id}\t%{IP:source.ip}\t%{INT:source.port:int}\t%{IP:destination.ip}\t%{INT:destination.port:int}\t%{INT:zeek.http.trans_depth:int}\t(?:-|%{WORD:http.request.method})\t(?:-|%{ZEEK_DATA:url.domain})\t(?:-|%{ZEEK_DATA:url.original})\t(?:-|%{ZEEK_DATA:http.request.referrer})\t(?:-|%{NUMBER:http.version})\t(?:-|%{ZEEK_DATA:user_agent.original})\t(?:-|%{ZEEK_DATA:zeek.http.origin})\t(?:-|%{NUMBER:http.request.body.bytes:long})\t(?:-|%{NUMBER:http.response.body.bytes:long})\t(?:-|%{POSINT:http.response.status_code:int})\t(?:-|%{DATA:zeek.http.status_msg})\t(?:-|%{POSINT:zeek.http.info_code:int})\t(?:-|%{DATA:zeek.http.info_msg})\t(?:\(empty\)|%{ZEEK_DATA:zeek.http.tags})\t(?:-|%{ZEEK_DATA:url.username})\t(?:-|%{ZEEK_DATA:url.password})\t(?:-|%{ZEEK_DATA:zeek.http.proxied})\t(?:-|%{ZEEK_DATA:zeek.http.orig_fuids})\t(?:-|%{ZEEK_DATA:zeek.http.orig_filenames})\t(?:-|%{ZEEK_DATA:http.request.mime_type})\t(?:-|%{ZEEK_DATA:zeek.http.resp_fuids})\t(?:-|%{ZEEK_DATA:zeek.http.resp_filenames})\t(?:-|%{ZEEK_DATA:http.response.mime_type}) +# :long - %{NUMBER:http.request.body.bytes:int} +# :long - %{NUMBER:http.response.body.bytes:int} # dns.log - 'updated' BRO_DNS format (added *zeek.dns.rtt*) -ZEEK_DNS %{NUMBER:timestamp}\t%{NOTSPACE:[zeek][session_id]}\t%{IP:[source][ip]}\t%{INT:[source][port]:int}\t%{IP:[destination][ip]}\t%{INT:[destination][port]:int}\t%{WORD:[network][transport]}\t(?:-|%{INT:[dns][id]:int})\t(?:-|%{NUMBER:[zeek][dns][rtt]:float})\t(?:-|%{ZEEK_DATA:[dns][question][name]})\t(?:-|%{INT:[zeek][dns][qclass]:int})\t(?:-|%{ZEEK_DATA:[zeek][dns][qclass_name]})\t(?:-|%{INT:[zeek][dns][qtype]:int})\t(?:-|%{ZEEK_DATA:[dns][question][type]})\t(?:-|%{INT:[zeek][dns][rcode]:int})\t(?:-|%{ZEEK_DATA:[dns][response_code]})\t%{ZEEK_BOOL:[zeek][dns][AA]}\t%{ZEEK_BOOL:[zeek][dns][TC]}\t%{ZEEK_BOOL:[zeek][dns][RD]}\t%{ZEEK_BOOL:[zeek][dns][RA]}\t%{NONNEGINT:[zeek][dns][Z]:int}\t(?:-|%{ZEEK_DATA:[zeek][dns][answers]})\t(?:-|%{DATA:[zeek][dns][TTLs]})\t(?:-|%{ZEEK_BOOL:[zeek][dns][rejected]}) +ZEEK_DNS %{NUMBER:timestamp}\t%{NOTSPACE:zeek.session_id}\t%{IP:source.ip}\t%{INT:source.port:int}\t%{IP:destination.ip}\t%{INT:destination.port:int}\t%{WORD:network.transport}\t(?:-|%{INT:dns.id:int})\t(?:-|%{NUMBER:zeek.dns.rtt:float})\t(?:-|%{ZEEK_DATA:dns.question.name})\t(?:-|%{INT:zeek.dns.qclass:int})\t(?:-|%{ZEEK_DATA:zeek.dns.qclass_name})\t(?:-|%{INT:zeek.dns.qtype:int})\t(?:-|%{ZEEK_DATA:dns.question.type})\t(?:-|%{INT:zeek.dns.rcode:int})\t(?:-|%{ZEEK_DATA:dns.response_code})\t%{ZEEK_BOOL:zeek.dns.AA}\t%{ZEEK_BOOL:zeek.dns.TC}\t%{ZEEK_BOOL:zeek.dns.RD}\t%{ZEEK_BOOL:zeek.dns.RA}\t%{NONNEGINT:zeek.dns.Z:int}\t(?:-|%{ZEEK_DATA:zeek.dns.answers})\t(?:-|%{DATA:zeek.dns.TTLs})\t(?:-|%{ZEEK_BOOL:zeek.dns.rejected}) # conn.log - the 'new' format (requires *zeek.connection.local_resp*, handles `(empty)` as `-` for tunnel_parents, and optional mac adresses) -ZEEK_CONN %{NUMBER:timestamp}\t%{NOTSPACE:[zeek][session_id]}\t%{IP:[source][ip]}\t%{INT:[source][port]:int}\t%{IP:[destination][ip]}\t%{INT:[destination][port]:int}\t%{WORD:[network][transport]}\t(?:-|%{ZEEK_DATA:[network][protocol]})\t(?:-|%{NUMBER:[zeek][connection][duration]:float})\t(?:-|%{INT:[zeek][connection][orig_bytes]:int})\t(?:-|%{INT:[zeek][connection][resp_bytes]:int})\t(?:-|%{ZEEK_DATA:[zeek][connection][state]})\t(?:-|%{ZEEK_BOOL:[zeek][connection][local_orig]})\t(?:-|%{ZEEK_BOOL:[zeek][connection][local_resp]})\t(?:-|%{INT:[zeek][connection][missed_bytes]:int})\t(?:-|%{ZEEK_DATA:[zeek][connection][history]})\t(?:-|%{INT:[source][packets]:int})\t(?:-|%{INT:[source][bytes]:int})\t(?:-|%{INT:[destination][packets]:int})\t(?:-|%{INT:[destination][bytes]:int})\t(?:-|%{ZEEK_DATA:[zeek][connection][tunnel_parents]})(?:\t(?:-|%{COMMONMAC:[source][mac]})\t(?:-|%{COMMONMAC:[destination][mac]}))? -# :long - %{INT:[zeek][connection][orig_bytes]:int} -# :long - %{INT:[zeek][connection][resp_bytes]:int} -# :long - %{INT:[zeek][connection][missed_bytes]:int} -# :long - %{INT:[source][packets]:int} -# :long - %{INT:[source][bytes]:int} -# :long - %{INT:[destination][packets]:int} -# :long - %{INT:[destination][bytes]:int} +ZEEK_CONN %{NUMBER:timestamp}\t%{NOTSPACE:zeek.session_id}\t%{IP:source.ip}\t%{INT:source.port:int}\t%{IP:destination.ip}\t%{INT:destination.port:int}\t%{WORD:network.transport}\t(?:-|%{ZEEK_DATA:network.protocol})\t(?:-|%{NUMBER:zeek.connection.duration:float})\t(?:-|%{INT:zeek.connection.orig_bytes:long})\t(?:-|%{INT:zeek.connection.resp_bytes:long})\t(?:-|%{ZEEK_DATA:zeek.connection.state})\t(?:-|%{ZEEK_BOOL:zeek.connection.local_orig})\t(?:-|%{ZEEK_BOOL:zeek.connection.local_resp})\t(?:-|%{INT:zeek.connection.missed_bytes:long})\t(?:-|%{ZEEK_DATA:zeek.connection.history})\t(?:-|%{INT:source.packets:long})\t(?:-|%{INT:source.bytes:long})\t(?:-|%{INT:destination.packets:long})\t(?:-|%{INT:destination.bytes:long})\t(?:-|%{ZEEK_DATA:zeek.connection.tunnel_parents})(?:\t(?:-|%{COMMONMAC:source.mac})\t(?:-|%{COMMONMAC:destination.mac}))? +# :long - %{INT:zeek.connection.orig_bytes:int} +# :long - %{INT:zeek.connection.resp_bytes:int} +# :long - %{INT:zeek.connection.missed_bytes:int} +# :long - %{INT:source.packets:int} +# :long - %{INT:source.bytes:int} +# :long - %{INT:destination.packets:int} +# :long - %{INT:destination.bytes:int} # files.log - updated BRO_FILES format (2 new fields added at the end) -ZEEK_FILES_TX_HOSTS (?:-|%{IP:[server][ip]})|(?<[zeek][files][tx_hosts]>%{IP:[server][ip]}(?:[\s,]%{IP})+) -ZEEK_FILES_RX_HOSTS (?:-|%{IP:[client][ip]})|(?<[zeek][files][rx_hosts]>%{IP:[client][ip]}(?:[\s,]%{IP})+) -ZEEK_FILES %{NUMBER:timestamp}\t%{NOTSPACE:[zeek][files][fuid]}\t%{ZEEK_FILES_TX_HOSTS}\t%{ZEEK_FILES_RX_HOSTS}\t(?:-|%{ZEEK_DATA:[zeek][files][session_ids]})\t(?:-|%{ZEEK_DATA:[zeek][files][source]})\t(?:-|%{INT:[zeek][files][depth]:int})\t(?:-|%{ZEEK_DATA:[zeek][files][analyzers]})\t(?:-|%{ZEEK_DATA:[file][mime_type]})\t(?:-|%{ZEEK_DATA:[file][name]})\t(?:-|%{NUMBER:[zeek][files][duration]:float})\t(?:-|%{ZEEK_DATA:[zeek][files][local_orig]})\t(?:-|%{ZEEK_BOOL:[zeek][files][is_orig]})\t(?:-|%{INT:[zeek][files][seen_bytes]:int})\t(?:-|%{INT:[file][size]:int})\t(?:-|%{INT:[zeek][files][missing_bytes]:int})\t(?:-|%{INT:[zeek][files][overflow_bytes]:int})\t(?:-|%{ZEEK_BOOL:[zeek][files][timedout]})\t(?:-|%{ZEEK_DATA:[zeek][files][parent_fuid]})\t(?:-|%{ZEEK_DATA:[file][hash][md5]})\t(?:-|%{ZEEK_DATA:[file][hash][sha1]})\t(?:-|%{ZEEK_DATA:[file][hash][sha256]})\t(?:-|%{ZEEK_DATA:[zeek][files][extracted]})(?:\t(?:-|%{ZEEK_BOOL:[zeek][files][extracted_cutoff]})\t(?:-|%{INT:[zeek][files][extracted_size]:int}))? -# :long - %{INT:[zeek][files][seen_bytes]:int} -# :long - %{INT:[file][size]:int} -# :long - %{INT:[zeek][files][missing_bytes]:int} -# :long - %{INT:[zeek][files][overflow_bytes]:int} -# :long - %{INT:[zeek][files][extracted_size]:int} +ZEEK_FILES_TX_HOSTS (?:-|%{IP:server.ip})|(?%{IP:server.ip}(?:\s,%{IP})+) +ZEEK_FILES_RX_HOSTS (?:-|%{IP:client.ip})|(?%{IP:client.ip}(?:\s,%{IP})+) +ZEEK_FILES %{NUMBER:timestamp}\t%{NOTSPACE:zeek.files.fuid}\t%{ZEEK_FILES_TX_HOSTS}\t%{ZEEK_FILES_RX_HOSTS}\t(?:-|%{ZEEK_DATA:zeek.files.session_ids})\t(?:-|%{ZEEK_DATA:zeek.files.source})\t(?:-|%{INT:zeek.files.depth:int})\t(?:-|%{ZEEK_DATA:zeek.files.analyzers})\t(?:-|%{ZEEK_DATA:file.mime_type})\t(?:-|%{ZEEK_DATA:file.name})\t(?:-|%{NUMBER:zeek.files.duration:float})\t(?:-|%{ZEEK_DATA:zeek.files.local_orig})\t(?:-|%{ZEEK_BOOL:zeek.files.is_orig})\t(?:-|%{INT:zeek.files.seen_bytes:long})\t(?:-|%{INT:file.size:long})\t(?:-|%{INT:zeek.files.missing_bytes:long})\t(?:-|%{INT:zeek.files.overflow_bytes:long})\t(?:-|%{ZEEK_BOOL:zeek.files.timedout})\t(?:-|%{ZEEK_DATA:zeek.files.parent_fuid})\t(?:-|%{ZEEK_DATA:file.hash.md5})\t(?:-|%{ZEEK_DATA:file.hash.sha1})\t(?:-|%{ZEEK_DATA:file.hash.sha256})\t(?:-|%{ZEEK_DATA:zeek.files.extracted})(?:\t(?:-|%{ZEEK_BOOL:zeek.files.extracted_cutoff})\t(?:-|%{INT:zeek.files.extracted_size:long}))? +# :long - %{INT:zeek.files.seen_bytes:int} +# :long - %{INT:file.size:int} +# :long - %{INT:zeek.files.missing_bytes:int} +# :long - %{INT:zeek.files.overflow_bytes:int} +# :long - %{INT:zeek.files.extracted_size:int} From 87555752b0d2fd675ec9d12c595093f039ae3325 Mon Sep 17 00:00:00 2001 From: Vito Piserchia Date: Mon, 21 Feb 2022 00:38:12 +0100 Subject: [PATCH 03/22] support for both newer dotted style and legacy --- pygrok/pygrok.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pygrok/pygrok.py b/pygrok/pygrok.py index b3da0df..972a9a1 100644 --- a/pygrok/pygrok.py +++ b/pygrok/pygrok.py @@ -98,9 +98,11 @@ def _load_search_pattern(self): py_regex_pattern = self.pattern while True: # Finding all types specified in the groks - m = re.findall(r"%{(\w+):\[?([\w\.?]+)\]?:(\w+)}", py_regex_pattern) + m = re.findall(r"%{(\w+):([\w\.?\[\]]+):(\w+)}", py_regex_pattern) for n in m: - self.type_mapper[n[1]] = n[2] + # accounts for dotted or legacy groups, but not both at the same time + key = '.'.join([f[1] and f[1] or f[0] for f in re.findall("\[(\w*?)\]|(\w+)", n[1])]) + self.type_mapper[key] = n[2] # replace %{pattern_name:custom_name} (or %{pattern_name:custom_name:type} # with regex and regex group name @@ -133,7 +135,7 @@ def _load_search_pattern(self): py_regex_pattern, ) - if re.search("%{\w+(:\[?[\w\.\]\[]+\]?)?}", py_regex_pattern) is None: + if re.search("%{\w+(:[\w\.?\[\]]+)?}", py_regex_pattern) is None: break self.regex_obj = re.compile(py_regex_pattern) @@ -200,3 +202,4 @@ def __str__(self): self.sub_patterns, ) + From d429e73c8f1e3160f3286db483c75addec166313 Mon Sep 17 00:00:00 2001 From: Vito Piserchia Date: Tue, 7 Jun 2022 22:39:46 +0200 Subject: [PATCH 04/22] fix broken cisco patterns, make a change request to ES as well --- pygrok/patterns/firewalls | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pygrok/patterns/firewalls b/pygrok/patterns/firewalls index 892b3a5..9a662d4 100644 --- a/pygrok/patterns/firewalls +++ b/pygrok/patterns/firewalls @@ -48,11 +48,11 @@ CISCOFW106015 %{CISCO_ACTION:cisco.asa.outcome} %{WORD:cisco.asa.network.transpo # ASA-1-106021 CISCOFW106021 %{CISCO_ACTION:cisco.asa.outcome} %{WORD:cisco.asa.network.transport} reverse path check from %{IP:source.ip} to %{IP:destination.ip} on interface %{NOTSPACE:observer.egress.interface.name} # ASA-4-106023 -CISCOFW106023 %{CISCO_ACTION:cisco.asa.outcome}(?: protocol)? %{WORD:cisco.asa.network.transport} src %{CISCO_SRC_HOST_PORT_USER} dst %{CISCO_DST_HOST_PORT_USER}( \(type %{INT:cisco.asa.icmp_type:int}, code %{INT:cisco.asa.icmp_code:int}\))? by access-group "?%{DATA:cisco.asa.rule_name}"? \%{DATA:[@metadata.cisco.asa.hashcode1}, %{DATA:@metadata.cisco.asa.hashcode2}\] +CISCOFW106023 %{CISCO_ACTION:cisco.asa.outcome}(?: protocol)? %{WORD:cisco.asa.network.transport} src %{CISCO_SRC_HOST_PORT_USER} dst %{CISCO_DST_HOST_PORT_USER}( \(type %{INT:cisco.asa.icmp_type:int}, code %{INT:cisco.asa.icmp_code:int}\))? by access-group "?%{DATA:cisco.asa.rule_name}"? \[%{DATA:@metadata.cisco.asa.hashcode1}, %{DATA:@metadata.cisco.asa.hashcode2}\] # ASA-4-106100, ASA-4-106102, ASA-4-106103 -CISCOFW106100_2_3 access-list %{NOTSPACE:cisco.asa.rule_name} %{CISCO_ACTION:cisco.asa.outcome} %{WORD:cisco.asa.network.transport} for user '%{DATA:user.name}' %{DATA:observer.ingress.interface.name}/%{IP:source.ip}\(%{INT:source.port:int}\) -> %{DATA:observer.egress.interface.name}/%{IP:destination.ip}\(%{INT:destination.port:int}\) %{CISCO_HITCOUNT_INTERVAL} \%{DATA:[@metadata.cisco.asa.hashcode1}, %{DATA:@metadata.cisco.asa.hashcode2}\] +CISCOFW106100_2_3 access-list %{NOTSPACE:cisco.asa.rule_name} %{CISCO_ACTION:cisco.asa.outcome} %{WORD:cisco.asa.network.transport} for user '%{DATA:user.name}' %{DATA:observer.ingress.interface.name}/%{IP:source.ip}\(%{INT:source.port:int}\) -> %{DATA:observer.egress.interface.name}/%{IP:destination.ip}\(%{INT:destination.port:int}\) %{CISCO_HITCOUNT_INTERVAL} \[%{DATA:@metadata.cisco.asa.hashcode1}, %{DATA:@metadata.cisco.asa.hashcode2}\] # ASA-5-106100 -CISCOFW106100 access-list %{NOTSPACE:cisco.asa.rule_name} %{CISCO_ACTION:cisco.asa.outcome} %{WORD:cisco.asa.network.transport} %{DATA:observer.ingress.interface.name}/%{IP:source.ip}\(%{INT:source.port:int}\)(?:\(%{DATA:source.user.name}\))? -> %{DATA:observer.egress.interface.name}/%{IP:destination.ip}\(%{INT:destination.port:int}\)(?:\(%{DATA:source.user.name}\))? hit-cnt %{INT:cisco.asa.hit_count:int} %{CISCO_INTERVAL} \%{DATA:[@metadata.cisco.asa.hashcode1}, %{DATA:@metadata.cisco.asa.hashcode2}\] +CISCOFW106100 access-list %{NOTSPACE:cisco.asa.rule_name} %{CISCO_ACTION:cisco.asa.outcome} %{WORD:cisco.asa.network.transport} %{DATA:observer.ingress.interface.name}/%{IP:source.ip}\(%{INT:source.port:int}\)(?:\(%{DATA:source.user.name}\))? -> %{DATA:observer.egress.interface.name}/%{IP:destination.ip}\(%{INT:destination.port:int}\)(?:\(%{DATA:source.user.name}\))? hit-cnt %{INT:cisco.asa.hit_count:int} %{CISCO_INTERVAL} \[%{DATA:@metadata.cisco.asa.hashcode1}, %{DATA:@metadata.cisco.asa.hashcode2}\] # ASA-5-304001 CISCOFW304001 %{IP:source.ip}(?:\(%{DATA:source.user.name}\))? Accessed URL %{IP:destination.ip}:%{GREEDYDATA:url.original} # ASA-6-110002 From 795aa397675f1054d96a78f97c80cb22837318a7 Mon Sep 17 00:00:00 2001 From: Vito Piserchia Date: Tue, 7 Jun 2022 22:40:26 +0200 Subject: [PATCH 05/22] bump version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 1a0c4ab..bbc74e4 100755 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ setup( name="pygrok", - version="1.0.2", + version="1.0.3", description="A Python library to parse strings and" + " extract information from structured/unstructured data", long_description=long_desc, From 3a421c3d784b5c0c04b146879e13cc2d2117cfd7 Mon Sep 17 00:00:00 2001 From: Vito Piserchia Date: Tue, 14 Jun 2022 19:18:11 +0200 Subject: [PATCH 06/22] fix a couple of CISCO patterns --- pygrok/patterns/firewalls | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pygrok/patterns/firewalls b/pygrok/patterns/firewalls index 9a662d4..29389ea 100644 --- a/pygrok/patterns/firewalls +++ b/pygrok/patterns/firewalls @@ -48,7 +48,7 @@ CISCOFW106015 %{CISCO_ACTION:cisco.asa.outcome} %{WORD:cisco.asa.network.transpo # ASA-1-106021 CISCOFW106021 %{CISCO_ACTION:cisco.asa.outcome} %{WORD:cisco.asa.network.transport} reverse path check from %{IP:source.ip} to %{IP:destination.ip} on interface %{NOTSPACE:observer.egress.interface.name} # ASA-4-106023 -CISCOFW106023 %{CISCO_ACTION:cisco.asa.outcome}(?: protocol)? %{WORD:cisco.asa.network.transport} src %{CISCO_SRC_HOST_PORT_USER} dst %{CISCO_DST_HOST_PORT_USER}( \(type %{INT:cisco.asa.icmp_type:int}, code %{INT:cisco.asa.icmp_code:int}\))? by access-group "?%{DATA:cisco.asa.rule_name}"? \[%{DATA:@metadata.cisco.asa.hashcode1}, %{DATA:@metadata.cisco.asa.hashcode2}\] +CISCOFW106023 %{CISCO_ACTION:cisco.asa.outcome}(?: protocol)? %{WORD:cisco.asa.network.transport} src %{CISCO_SRC_HOST_PORT_USER} dst %{CISCO_DST_HOST_PORT_USER}( \(?type %{INT:cisco.asa.icmp_type:int}, code %{INT:cisco.asa.icmp_code:int},?\)?)? by access-group "?%{DATA:cisco.asa.rule_name}"? \[%{DATA:@metadata.cisco.asa.hashcode1}, %{DATA:@metadata.cisco.asa.hashcode2}\] # ASA-4-106100, ASA-4-106102, ASA-4-106103 CISCOFW106100_2_3 access-list %{NOTSPACE:cisco.asa.rule_name} %{CISCO_ACTION:cisco.asa.outcome} %{WORD:cisco.asa.network.transport} for user '%{DATA:user.name}' %{DATA:observer.ingress.interface.name}/%{IP:source.ip}\(%{INT:source.port:int}\) -> %{DATA:observer.egress.interface.name}/%{IP:destination.ip}\(%{INT:destination.port:int}\) %{CISCO_HITCOUNT_INTERVAL} \[%{DATA:@metadata.cisco.asa.hashcode1}, %{DATA:@metadata.cisco.asa.hashcode2}\] # ASA-5-106100 @@ -60,7 +60,7 @@ CISCOFW110002 %{CISCO_REASON:event.reason} for %{WORD:cisco.asa.network.transpor # ASA-6-302010 CISCOFW302010 %{INT:cisco.asa.connections.in_use:int} in use, %{INT:cisco.asa.connections.most_used:int} most used # ASA-6-302013, ASA-6-302014, ASA-6-302015, ASA-6-302016 -CISCOFW302013_302014_302015_302016 %{CISCO_ACTION:cisco.asa.outcome}(?: %{CISCO_DIRECTION:cisco.asa.network.direction})? %{WORD:cisco.asa.network.transport} connection %{INT:cisco.asa.connection_id} for %{NOTSPACE:observer.ingress.interface.name}:%{IP:source.ip}/%{INT:source.port:int}(?: \(%{IP:source.nat.ip}/%{INT:source.nat.port:int}\))?(?:\(%{DATA:source.user.name?}\))? to %{NOTSPACE:observer.egress.interface.name}:%{IP:destination.ip}/%{INT:destination.port:int}( \(%{IP:destination.nat.ip}/%{INT:destination.nat.port:int}\))?(?:\(%{DATA:destination.user.name}\))?( duration %{TIME:cisco.asa.duration} bytes %{INT:network.bytes:long})?(?: %{CISCO_REASON:event.reason})?(?: \(%{DATA:user.name}\))? +CISCOFW302013_302014_302015_302016 %{CISCO_ACTION:cisco.asa.outcome}(?: %{CISCO_DIRECTION:cisco.asa.network.direction})? %{WORD:cisco.asa.network.transport} connection %{INT:cisco.asa.connection_id} for %{NOTSPACE:observer.ingress.interface.name}:%{IP:source.ip}/%{INT:source.port:int}(?: \(%{IP:source.nat.ip}/%{INT:source.nat.port:int}\))?(?:\(%{DATA:source.user.name}\))? to %{NOTSPACE:observer.egress.interface.name}:%{IP:destination.ip}/%{INT:destination.port:int}( \(%{IP:destination.nat.ip}/%{INT:destination.nat.port:int}\))?(?:\(%{DATA:destination.user.name}\))?( duration %{TIME:cisco.asa.duration} bytes %{INT:network.bytes:long})?(?: %{CISCO_REASON:event.reason})?(?: \(%{DATA:user.name}\))? # :long - %{INT:network.bytes:int} # ASA-6-302020, ASA-6-302021 CISCOFW302020_302021 %{CISCO_ACTION:cisco.asa.outcome}(?: %{CISCO_DIRECTION:cisco.asa.network.direction})? %{WORD:cisco.asa.network.transport} connection for faddr %{IP:destination.ip}/%{INT:cisco.asa.icmp_seq:int}(?:\(%{DATA:destination.user.name}\))? gaddr %{IP:source.nat.ip}/%{INT:cisco.asa.icmp_type:int} laddr %{IP:source.ip}/%{INT}(?: \(%{DATA:source.user.name}\))? From a493a9dea4957fe0e5a45cec0b89d2b56260fde3 Mon Sep 17 00:00:00 2001 From: Vito Piserchia Date: Tue, 14 Jun 2022 19:18:56 +0200 Subject: [PATCH 07/22] - Add support for @ in the named group patterns - Fix type in pattern usage --- pygrok/pygrok.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pygrok/pygrok.py b/pygrok/pygrok.py index 972a9a1..194f9f7 100644 --- a/pygrok/pygrok.py +++ b/pygrok/pygrok.py @@ -15,7 +15,7 @@ def parse_name(source, allow_numeric=False, allow_group_0=False): raise error("bad character in group name", source.string, source.pos) else: - if not name.replace(".","").isidentifier(): + if not name.replace("@", "").replace(".","").isidentifier(): raise error("character in group name", source.string, source.pos) @@ -98,16 +98,16 @@ def _load_search_pattern(self): py_regex_pattern = self.pattern while True: # Finding all types specified in the groks - m = re.findall(r"%{(\w+):([\w\.?\[\]]+):(\w+)}", py_regex_pattern) + m = re.findall(r"%{(\w+):([@\w\.?\[\]]+):(\w+)}", py_regex_pattern) for n in m: # accounts for dotted or legacy groups, but not both at the same time - key = '.'.join([f[1] and f[1] or f[0] for f in re.findall("\[(\w*?)\]|(\w+)", n[1])]) + key = '.'.join([f[1] and f[1] or f[0] for f in re.findall("\[([@\.\w]*?)\]|([@\.\w]+)", n[1])]) self.type_mapper[key] = n[2] # replace %{pattern_name:custom_name} (or %{pattern_name:custom_name:type} # with regex and regex group name py_regex_pattern = re.sub( - r"%{(\w+):(\[?[\w\]\[\.]+\]?)(?::\w+)?}", + r"%{(\w+):(\[?[@\w\]\[\.]+\]?)(?::\w+)?}", lambda m: "(?P<" + m.group(2).replace("][", ".").replace("[", "").replace("]", "") + ">" @@ -135,7 +135,7 @@ def _load_search_pattern(self): py_regex_pattern, ) - if re.search("%{\w+(:[\w\.?\[\]]+)?}", py_regex_pattern) is None: + if re.search("%{\w+(:[@\w\.?\[\]]+)?}", py_regex_pattern) is None: break self.regex_obj = re.compile(py_regex_pattern) From af4f7d4971270cad983f48bbc1eec1033d581de9 Mon Sep 17 00:00:00 2001 From: Vito Piserchia Date: Tue, 14 Jun 2022 19:19:17 +0200 Subject: [PATCH 08/22] bump version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index bbc74e4..2ae5cf4 100755 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ setup( name="pygrok", - version="1.0.3", + version="1.0.4", description="A Python library to parse strings and" + " extract information from structured/unstructured data", long_description=long_desc, From 74758c0212e55b631570b25d50a94434a18a3c4d Mon Sep 17 00:00:00 2001 From: Patrick Winter Date: Sat, 22 Feb 2020 21:19:33 +0100 Subject: [PATCH 09/22] Add support for re flags When working with multiline strings, re requires extra flags in order to interpret new line characters differently. e.g. https://docs.python.org/3/library/re.html#re.M With this change you can optionally pass re flags when initalizing Grok objects. grok = Grok(pat, flags=re.M | re.S) I've successfully tested this change with both the re and regex package. --- README.md | 6 ++++-- pygrok/pygrok.py | 4 +++- tests/test_pygrok.py | 11 +++++++++++ 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index a63e5ee..9a86214 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,8 @@ Some of the pattern you can use are listed here: other patterns such as `IP`, `HOSTNAME`, `URIPATH`, `DATE`, `TIMESTAMP_ISO8601`, `COMMONAPACHELOG`.. ``` +You can also pass re flags to Grok (e.g. `Grok('%{GREEDYDATA:txt}', flags=re.M|re.S)`). + See All patterns [here](./pygrok/patterns) You can also have custom pattern, see [these codes](https://github.com/garyelephant/pygrok/blob/master/tests/test_pygrok.py#L97). @@ -82,14 +84,14 @@ pattern files come from [logstash filter grok's pattern files](https://github.co Contribute --- -* You are encouraged to [fork](https://github.com/garyelephant/pygrok/fork), improve the code, then make a pull request. +* You are encouraged to [fork](https://github.com/garyelephant/pygrok/fork), improve the code, then make a pull request. * [Issue tracker](https://github.com/garyelephant/pygrok/issues) Get Help --- mail:garygaowork@gmail.com twitter:@garyelephant - + Contributors --- Thanks to [all contributors](https://github.com/garyelephant/pygrok/graphs/contributors) diff --git a/pygrok/pygrok.py b/pygrok/pygrok.py index 194f9f7..37491eb 100644 --- a/pygrok/pygrok.py +++ b/pygrok/pygrok.py @@ -43,6 +43,7 @@ def __init__( custom_patterns=None, fullmatch=True, match_unnamed_groks=False, + flags=0 ): self.pattern = pattern self.custom_patterns_dir = custom_patterns_dir @@ -50,6 +51,7 @@ def __init__( self.fullmatch = fullmatch custom_patterns = custom_patterns or {} self.match_unnamed_groks = match_unnamed_groks + self.flags = flags custom_pats = {} if custom_patterns_dir is not None: @@ -138,7 +140,7 @@ def _load_search_pattern(self): if re.search("%{\w+(:[@\w\.?\[\]]+)?}", py_regex_pattern) is None: break - self.regex_obj = re.compile(py_regex_pattern) + self.regex_obj = re.compile(py_regex_pattern, flags=self.flags) def _wrap_pattern_name(pat_name): diff --git a/tests/test_pygrok.py b/tests/test_pygrok.py index 2012dc8..040b96b 100644 --- a/tests/test_pygrok.py +++ b/tests/test_pygrok.py @@ -1,4 +1,9 @@ from pygrok import Grok +try: + import regex as re +except ImportError as e: + # If you import re, grok_match can't handle regular expression containing atomic group(?>) + import re def test_one_pat(): @@ -91,6 +96,12 @@ def test_one_pat(): m = grok.match(text) assert m["test_int"] == "/home/username/Downloads/test.sh" + text = 'some multiline\ntext' + pat = '%{GREEDYDATA:txt}' + grok = Grok(pat, flags=re.M | re.S) + m = grok.match(text) + assert m == {'txt': 'some multiline\ntext'}, 'grok match failed:%s, %s' % (text, pat, ) + def test_multiple_pats(): text = 'gary 25 "never quit"' From 72aac3456ed918d89f453394dcb91e547667bfe1 Mon Sep 17 00:00:00 2001 From: LAR Date: Fri, 4 Feb 2022 11:36:58 +0100 Subject: [PATCH 10/22] fix: fix unixpath --- pygrok/patterns/grok-patterns | 2 +- tests/test_pygrok.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/pygrok/patterns/grok-patterns b/pygrok/patterns/grok-patterns index 6f58f3f..230bbf5 100644 --- a/pygrok/patterns/grok-patterns +++ b/pygrok/patterns/grok-patterns @@ -34,7 +34,7 @@ HOSTPORT %{IPORHOST}:%{POSINT} # paths (only absolute paths are matched) PATH (?:%{UNIXPATH}|%{WINPATH}) -UNIXPATH (/[[[:alnum:]]_%!$@:.,+~-]*)+ +UNIXPATH (/[\w_%!$@:.,+~-]*)+ TTY (?:/dev/(pts|tty([pq])?)(\w+)?/?(?:[0-9]+)) WINPATH (?>[A-Za-z]+:|\\)(?:\\[^\\?*]*)+ URIPROTO [A-Za-z]([A-Za-z0-9+\-.]+)+ diff --git a/tests/test_pygrok.py b/tests/test_pygrok.py index 040b96b..a10c35b 100644 --- a/tests/test_pygrok.py +++ b/tests/test_pygrok.py @@ -102,7 +102,6 @@ def test_one_pat(): m = grok.match(text) assert m == {'txt': 'some multiline\ntext'}, 'grok match failed:%s, %s' % (text, pat, ) - def test_multiple_pats(): text = 'gary 25 "never quit"' pat = "%{WORD:name} %{INT:age} %{QUOTEDSTRING:motto}" From 7d8f9b6b68400a80df0be2c34e0cea68268d7741 Mon Sep 17 00:00:00 2001 From: Vito Piserchia Date: Tue, 14 Jun 2022 22:37:16 +0200 Subject: [PATCH 11/22] fix DeprecationWarning: invalid escape sequence \[ --- tests/test_pygrok.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_pygrok.py b/tests/test_pygrok.py index a10c35b..63f5284 100644 --- a/tests/test_pygrok.py +++ b/tests/test_pygrok.py @@ -142,7 +142,7 @@ def test_multiple_pats(): + ' Chrome/36.0.1985.125 Safari/537.36"' ) pat = ( - "%{HOSTNAME:host} %{IP:client_ip} %{NUMBER:delay}s - \[%{DATA:time_stamp}\]" + "%{HOSTNAME:host} %{IP:client_ip} %{NUMBER:delay}s - \\[%{DATA:time_stamp}\\]" + ' "%{WORD:verb} %{URIPATHPARAM:uri_path} HTTP/%{NUMBER:http_ver}" %{INT:http_status} %{INT:bytes} %{QS}' + " %{QS:client}" ) From c6cbce325ea8c563f439fd5adb9c0830a8723ce1 Mon Sep 17 00:00:00 2001 From: Vito Piserchia Date: Tue, 14 Jun 2022 22:39:36 +0200 Subject: [PATCH 12/22] bump pygrok lib version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 2ae5cf4..ad2e519 100755 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ setup( name="pygrok", - version="1.0.4", + version="1.0.5", description="A Python library to parse strings and" + " extract information from structured/unstructured data", long_description=long_desc, From ba1c42dcf577e268cfa118ad0bdc77163918a424 Mon Sep 17 00:00:00 2001 From: Vito Piserchia Date: Fri, 17 Jun 2022 14:20:23 +0200 Subject: [PATCH 13/22] validate sub patterns and group name as part of the loading process for a Grok. Avoid endless loop during pattern normalization pahse --- pygrok/pygrok.py | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/pygrok/pygrok.py b/pygrok/pygrok.py index 37491eb..510cc86 100644 --- a/pygrok/pygrok.py +++ b/pygrok/pygrok.py @@ -43,7 +43,7 @@ def __init__( custom_patterns=None, fullmatch=True, match_unnamed_groks=False, - flags=0 + flags=0 ): self.pattern = pattern self.custom_patterns_dir = custom_patterns_dir @@ -99,22 +99,33 @@ def _load_search_pattern(self): self.type_mapper = {} py_regex_pattern = self.pattern while True: + # used as safe exit condition + old_py_regex_pattern = py_regex_pattern + # Finding all types specified in the groks - m = re.findall(r"%{(\w+):([@\w\.?\[\]]+):(\w+)}", py_regex_pattern) + m = re.findall(r"%{(\w+):([@\w\.\[\]]+):(\w+)}", py_regex_pattern) for n in m: # accounts for dotted or legacy groups, but not both at the same time - key = '.'.join([f[1] and f[1] or f[0] for f in re.findall("\[([@\.\w]*?)\]|([@\.\w]+)", n[1])]) + key = '.'.join([f[1] and f[1] or f[0] for f in re.findall(r"\[([@\.\w]*?)\]|([@\.\w]+)", n[1])]) self.type_mapper[key] = n[2] # replace %{pattern_name:custom_name} (or %{pattern_name:custom_name:type} # with regex and regex group name + def _validate_sub(r): + """ Validate group names and return search substitution """ + s = r.group(2).replace("][", ".").replace("[", "").replace("]", "") + if not re.fullmatch(r'^(@?\w[\.\w]*)+$', s): + raise RuntimeError("Error in group name definition: '%s' ('%s')" % (r.group(2), self.pattern)) + return ("(?P<" + + s + + ">" + + self.predefined_patterns[r.group(1)].regex_str + + ")" + ) + py_regex_pattern = re.sub( - r"%{(\w+):(\[?[@\w\]\[\.]+\]?)(?::\w+)?}", - lambda m: "(?P<" - + m.group(2).replace("][", ".").replace("[", "").replace("]", "") - + ">" - + self.predefined_patterns[m.group(1)].regex_str - + ")", + r"%{(\w+):([@\w\[\]\.?]+)(?::\w+)?}", + _validate_sub, py_regex_pattern, ) @@ -137,9 +148,13 @@ def _load_search_pattern(self): py_regex_pattern, ) - if re.search("%{\w+(:[@\w\.?\[\]]+)?}", py_regex_pattern) is None: + if re.search(r"%{\w+(:[@\w\[\]\.]+)?}", py_regex_pattern) is None: break + # avoid endless loop recursion + if py_regex_pattern == old_py_regex_pattern: + raise RuntimeError("Error in pattern definition: %s" % py_regex_pattern) + self.regex_obj = re.compile(py_regex_pattern, flags=self.flags) From 3e56ca68f616b763bd443c26e5f9a35c61dd873b Mon Sep 17 00:00:00 2001 From: Vito Piserchia Date: Fri, 17 Jun 2022 14:20:51 +0200 Subject: [PATCH 14/22] Test: check all predefined patterns in test --- tests/test_pygrok.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/test_pygrok.py b/tests/test_pygrok.py index 63f5284..b78f735 100644 --- a/tests/test_pygrok.py +++ b/tests/test_pygrok.py @@ -1,4 +1,5 @@ from pygrok import Grok +import pygrok try: import regex as re except ImportError as e: @@ -260,9 +261,21 @@ def test_match_unnamed(): assert m["HOSTNAME"] == "test.com" +def test_predefined_patterns(): + grok = Grok("%{DATA}") + errors = [] + for pattern in grok.predefined_patterns: + try: + g = Grok("%{"+pattern+"}") + except Exception as e: + errors.append((pattern, str(e))) + assert errors == [] + + if __name__ == "__main__": test_one_pat() test_multiple_pats() test_custom_pats() test_custom_pat_files() test_hotloading_pats() + test_predefined_patterns() From 76fca1e8f629661a64334558ed5f46b62fc390b3 Mon Sep 17 00:00:00 2001 From: Vito Piserchia Date: Fri, 17 Jun 2022 14:23:39 +0200 Subject: [PATCH 15/22] update patterns to use int type instead of long --- pygrok/patterns/bacula | 2 +- pygrok/patterns/bro | 6 +++--- pygrok/patterns/firewalls | 4 ++-- pygrok/patterns/haproxy | 4 ++-- pygrok/patterns/httpd | 4 ++-- pygrok/patterns/junos | 2 +- pygrok/patterns/squid | 2 +- pygrok/patterns/zeek | 6 +++--- 8 files changed, 15 insertions(+), 15 deletions(-) diff --git a/pygrok/patterns/bacula b/pygrok/patterns/bacula index 169defd..6764fdf 100644 --- a/pygrok/patterns/bacula +++ b/pygrok/patterns/bacula @@ -31,7 +31,7 @@ BACULA_LOG_JOBEND Job write elapsed time = %{DATA:bacula.job.elapsed_time}, Tran BACULA_LOG_NOPRUNE_JOBS No Jobs found to prune. BACULA_LOG_NOPRUNE_FILES No Files found to prune. BACULA_LOG_VOLUME_PREVWRITTEN Volume \"?%{BACULA_VOLUME:bacula.volume.name}\"? previously written, moving to end of data. -BACULA_LOG_READYAPPEND Ready to append to end of Volume \"%{BACULA_VOLUME:bacula.volume.name}\" size=%{INT:bacula.volume.size:long} +BACULA_LOG_READYAPPEND Ready to append to end of Volume \"%{BACULA_VOLUME:bacula.volume.name}\" size=%{INT:bacula.volume.size:int} # :long - %{INT:bacula.volume.size:int} BACULA_LOG_CANCELLING Cancelling duplicate JobId=%{INT:bacula.job.other_id}. BACULA_LOG_MARKCANCEL JobId %{INT:bacula.job.id}, Job %{BACULA_JOB:bacula.job.name} marked to be canceled. diff --git a/pygrok/patterns/bro b/pygrok/patterns/bro index dc38d5a..711944f 100644 --- a/pygrok/patterns/bro +++ b/pygrok/patterns/bro @@ -5,7 +5,7 @@ BRO_BOOL [TF] BRO_DATA [^\t]+ # http.log - old format (before the Zeek rename) : -BRO_HTTP %{NUMBER:timestamp}\t%{NOTSPACE:zeek.session_id}\t%{IP:source.ip}\t%{INT:source.port:int}\t%{IP:destination.ip}\t%{INT:destination.port:int}\t%{INT:zeek.http.trans_depth:int}\t(?:-|%{WORD:http.request.method})\t(?:-|%{BRO_DATA:url.domain})\t(?:-|%{BRO_DATA:url.original})\t(?:-|%{BRO_DATA:http.request.referrer})\t(?:-|%{BRO_DATA:user_agent.original})\t(?:-|%{NUMBER:http.request.body.bytes:long})\t(?:-|%{NUMBER:http.response.body.bytes:long})\t(?:-|%{POSINT:http.response.status_code:int})\t(?:-|%{DATA:zeek.http.status_msg})\t(?:-|%{POSINT:zeek.http.info_code:int})\t(?:-|%{DATA:zeek.http.info_msg})\t(?:-|%{BRO_DATA:zeek.http.filename})\t(?:\(empty\)|%{BRO_DATA:zeek.http.tags})\t(?:-|%{BRO_DATA:url.username})\t(?:-|%{BRO_DATA:url.password})\t(?:-|%{BRO_DATA:zeek.http.proxied})\t(?:-|%{BRO_DATA:zeek.http.orig_fuids})\t(?:-|%{BRO_DATA:http.request.mime_type})\t(?:-|%{BRO_DATA:zeek.http.resp_fuids})\t(?:-|%{BRO_DATA:http.response.mime_type}) +BRO_HTTP %{NUMBER:timestamp}\t%{NOTSPACE:zeek.session_id}\t%{IP:source.ip}\t%{INT:source.port:int}\t%{IP:destination.ip}\t%{INT:destination.port:int}\t%{INT:zeek.http.trans_depth:int}\t(?:-|%{WORD:http.request.method})\t(?:-|%{BRO_DATA:url.domain})\t(?:-|%{BRO_DATA:url.original})\t(?:-|%{BRO_DATA:http.request.referrer})\t(?:-|%{BRO_DATA:user_agent.original})\t(?:-|%{NUMBER:http.request.body.bytes:int})\t(?:-|%{NUMBER:http.response.body.bytes:int})\t(?:-|%{POSINT:http.response.status_code:int})\t(?:-|%{DATA:zeek.http.status_msg})\t(?:-|%{POSINT:zeek.http.info_code:int})\t(?:-|%{DATA:zeek.http.info_msg})\t(?:-|%{BRO_DATA:zeek.http.filename})\t(?:\(empty\)|%{BRO_DATA:zeek.http.tags})\t(?:-|%{BRO_DATA:url.username})\t(?:-|%{BRO_DATA:url.password})\t(?:-|%{BRO_DATA:zeek.http.proxied})\t(?:-|%{BRO_DATA:zeek.http.orig_fuids})\t(?:-|%{BRO_DATA:http.request.mime_type})\t(?:-|%{BRO_DATA:zeek.http.resp_fuids})\t(?:-|%{BRO_DATA:http.response.mime_type}) # :long - %{NUMBER:http.request.body.bytes:int} # :long - %{NUMBER:http.response.body.bytes:int} @@ -13,7 +13,7 @@ BRO_HTTP %{NUMBER:timestamp}\t%{NOTSPACE:zeek.session_id}\t%{IP:source.ip}\t%{IN BRO_DNS %{NUMBER:timestamp}\t%{NOTSPACE:zeek.session_id}\t%{IP:source.ip}\t%{INT:source.port:int}\t%{IP:destination.ip}\t%{INT:destination.port:int}\t%{WORD:network.transport}\t(?:-|%{INT:dns.id:int})\t(?:-|%{BRO_DATA:dns.question.name})\t(?:-|%{INT:zeek.dns.qclass:int})\t(?:-|%{BRO_DATA:zeek.dns.qclass_name})\t(?:-|%{INT:zeek.dns.qtype:int})\t(?:-|%{BRO_DATA:dns.question.type})\t(?:-|%{INT:zeek.dns.rcode:int})\t(?:-|%{BRO_DATA:dns.response_code})\t(?:-|%{BRO_BOOL:zeek.dns.AA})\t(?:-|%{BRO_BOOL:zeek.dns.TC})\t(?:-|%{BRO_BOOL:zeek.dns.RD})\t(?:-|%{BRO_BOOL:zeek.dns.RA})\t(?:-|%{NONNEGINT:zeek.dns.Z:int})\t(?:-|%{BRO_DATA:zeek.dns.answers})\t(?:-|%{DATA:zeek.dns.TTLs})\t(?:-|%{BRO_BOOL:zeek.dns.rejected}) # conn.log - old bro, also supports 'newer' format (optional *zeek.connection.local_resp* flag) compared to non-ecs mode -BRO_CONN %{NUMBER:timestamp}\t%{NOTSPACE:zeek.session_id}\t%{IP:source.ip}\t%{INT:source.port:int}\t%{IP:destination.ip}\t%{INT:destination.port:int}\t%{WORD:network.transport}\t(?:-|%{BRO_DATA:network.protocol})\t(?:-|%{NUMBER:zeek.connection.duration:float})\t(?:-|%{INT:zeek.connection.orig_bytes:long})\t(?:-|%{INT:zeek.connection.resp_bytes:long})\t(?:-|%{BRO_DATA:zeek.connection.state})\t(?:-|%{BRO_BOOL:zeek.connection.local_orig})\t(?:(?:-|%{BRO_BOOL:zeek.connection.local_resp})\t)?(?:-|%{INT:zeek.connection.missed_bytes:long})\t(?:-|%{BRO_DATA:zeek.connection.history})\t(?:-|%{INT:source.packets:long})\t(?:-|%{INT:source.bytes:long})\t(?:-|%{INT:destination.packets:long})\t(?:-|%{INT:destination.bytes:long})\t(?:\(empty\)|%{BRO_DATA:zeek.connection.tunnel_parents}) +BRO_CONN %{NUMBER:timestamp}\t%{NOTSPACE:zeek.session_id}\t%{IP:source.ip}\t%{INT:source.port:int}\t%{IP:destination.ip}\t%{INT:destination.port:int}\t%{WORD:network.transport}\t(?:-|%{BRO_DATA:network.protocol})\t(?:-|%{NUMBER:zeek.connection.duration:float})\t(?:-|%{INT:zeek.connection.orig_bytes:int})\t(?:-|%{INT:zeek.connection.resp_bytes:int})\t(?:-|%{BRO_DATA:zeek.connection.state})\t(?:-|%{BRO_BOOL:zeek.connection.local_orig})\t(?:(?:-|%{BRO_BOOL:zeek.connection.local_resp})\t)?(?:-|%{INT:zeek.connection.missed_bytes:int})\t(?:-|%{BRO_DATA:zeek.connection.history})\t(?:-|%{INT:source.packets:int})\t(?:-|%{INT:source.bytes:int})\t(?:-|%{INT:destination.packets:int})\t(?:-|%{INT:destination.bytes:int})\t(?:\(empty\)|%{BRO_DATA:zeek.connection.tunnel_parents}) # :long - %{INT:zeek.connection.orig_bytes:int} # :long - %{INT:zeek.connection.resp_bytes:int} # :long - %{INT:zeek.connection.missed_bytes:int} @@ -23,7 +23,7 @@ BRO_CONN %{NUMBER:timestamp}\t%{NOTSPACE:zeek.session_id}\t%{IP:source.ip}\t%{IN # :long - %{INT:destination.bytes:int} # files.log - old format -BRO_FILES %{NUMBER:timestamp}\t%{NOTSPACE:zeek.files.fuid}\t(?:-|%{IP:server.ip})\t(?:-|%{IP:client.ip})\t(?:-|%{BRO_DATA:zeek.files.session_ids})\t(?:-|%{BRO_DATA:zeek.files.source})\t(?:-|%{INT:zeek.files.depth:int})\t(?:-|%{BRO_DATA:zeek.files.analyzers})\t(?:-|%{BRO_DATA:file.mime_type})\t(?:-|%{BRO_DATA:file.name})\t(?:-|%{NUMBER:zeek.files.duration:float})\t(?:-|%{BRO_DATA:zeek.files.local_orig})\t(?:-|%{BRO_BOOL:zeek.files.is_orig})\t(?:-|%{INT:zeek.files.seen_bytes:long})\t(?:-|%{INT:file.size:long})\t(?:-|%{INT:zeek.files.missing_bytes:long})\t(?:-|%{INT:zeek.files.overflow_bytes:long})\t(?:-|%{BRO_BOOL:zeek.files.timedout})\t(?:-|%{BRO_DATA:zeek.files.parent_fuid})\t(?:-|%{BRO_DATA:file.hash.md5})\t(?:-|%{BRO_DATA:file.hash.sha1})\t(?:-|%{BRO_DATA:file.hash.sha256})\t(?:-|%{BRO_DATA:zeek.files.extracted}) +BRO_FILES %{NUMBER:timestamp}\t%{NOTSPACE:zeek.files.fuid}\t(?:-|%{IP:server.ip})\t(?:-|%{IP:client.ip})\t(?:-|%{BRO_DATA:zeek.files.session_ids})\t(?:-|%{BRO_DATA:zeek.files.source})\t(?:-|%{INT:zeek.files.depth:int})\t(?:-|%{BRO_DATA:zeek.files.analyzers})\t(?:-|%{BRO_DATA:file.mime_type})\t(?:-|%{BRO_DATA:file.name})\t(?:-|%{NUMBER:zeek.files.duration:float})\t(?:-|%{BRO_DATA:zeek.files.local_orig})\t(?:-|%{BRO_BOOL:zeek.files.is_orig})\t(?:-|%{INT:zeek.files.seen_bytes:int})\t(?:-|%{INT:file.size:int})\t(?:-|%{INT:zeek.files.missing_bytes:int})\t(?:-|%{INT:zeek.files.overflow_bytes:int})\t(?:-|%{BRO_BOOL:zeek.files.timedout})\t(?:-|%{BRO_DATA:zeek.files.parent_fuid})\t(?:-|%{BRO_DATA:file.hash.md5})\t(?:-|%{BRO_DATA:file.hash.sha1})\t(?:-|%{BRO_DATA:file.hash.sha256})\t(?:-|%{BRO_DATA:zeek.files.extracted}) # :long - %{INT:zeek.files.seen_bytes:int} # :long - %{INT:file.size:int} # :long - %{INT:zeek.files.missing_bytes:int} diff --git a/pygrok/patterns/firewalls b/pygrok/patterns/firewalls index 29389ea..26abb3c 100644 --- a/pygrok/patterns/firewalls +++ b/pygrok/patterns/firewalls @@ -1,5 +1,5 @@ # NetScreen firewall logs -NETSCREENSESSIONLOG %{SYSLOGTIMESTAMP:timestamp} %{IPORHOST:observer.hostname} %{NOTSPACE:observer.name}: (?NetScreen) device_id=%{WORD:netscreen.device_id} .*?(system-\w+-%{NONNEGINT:event.code}\(%{WORD:netscreen.session.type}\))?: start_time="%{DATA:netscreen.session.start_time}" duration=%{INT:netscreen.session.duration:int} policy_id=%{INT:netscreen.policy_id} service=%{DATA:netscreen.service} proto=%{INT:netscreen.protocol_number:int} src zone=%{WORD:observer.ingress.zone} dst zone=%{WORD:observer.egress.zone} action=%{WORD:event.action} sent=%{INT:source.bytes:long} rcvd=%{INT:destination.bytes:long} src=%{IPORHOST:source.address} dst=%{IPORHOST:destination.address}(?: src_port=%{INT:source.port:int} dst_port=%{INT:destination.port:int})?(?: src-xlated ip=%{IP:source.nat.ip} port=%{INT:source.nat.port:int} dst-xlated ip=%{IP:destination.nat.ip} port=%{INT:destination.nat.port:int})?(?: session_id=%{INT:netscreen.session.id} reason=%{GREEDYDATA:netscreen.session.reason})? +NETSCREENSESSIONLOG %{SYSLOGTIMESTAMP:timestamp} %{IPORHOST:observer.hostname} %{NOTSPACE:observer.name}: (?NetScreen) device_id=%{WORD:netscreen.device_id} .*?(system-\w+-%{NONNEGINT:event.code}\(%{WORD:netscreen.session.type}\))?: start_time="%{DATA:netscreen.session.start_time}" duration=%{INT:netscreen.session.duration:int} policy_id=%{INT:netscreen.policy_id} service=%{DATA:netscreen.service} proto=%{INT:netscreen.protocol_number:int} src zone=%{WORD:observer.ingress.zone} dst zone=%{WORD:observer.egress.zone} action=%{WORD:event.action} sent=%{INT:source.bytes:int} rcvd=%{INT:destination.bytes:int} src=%{IPORHOST:source.address} dst=%{IPORHOST:destination.address}(?: src_port=%{INT:source.port:int} dst_port=%{INT:destination.port:int})?(?: src-xlated ip=%{IP:source.nat.ip} port=%{INT:source.nat.port:int} dst-xlated ip=%{IP:destination.nat.ip} port=%{INT:destination.nat.port:int})?(?: session_id=%{INT:netscreen.session.id} reason=%{GREEDYDATA:netscreen.session.reason})? # :long - %{INT:source.bytes:int} # :long - %{INT:destination.bytes:int} @@ -60,7 +60,7 @@ CISCOFW110002 %{CISCO_REASON:event.reason} for %{WORD:cisco.asa.network.transpor # ASA-6-302010 CISCOFW302010 %{INT:cisco.asa.connections.in_use:int} in use, %{INT:cisco.asa.connections.most_used:int} most used # ASA-6-302013, ASA-6-302014, ASA-6-302015, ASA-6-302016 -CISCOFW302013_302014_302015_302016 %{CISCO_ACTION:cisco.asa.outcome}(?: %{CISCO_DIRECTION:cisco.asa.network.direction})? %{WORD:cisco.asa.network.transport} connection %{INT:cisco.asa.connection_id} for %{NOTSPACE:observer.ingress.interface.name}:%{IP:source.ip}/%{INT:source.port:int}(?: \(%{IP:source.nat.ip}/%{INT:source.nat.port:int}\))?(?:\(%{DATA:source.user.name}\))? to %{NOTSPACE:observer.egress.interface.name}:%{IP:destination.ip}/%{INT:destination.port:int}( \(%{IP:destination.nat.ip}/%{INT:destination.nat.port:int}\))?(?:\(%{DATA:destination.user.name}\))?( duration %{TIME:cisco.asa.duration} bytes %{INT:network.bytes:long})?(?: %{CISCO_REASON:event.reason})?(?: \(%{DATA:user.name}\))? +CISCOFW302013_302014_302015_302016 %{CISCO_ACTION:cisco.asa.outcome}(?: %{CISCO_DIRECTION:cisco.asa.network.direction})? %{WORD:cisco.asa.network.transport} connection %{INT:cisco.asa.connection_id} for %{NOTSPACE:observer.ingress.interface.name}:%{IP:source.ip}/%{INT:source.port:int}(?: \(%{IP:source.nat.ip}/%{INT:source.nat.port:int}\))?(?:\(%{DATA:source.user.name}\))? to %{NOTSPACE:observer.egress.interface.name}:%{IP:destination.ip}/%{INT:destination.port:int}( \(%{IP:destination.nat.ip}/%{INT:destination.nat.port:int}\))?(?:\(%{DATA:destination.user.name}\))?( duration %{TIME:cisco.asa.duration} bytes %{INT:network.bytes:int})?(?: %{CISCO_REASON:event.reason})?(?: \(%{DATA:user.name}\))? # :long - %{INT:network.bytes:int} # ASA-6-302020, ASA-6-302021 CISCOFW302020_302021 %{CISCO_ACTION:cisco.asa.outcome}(?: %{CISCO_DIRECTION:cisco.asa.network.direction})? %{WORD:cisco.asa.network.transport} connection for faddr %{IP:destination.ip}/%{INT:cisco.asa.icmp_seq:int}(?:\(%{DATA:destination.user.name}\))? gaddr %{IP:source.nat.ip}/%{INT:cisco.asa.icmp_type:int} laddr %{IP:source.ip}/%{INT}(?: \(%{DATA:source.user.name}\))? diff --git a/pygrok/patterns/haproxy b/pygrok/patterns/haproxy index f46d4ba..59303e6 100644 --- a/pygrok/patterns/haproxy +++ b/pygrok/patterns/haproxy @@ -30,11 +30,11 @@ HAPROXYURI (?:%{URIPROTO:url.scheme}://)?(?:%{USER:url.username}(?::[^@]*)?@)?(? HAPROXYHTTPREQUESTLINE (?:|(?:%{WORD:http.request.method} %{HAPROXYURI:url.original}(?: HTTP/%{NUMBER:http.version})?)) # parse a haproxy 'httplog' line -HAPROXYHTTPBASE %{IP:source.address}:%{INT:source.port:int} \[%{HAPROXYDATE:haproxy.request_date}\] %{NOTSPACE:haproxy.frontend_name} %{NOTSPACE:haproxy.backend_name}/(?:|%{NOTSPACE:haproxy.server_name}) (?:-1|%{INT:haproxy.http.request.time_wait_ms:int})/(?:-1|%{INT:haproxy.total_waiting_time_ms:int})/(?:-1|%{INT:haproxy.connection_wait_time_ms:int})/(?:-1|%{INT:haproxy.http.request.time_wait_without_data_ms:int})/%{NOTSPACE:haproxy.total_time_ms} %{INT:http.response.status_code:int} %{INT:source.bytes:long} (?:-|%{DATA:haproxy.http.request.captured_cookie}) (?:-|%{DATA:haproxy.http.response.captured_cookie}) %{NOTSPACE:haproxy.termination_state} %{INT:haproxy.connections.active:int}/%{INT:haproxy.connections.frontend:int}/%{INT:haproxy.connections.backend:int}/%{INT:haproxy.connections.server:int}/%{INT:haproxy.connections.retries:int} %{INT:haproxy.server_queue:int}/%{INT:haproxy.backend_queue:int}(?: \{%{HAPROXYCAPTUREDREQUESTHEADERS}\}(?: \{%{HAPROXYCAPTUREDRESPONSEHEADERS}\})?)?(?: "%{HAPROXYHTTPREQUESTLINE}"?)? +HAPROXYHTTPBASE %{IP:source.address}:%{INT:source.port:int} \[%{HAPROXYDATE:haproxy.request_date}\] %{NOTSPACE:haproxy.frontend_name} %{NOTSPACE:haproxy.backend_name}/(?:|%{NOTSPACE:haproxy.server_name}) (?:-1|%{INT:haproxy.http.request.time_wait_ms:int})/(?:-1|%{INT:haproxy.total_waiting_time_ms:int})/(?:-1|%{INT:haproxy.connection_wait_time_ms:int})/(?:-1|%{INT:haproxy.http.request.time_wait_without_data_ms:int})/%{NOTSPACE:haproxy.total_time_ms} %{INT:http.response.status_code:int} %{INT:source.bytes:int} (?:-|%{DATA:haproxy.http.request.captured_cookie}) (?:-|%{DATA:haproxy.http.response.captured_cookie}) %{NOTSPACE:haproxy.termination_state} %{INT:haproxy.connections.active:int}/%{INT:haproxy.connections.frontend:int}/%{INT:haproxy.connections.backend:int}/%{INT:haproxy.connections.server:int}/%{INT:haproxy.connections.retries:int} %{INT:haproxy.server_queue:int}/%{INT:haproxy.backend_queue:int}(?: \{%{HAPROXYCAPTUREDREQUESTHEADERS}\}(?: \{%{HAPROXYCAPTUREDRESPONSEHEADERS}\})?)?(?: "%{HAPROXYHTTPREQUESTLINE}"?)? # :long - %{INT:source.bytes:int} HAPROXYHTTP (?:%{SYSLOGTIMESTAMP:timestamp}|%{TIMESTAMP_ISO8601:timestamp}) %{IPORHOST:host.hostname} %{SYSLOGPROG}: %{HAPROXYHTTPBASE} # parse a haproxy 'tcplog' line -HAPROXYTCP (?:%{SYSLOGTIMESTAMP:timestamp}|%{TIMESTAMP_ISO8601:timestamp}) %{IPORHOST:host.hostname} %{SYSLOGPROG}: %{IP:source.address}:%{INT:source.port:int} \[%{HAPROXYDATE:haproxy.request_date}\] %{NOTSPACE:haproxy.frontend_name} %{NOTSPACE:haproxy.backend_name}/(?:|%{NOTSPACE:haproxy.server_name}) (?:-1|%{INT:haproxy.total_waiting_time_ms:int})/(?:-1|%{INT:haproxy.connection_wait_time_ms:int})/%{NOTSPACE:haproxy.total_time_ms} %{INT:source.bytes:long} %{NOTSPACE:haproxy.termination_state} %{INT:haproxy.connections.active:int}/%{INT:haproxy.connections.frontend:int}/%{INT:haproxy.connections.backend:int}/%{INT:haproxy.connections.server:int}/%{INT:haproxy.connections.retries:int} %{INT:haproxy.server_queue:int}/%{INT:haproxy.backend_queue:int} +HAPROXYTCP (?:%{SYSLOGTIMESTAMP:timestamp}|%{TIMESTAMP_ISO8601:timestamp}) %{IPORHOST:host.hostname} %{SYSLOGPROG}: %{IP:source.address}:%{INT:source.port:int} \[%{HAPROXYDATE:haproxy.request_date}\] %{NOTSPACE:haproxy.frontend_name} %{NOTSPACE:haproxy.backend_name}/(?:|%{NOTSPACE:haproxy.server_name}) (?:-1|%{INT:haproxy.total_waiting_time_ms:int})/(?:-1|%{INT:haproxy.connection_wait_time_ms:int})/%{NOTSPACE:haproxy.total_time_ms} %{INT:source.bytes:int} %{NOTSPACE:haproxy.termination_state} %{INT:haproxy.connections.active:int}/%{INT:haproxy.connections.frontend:int}/%{INT:haproxy.connections.backend:int}/%{INT:haproxy.connections.server:int}/%{INT:haproxy.connections.retries:int} %{INT:haproxy.server_queue:int}/%{INT:haproxy.backend_queue:int} # :long - %{INT:source.bytes:int} diff --git a/pygrok/patterns/httpd b/pygrok/patterns/httpd index 9b58e50..edd23d4 100644 --- a/pygrok/patterns/httpd +++ b/pygrok/patterns/httpd @@ -2,13 +2,13 @@ HTTPDUSER %{EMAILADDRESS}|%{USER} HTTPDERROR_DATE %{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{YEAR} # Log formats -HTTPD_COMMONLOG %{IPORHOST:source.address} (?:-|%{HTTPDUSER:apache.access.user.identity}) (?:-|%{HTTPDUSER:user.name}) \[%{HTTPDATE:timestamp}\] "(?:%{WORD:http.request.method} %{NOTSPACE:url.original}(?: HTTP/%{NUMBER:http.version})?|%{DATA})" (?:-|%{INT:http.response.status_code:int}) (?:-|%{INT:http.response.body.bytes:long}) +HTTPD_COMMONLOG %{IPORHOST:source.address} (?:-|%{HTTPDUSER:apache.access.user.identity}) (?:-|%{HTTPDUSER:user.name}) \[%{HTTPDATE:timestamp}\] "(?:%{WORD:http.request.method} %{NOTSPACE:url.original}(?: HTTP/%{NUMBER:http.version})?|%{DATA})" (?:-|%{INT:http.response.status_code:int}) (?:-|%{INT:http.response.body.bytes:int}) # :long - %{INT:http.response.body.bytes:int} HTTPD_COMBINEDLOG %{HTTPD_COMMONLOG} "(?:-|%{DATA:http.request.referrer})" "(?:-|%{DATA:user_agent.original})" # Error logs HTTPD20_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{LOGLEVEL:log.level}\] (?:\[client %{IPORHOST:source.address}\] )?%{GREEDYDATA:message} -HTTPD24_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[(?:%{WORD:apache.error.module})?:%{LOGLEVEL:log.level}\] \[pid %{POSINT:process.pid:long}(:tid %{INT:process.thread.id:int})?\](?: \(%{POSINT:apache.error.proxy.error.code?}\)%{DATA:apache.error.proxy.error.message}:)?(?: \[client %{IPORHOST:source.address}(?::%{POSINT:source.port:int})?\])?(?: %{DATA:error.code}:)? %{GREEDYDATA:message} +HTTPD24_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[(?:%{WORD:apache.error.module})?:%{LOGLEVEL:log.level}\] \[pid %{POSINT:process.pid:int}(:tid %{INT:process.thread.id:int})?\](?: \(%{POSINT:apache.error.proxy.error.code}\)%{DATA:apache.error.proxy.error.message}:)?(?: \[client %{IPORHOST:source.address}(?::%{POSINT:source.port:int})?\])?(?: %{DATA:error.code}:)? %{GREEDYDATA:message} # :long - %{INT:process.thread.id:int} HTTPD_ERRORLOG %{HTTPD20_ERRORLOG}|%{HTTPD24_ERRORLOG} diff --git a/pygrok/patterns/junos b/pygrok/patterns/junos index d23d455..7fc008e 100644 --- a/pygrok/patterns/junos +++ b/pygrok/patterns/junos @@ -3,7 +3,7 @@ RT_FLOW_TAG (?:RT_FLOW_SESSION_CREATE|RT_FLOW_SESSION_CLOSE|RT_FLOW_SESSION_DENY # deprecated legacy name: RT_FLOW_EVENT RT_FLOW_TAG -RT_FLOW1 %{RT_FLOW_TAG:juniper.srx.tag}: %{GREEDYDATA:juniper.srx.reason}: %{IP:source.ip}/%{INT:source.port:int}->%{IP:destination.ip}/%{INT:destination.port:int} %{DATA:juniper.srx.service_name} %{IP:source.nat.ip}/%{INT:source.nat.port:int}->%{IP:destination.nat.ip}/%{INT:destination.nat.port:int} (?:(?:None)|(?:%{DATA:juniper.srx.src_nat_rule_name})) (?:(?:None)|(?:%{DATA:juniper.srx.dst_nat_rule_name})) %{INT:network.iana_number} %{DATA:rule.name} %{DATA:observer.ingress.zone} %{DATA:observer.egress.zone} %{INT:juniper.srx.session_id} \d+\(%{INT:source.bytes:long}\) \d+\(%{INT:destination.bytes:long}\) %{INT:juniper.srx.elapsed_time:int} .* +RT_FLOW1 %{RT_FLOW_TAG:juniper.srx.tag}: %{GREEDYDATA:juniper.srx.reason}: %{IP:source.ip}/%{INT:source.port:int}->%{IP:destination.ip}/%{INT:destination.port:int} %{DATA:juniper.srx.service_name} %{IP:source.nat.ip}/%{INT:source.nat.port:int}->%{IP:destination.nat.ip}/%{INT:destination.nat.port:int} (?:(?:None)|(?:%{DATA:juniper.srx.src_nat_rule_name})) (?:(?:None)|(?:%{DATA:juniper.srx.dst_nat_rule_name})) %{INT:network.iana_number} %{DATA:rule.name} %{DATA:observer.ingress.zone} %{DATA:observer.egress.zone} %{INT:juniper.srx.session_id} \d+\(%{INT:source.bytes:int}\) \d+\(%{INT:destination.bytes:int}\) %{INT:juniper.srx.elapsed_time:int} .* # :long - %{INT:source.bytes:int} # :long - %{INT:destination.bytes:int} diff --git a/pygrok/patterns/squid b/pygrok/patterns/squid index dfff4f6..19dceff 100644 --- a/pygrok/patterns/squid +++ b/pygrok/patterns/squid @@ -2,5 +2,5 @@ # Documentation of squid3 logs formats can be found at the following link: # http://wiki.squid-cache.org/Features/LogFormat SQUID3_STATUS (?:%{POSINT:http.response.status_code:int}|0|000) -SQUID3 %{NUMBER:timestamp}\s+%{NUMBER:squid.request.duration:int}\s%{IP:source.ip}\s%{WORD:event.action}/%{SQUID3_STATUS}\s%{INT:http.response.bytes:long}\s%{WORD:http.request.method}\s%{NOTSPACE:url.original}\s(?:-|%{NOTSPACE:user.name})\s%{WORD:squid.hierarchy_code}/(?:-|%{IPORHOST:destination.address})\s(?:-|%{NOTSPACE:http.response.mime_type}) +SQUID3 %{NUMBER:timestamp}\s+%{NUMBER:squid.request.duration:int}\s%{IP:source.ip}\s%{WORD:event.action}/%{SQUID3_STATUS}\s%{INT:http.response.bytes:int}\s%{WORD:http.request.method}\s%{NOTSPACE:url.original}\s(?:-|%{NOTSPACE:user.name})\s%{WORD:squid.hierarchy_code}/(?:-|%{IPORHOST:destination.address})\s(?:-|%{NOTSPACE:http.response.mime_type}) # :long - %{INT:http.response.bytes:int} diff --git a/pygrok/patterns/zeek b/pygrok/patterns/zeek index 397e84a..1ee4a4a 100644 --- a/pygrok/patterns/zeek +++ b/pygrok/patterns/zeek @@ -5,7 +5,7 @@ ZEEK_DATA [^\t]+ # http.log - the 'new' format (compared to BRO_HTTP) # has *version* and *origin* fields added and *filename* replaced with *orig_filenames* + *resp_filenames* -ZEEK_HTTP %{NUMBER:timestamp}\t%{NOTSPACE:zeek.session_id}\t%{IP:source.ip}\t%{INT:source.port:int}\t%{IP:destination.ip}\t%{INT:destination.port:int}\t%{INT:zeek.http.trans_depth:int}\t(?:-|%{WORD:http.request.method})\t(?:-|%{ZEEK_DATA:url.domain})\t(?:-|%{ZEEK_DATA:url.original})\t(?:-|%{ZEEK_DATA:http.request.referrer})\t(?:-|%{NUMBER:http.version})\t(?:-|%{ZEEK_DATA:user_agent.original})\t(?:-|%{ZEEK_DATA:zeek.http.origin})\t(?:-|%{NUMBER:http.request.body.bytes:long})\t(?:-|%{NUMBER:http.response.body.bytes:long})\t(?:-|%{POSINT:http.response.status_code:int})\t(?:-|%{DATA:zeek.http.status_msg})\t(?:-|%{POSINT:zeek.http.info_code:int})\t(?:-|%{DATA:zeek.http.info_msg})\t(?:\(empty\)|%{ZEEK_DATA:zeek.http.tags})\t(?:-|%{ZEEK_DATA:url.username})\t(?:-|%{ZEEK_DATA:url.password})\t(?:-|%{ZEEK_DATA:zeek.http.proxied})\t(?:-|%{ZEEK_DATA:zeek.http.orig_fuids})\t(?:-|%{ZEEK_DATA:zeek.http.orig_filenames})\t(?:-|%{ZEEK_DATA:http.request.mime_type})\t(?:-|%{ZEEK_DATA:zeek.http.resp_fuids})\t(?:-|%{ZEEK_DATA:zeek.http.resp_filenames})\t(?:-|%{ZEEK_DATA:http.response.mime_type}) +ZEEK_HTTP %{NUMBER:timestamp}\t%{NOTSPACE:zeek.session_id}\t%{IP:source.ip}\t%{INT:source.port:int}\t%{IP:destination.ip}\t%{INT:destination.port:int}\t%{INT:zeek.http.trans_depth:int}\t(?:-|%{WORD:http.request.method})\t(?:-|%{ZEEK_DATA:url.domain})\t(?:-|%{ZEEK_DATA:url.original})\t(?:-|%{ZEEK_DATA:http.request.referrer})\t(?:-|%{NUMBER:http.version})\t(?:-|%{ZEEK_DATA:user_agent.original})\t(?:-|%{ZEEK_DATA:zeek.http.origin})\t(?:-|%{NUMBER:http.request.body.bytes:int})\t(?:-|%{NUMBER:http.response.body.bytes:int})\t(?:-|%{POSINT:http.response.status_code:int})\t(?:-|%{DATA:zeek.http.status_msg})\t(?:-|%{POSINT:zeek.http.info_code:int})\t(?:-|%{DATA:zeek.http.info_msg})\t(?:\(empty\)|%{ZEEK_DATA:zeek.http.tags})\t(?:-|%{ZEEK_DATA:url.username})\t(?:-|%{ZEEK_DATA:url.password})\t(?:-|%{ZEEK_DATA:zeek.http.proxied})\t(?:-|%{ZEEK_DATA:zeek.http.orig_fuids})\t(?:-|%{ZEEK_DATA:zeek.http.orig_filenames})\t(?:-|%{ZEEK_DATA:http.request.mime_type})\t(?:-|%{ZEEK_DATA:zeek.http.resp_fuids})\t(?:-|%{ZEEK_DATA:zeek.http.resp_filenames})\t(?:-|%{ZEEK_DATA:http.response.mime_type}) # :long - %{NUMBER:http.request.body.bytes:int} # :long - %{NUMBER:http.response.body.bytes:int} @@ -13,7 +13,7 @@ ZEEK_HTTP %{NUMBER:timestamp}\t%{NOTSPACE:zeek.session_id}\t%{IP:source.ip}\t%{I ZEEK_DNS %{NUMBER:timestamp}\t%{NOTSPACE:zeek.session_id}\t%{IP:source.ip}\t%{INT:source.port:int}\t%{IP:destination.ip}\t%{INT:destination.port:int}\t%{WORD:network.transport}\t(?:-|%{INT:dns.id:int})\t(?:-|%{NUMBER:zeek.dns.rtt:float})\t(?:-|%{ZEEK_DATA:dns.question.name})\t(?:-|%{INT:zeek.dns.qclass:int})\t(?:-|%{ZEEK_DATA:zeek.dns.qclass_name})\t(?:-|%{INT:zeek.dns.qtype:int})\t(?:-|%{ZEEK_DATA:dns.question.type})\t(?:-|%{INT:zeek.dns.rcode:int})\t(?:-|%{ZEEK_DATA:dns.response_code})\t%{ZEEK_BOOL:zeek.dns.AA}\t%{ZEEK_BOOL:zeek.dns.TC}\t%{ZEEK_BOOL:zeek.dns.RD}\t%{ZEEK_BOOL:zeek.dns.RA}\t%{NONNEGINT:zeek.dns.Z:int}\t(?:-|%{ZEEK_DATA:zeek.dns.answers})\t(?:-|%{DATA:zeek.dns.TTLs})\t(?:-|%{ZEEK_BOOL:zeek.dns.rejected}) # conn.log - the 'new' format (requires *zeek.connection.local_resp*, handles `(empty)` as `-` for tunnel_parents, and optional mac adresses) -ZEEK_CONN %{NUMBER:timestamp}\t%{NOTSPACE:zeek.session_id}\t%{IP:source.ip}\t%{INT:source.port:int}\t%{IP:destination.ip}\t%{INT:destination.port:int}\t%{WORD:network.transport}\t(?:-|%{ZEEK_DATA:network.protocol})\t(?:-|%{NUMBER:zeek.connection.duration:float})\t(?:-|%{INT:zeek.connection.orig_bytes:long})\t(?:-|%{INT:zeek.connection.resp_bytes:long})\t(?:-|%{ZEEK_DATA:zeek.connection.state})\t(?:-|%{ZEEK_BOOL:zeek.connection.local_orig})\t(?:-|%{ZEEK_BOOL:zeek.connection.local_resp})\t(?:-|%{INT:zeek.connection.missed_bytes:long})\t(?:-|%{ZEEK_DATA:zeek.connection.history})\t(?:-|%{INT:source.packets:long})\t(?:-|%{INT:source.bytes:long})\t(?:-|%{INT:destination.packets:long})\t(?:-|%{INT:destination.bytes:long})\t(?:-|%{ZEEK_DATA:zeek.connection.tunnel_parents})(?:\t(?:-|%{COMMONMAC:source.mac})\t(?:-|%{COMMONMAC:destination.mac}))? +ZEEK_CONN %{NUMBER:timestamp}\t%{NOTSPACE:zeek.session_id}\t%{IP:source.ip}\t%{INT:source.port:int}\t%{IP:destination.ip}\t%{INT:destination.port:int}\t%{WORD:network.transport}\t(?:-|%{ZEEK_DATA:network.protocol})\t(?:-|%{NUMBER:zeek.connection.duration:float})\t(?:-|%{INT:zeek.connection.orig_bytes:int})\t(?:-|%{INT:zeek.connection.resp_bytes:int})\t(?:-|%{ZEEK_DATA:zeek.connection.state})\t(?:-|%{ZEEK_BOOL:zeek.connection.local_orig})\t(?:-|%{ZEEK_BOOL:zeek.connection.local_resp})\t(?:-|%{INT:zeek.connection.missed_bytes:int})\t(?:-|%{ZEEK_DATA:zeek.connection.history})\t(?:-|%{INT:source.packets:int})\t(?:-|%{INT:source.bytes:int})\t(?:-|%{INT:destination.packets:int})\t(?:-|%{INT:destination.bytes:int})\t(?:-|%{ZEEK_DATA:zeek.connection.tunnel_parents})(?:\t(?:-|%{COMMONMAC:source.mac})\t(?:-|%{COMMONMAC:destination.mac}))? # :long - %{INT:zeek.connection.orig_bytes:int} # :long - %{INT:zeek.connection.resp_bytes:int} # :long - %{INT:zeek.connection.missed_bytes:int} @@ -25,7 +25,7 @@ ZEEK_CONN %{NUMBER:timestamp}\t%{NOTSPACE:zeek.session_id}\t%{IP:source.ip}\t%{I # files.log - updated BRO_FILES format (2 new fields added at the end) ZEEK_FILES_TX_HOSTS (?:-|%{IP:server.ip})|(?%{IP:server.ip}(?:\s,%{IP})+) ZEEK_FILES_RX_HOSTS (?:-|%{IP:client.ip})|(?%{IP:client.ip}(?:\s,%{IP})+) -ZEEK_FILES %{NUMBER:timestamp}\t%{NOTSPACE:zeek.files.fuid}\t%{ZEEK_FILES_TX_HOSTS}\t%{ZEEK_FILES_RX_HOSTS}\t(?:-|%{ZEEK_DATA:zeek.files.session_ids})\t(?:-|%{ZEEK_DATA:zeek.files.source})\t(?:-|%{INT:zeek.files.depth:int})\t(?:-|%{ZEEK_DATA:zeek.files.analyzers})\t(?:-|%{ZEEK_DATA:file.mime_type})\t(?:-|%{ZEEK_DATA:file.name})\t(?:-|%{NUMBER:zeek.files.duration:float})\t(?:-|%{ZEEK_DATA:zeek.files.local_orig})\t(?:-|%{ZEEK_BOOL:zeek.files.is_orig})\t(?:-|%{INT:zeek.files.seen_bytes:long})\t(?:-|%{INT:file.size:long})\t(?:-|%{INT:zeek.files.missing_bytes:long})\t(?:-|%{INT:zeek.files.overflow_bytes:long})\t(?:-|%{ZEEK_BOOL:zeek.files.timedout})\t(?:-|%{ZEEK_DATA:zeek.files.parent_fuid})\t(?:-|%{ZEEK_DATA:file.hash.md5})\t(?:-|%{ZEEK_DATA:file.hash.sha1})\t(?:-|%{ZEEK_DATA:file.hash.sha256})\t(?:-|%{ZEEK_DATA:zeek.files.extracted})(?:\t(?:-|%{ZEEK_BOOL:zeek.files.extracted_cutoff})\t(?:-|%{INT:zeek.files.extracted_size:long}))? +ZEEK_FILES %{NUMBER:timestamp}\t%{NOTSPACE:zeek.files.fuid}\t%{ZEEK_FILES_TX_HOSTS}\t%{ZEEK_FILES_RX_HOSTS}\t(?:-|%{ZEEK_DATA:zeek.files.session_ids})\t(?:-|%{ZEEK_DATA:zeek.files.source})\t(?:-|%{INT:zeek.files.depth:int})\t(?:-|%{ZEEK_DATA:zeek.files.analyzers})\t(?:-|%{ZEEK_DATA:file.mime_type})\t(?:-|%{ZEEK_DATA:file.name})\t(?:-|%{NUMBER:zeek.files.duration:float})\t(?:-|%{ZEEK_DATA:zeek.files.local_orig})\t(?:-|%{ZEEK_BOOL:zeek.files.is_orig})\t(?:-|%{INT:zeek.files.seen_bytes:int})\t(?:-|%{INT:file.size:int})\t(?:-|%{INT:zeek.files.missing_bytes:int})\t(?:-|%{INT:zeek.files.overflow_bytes:int})\t(?:-|%{ZEEK_BOOL:zeek.files.timedout})\t(?:-|%{ZEEK_DATA:zeek.files.parent_fuid})\t(?:-|%{ZEEK_DATA:file.hash.md5})\t(?:-|%{ZEEK_DATA:file.hash.sha1})\t(?:-|%{ZEEK_DATA:file.hash.sha256})\t(?:-|%{ZEEK_DATA:zeek.files.extracted})(?:\t(?:-|%{ZEEK_BOOL:zeek.files.extracted_cutoff})\t(?:-|%{INT:zeek.files.extracted_size:int}))? # :long - %{INT:zeek.files.seen_bytes:int} # :long - %{INT:file.size:int} # :long - %{INT:zeek.files.missing_bytes:int} From eb4dcd0b1c2a82942a126907195e7904ec54e53c Mon Sep 17 00:00:00 2001 From: Vito Piserchia Date: Fri, 17 Jun 2022 14:24:01 +0200 Subject: [PATCH 16/22] Fix patterns from latest logstash --- pygrok/patterns/aws | 11 ++++++++--- pygrok/patterns/exim | 3 +-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/pygrok/patterns/aws b/pygrok/patterns/aws index 35d1467..39d2fb5 100644 --- a/pygrok/patterns/aws +++ b/pygrok/patterns/aws @@ -1,6 +1,6 @@ S3_REQUEST_LINE (?:%{WORD:http.request.method} %{NOTSPACE:url.original}(?: HTTP/%{NUMBER:http.version})?) -S3_ACCESS_LOG %{WORD:aws.s3access.bucket_owner} %{NOTSPACE:aws.s3access.bucket} \%{HTTPDATE:timestamp}\ (?:-|%{IP:client.ip}) (?:-|%{NOTSPACE:client.user.id}) %{NOTSPACE:aws.s3access.request_id} %{NOTSPACE:aws.s3access.operation} (?:-|%{NOTSPACE:aws.s3access.key}) (?:-|"%{S3_REQUEST_LINE:aws.s3access.request_uri}") (?:-|%{INT:http.response.status_code:int}) (?:-|%{NOTSPACE:aws.s3access.error_code}) (?:-|%{INT:aws.s3access.bytes_sent:long}) (?:-|%{INT:aws.s3access.object_size:long}) (?:-|%{INT:aws.s3access.total_time:int}) (?:-|%{INT:aws.s3access.turn_around_time:int}) "(?:-|%{DATA:http.request.referrer})" "(?:-|%{DATA:user_agent.original})" (?:-|%{NOTSPACE:aws.s3access.version_id})(?: (?:-|%{NOTSPACE:aws.s3access.host_id}) (?:-|%{NOTSPACE:aws.s3access.signature_version}) (?:-|%{NOTSPACE:tls.cipher}) (?:-|%{NOTSPACE:aws.s3access.authentication_type}) (?:-|%{NOTSPACE:aws.s3access.host_header}) (?:-|%{NOTSPACE:aws.s3access.tls_version}))? +S3_ACCESS_LOG %{WORD:aws.s3access.bucket_owner} %{NOTSPACE:aws.s3access.bucket} \[%{HTTPDATE:timestamp}\] (?:-|%{IP:client.ip}) (?:-|%{NOTSPACE:client.user.id}) %{NOTSPACE:aws.s3access.request_id} %{NOTSPACE:aws.s3access.operation} (?:-|%{NOTSPACE:aws.s3access.key}) (?:-|"%{S3_REQUEST_LINE:aws.s3access.request_uri}") (?:-|%{INT:http.response.status_code:int}) (?:-|%{NOTSPACE:aws.s3access.error_code}) (?:-|%{INT:aws.s3access.bytes_sent:int}) (?:-|%{INT:aws.s3access.object_size:int}) (?:-|%{INT:aws.s3access.total_time:int}) (?:-|%{INT:aws.s3access.turn_around_time:int}) "(?:-|%{DATA:http.request.referrer})" "(?:-|%{DATA:user_agent.original})" (?:-|%{NOTSPACE:aws.s3access.version_id})(?: (?:-|%{NOTSPACE:aws.s3access.host_id}) (?:-|%{NOTSPACE:aws.s3access.signature_version}) (?:-|%{NOTSPACE:tls.cipher}) (?:-|%{NOTSPACE:aws.s3access.authentication_type}) (?:-|%{NOTSPACE:aws.s3access.host_header}) (?:-|%{NOTSPACE:aws.s3access.tls_version}))? # :long - %{INT:aws.s3access.bytes_sent:int} # :long - %{INT:aws.s3access.object_size:int} @@ -13,14 +13,19 @@ ELB_URI %{URIPROTO:url.scheme}://(?:%{USER:url.username}(?::^@*)?@)?(?:%{ELB_URI ELB_REQUEST_LINE (?:%{WORD:http.request.method} %{ELB_URI:url.original}(?: HTTP/%{NUMBER:http.version})?) # pattern supports 'regular' HTTP ELB format -ELB_V1_HTTP_LOG %{TIMESTAMP_ISO8601:timestamp} %{NOTSPACE:aws.elb.name} %{IP:source.ip}:%{INT:source.port:int} (?:-|(?:%{IP:aws.elb.backend.ip}:%{INT:aws.elb.backend.port:int})) (?:-1|%{NUMBER:aws.elb.request_processing_time.sec:float}) (?:-1|%{NUMBER:aws.elb.backend_processing_time.sec:float}) (?:-1|%{NUMBER:aws.elb.response_processing_time.sec:float}) %{INT:http.response.status_code:int} (?:-|%{INT:aws.elb.backend.http.response.status_code:int}) %{INT:http.request.body.bytes:long} %{INT:http.response.body.bytes:long} "%{ELB_REQUEST_LINE}"(?: "(?:-|%{DATA:user_agent.original})" (?:-|%{NOTSPACE:tls.cipher}) (?:-|%{NOTSPACE:aws.elb.ssl_protocol}))? +ELB_V1_HTTP_LOG %{TIMESTAMP_ISO8601:timestamp} %{NOTSPACE:aws.elb.name} %{IP:source.ip}:%{INT:source.port:int} (?:-|(?:%{IP:aws.elb.backend.ip}:%{INT:aws.elb.backend.port:int})) (?:-1|%{NUMBER:aws.elb.request_processing_time.sec:float}) (?:-1|%{NUMBER:aws.elb.backend_processing_time.sec:float}) (?:-1|%{NUMBER:aws.elb.response_processing_time.sec:float}) %{INT:http.response.status_code:int} (?:-|%{INT:aws.elb.backend.http.response.status_code:int}) %{INT:http.request.body.bytes:int} %{INT:http.response.body.bytes:int} "%{ELB_REQUEST_LINE}"(?: "(?:-|%{DATA:user_agent.original})" (?:-|%{NOTSPACE:tls.cipher}) (?:-|%{NOTSPACE:aws.elb.ssl_protocol}))? # :long - %{INT:http.request.body.bytes:int} # :long - %{INT:http.response.body.bytes:int} ELB_ACCESS_LOG %{ELB_V1_HTTP_LOG} +# Each edge location is identified by a three-letter code and an arbitrarily assigned number. +# The three-letter IATA code typically represents an airport near the edge location. +# examples: "LHR62-C2", "SFO5-P1", ""IND6", "CPT50" +CLOUDFRONT_EDGE_LOCATION [A-Z]{3}[0-9]{1,2}(?:-[A-Z0-9]{2})? + # pattern used to match a shorted format, that's why we have the optional part (starting with *http.version*) at the end -CLOUDFRONT_ACCESS_LOG (?%{YEAR}-%{MONTHNUM}-%{MONTHDAY}\t%{TIME})\t%{WORD:aws.cloudfront.x_edge_location}\t(?:-|%{INT:destination.bytes:long})\t%{IPORHOST:source.ip}\t%{WORD:http.request.method}\t%{HOSTNAME:url.domain}\t%{NOTSPACE:url.path}\t(?:(?:000)|%{INT:http.response.status_code:int})\t(?:-|%{DATA:http.request.referrer})\t%{DATA:user_agent.original}\t(?:-|%{DATA:url.query})\t(?:-|%{DATA:aws.cloudfront.http.request.cookie})\t%{WORD:aws.cloudfront.x_edge_result_type}\t%{NOTSPACE:aws.cloudfront.x_edge_request_id}\t%{HOSTNAME:aws.cloudfront.http.request.host}\t%{URIPROTO:network.protocol}\t(?:-|%{INT:source.bytes:long})\t%{NUMBER:aws.cloudfront.time_taken:float}\t(?:-|%{IP:network.forwarded_ip})\t(?:-|%{DATA:aws.cloudfront.ssl_protocol})\t(?:-|%{NOTSPACE:tls.cipher})\t%{WORD:aws.cloudfront.x_edge_response_result_type}(?:\t(?:-|HTTP/%{NUMBER:http.version})\t(?:-|%{DATA:aws.cloudfront.fle_status})\t(?:-|%{DATA:aws.cloudfront.fle_encrypted_fields})\t%{INT:source.port:int}\t%{NUMBER:aws.cloudfront.time_to_first_byte:float}\t(?:-|%{DATA:aws.cloudfront.x_edge_detailed_result_type})\t(?:-|%{NOTSPACE:http.request.mime_type})\t(?:-|%{INT:aws.cloudfront.http.request.size:long})\t(?:-|%{INT:aws.cloudfront.http.request.range.start:long})\t(?:-|%{INT:aws.cloudfront.http.request.range.end:long}))? +CLOUDFRONT_ACCESS_LOG (?%{YEAR}-%{MONTHNUM}-%{MONTHDAY}\t%{TIME})\t%{CLOUDFRONT_EDGE_LOCATION:aws.cloudfront.x_edge_location}\t(?:-|%{INT:destination.bytes:int})\t%{IPORHOST:source.ip}\t%{WORD:http.request.method}\t%{HOSTNAME:url.domain}\t%{NOTSPACE:url.path}\t(?:(?:000)|%{INT:http.response.status_code:int})\t(?:-|%{DATA:http.request.referrer})\t%{DATA:user_agent.original}\t(?:-|%{DATA:url.query})\t(?:-|%{DATA:aws.cloudfront.http.request.cookie})\t%{WORD:aws.cloudfront.x_edge_result_type}\t%{NOTSPACE:aws.cloudfront.x_edge_request_id}\t%{HOSTNAME:aws.cloudfront.http.request.host}\t%{URIPROTO:network.protocol}\t(?:-|%{INT:source.bytes:int})\t%{NUMBER:aws.cloudfront.time_taken:float}\t(?:-|%{IP:network.forwarded_ip})\t(?:-|%{DATA:aws.cloudfront.ssl_protocol})\t(?:-|%{NOTSPACE:tls.cipher})\t%{WORD:aws.cloudfront.x_edge_response_result_type}(?:\t(?:-|HTTP/%{NUMBER:http.version})\t(?:-|%{DATA:aws.cloudfront.fle_status})\t(?:-|%{DATA:aws.cloudfront.fle_encrypted_fields})\t%{INT:source.port:int}\t%{NUMBER:aws.cloudfront.time_to_first_byte:float}\t(?:-|%{DATA:aws.cloudfront.x_edge_detailed_result_type})\t(?:-|%{NOTSPACE:http.request.mime_type})\t(?:-|%{INT:aws.cloudfront.http.request.size:int})\t(?:-|%{INT:aws.cloudfront.http.request.range.start:int})\t(?:-|%{INT:aws.cloudfront.http.request.range.end:int}))? # :long - %{INT:destination.bytes:int} # :long - %{INT:source.bytes:int} # :long - %{INT:aws.cloudfront.http.request.size:int} diff --git a/pygrok/patterns/exim b/pygrok/patterns/exim index dba7950..82d7820 100644 --- a/pygrok/patterns/exim +++ b/pygrok/patterns/exim @@ -10,14 +10,13 @@ EXIM_DATE (:?%{YEAR}-%{MONTHNUM}-%{MONTHDAY} %{TIME}) EXIM_PID \[%{POSINT:process.pid:int}\] EXIM_QT ((\d+y)?(\d+w)?(\d+d)?(\d+h)?(\d+m)?(\d+s)?) EXIM_EXCLUDE_TERMS (Message is frozen|(Start|End) queue run| Warning: | retry time not reached | no (IP address|host name) found for (IP address|host) | unexpected disconnection while reading SMTP command | no immediate delivery: |another process is handling this message) -EXIM_REMOTE_HOST (H=(%{NOTSPACE:source.address} )?(\(%{NOTSPACE:exim.log.remote_address}\) )?\%{IP:source.ip}\](?::%{POSINT:source.port:int})?) +EXIM_REMOTE_HOST (H=(%{NOTSPACE:source.address} )?(\(%{NOTSPACE:exim.log.remote_address}\) )?\[%{IP:source.ip}\](?::%{POSINT:source.port:int})?) EXIM_INTERFACE (I=\[%{IP:destination.ip}\](?::%{NUMBER:destination.port:int})) EXIM_PROTOCOL (P=%{NOTSPACE:network.protocol}) EXIM_MSG_SIZE (S=%{NUMBER:exim.log.message.size:int}) EXIM_HEADER_ID (id=%{NOTSPACE:exim.log.header_id}) EXIM_QUOTED_CONTENT (?:\\.|[^\\"])* EXIM_SUBJECT (T="%{EXIM_QUOTED_CONTENT:exim.log.message.subject}") - EXIM_UNKNOWN_FIELD (?:[A-Za-z0-9]{1,4}=(?:%{QUOTEDSTRING}|%{NOTSPACE})) EXIM_NAMED_FIELDS (?: (?:%{EXIM_REMOTE_HOST}|%{EXIM_INTERFACE}|%{EXIM_PROTOCOL}|%{EXIM_MSG_SIZE}|%{EXIM_HEADER_ID}|%{EXIM_SUBJECT}|%{EXIM_UNKNOWN_FIELD}))* From 7ecbe0bc24b411c06efe9da43effff7624b1919a Mon Sep 17 00:00:00 2001 From: Vito Piserchia Date: Fri, 17 Jun 2022 14:26:12 +0200 Subject: [PATCH 17/22] dump version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ad2e519..7a1a740 100755 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ setup( name="pygrok", - version="1.0.5", + version="1.0.6", description="A Python library to parse strings and" + " extract information from structured/unstructured data", long_description=long_desc, From bb23edc335c6c26c606ea69eb64a31498d95d20c Mon Sep 17 00:00:00 2001 From: Vito Piserchia Date: Sat, 18 Jun 2022 09:28:17 +0200 Subject: [PATCH 18/22] fix patterns --- pygrok/patterns/bind | 2 +- pygrok/patterns/firewalls | 2 +- pygrok/patterns/zeek | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pygrok/patterns/bind b/pygrok/patterns/bind index ec212de..6914c61 100644 --- a/pygrok/patterns/bind +++ b/pygrok/patterns/bind @@ -8,6 +8,6 @@ BIND9_CATEGORY (?:queries) BIND9_QUERYLOGBASE client(:? @0x(?:[0-9A-Fa-f]+))? %{IP:client.ip}#%{POSINT:client.port:int} \(%{GREEDYDATA:bind.log.question.name}\): query: %{GREEDYDATA:dns.question.name} (?IN) %{BIND9_DNSTYPE:dns.question.type}(:? %{DATA:bind.log.question.flags})? \(%{IP:server.ip}\) # for query-logging category and severity are always fixed as "queries: info: " -BIND9_QUERYLOG %{BIND9_TIMESTAMP:timestamp} %{BIND9_CATEGORY:bing.log.category}: %{LOGLEVEL:log.level}: %{BIND9_QUERYLOGBASE} +BIND9_QUERYLOG %{BIND9_TIMESTAMP:timestamp} %{BIND9_CATEGORY:bind.log.category}: %{LOGLEVEL:log.level}: %{BIND9_QUERYLOGBASE} BIND9 %{BIND9_QUERYLOG} diff --git a/pygrok/patterns/firewalls b/pygrok/patterns/firewalls index 26abb3c..1b02df0 100644 --- a/pygrok/patterns/firewalls +++ b/pygrok/patterns/firewalls @@ -100,7 +100,7 @@ IPTABLES4_FRAG (?:(?<= )(?:CE|DF|MF))* IPTABLES4_PART SRC=%{IPV4:source.ip}\s+DST=%{IPV4:destination.ip}\s+LEN=(?:%{INT:iptables.length:int})?\s+TOS=(?:0|0x%{BASE16NUM:iptables.tos})?\s+PREC=(?:0x%{BASE16NUM:iptables.precedence_bits})?\s+TTL=(?:%{INT:iptables.ttl:int})?\s+ID=(?:%{INT:iptables.id})?\s+(?:%{IPTABLES4_FRAG:iptables.fragment_flags})?(?:\s+FRAG: %{INT:iptables.fragment_offset:int})? IPTABLES6_PART SRC=%{IPV6:source.ip}\s+DST=%{IPV6:destination.ip}\s+LEN=(?:%{INT:iptables.length:int})?\s+TC=(?:0|0x%{BASE16NUM:iptables.tos})?\s+HOPLIMIT=(?:%{INT:iptables.ttl:int})?\s+FLOWLBL=(?:%{INT:iptables.flow_label})? -IPTABLES IN=(?:%{NOTSPACE:observer.ingress.interface.name})?\s+OUT=(?:%{NOTSPACE:observer.egress.interface.name})?\s+(?:MAC=(?:%{COMMONMAC:destination.mac})?(?::%{COMMONMAC:source.mac})?(?::A-Fa-f0-9{2}:A-Fa-f0-9{2})?\s+)?(:?%{IPTABLES4_PART}|%{IPTABLES6_PART}).*?PROTO=(?:%{WORD:network.transport})?\s+SPT=(?:%{INT:source.port:int})?\s+DPT=(?:%{INT:destination.port:int})?\s+(?:%{IPTABLES_TCP_PART})? +IPTABLES IN=(?:%{NOTSPACE:observer.ingress.interface.name})?\s+OUT=(?:%{NOTSPACE:observer.egress.interface.name})?\s+(?:MAC=(?:%{COMMONMAC:destination.mac})?(?::%{COMMONMAC:source.mac})?(?::[A-Fa-f0-9]{2}:[A-Fa-f0-9]{2})?\s+)?(:?%{IPTABLES4_PART}|%{IPTABLES6_PART}).*?PROTO=(?:%{WORD:network.transport})?\s+SPT=(?:%{INT:source.port:int})?\s+DPT=(?:%{INT:destination.port:int})?\s+(?:%{IPTABLES_TCP_PART})? # Shorewall firewall logs SHOREWALL (?:%{SYSLOGTIMESTAMP:timestamp}) (?:%{WORD:observer.hostname}) .*Shorewall:(?:%{WORD:shorewall.firewall.type})?:(?:%{WORD:shorewall.firewall.action})?.*%{IPTABLES} diff --git a/pygrok/patterns/zeek b/pygrok/patterns/zeek index 1ee4a4a..1a862ea 100644 --- a/pygrok/patterns/zeek +++ b/pygrok/patterns/zeek @@ -23,8 +23,8 @@ ZEEK_CONN %{NUMBER:timestamp}\t%{NOTSPACE:zeek.session_id}\t%{IP:source.ip}\t%{I # :long - %{INT:destination.bytes:int} # files.log - updated BRO_FILES format (2 new fields added at the end) -ZEEK_FILES_TX_HOSTS (?:-|%{IP:server.ip})|(?%{IP:server.ip}(?:\s,%{IP})+) -ZEEK_FILES_RX_HOSTS (?:-|%{IP:client.ip})|(?%{IP:client.ip}(?:\s,%{IP})+) +ZEEK_FILES_TX_HOSTS (?:-|%{IP:server.ip})|(?%{IP:server.ip}(?:[\s,]%{IP})+) +ZEEK_FILES_RX_HOSTS (?:-|%{IP:client.ip})|(?%{IP:client.ip}(?:[\s,]%{IP})+) ZEEK_FILES %{NUMBER:timestamp}\t%{NOTSPACE:zeek.files.fuid}\t%{ZEEK_FILES_TX_HOSTS}\t%{ZEEK_FILES_RX_HOSTS}\t(?:-|%{ZEEK_DATA:zeek.files.session_ids})\t(?:-|%{ZEEK_DATA:zeek.files.source})\t(?:-|%{INT:zeek.files.depth:int})\t(?:-|%{ZEEK_DATA:zeek.files.analyzers})\t(?:-|%{ZEEK_DATA:file.mime_type})\t(?:-|%{ZEEK_DATA:file.name})\t(?:-|%{NUMBER:zeek.files.duration:float})\t(?:-|%{ZEEK_DATA:zeek.files.local_orig})\t(?:-|%{ZEEK_BOOL:zeek.files.is_orig})\t(?:-|%{INT:zeek.files.seen_bytes:int})\t(?:-|%{INT:file.size:int})\t(?:-|%{INT:zeek.files.missing_bytes:int})\t(?:-|%{INT:zeek.files.overflow_bytes:int})\t(?:-|%{ZEEK_BOOL:zeek.files.timedout})\t(?:-|%{ZEEK_DATA:zeek.files.parent_fuid})\t(?:-|%{ZEEK_DATA:file.hash.md5})\t(?:-|%{ZEEK_DATA:file.hash.sha1})\t(?:-|%{ZEEK_DATA:file.hash.sha256})\t(?:-|%{ZEEK_DATA:zeek.files.extracted})(?:\t(?:-|%{ZEEK_BOOL:zeek.files.extracted_cutoff})\t(?:-|%{INT:zeek.files.extracted_size:int}))? # :long - %{INT:zeek.files.seen_bytes:int} # :long - %{INT:file.size:int} From 238c2baef6eaded85c43f2524df4b529336901c5 Mon Sep 17 00:00:00 2001 From: Vito Piserchia Date: Sat, 18 Jun 2022 23:10:29 +0200 Subject: [PATCH 19/22] fix pattern CISCOFW733100 --- pygrok/patterns/firewalls | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygrok/patterns/firewalls b/pygrok/patterns/firewalls index 1b02df0..d4009b1 100644 --- a/pygrok/patterns/firewalls +++ b/pygrok/patterns/firewalls @@ -89,7 +89,7 @@ CISCOFW710001_710002_710003_710005_710006 %{WORD:cisco.asa.network.transport} (? # ASA-6-713172 CISCOFW713172 Group = %{DATA:cisco.asa.source.group}, IP = %{IP:source.ip}, Automatic NAT Detection Status:\s+Remote end\s*%{DATA:@metadata.cisco.asa.remote_nat}\s*behind a NAT device\s+This\s+end\s*%{DATA:@metadata.cisco.asa.local_nat}\s*behind a NAT device # ASA-4-733100 -CISCOFW733100 \\s*%{DATA:[cisco.asa.burst.object}\s*\] drop %{DATA:cisco.asa.burst.id} exceeded. Current burst rate is %{INT:cisco.asa.burst.current_rate:int} per second, max configured rate is %{INT:cisco.asa.burst.configured_rate:int}; Current average rate is %{INT:cisco.asa.burst.avg_rate:int} per second, max configured rate is %{INT:cisco.asa.burst.configured_avg_rate:int}; Cumulative total count is %{INT:cisco.asa.burst.cumulative_count:int} +CISCOFW733100 \[\s*%{DATA:cisco.asa.burst.object}\s*\] drop %{DATA:cisco.asa.burst.id} exceeded. Current burst rate is %{INT:cisco.asa.burst.current_rate:int} per second, max configured rate is %{INT:cisco.asa.burst.configured_rate:int}; Current average rate is %{INT:cisco.asa.burst.avg_rate:int} per second, max configured rate is %{INT:cisco.asa.burst.configured_avg_rate:int}; Cumulative total count is %{INT:cisco.asa.burst.cumulative_count:int} #== End Cisco ASA == From 4d741f278218bf13adfcac7f4d8397a2ac98ef28 Mon Sep 17 00:00:00 2001 From: Vito Piserchia Date: Sat, 18 Jun 2022 23:10:46 +0200 Subject: [PATCH 20/22] remove deprecated pattern file --- pygrok/patterns/mcollective-patterns | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 pygrok/patterns/mcollective-patterns diff --git a/pygrok/patterns/mcollective-patterns b/pygrok/patterns/mcollective-patterns deleted file mode 100644 index bb2f7f9..0000000 --- a/pygrok/patterns/mcollective-patterns +++ /dev/null @@ -1,4 +0,0 @@ -# Remember, these can be multi-line events. -MCOLLECTIVE ., \[%{TIMESTAMP_ISO8601:timestamp} #%{POSINT:pid}\]%{SPACE}%{LOGLEVEL:event_level} - -MCOLLECTIVEAUDIT %{TIMESTAMP_ISO8601:timestamp}: From e36542de9c4fb37cedf36c1e5e48521fa542fe1a Mon Sep 17 00:00:00 2001 From: Vito Piserchia Date: Sat, 18 Jun 2022 23:13:51 +0200 Subject: [PATCH 21/22] dump version --- setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 7a1a740..1ea9a69 100755 --- a/setup.py +++ b/setup.py @@ -8,11 +8,11 @@ setup( name="pygrok", - version="1.0.6", + version="1.0.7", description="A Python library to parse strings and" + " extract information from structured/unstructured data", long_description=long_desc, - url="https://github.com/garyelephant/pygrok", + url="https://github.com/dreamlabnet/pygrok", author="garyelephant", author_email="garygaowork@gmail.com", license="MIT", @@ -20,6 +20,6 @@ include_package_data=True, zip_safe=True, keywords=["python grok", "regex"], # arbitrary keywords - download_url="https://github.com/garyelephant/pygrok/tarball/v1.0.0", + download_url="https://github.com/dreamlabnet/pygrok/tarball/v1.0.7", install_requires=["regex"], ) From 4db5faafb7dd2ae2f79af2c9179a80251e39b771 Mon Sep 17 00:00:00 2001 From: Maciej Lech Date: Mon, 29 Apr 2024 15:29:27 +0200 Subject: [PATCH 22/22] Use post-release in version --- .gitignore | 1 + setup.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index e78d80c..e0b2d71 100644 --- a/.gitignore +++ b/.gitignore @@ -55,6 +55,7 @@ docs/_build/ # Environments .env .venv +.python-version env/ venv/ ENV/ diff --git a/setup.py b/setup.py index 1ea9a69..e896a68 100755 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ setup( name="pygrok", - version="1.0.7", + version="1.0.7.post1", description="A Python library to parse strings and" + " extract information from structured/unstructured data", long_description=long_desc, @@ -20,6 +20,6 @@ include_package_data=True, zip_safe=True, keywords=["python grok", "regex"], # arbitrary keywords - download_url="https://github.com/dreamlabnet/pygrok/tarball/v1.0.7", + download_url="https://github.com/dreamlabnet/pygrok/tarball/v1.0.7.post1", install_requires=["regex"], )