diff --git a/README.md b/README.md index 881edf2..76210b4 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ # Parser Publisher: Splunk -Connector Version: 2\.7\.0 +Connector Version: 2\.7\.1 Product Vendor: Splunk Product Name: Parser Product Version Supported (regex): "\.\*" diff --git a/parser.json b/parser.json index 1657301..b91f65d 100644 --- a/parser.json +++ b/parser.json @@ -9,10 +9,10 @@ "product_name": "Parser", "product_version_regex": ".*", "publisher": "Splunk", - "app_version": "2.7.0", + "app_version": "2.7.1", "fips_compliant": true, "license": "Copyright (c) 2017-2022 Splunk Inc.", - "utctime_updated": "2022-02-08T18:41:46.000000Z", + "utctime_updated": "2022-03-15T18:28:49.000000Z", "package_name": "phantom_parser", "main_module": "parser_connector.py", "min_phantom_version": "5.1.0", @@ -93,7 +93,7 @@ }, { "module": "typing_extensions", - "input_file": "wheels/py3/typing_extensions-4.0.1-py3-none-any.whl" + "input_file": "wheels/py3/typing_extensions-4.1.1-py3-none-any.whl" }, { "module": "urllib3", @@ -571,4 +571,4 @@ } ] } -} +} \ No newline at end of file diff --git a/parser_email.py b/parser_email.py index 3bde93c..7949772 100644 --- a/parser_email.py +++ b/parser_email.py @@ -25,6 +25,7 @@ from collections import OrderedDict from email.header import decode_header, make_header from html import unescape +from urllib.parse import urlparse import magic import phantom.app as phantom @@ -101,7 +102,7 @@ PROC_EMAIL_JSON_EMAIL_HEADERS = "email_headers" PROC_EMAIL_CONTENT_TYPE_MESSAGE = "message/rfc822" -URI_REGEX = r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+#]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+" +URI_REGEX = r"h(?:tt|xx)p[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+#]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+" EMAIL_REGEX = r"\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b" EMAIL_REGEX2 = r'".*"@[A-Z0-9.-]+\.[A-Z]{2,}\b' HASH_REGEX = r"\b[0-9a-fA-F]{32}\b|\b[0-9a-fA-F]{40}\b|\b[0-9a-fA-F]{64}\b" @@ -182,6 +183,20 @@ def _is_ip(input_ip): return False +def _refang_url(url): + parsed = urlparse(url) + scheme = parsed.scheme + + # Replace hxxp/hxxps with http/https + if scheme == "hxxp": + parsed = parsed._replace(scheme='http') + elif scheme == "hxxps": + parsed = parsed._replace(scheme='https') + + refang_url = parsed.geturl() + return refang_url + + def _clean_url(url): url = url.strip('>),.]\r\n') @@ -193,6 +208,7 @@ def _clean_url(url): if '>' in url: url = url[:url.find('>')] + url = _refang_url(url) return url diff --git a/parser_methods.py b/parser_methods.py index 2ea6f4d..8c98b1e 100644 --- a/parser_methods.py +++ b/parser_methods.py @@ -18,6 +18,7 @@ import sys import zipfile from html import unescape +from urllib.parse import urlparse import pdfminer from bs4 import BeautifulSoup, UnicodeDammit @@ -50,7 +51,7 @@ } -URI_REGEX = r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+#]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+" +URI_REGEX = r"h(?:tt|xx)p[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+#]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+" EMAIL_REGEX = r"\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b" EMAIL_REGEX2 = r'".*"@[A-Z0-9.-]+\.[A-Z]{2,}\b' HASH_REGEX = r"\b[0-9a-fA-F]{32}\b|\b[0-9a-fA-F]{40}\b|\b[0-9a-fA-F]{64}\b" @@ -109,6 +110,20 @@ def is_ipv6(input_ip): return bool(re.match(IPV6_REGEX, input_ip)) +def _refang_url(url): + parsed = urlparse(url) + scheme = parsed.scheme + + # Replace hxxp/hxxps with http/https + if scheme == "hxxp": + parsed = parsed._replace(scheme='http') + elif scheme == "hxxps": + parsed = parsed._replace(scheme='https') + + refang_url = parsed.geturl() + return refang_url + + def _clean_url(url): url = url.strip('>),.]\r\n') @@ -120,6 +135,7 @@ def _clean_url(url): if '>' in url: url = url[:url.find('>')] + url = _refang_url(url) return url diff --git a/release_notes/2.7.1.md b/release_notes/2.7.1.md new file mode 100644 index 0000000..56bd00b --- /dev/null +++ b/release_notes/2.7.1.md @@ -0,0 +1 @@ +* Extract defanged URL Code Implementation [PAPP-24845] \ No newline at end of file diff --git a/release_notes/release_notes.html b/release_notes/release_notes.html index 3f72750..e279aa8 100644 --- a/release_notes/release_notes.html +++ b/release_notes/release_notes.html @@ -1,5 +1,9 @@ -Parser Release Notes - Published by Splunk February 21, 2022 +Parser Release Notes - Published by Splunk March 17, 2022

+Version 2.7.1 - Released March 17, 2022 + Version 2.7.0 - Released February 21, 2022