diff --git a/README.md b/README.md
index 881edf2..76210b4 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
# Parser
Publisher: Splunk
-Connector Version: 2\.7\.0
+Connector Version: 2\.7\.1
Product Vendor: Splunk
Product Name: Parser
Product Version Supported (regex): "\.\*"
diff --git a/parser.json b/parser.json
index 1657301..b91f65d 100644
--- a/parser.json
+++ b/parser.json
@@ -9,10 +9,10 @@
"product_name": "Parser",
"product_version_regex": ".*",
"publisher": "Splunk",
- "app_version": "2.7.0",
+ "app_version": "2.7.1",
"fips_compliant": true,
"license": "Copyright (c) 2017-2022 Splunk Inc.",
- "utctime_updated": "2022-02-08T18:41:46.000000Z",
+ "utctime_updated": "2022-03-15T18:28:49.000000Z",
"package_name": "phantom_parser",
"main_module": "parser_connector.py",
"min_phantom_version": "5.1.0",
@@ -93,7 +93,7 @@
},
{
"module": "typing_extensions",
- "input_file": "wheels/py3/typing_extensions-4.0.1-py3-none-any.whl"
+ "input_file": "wheels/py3/typing_extensions-4.1.1-py3-none-any.whl"
},
{
"module": "urllib3",
@@ -571,4 +571,4 @@
}
]
}
-}
+}
\ No newline at end of file
diff --git a/parser_email.py b/parser_email.py
index 3bde93c..7949772 100644
--- a/parser_email.py
+++ b/parser_email.py
@@ -25,6 +25,7 @@
from collections import OrderedDict
from email.header import decode_header, make_header
from html import unescape
+from urllib.parse import urlparse
import magic
import phantom.app as phantom
@@ -101,7 +102,7 @@
PROC_EMAIL_JSON_EMAIL_HEADERS = "email_headers"
PROC_EMAIL_CONTENT_TYPE_MESSAGE = "message/rfc822"
-URI_REGEX = r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+#]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
+URI_REGEX = r"h(?:tt|xx)p[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+#]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
EMAIL_REGEX = r"\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b"
EMAIL_REGEX2 = r'".*"@[A-Z0-9.-]+\.[A-Z]{2,}\b'
HASH_REGEX = r"\b[0-9a-fA-F]{32}\b|\b[0-9a-fA-F]{40}\b|\b[0-9a-fA-F]{64}\b"
@@ -182,6 +183,20 @@ def _is_ip(input_ip):
return False
+def _refang_url(url):
+ parsed = urlparse(url)
+ scheme = parsed.scheme
+
+ # Replace hxxp/hxxps with http/https
+ if scheme == "hxxp":
+ parsed = parsed._replace(scheme='http')
+ elif scheme == "hxxps":
+ parsed = parsed._replace(scheme='https')
+
+ refang_url = parsed.geturl()
+ return refang_url
+
+
def _clean_url(url):
url = url.strip('>),.]\r\n')
@@ -193,6 +208,7 @@ def _clean_url(url):
if '>' in url:
url = url[:url.find('>')]
+ url = _refang_url(url)
return url
diff --git a/parser_methods.py b/parser_methods.py
index 2ea6f4d..8c98b1e 100644
--- a/parser_methods.py
+++ b/parser_methods.py
@@ -18,6 +18,7 @@
import sys
import zipfile
from html import unescape
+from urllib.parse import urlparse
import pdfminer
from bs4 import BeautifulSoup, UnicodeDammit
@@ -50,7 +51,7 @@
}
-URI_REGEX = r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+#]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
+URI_REGEX = r"h(?:tt|xx)p[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+#]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
EMAIL_REGEX = r"\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b"
EMAIL_REGEX2 = r'".*"@[A-Z0-9.-]+\.[A-Z]{2,}\b'
HASH_REGEX = r"\b[0-9a-fA-F]{32}\b|\b[0-9a-fA-F]{40}\b|\b[0-9a-fA-F]{64}\b"
@@ -109,6 +110,20 @@ def is_ipv6(input_ip):
return bool(re.match(IPV6_REGEX, input_ip))
+def _refang_url(url):
+ parsed = urlparse(url)
+ scheme = parsed.scheme
+
+ # Replace hxxp/hxxps with http/https
+ if scheme == "hxxp":
+ parsed = parsed._replace(scheme='http')
+ elif scheme == "hxxps":
+ parsed = parsed._replace(scheme='https')
+
+ refang_url = parsed.geturl()
+ return refang_url
+
+
def _clean_url(url):
url = url.strip('>),.]\r\n')
@@ -120,6 +135,7 @@ def _clean_url(url):
if '>' in url:
url = url[:url.find('>')]
+ url = _refang_url(url)
return url
diff --git a/release_notes/2.7.1.md b/release_notes/2.7.1.md
new file mode 100644
index 0000000..56bd00b
--- /dev/null
+++ b/release_notes/2.7.1.md
@@ -0,0 +1 @@
+* Extract defanged URL Code Implementation [PAPP-24845]
\ No newline at end of file
diff --git a/release_notes/release_notes.html b/release_notes/release_notes.html
index 3f72750..e279aa8 100644
--- a/release_notes/release_notes.html
+++ b/release_notes/release_notes.html
@@ -1,5 +1,9 @@
-Parser Release Notes - Published by Splunk February 21, 2022
+Parser Release Notes - Published by Splunk March 17, 2022
+Version 2.7.1 - Released March 17, 2022
+