Skip to content

Commit

Permalink
Merge pull request #25 from splunk-soar-connectors/next
Browse files Browse the repository at this point in the history
Merging next to main for release 2.7.1
  • Loading branch information
mpan-splunk authored Mar 17, 2022
2 parents 0376d85 + 6c7bf67 commit 3be57e3
Show file tree
Hide file tree
Showing 8 changed files with 45 additions and 8 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Parser

Publisher: Splunk
Connector Version: 2\.7\.0
Connector Version: 2\.7\.1
Product Vendor: Splunk
Product Name: Parser
Product Version Supported (regex): "\.\*"
Expand Down
8 changes: 4 additions & 4 deletions parser.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
"product_name": "Parser",
"product_version_regex": ".*",
"publisher": "Splunk",
"app_version": "2.7.0",
"app_version": "2.7.1",
"fips_compliant": true,
"license": "Copyright (c) 2017-2022 Splunk Inc.",
"utctime_updated": "2022-02-08T18:41:46.000000Z",
"utctime_updated": "2022-03-15T18:28:49.000000Z",
"package_name": "phantom_parser",
"main_module": "parser_connector.py",
"min_phantom_version": "5.1.0",
Expand Down Expand Up @@ -93,7 +93,7 @@
},
{
"module": "typing_extensions",
"input_file": "wheels/py3/typing_extensions-4.0.1-py3-none-any.whl"
"input_file": "wheels/py3/typing_extensions-4.1.1-py3-none-any.whl"
},
{
"module": "urllib3",
Expand Down Expand Up @@ -571,4 +571,4 @@
}
]
}
}
}
18 changes: 17 additions & 1 deletion parser_email.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from collections import OrderedDict
from email.header import decode_header, make_header
from html import unescape
from urllib.parse import urlparse

import magic
import phantom.app as phantom
Expand Down Expand Up @@ -101,7 +102,7 @@
PROC_EMAIL_JSON_EMAIL_HEADERS = "email_headers"
PROC_EMAIL_CONTENT_TYPE_MESSAGE = "message/rfc822"

URI_REGEX = r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+#]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
URI_REGEX = r"h(?:tt|xx)p[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+#]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
EMAIL_REGEX = r"\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b"
EMAIL_REGEX2 = r'".*"@[A-Z0-9.-]+\.[A-Z]{2,}\b'
HASH_REGEX = r"\b[0-9a-fA-F]{32}\b|\b[0-9a-fA-F]{40}\b|\b[0-9a-fA-F]{64}\b"
Expand Down Expand Up @@ -182,6 +183,20 @@ def _is_ip(input_ip):
return False


def _refang_url(url):
parsed = urlparse(url)
scheme = parsed.scheme

# Replace hxxp/hxxps with http/https
if scheme == "hxxp":
parsed = parsed._replace(scheme='http')
elif scheme == "hxxps":
parsed = parsed._replace(scheme='https')

refang_url = parsed.geturl()
return refang_url


def _clean_url(url):
url = url.strip('>),.]\r\n')

Expand All @@ -193,6 +208,7 @@ def _clean_url(url):
if '>' in url:
url = url[:url.find('>')]

url = _refang_url(url)
return url


Expand Down
18 changes: 17 additions & 1 deletion parser_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import sys
import zipfile
from html import unescape
from urllib.parse import urlparse

import pdfminer
from bs4 import BeautifulSoup, UnicodeDammit
Expand Down Expand Up @@ -50,7 +51,7 @@
}


URI_REGEX = r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+#]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
URI_REGEX = r"h(?:tt|xx)p[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+#]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
EMAIL_REGEX = r"\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b"
EMAIL_REGEX2 = r'".*"@[A-Z0-9.-]+\.[A-Z]{2,}\b'
HASH_REGEX = r"\b[0-9a-fA-F]{32}\b|\b[0-9a-fA-F]{40}\b|\b[0-9a-fA-F]{64}\b"
Expand Down Expand Up @@ -109,6 +110,20 @@ def is_ipv6(input_ip):
return bool(re.match(IPV6_REGEX, input_ip))


def _refang_url(url):
parsed = urlparse(url)
scheme = parsed.scheme

# Replace hxxp/hxxps with http/https
if scheme == "hxxp":
parsed = parsed._replace(scheme='http')
elif scheme == "hxxps":
parsed = parsed._replace(scheme='https')

refang_url = parsed.geturl()
return refang_url


def _clean_url(url):
url = url.strip('>),.]\r\n')

Expand All @@ -120,6 +135,7 @@ def _clean_url(url):
if '>' in url:
url = url[:url.find('>')]

url = _refang_url(url)
return url


Expand Down
1 change: 1 addition & 0 deletions release_notes/2.7.1.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
* Extract defanged URL Code Implementation [PAPP-24845]
6 changes: 5 additions & 1 deletion release_notes/release_notes.html
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
<b>Parser Release Notes - Published by Splunk February 21, 2022</b>
<b>Parser Release Notes - Published by Splunk March 17, 2022</b>
<br><br>
<b>Version 2.7.1 - Released March 17, 2022</b>
<ul>
<li>Extract defanged URL Code Implementation [PAPP-24845]</li>
</ul>
<b>Version 2.7.0 - Released February 21, 2022</b>
<ul>
<li>Added validation for the extraction of URL artifacts [PAPP-23789]</li>
Expand Down
Binary file removed wheels/py3/typing_extensions-4.0.1-py3-none-any.whl
Binary file not shown.
Binary file not shown.

0 comments on commit 3be57e3

Please sign in to comment.