Skip to content

Commit

Permalink
Merge pull request #942 from openzim/fix_secret_removal
Browse files Browse the repository at this point in the history
Fix secret removal which was also removing carriage returns and not supporting multiple URLs in single string
  • Loading branch information
benoit74 authored Mar 12, 2024
2 parents 05fb9cd + 6effbe8 commit 7c68b32
Show file tree
Hide file tree
Showing 3 changed files with 596 additions and 240 deletions.
2 changes: 1 addition & 1 deletion dispatcher/backend/src/common/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
SLACK_ICON = os.getenv("SLACK_ICON")

# string to replace hidden secrets with
SECRET_REPLACEMENT = "********" # nosec
SECRET_REPLACEMENT = "--------" # nosec

# ###
# workers whitelist management
Expand Down
49 changes: 35 additions & 14 deletions dispatcher/backend/src/routes/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
from typing import Any, Dict, List
from urllib.parse import parse_qs, urlparse
from urllib.parse import parse_qs, urlencode, urlsplit, urlunsplit

from common.constants import SECRET_REPLACEMENT
from common.schemas.models import ScheduleConfigSchema
Expand All @@ -26,7 +26,7 @@ def has_dict_sub_key(data: Dict[str, Any], keys: List[str]):
def remove_secrets_from_response(response: dict):
"""replaces or removes (in-place) all occurences of secrets"""
remove_secret_flags(response)
remove_s3_secrets(response)
remove_url_secrets(response)


def remove_secret_flags(response: dict):
Expand Down Expand Up @@ -87,24 +87,45 @@ def remove_secret_flag_from_command(
] = f'--{field_name}="{SECRET_REPLACEMENT}"'


def remove_s3_secrets(response: dict):
"""remove keyId and secretAccessKey query params from any URL we might find"""
def remove_url_secrets(response: dict):
"""remove secrets from any URL we might find
For now we remove:
- password if passed in the network location
- keyId and secretAccessKey query params (probably used for S3)
"""
for key in response.keys():
if not response[key]:
continue
if isinstance(response[key], dict):
remove_s3_secrets(response[key])
remove_url_secrets(response[key])
else:
if not isinstance(response[key], str) or "://" not in response[key]:
continue
url = urlparse(response[key])
response[key] = url._replace(
query="&".join(
[
f"{key}={value}"
for key, values in parse_qs(url.query).items()
if str(key).lower() not in ["keyid", "secretaccesskey"]
for url in [word for word in response[key].split() if "://" in word]:
urlparts = urlsplit(url)
newquery = urlencode(
{
key: (
value
if str(key).lower() not in ["keyid", "secretaccesskey"]
else SECRET_REPLACEMENT
)
for key, values in parse_qs(urlparts.query).items()
for value in values
]
},
doseq=True,
)
newnetloc: str | None = urlparts.netloc
if newnetloc and urlparts.password:
newnetloc = newnetloc.replace(urlparts.password, SECRET_REPLACEMENT)
secured_url = urlunsplit(
(
urlparts.scheme,
newnetloc,
urlparts.path,
newquery,
urlparts.fragment,
)
)
).geturl()
response[key] = response[key].replace(url, secured_url)
Loading

0 comments on commit 7c68b32

Please sign in to comment.