From 6d8ffe5e6d4eb1633c5569242df8ee8913af810b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mi=C5=82osz=20Skaza?= Date: Thu, 25 Jan 2024 14:28:22 +0100 Subject: [PATCH 1/2] use sha1sum to compare files when syncing and verifying --- ctfcli/cli/challenges.py | 7 +- ctfcli/core/challenge.py | 296 +++++++++++++++++++++-------------- ctfcli/utils/hashing.py | 14 ++ tests/core/test_challenge.py | 7 +- 4 files changed, 201 insertions(+), 123 deletions(-) create mode 100644 ctfcli/utils/hashing.py diff --git a/ctfcli/cli/challenges.py b/ctfcli/cli/challenges.py index e4035e2..c284f00 100644 --- a/ctfcli/cli/challenges.py +++ b/ctfcli/cli/challenges.py @@ -14,7 +14,7 @@ from ctfcli.core.challenge import Challenge from ctfcli.core.config import Config from ctfcli.core.deployment import get_deployment_handler -from ctfcli.core.exceptions import ChallengeException, LintException +from ctfcli.core.exceptions import ChallengeException, LintException, RemoteChallengeNotFound from ctfcli.utils.git import get_git_repo_head_branch log = logging.getLogger("ctfcli.cli.challenges") @@ -774,8 +774,9 @@ def healthcheck(self, challenge: Optional[str] = None) -> int: ) return 1 - challenge_data = Challenge.load_installed_challenge(challenge_id) - if not challenge_data: + try: + challenge_data = Challenge.load_installed_challenge(challenge_id) + except RemoteChallengeNotFound: click.secho(f"Could not load data for challenge '{challenge_instance}'.", fg="red") return 1 diff --git a/ctfcli/core/challenge.py b/ctfcli/core/challenge.py index 046add6..b5347d4 100644 --- a/ctfcli/core/challenge.py +++ b/ctfcli/core/challenge.py @@ -17,6 +17,7 @@ ) from ctfcli.core.image import Image from ctfcli.utils.tools import strings +from ctfcli.utils.hashing import hash_file def str_presenter(dumper, data): @@ -53,16 +54,16 @@ class Challenge(dict): ] @staticmethod - def load_installed_challenge(challenge_id) -> Optional[Dict]: + def load_installed_challenge(challenge_id) -> Dict: api = API() r = api.get(f"/api/v1/challenges/{challenge_id}") if not r.ok: - return + raise RemoteChallengeNotFound(f"Could not load challenge with id={challenge_id}") installed_challenge = r.json().get("data", None) if not installed_challenge: - return + raise RemoteChallengeNotFound(f"Could not load challenge with id={challenge_id}") return installed_challenge @@ -202,7 +203,6 @@ def _get_initial_challenge_payload(self, ignore: Tuple[str] = ()) -> Dict: return challenge_payload - # Flag delete/create def _delete_existing_flags(self): remote_flags = self.api.get("/api/v1/flags").json()["data"] for flag in remote_flags: @@ -224,7 +224,6 @@ def _create_flags(self): r = self.api.post("/api/v1/flags", json=flag_payload) r.raise_for_status() - # Topic delete/create def _delete_existing_topics(self): remote_topics = self.api.get(f"/api/v1/challenges/{self.challenge_id}/topics").json()["data"] for topic in remote_topics: @@ -243,7 +242,6 @@ def _create_topics(self): ) r.raise_for_status() - # Tag delete/create def _delete_existing_tags(self): remote_tags = self.api.get("/api/v1/tags").json()["data"] for tag in remote_tags: @@ -259,31 +257,40 @@ def _create_tags(self): ) r.raise_for_status() - # File delete/create - def _delete_existing_files(self): - remote_challenge = self.load_installed_challenge(self.challenge_id) + def _delete_file(self, remote_location: str): remote_files = self.api.get("/api/v1/files?type=challenge").json()["data"] for remote_file in remote_files: - for utilized_file in remote_challenge["files"]: - if remote_file["location"] in utilized_file: - r = self.api.delete(f"/api/v1/files/{remote_file['id']}") - r.raise_for_status() + if remote_file["location"] == remote_location: + r = self.api.delete(f"/api/v1/files/{remote_file['id']}") + r.raise_for_status() + + def _create_file(self, local_path: Path): + new_file = ("file", open(local_path, mode="rb")) + file_payload = {"challenge_id": self.challenge_id, "type": "challenge"} + + # Specifically use data= here to send multipart/form-data + r = self.api.post("/api/v1/files", files=[new_file], data=file_payload) + r.raise_for_status() - def _create_files(self): + # Close the file handle + new_file[1].close() + + def _create_all_files(self): new_files = [] for challenge_file in self["files"]: new_files.append(("file", open(self.challenge_directory / challenge_file, mode="rb"))) files_payload = {"challenge_id": self.challenge_id, "type": "challenge"} + # Specifically use data= here to send multipart/form-data r = self.api.post("/api/v1/files", files=new_files, data=files_payload) r.raise_for_status() + # Close the file handles for file_payload in new_files: file_payload[1].close() - # Hint delete/create def _delete_existing_hints(self): remote_hints = self.api.get("/api/v1/hints").json()["data"] for hint in remote_hints: @@ -309,7 +316,6 @@ def _create_hints(self): r = self.api.post("/api/v1/hints", json=hint_payload) r.raise_for_status() - # Required challenges def _set_required_challenges(self): remote_challenges = self.load_installed_challenges() required_challenges = [] @@ -340,6 +346,109 @@ def _set_required_challenges(self): r = self.api.patch(f"/api/v1/challenges/{self.challenge_id}", json=requirements_payload) r.raise_for_status() + # Compare challenge requirements, will resolve all IDs to names + def _compare_challenge_requirements(self, r1: List[Union[str, int]], r2: List[Union[str, int]]) -> bool: + remote_challenges = self.load_installed_challenges() + + def normalize_requirements(requirements): + normalized = [] + for r in requirements: + if type(r) == int: + for remote_challenge in remote_challenges: + if remote_challenge["id"] == r: + normalized.append(remote_challenge["name"]) + break + else: + normalized.append(r) + + return normalized + + return normalize_requirements(r1) == normalize_requirements(r2) + + # Normalize challenge data from the API response to match challenge.yml + # It will remove any extra fields from the remote, as well as expand external references + # that have to be fetched separately (e.g., files, flags, hints, etc.) + # Note: files won't be included for two reasons: + # 1. To avoid downloading them unnecessarily, e.g., when they are ignored + # 2. Because it's dependent on the implementation whether to save them (mirror) or just compare (verify) + def _normalize_challenge(self, challenge_data: Dict[str, Any]): + challenge = {} + + copy_keys = ["name", "category", "value", "type", "state", "connection_info"] + for key in copy_keys: + if key in challenge_data: + challenge[key] = challenge_data[key] + + challenge["description"] = challenge_data["description"].strip().replace("\r\n", "\n").replace("\t", "") + challenge["attempts"] = challenge_data["max_attempts"] + + for key in ["initial", "decay", "minimum"]: + if key in challenge_data: + if "extra" not in challenge: + challenge["extra"] = {} + + challenge["extra"][key] = challenge_data[key] + + # Add flags + r = self.api.get(f"/api/v1/challenges/{self.challenge_id}/flags") + r.raise_for_status() + flags = r.json()["data"] + challenge["flags"] = [ + f["content"] + if f["type"] == "static" and (f["data"] is None or f["data"] == "") + else {"content": f["content"].strip().replace("\r\n", "\n"), "type": f["type"], "data": f["data"]} + for f in flags + ] + + # Add tags + r = self.api.get(f"/api/v1/challenges/{self.challenge_id}/tags") + r.raise_for_status() + tags = r.json()["data"] + challenge["tags"] = [t["value"] for t in tags] + + # Add hints + r = self.api.get(f"/api/v1/challenges/{self.challenge_id}/hints") + r.raise_for_status() + hints = r.json()["data"] + # skipping pre-requisites for hints because they are not supported in ctfcli + challenge["hints"] = [ + {"content": h["content"], "cost": h["cost"]} if h["cost"] > 0 else h["content"] for h in hints + ] + + # Add topics + r = self.api.get(f"/api/v1/challenges/{self.challenge_id}/topics") + r.raise_for_status() + topics = r.json()["data"] + challenge["topics"] = [t["value"] for t in topics] + + # Add requirements + r = self.api.get(f"/api/v1/challenges/{self.challenge_id}/requirements") + r.raise_for_status() + requirements = (r.json().get("data") or {}).get("prerequisites", []) + if len(requirements) > 0: + # Prefer challenge names over IDs + r = self.api.get("/api/v1/challenges") + r.raise_for_status() + challenges = r.json()["data"] + challenge["requirements"] = [c["name"] for c in challenges if c["id"] in requirements] + + return challenge + + # Create a dictionary of remote files in { basename: {"url": "", "location": ""} } format + def _normalize_remote_files(self, remote_files: List[str]) -> Dict[str, Dict[str, str]]: + normalized = {} + for f in remote_files: + file_parts = f.split("?token=")[0].split("/") + normalized[file_parts[-1]] = {"url": f, "location": f"{file_parts[-2]}/{file_parts[-1]}"} + + return normalized + + # Create a dictionary of sha1sums in { location: sha1sum } format + def _get_files_sha1sums(self) -> Dict[str, str]: + r = self.api.get("/api/v1/files?type=challenge") + r.raise_for_status() + return {f["location"]: f.get("sha1sum", None) for f in r.json()["data"]} + def sync(self, ignore: Tuple[str] = ()) -> None: challenge = self @@ -391,9 +500,37 @@ def sync(self, ignore: Tuple[str] = ()) -> None: # Create / Upload files if "files" not in ignore: - self._delete_existing_files() - if challenge.get("files"): - self._create_files() + # Get basenames of local files to compare against remote files + local_files = {f.split("/")[-1]: f for f in self.get("files", [])} + remote_files = self._normalize_remote_files(remote_challenge.get("files", [])) + + # Delete remote files which are no longer defined locally + for remote_file in remote_files: + if remote_file not in local_files: + self._delete_file(remote_files[remote_file]["location"]) + + # Only check for file changes if there are files to upload + if local_files: + sha1sums = self._get_files_sha1sums() + for local_file_name in local_files: + # Creating a new file + if local_file_name not in remote_files: + self._create_file(self.challenge_directory / local_files[local_file_name]) + continue + + # Updating an existing file + # sha1sum is present in CTFd 3.7+, use it instead of always re-uploading the file if possible + remote_file_sha1sum = sha1sums[remote_files[local_file_name]["location"]] + if remote_file_sha1sum is not None: + with open(self.challenge_directory / local_files[local_file_name], "rb") as lf: + local_file_sha1sum = hash_file(lf) + + if local_file_sha1sum == remote_file_sha1sum: + continue + + # if sha1sums are not present, or the hashes are different, re-upload the file + self._delete_file(remote_files[local_file_name]["location"]) + self._create_file(self.challenge_directory / local_files[local_file_name]) # Update hints if "hints" not in ignore: @@ -472,7 +609,7 @@ def create(self, ignore: Tuple[str] = ()) -> None: # Upload files if challenge.get("files") and "files" not in ignore: - self._create_files() + self._create_all_files() # Add hints if challenge.get("hints") and "hints" not in ignore: @@ -565,94 +702,6 @@ def lint(self, skip_hadolint=False, flag_format="flag{") -> bool: return True - # Compare challenge requirements, will resolve all IDs to names - def _compare_challenge_requirements(self, r1: List[Union[str, int]], r2: List[Union[str, int]]) -> bool: - remote_challenges = self.load_installed_challenges() - - def normalize_requirements(requirements): - normalized = [] - for r in requirements: - if type(r) == int: - for remote_challenge in remote_challenges: - if remote_challenge["id"] == r: - normalized.append(remote_challenge["name"]) - break - else: - normalized.append(r) - - return normalized - - return normalize_requirements(r1) == normalize_requirements(r2) - - # Normalize challenge data from the API response to match challenge.yml - # It will remove any extra fields from the remote, as well as expand external references - # that have to be fetched separately (e.g., files, flags, hints, etc.) - # Note: files won't be included for two reasons: - # 1. To avoid downloading them unnecessarily, e.g., when they are ignored - # 2. Because it's dependent on the implementation whether to save them (mirror) or just compare (verify) - def _normalize_challenge(self, challenge_data: Dict[str, Any]): - challenge = {} - - copy_keys = ["name", "category", "value", "type", "state", "connection_info"] - for key in copy_keys: - if key in challenge_data: - challenge[key] = challenge_data[key] - - challenge["description"] = challenge_data["description"].strip().replace("\r\n", "\n").replace("\t", "") - challenge["attempts"] = challenge_data["max_attempts"] - - for key in ["initial", "decay", "minimum"]: - if key in challenge_data: - if "extra" not in challenge: - challenge["extra"] = {} - - challenge["extra"][key] = challenge_data[key] - - # Add flags - r = self.api.get(f"/api/v1/challenges/{self.challenge_id}/flags") - r.raise_for_status() - flags = r.json()["data"] - challenge["flags"] = [ - f["content"] - if f["type"] == "static" and (f["data"] is None or f["data"] == "") - else {"content": f["content"].strip().replace("\r\n", "\n"), "type": f["type"], "data": f["data"]} - for f in flags - ] - - # Add tags - r = self.api.get(f"/api/v1/challenges/{self.challenge_id}/tags") - r.raise_for_status() - tags = r.json()["data"] - challenge["tags"] = [t["value"] for t in tags] - - # Add hints - r = self.api.get(f"/api/v1/challenges/{self.challenge_id}/hints") - r.raise_for_status() - hints = r.json()["data"] - # skipping pre-requisites for hints because they are not supported in ctfcli - challenge["hints"] = [ - {"content": h["content"], "cost": h["cost"]} if h["cost"] > 0 else h["content"] for h in hints - ] - - # Add topics - r = self.api.get(f"/api/v1/challenges/{self.challenge_id}/topics") - r.raise_for_status() - topics = r.json()["data"] - challenge["topics"] = [t["value"] for t in topics] - - # Add requirements - r = self.api.get(f"/api/v1/challenges/{self.challenge_id}/requirements") - r.raise_for_status() - requirements = (r.json().get("data") or {}).get("prerequisites", []) - if len(requirements) > 0: - # Prefer challenge names over IDs - r = self.api.get("/api/v1/challenges") - r.raise_for_status() - challenges = r.json()["data"] - challenge["requirements"] = [c["name"] for c in challenges if c["id"] in requirements] - - return challenge - def mirror(self, files_directory_name: str = "dist", ignore: Tuple[str] = ()) -> None: self._load_challenge_id() remote_challenge = self.load_installed_challenge(self.challenge_id) @@ -726,24 +775,41 @@ def verify(self, ignore: Tuple[str] = ()) -> bool: # Handle a special case for files, unless they are ignored if "files" not in ignore: - local_files = {Path(f).name: f for f in challenge.get("files", [])} - remote_files = {f.split("/")[-1].split("?token=")[0]: f for f in remote_challenge["files"]} + # Check if files defined in challenge.yml are present + try: + self._validate_files() + local_files = {Path(f).name: f for f in challenge.get("files", [])} + except InvalidChallengeFile: + return False + remote_files = self._normalize_remote_files(remote_challenge["files"]) # Check if there are no extra local files for local_file in local_files: if local_file not in remote_files: return False + sha1sums = self._get_files_sha1sums() # Check if all remote files are present locally - for remote_file in remote_files: - if remote_file not in local_files: + for remote_file_name in remote_files: + if remote_file_name not in local_files: return False - # Check if the remote files are the same as local - r = self.api.get(remote_files[remote_file]) + # sha1sum is present in CTFd 3.7+, use it instead of downloading the file if possible + remote_file_sha1sum = sha1sums[remote_files[remote_file_name]["location"]] + if remote_file_sha1sum is not None: + with open(self.challenge_directory / local_files[remote_file_name], "rb") as lf: + local_file_sha1sum = hash_file(lf) + + if local_file_sha1sum != remote_file_sha1sum: + return False + + return True + + # If sha1sum is not present, download the file and compare the contents + r = self.api.get(remote_files[remote_file_name]["url"]) r.raise_for_status() remote_file_contents = r.content - local_file_contents = (self.challenge_directory / local_files[remote_file]).read_bytes() + local_file_contents = (self.challenge_directory / local_files[remote_file_name]).read_bytes() if remote_file_contents != local_file_contents: return False diff --git a/ctfcli/utils/hashing.py b/ctfcli/utils/hashing.py new file mode 100644 index 0000000..cf96f6c --- /dev/null +++ b/ctfcli/utils/hashing.py @@ -0,0 +1,14 @@ +import hashlib + + +def hash_file(fp, algo="sha1"): + fp.seek(0) + if algo == "sha1": + h = hashlib.sha1() # nosec + # https://stackoverflow.com/a/64730457 + while chunk := fp.read(1024): + h.update(chunk) + fp.seek(0) + return h.hexdigest() + else: + raise NotImplementedError diff --git a/tests/core/test_challenge.py b/tests/core/test_challenge.py index 184437a..d7f39e2 100644 --- a/tests/core/test_challenge.py +++ b/tests/core/test_challenge.py @@ -168,7 +168,6 @@ def test_updates_simple_properties(self, mock_api_constructor: MagicMock, *args, call("/api/v1/flags"), call("/api/v1/challenges/1/topics"), call("/api/v1/tags"), - call("/api/v1/files?type=challenge"), call("/api/v1/hints"), ], any_order=True, @@ -210,7 +209,6 @@ def test_updates_attempts(self, mock_api_constructor: MagicMock, *args, **kwargs call("/api/v1/flags"), call("/api/v1/challenges/1/topics"), call("/api/v1/tags"), - call("/api/v1/files?type=challenge"), call("/api/v1/hints"), ], any_order=True, @@ -258,7 +256,6 @@ def test_updates_extra_properties(self, mock_api_constructor: MagicMock, *args, call("/api/v1/flags"), call("/api/v1/challenges/1/topics"), call("/api/v1/tags"), - call("/api/v1/files?type=challenge"), call("/api/v1/hints"), ], any_order=True, @@ -790,7 +787,6 @@ def test_defaults_to_standard_challenge_type(self, mock_api_constructor: MagicMo call("/api/v1/flags"), call("/api/v1/challenges/1/topics"), call("/api/v1/tags"), - call("/api/v1/files?type=challenge"), call("/api/v1/hints"), ], any_order=True, @@ -830,7 +826,6 @@ def test_defaults_to_visible_state(self, mock_api_constructor: MagicMock, *args, call("/api/v1/flags"), call("/api/v1/challenges/1/topics"), call("/api/v1/tags"), - call("/api/v1/files?type=challenge"), call("/api/v1/hints"), ], any_order=True, @@ -964,6 +959,8 @@ def test_updates_multiple_attributes_at_once(self, mock_api_constructor: MagicMo call().raise_for_status(), call("/api/v1/files", files=ANY, data={"challenge_id": 1, "type": "challenge"}), call().raise_for_status(), + call("/api/v1/files", files=ANY, data={"challenge_id": 1, "type": "challenge"}), + call().raise_for_status(), call("/api/v1/hints", json={"content": "free hint", "cost": 0, "challenge_id": 1}), call().raise_for_status(), ] From 1078eb456d27f317d30cb7d073f57ca119f74441 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mi=C5=82osz=20Skaza?= Date: Thu, 25 Jan 2024 14:29:26 +0100 Subject: [PATCH 2/2] make format --- ctfcli/cli/challenges.py | 6 +++++- ctfcli/core/challenge.py | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/ctfcli/cli/challenges.py b/ctfcli/cli/challenges.py index c284f00..b56525c 100644 --- a/ctfcli/cli/challenges.py +++ b/ctfcli/cli/challenges.py @@ -14,7 +14,11 @@ from ctfcli.core.challenge import Challenge from ctfcli.core.config import Config from ctfcli.core.deployment import get_deployment_handler -from ctfcli.core.exceptions import ChallengeException, LintException, RemoteChallengeNotFound +from ctfcli.core.exceptions import ( + ChallengeException, + LintException, + RemoteChallengeNotFound, +) from ctfcli.utils.git import get_git_repo_head_branch log = logging.getLogger("ctfcli.cli.challenges") diff --git a/ctfcli/core/challenge.py b/ctfcli/core/challenge.py index b5347d4..4a94fa4 100644 --- a/ctfcli/core/challenge.py +++ b/ctfcli/core/challenge.py @@ -2,7 +2,7 @@ import subprocess from os import PathLike from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any, Dict, List, Tuple, Union import click import yaml @@ -16,8 +16,8 @@ RemoteChallengeNotFound, ) from ctfcli.core.image import Image -from ctfcli.utils.tools import strings from ctfcli.utils.hashing import hash_file +from ctfcli.utils.tools import strings def str_presenter(dumper, data):