From 78165e294f3ae731a892b6321e960a3a04948f2a Mon Sep 17 00:00:00 2001 From: Tarek Bouali Date: Thu, 11 Apr 2024 07:54:33 +0200 Subject: [PATCH 1/5] add --disable-placeholder option --- paramspider/main.py | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/paramspider/main.py b/paramspider/main.py index 38ff9f5..f53fc7a 100644 --- a/paramspider/main.py +++ b/paramspider/main.py @@ -55,13 +55,15 @@ def clean_url(url): return parsed_url.geturl() -def clean_urls(urls, extensions, placeholder): +def clean_urls(urls, extensions, placeholder, disable_placeholder): """ Clean a list of URLs by removing unnecessary parameters and query strings. Args: urls (list): List of URLs to clean. extensions (list): List of file extensions to check against. + placeholder (str): Default placeholder for parameter values. + disable_placeholder (bool): Flag to indicate whether to update parameter values with the default placeholder. Returns: list: List of cleaned URLs. @@ -72,13 +74,16 @@ def clean_urls(urls, extensions, placeholder): if not has_extension(cleaned_url, extensions): parsed_url = urlparse(cleaned_url) query_params = parse_qs(parsed_url.query) - cleaned_params = {key: placeholder for key in query_params} + if not disable_placeholder: + cleaned_params = {key: placeholder for key in query_params} + else: + cleaned_params = query_params cleaned_query = urlencode(cleaned_params, doseq=True) cleaned_url = parsed_url._replace(query=cleaned_query).geturl() cleaned_urls.add(cleaned_url) return list(cleaned_urls) -def fetch_and_clean_urls(domain, extensions, stream_output,proxy, placeholder): +def fetch_and_clean_urls(domain, extensions, stream_output, proxy, placeholder, disable_placeholder): """ Fetch and clean URLs related to a specific domain from the Wayback Machine. @@ -86,18 +91,21 @@ def fetch_and_clean_urls(domain, extensions, stream_output,proxy, placeholder): domain (str): The domain name to fetch URLs for. extensions (list): List of file extensions to check against. stream_output (bool): True to stream URLs to the terminal. + proxy (str): Proxy address for web requests. + placeholder (str): Default placeholder for parameter values. + disable_placeholder (bool): Flag to indicate whether to update parameter values with the default placeholder. Returns: None """ logging.info(f"{Fore.YELLOW}[INFO]{Style.RESET_ALL} Fetching URLs for {Fore.CYAN + domain + Style.RESET_ALL}") wayback_uri = f"https://web.archive.org/cdx/search/cdx?url={domain}/*&output=txt&collapse=urlkey&fl=original&page=/" - response = client.fetch_url_content(wayback_uri,proxy) + response = client.fetch_url_content(wayback_uri, proxy) urls = response.text.split() logging.info(f"{Fore.YELLOW}[INFO]{Style.RESET_ALL} Found {Fore.GREEN + str(len(urls)) + Style.RESET_ALL} URLs for {Fore.CYAN + domain + Style.RESET_ALL}") - cleaned_urls = clean_urls(urls, extensions, placeholder) + cleaned_urls = clean_urls(urls, extensions, placeholder, disable_placeholder) logging.info(f"{Fore.YELLOW}[INFO]{Style.RESET_ALL} Cleaning URLs for {Fore.CYAN + domain + Style.RESET_ALL}") logging.info(f"{Fore.YELLOW}[INFO]{Style.RESET_ALL} Found {Fore.GREEN + str(len(cleaned_urls)) + Style.RESET_ALL} URLs after cleaning") logging.info(f"{Fore.YELLOW}[INFO]{Style.RESET_ALL} Extracting URLs with parameters") @@ -137,8 +145,9 @@ def main(): parser.add_argument("-d", "--domain", help="Domain name to fetch related URLs for.") parser.add_argument("-l", "--list", help="File containing a list of domain names.") parser.add_argument("-s", "--stream", action="store_true", help="Stream URLs on the terminal.") - parser.add_argument("--proxy", help="Set the proxy address for web requests.",default=None) - parser.add_argument("-p", "--placeholder", help="placeholder for parameter values", default="FUZZ") + parser.add_argument("--proxy", help="Set the proxy address for web requests.", default=None) + parser.add_argument("-p", "--placeholder", help="Placeholder for parameter values", default="FUZZ") + parser.add_argument("-db", "--disable-placeholder", action="store_true", help="Disable updating parameter values with the default placeholder.") args = parser.parse_args() if not args.domain and not args.list: @@ -147,6 +156,9 @@ def main(): if args.domain and args.list: parser.error("Please provide either the -d option or the -l option, not both.") + if args.placeholder and args.disable_placeholder: + parser.error("Please provide either the --placeholder option or the --disable-placeholder option, not both.") + if args.list: with open(args.list, "r") as f: domains = [line.strip().lower().replace('https://', '').replace('http://', '') for line in f.readlines()] @@ -158,11 +170,11 @@ def main(): extensions = HARDCODED_EXTENSIONS if args.domain: - fetch_and_clean_urls(domain, extensions, args.stream, args.proxy, args.placeholder) + fetch_and_clean_urls(domain, extensions, args.stream, args.proxy, args.placeholder, args.disable_placeholder) if args.list: for domain in domains: - fetch_and_clean_urls(domain, extensions, args.stream,args.proxy, args.placeholder) + fetch_and_clean_urls(domain, extensions, args.stream, args.proxy, args.placeholder, args.disable_placeholder) if __name__ == "__main__": - main() \ No newline at end of file + main() From 1efe371a9e9fd425038f8e89b3a6d991daad9ddc Mon Sep 17 00:00:00 2001 From: Tarek Bouali Date: Thu, 11 Apr 2024 07:58:04 +0200 Subject: [PATCH 2/5] Update README.md --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index d5c9048..ace11aa 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,12 @@ Here are a few examples of how to use `paramspider`: paramspider -d example.com -p '">

reflection

' ``` +- Disable updating parameter values with a placeholder: + + ```sh + paramspider -d example.com -db' +``` + ## Contributing Contributions are welcome! If you'd like to contribute to `paramspider`, please follow these steps: From 3762c9da671f8bd59ddabebe91d8f6e239655d07 Mon Sep 17 00:00:00 2001 From: Tarek Bouali Date: Thu, 11 Apr 2024 07:59:27 +0200 Subject: [PATCH 3/5] Fix description: Disable updating parameter values with a placeholder. --- paramspider/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paramspider/main.py b/paramspider/main.py index f53fc7a..f0ab04e 100644 --- a/paramspider/main.py +++ b/paramspider/main.py @@ -147,7 +147,7 @@ def main(): parser.add_argument("-s", "--stream", action="store_true", help="Stream URLs on the terminal.") parser.add_argument("--proxy", help="Set the proxy address for web requests.", default=None) parser.add_argument("-p", "--placeholder", help="Placeholder for parameter values", default="FUZZ") - parser.add_argument("-db", "--disable-placeholder", action="store_true", help="Disable updating parameter values with the default placeholder.") + parser.add_argument("-db", "--disable-placeholder", action="store_true", help="Disable updating parameter values with a placeholder.") args = parser.parse_args() if not args.domain and not args.list: From a59fdf38de33160d8764dd18e42f4582fc8fde0d Mon Sep 17 00:00:00 2001 From: Tarek Bouali Date: Thu, 11 Apr 2024 08:07:24 +0200 Subject: [PATCH 4/5] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ace11aa..5e328f4 100644 --- a/README.md +++ b/README.md @@ -74,7 +74,7 @@ Here are a few examples of how to use `paramspider`: - Disable updating parameter values with a placeholder: ```sh - paramspider -d example.com -db' + paramspider -d example.com -db ``` ## Contributing From eceead218360549159773d73cbd0890207840d30 Mon Sep 17 00:00:00 2001 From: Tarek Bouali Date: Fri, 12 Apr 2024 15:56:20 +0200 Subject: [PATCH 5/5] fix typo: db -> dp --- paramspider/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paramspider/main.py b/paramspider/main.py index f0ab04e..609db7b 100644 --- a/paramspider/main.py +++ b/paramspider/main.py @@ -147,7 +147,7 @@ def main(): parser.add_argument("-s", "--stream", action="store_true", help="Stream URLs on the terminal.") parser.add_argument("--proxy", help="Set the proxy address for web requests.", default=None) parser.add_argument("-p", "--placeholder", help="Placeholder for parameter values", default="FUZZ") - parser.add_argument("-db", "--disable-placeholder", action="store_true", help="Disable updating parameter values with a placeholder.") + parser.add_argument("-dp", "--disable-placeholder", action="store_true", help="Disable updating parameter values with a placeholder.") args = parser.parse_args() if not args.domain and not args.list: