Skip to content

Commit

Permalink
fix(scan): resolve detection of the first endpoint in the initiate sc…
Browse files Browse the repository at this point in the history
…an task

- Replace HTTPx first scan by nmap, then launch HTTPx with discovered port
- Create a reusable function to launch nmap on the fly
- Add parsing to get ports and services from Nmap output
- Add more logs to debug scans while running
- Remove the HTTP CRAWL global var, Nmap is the default to retrieve the first endpoint (the starting point for all the others tasks)
- Adjust the is_alive parameter for tasks that need alive endpoints
- Fix S3 scanner source file not found
- Add more checks to prevent errors and scan crash
- Refactor Endpoint saving for a better logic and less errors
- Improve URLs validation
  • Loading branch information
psyray committed Nov 22, 2024
1 parent a3918b7 commit 023e36b
Show file tree
Hide file tree
Showing 6 changed files with 593 additions and 260 deletions.
15 changes: 12 additions & 3 deletions web/reNgine/celery_custom_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,10 @@ def __call__(self, *args, **kwargs):

# Create ScanActivity for this task and send start scan notifs
if self.track:
logger.warning(f'Task {self.task_name} is RUNNING')
if self.domain:
logger.warning(f'Task {self.task_name} for {self.subdomain.name if self.subdomain else self.domain.name} is RUNNING')
else:
logger.warning(f'Task {self.task_name} is RUNNING')
self.create_scan_activity()

if RENGINE_CACHE_ENABLED:
Expand All @@ -119,7 +122,10 @@ def __call__(self, *args, **kwargs):
if result and result != b'null':
self.status = SUCCESS_TASK
if RENGINE_RECORD_ENABLED and self.track:
logger.warning(f'Task {self.task_name} status is SUCCESS (CACHED)')
if self.domain:
logger.warning(f'Task {self.task_name} for {self.subdomain.name if self.subdomain else self.domain.name} status is SUCCESS (CACHED)')
else:
logger.warning(f'Task {self.task_name} status is SUCCESS (CACHED)')
self.update_scan_activity()
return json.loads(result)

Expand Down Expand Up @@ -150,7 +156,10 @@ def __call__(self, *args, **kwargs):
self.write_results()

if RENGINE_RECORD_ENABLED and self.track:
msg = f'Task {self.task_name} status is {self.status_str}'
if self.domain:
msg = f'Task {self.task_name} for {self.subdomain.name if self.subdomain else self.domain.name} status is {self.status_str}'
else:
msg = f'Task {self.task_name} status is {self.status_str}'
msg += f' | Error: {self.error}' if self.error else ''
logger.warning(msg)
self.update_scan_activity()
Expand Down
83 changes: 74 additions & 9 deletions web/reNgine/common_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,45 @@ def extract_path_from_url(url):

return reconstructed_url

def is_valid_url(url):
"""Check if a URL is valid, including both full URLs and domain:port format.
Args:
url (str): URL to validate (https://domain.com or domain.com:port)
Returns:
bool: True if valid URL, False otherwise
"""
logger.debug(f'Validating URL: {url}')

# Handle URLs with scheme (http://, https://)
if url.startswith(('http://', 'https://')):
return validators.url(url)

# Handle domain:port format
try:
if ':' in url:
domain, port = url.rsplit(':', 1)
# Validate port
port = int(port)
if not 1 <= port <= 65535:
logger.debug(f'Invalid port number: {port}')
return False
else:
domain = url

# Validate domain
if validators.domain(domain) or validators.ipv4(domain) or validators.ipv6(domain):
logger.debug(f'Valid domain/IP found: {domain}')
return True

logger.debug(f'Invalid domain/IP: {domain}')
return False

except (ValueError, ValidationError) as e:
logger.debug(f'Validation error: {str(e)}')
return False

#-------#
# Utils #
#-------#
Expand Down Expand Up @@ -878,7 +917,7 @@ def get_task_cache_key(func_name, *args, **kwargs):


def get_output_file_name(scan_history_id, subscan_id, filename):
title = f'#{scan_history_id}'
title = f'{scan_history_id}'
if subscan_id:
title += f'-{subscan_id}'
title += f'_{filename}'
Expand Down Expand Up @@ -925,21 +964,28 @@ def get_nmap_cmd(
script=None,
script_args=None,
max_rate=None,
service_detection=True,
flags=[]):
if not cmd:
cmd = 'nmap'

# Initialize base options
options = {
"-sV": service_detection,
"-p": ports,
"--max-rate": max_rate,
"-oX": output_file,
"--script": script,
"--script-args": script_args,
"--max-rate": max_rate,
"-oX": output_file
}

if not cmd:
cmd = 'nmap'
# Update options with nmap specific parameters
options.update({
"-sV": "",
"-p": ports,
})

# Build command with options
cmd = _build_cmd(cmd, options, flags)

# Add input source
if not input_file:
cmd += f" {host}" if host else ""
else:
Expand Down Expand Up @@ -1352,4 +1398,23 @@ def get_ips_from_cidr_range(target):
return [str(ip) for ip in ipaddress.IPv4Network(target)]
except ValueError:
logger.error(f'{target} is not a valid CIDR range. Skipping.')
return []
return []

def get_http_crawl_value(engine, config):
"""Get HTTP crawl value from config.
Args:
engine: EngineType object
config: Configuration dictionary or None
Returns:
bool: True if HTTP crawl is enabled
"""
# subscan engine value
enable_http_crawl = config.get(ENABLE_HTTP_CRAWL) if config else None
if enable_http_crawl is None:
# scan engine value
yaml_config = yaml.safe_load(engine.yaml_configuration)
enable_http_crawl = yaml_config.get(ENABLE_HTTP_CRAWL, DEFAULT_ENABLE_HTTP_CRAWL)
logger.debug(f'Enable HTTP crawl: {enable_http_crawl}')
return enable_http_crawl
2 changes: 1 addition & 1 deletion web/reNgine/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
DOMAIN_NAME = env('DOMAIN_NAME', default='localhost:8000')
TEMPLATE_DEBUG = env.bool('TEMPLATE_DEBUG', default=False)
SECRET_FILE = os.path.join(RENGINE_HOME, 'secret')
DEFAULT_ENABLE_HTTP_CRAWL = env.bool('DEFAULT_ENABLE_HTTP_CRAWL', default=True)
DEFAULT_ENABLE_HTTP_CRAWL = env.bool('DEFAULT_ENABLE_HTTP_CRAWL', default=False)
DEFAULT_RATE_LIMIT = env.int('DEFAULT_RATE_LIMIT', default=150) # requests / second
DEFAULT_HTTP_TIMEOUT = env.int('DEFAULT_HTTP_TIMEOUT', default=5) # seconds
DEFAULT_RETRIES = env.int('DEFAULT_RETRIES', default=1)
Expand Down
Loading

0 comments on commit 023e36b

Please sign in to comment.