Skip to content

Commit

Permalink
Support usage with queue
Browse files Browse the repository at this point in the history
  • Loading branch information
ajinabraham committed Nov 14, 2024
1 parent ce0f68f commit 1d75514
Show file tree
Hide file tree
Showing 6 changed files with 49 additions and 36 deletions.
2 changes: 1 addition & 1 deletion libsast/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
__title__ = 'libsast'
__authors__ = 'Ajin Abraham'
__copyright__ = f'Copyright {year} Ajin Abraham, opensecurity.in'
__version__ = '3.1.1'
__version__ = '3.1.2'
__version_info__ = tuple(int(i) for i in __version__.split('.'))
__all__ = [
'Scanner',
Expand Down
24 changes: 15 additions & 9 deletions libsast/core_matcher/choice_matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def __init__(self, options: dict) -> None:
self.scan_rules = get_rules(options.get('choice_rules'))
self.show_progress = options.get('show_progress')
self.cpu = options.get('cpu_core')
self.queue = options.get('queue')
self.alternative_path = options.get('alternative_path')
exts = options.get('choice_extensions')
self.exts = [ext.lower() for ext in exts] if exts else []
Expand Down Expand Up @@ -65,15 +66,20 @@ def read_file_contents(self, paths: list) -> list:

def regex_scan(self, file_contents) -> list:
"""Process regex matches on the file contents."""
# Use ProcessPoolExecutor for regex processing
with ProcessPoolExecutor(max_workers=self.cpu) as cpu_executor:

results = []
for content in file_contents:
# Process Choice Matcher on the file contents
process_future = cpu_executor.submit(
self.choice_matcher, content)
results.append(process_future.result())
if self.queue:
# Use billiard's pool for regex (support queues)
from billiard import Pool
with Pool(processes=self.cpu) as pool:
# Run regex on file data
results = pool.map(
self.choice_matcher,
file_contents)
else:
# Use ProcessPoolExecutor for regex processing
with ProcessPoolExecutor(max_workers=self.cpu) as cpu_executor:
results = list(cpu_executor.map(
self.choice_matcher,
file_contents))

self.add_finding(results)
return self.findings
Expand Down
42 changes: 18 additions & 24 deletions libsast/core_matcher/pattern_matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def __init__(self, options: dict) -> None:
self.scan_rules = get_rules(options.get('match_rules'))
self.show_progress = options.get('show_progress')
self.cpu = options.get('cpu_core')
self.queue = options.get('queue')
exts = options.get('match_extensions')
self.exts = [ext.lower() for ext in exts] if exts else []
self.findings = {}
Expand Down Expand Up @@ -62,30 +63,23 @@ def read_file_contents(self, paths: list) -> list:

def regex_scan(self, file_contents: list) -> dict:
"""Scan file(s) content."""
import time
from billiard import Pool
start_time = time.time()

# Use a ProcessPool for CPU-bound regex
with ProcessPoolExecutor(max_workers=self.cpu) as cpu_executor:

# Run regex on file data
results = cpu_executor.map(
self.pattern_matcher,
file_contents,
)
endtime = time.time()
print(f"Execution ProcPool time: {endtime - start_time:.2f} seconds")

with Pool(processes=self.cpu) as cpu_pool:
# Use billiard's map to distribute file_contents to self.pattern_matcher
results = cpu_pool.map(
self.pattern_matcher,
file_contents,
)
endtime = time.time()
print(f"Execution Billiard time: {endtime - start_time:.2f} seconds")
start_time = time.time()
if self.queue:
# Use billiard's pool for CPU-bound regex (support queues)
from billiard import Pool
with Pool(processes=self.cpu) as cpu_executor:
# Run regex on file data
results = cpu_executor.map(
self.pattern_matcher,
file_contents,
)
else:
# Use a ProcessPool for CPU-bound regex
with ProcessPoolExecutor(max_workers=self.cpu) as cpu_executor:
# Run regex on file data
results = cpu_executor.map(
self.pattern_matcher,
file_contents,
)

# Compile findings
self.add_finding(results)
Expand Down
1 change: 1 addition & 0 deletions libsast/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def __init__(self, options: dict, paths: list) -> None:
'ignore_paths': [],
'show_progress': False,
'cpu_core': 1,
'queue': False,
# Overwrite with options from invocation
**(options or {}),
}
Expand Down
13 changes: 12 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "libsast"
version = "3.1.1"
version = "3.1.2"
description = "A generic SAST library built on top of semgrep and regex"
keywords = ["libsast", "SAST", "Python SAST", "SAST API", "Regex SAST", "Pattern Matcher"]
authors = ["Ajin Abraham <[email protected]>"]
Expand All @@ -27,6 +27,7 @@ python = "^3.8"
requests = "*"
pyyaml = ">=6.0"
semgrep = {version = "1.86.0", markers = "sys_platform != 'win32'"}
billiard = "^4.2.1"

[tool.poetry.group.dev.dependencies]
bandit = "*"
Expand Down

0 comments on commit 1d75514

Please sign in to comment.