Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Configurable settings for downloads #255

Merged
merged 6 commits into from
Jan 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CveXplore/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.3.20.dev18
0.3.20.dev23
10 changes: 10 additions & 0 deletions CveXplore/common/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,5 +150,15 @@ class Configuration(object):

MAX_DOWNLOAD_WORKERS = int(os.getenv("MAX_DOWNLOAD_WORKERS", 10))

# This factor determines the amount of simultaneous requests made towards the NIST API;
# The set amount of client requests (30) get divided with the sem factor, so the lower
# it is set, the more simultaneous requests are made.
DOWNLOAD_SEM_FACTOR = float(
os.getenv("DOWNLOAD_SEM_FACTOR", 0.0)
) # if set, should be set >=0.6
DOWNLOAD_SLEEP_MIN = float(os.getenv("DOWNLOAD_SLEEP_MIN", 0.5))
DOWNLOAD_SLEEP_MAX = float(os.getenv("DOWNLOAD_SLEEP_MAX", 2.5))
DOWNLOAD_BATCH_RANGE = os.getenv("DOWNLOAD_BATCH_RANGE", None)

def __repr__(self):
return f"<< CveXploreConfiguration >>"
40 changes: 31 additions & 9 deletions CveXplore/core/nvd_nist/nvd_nist_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import math
import random
import time
import uuid
from collections import namedtuple
from datetime import datetime, timedelta
from json import JSONDecodeError
Expand Down Expand Up @@ -338,10 +339,13 @@ def __init__(self, api_data: ApiData):
self._current_index = api_data.start_index
self.api_data = api_data

self.sem_factor = 6
if self.config.DOWNLOAD_SEM_FACTOR != 0.0:
self.sem_factor = self.config.DOWNLOAD_SEM_FACTOR
else:
self.sem_factor = 6

if not self.api_data.api_handle.api_key_limit:
self.sem_factor = 0.6
if not self.api_data.api_handle.api_key_limit:
self.sem_factor = 0.6

self.logger.debug(f"Using sem factor: {self.sem_factor}")

Expand Down Expand Up @@ -375,10 +379,20 @@ def __next__(self):

self.workload = []

if self.api_data.api_handle.api_key_limit:
batch_range = 5
if self.config.DOWNLOAD_BATCH_RANGE is None:
if self.api_data.api_handle.api_key_limit:
batch_range = 5
else:
batch_range = 45
else:
batch_range = 45
try:
batch_range = int(self.config.DOWNLOAD_BATCH_RANGE)
except ValueError:
self.logger.error(
f"Invalid value for DOWNLOAD_BATCH_RANGE, {self.config.DOWNLOAD_BATCH_RANGE} "
f"cannot be converted into an integer..."
)
raise

for i in range(batch_range):
if not self.first_iteration:
Expand Down Expand Up @@ -419,11 +433,12 @@ def process_async(self):

@retry(retry_policy)
async def fetch(self, session: aiohttp.ClientSession, url: str):
request_id = uuid.uuid4()
try:
async with session.get(
url, proxy=self.config.HTTP_PROXY_STRING
) as response:
self.logger.debug(f"Sending request to url: {url}")
self.logger.debug(f"[{request_id}] Sending request to url: {url}")
if response.status == 200:
data = await response.json()
if "format" in data:
Expand Down Expand Up @@ -462,8 +477,15 @@ async def fetch(self, session: aiohttp.ClientSession, url: str):
except ContentTypeError:
return ApiDataRetrievalFailed(url)
finally:
self.logger.debug(f"Finished request to url: {url}")
time.sleep(self.sem_factor / 2)
random_sleep = round(
random.SystemRandom().uniform(
self.config.DOWNLOAD_SLEEP_MIN, self.config.DOWNLOAD_SLEEP_MAX
),
1,
)
self.logger.debug(f"[{request_id}] Sleeping for {random_sleep} secs...")
await asyncio.sleep(random_sleep)
self.logger.debug(f"[{request_id}] Finished request")

async def fetch_all(self, loop):
sem = asyncio.Semaphore(math.ceil(30 / self.sem_factor))
Expand Down
Loading