cve-search · P-T-I · Jan 23, 2024 · Dec 27, 2023 · Dec 27, 2023 · Dec 27, 2023
diff --git a/CveXplore/VERSION b/CveXplore/VERSION
@@ -1 +1 @@
-0.3.20.dev18
+0.3.20.dev23
diff --git a/CveXplore/common/config.py b/CveXplore/common/config.py
@@ -150,5 +150,15 @@ class Configuration(object):
 
  MAX_DOWNLOAD_WORKERS = int(os.getenv("MAX_DOWNLOAD_WORKERS", 10))
 
+ # This factor determines the amount of simultaneous requests made towards the NIST API;
+ # The set amount of client requests (30) get divided with the sem factor, so the lower
+ # it is set, the more simultaneous requests are made.
+ DOWNLOAD_SEM_FACTOR = float(
+ os.getenv("DOWNLOAD_SEM_FACTOR", 0.0)
+ ) # if set, should be set >=0.6
+ DOWNLOAD_SLEEP_MIN = float(os.getenv("DOWNLOAD_SLEEP_MIN", 0.5))
+ DOWNLOAD_SLEEP_MAX = float(os.getenv("DOWNLOAD_SLEEP_MAX", 2.5))
+ DOWNLOAD_BATCH_RANGE = os.getenv("DOWNLOAD_BATCH_RANGE", None)
+
  def __repr__(self):
  return f"<< CveXploreConfiguration >>"
diff --git a/CveXplore/core/nvd_nist/nvd_nist_api.py b/CveXplore/core/nvd_nist/nvd_nist_api.py
@@ -4,6 +4,7 @@
 import math
 import random
 import time
+import uuid
 from collections import namedtuple
 from datetime import datetime, timedelta
 from json import JSONDecodeError
@@ -338,10 +339,13 @@ def __init__(self, api_data: ApiData):
  self._current_index = api_data.start_index
  self.api_data = api_data
 
- self.sem_factor = 6
+ if self.config.DOWNLOAD_SEM_FACTOR != 0.0:
+ self.sem_factor = self.config.DOWNLOAD_SEM_FACTOR
+ else:
+ self.sem_factor = 6
 
- if not self.api_data.api_handle.api_key_limit:
- self.sem_factor = 0.6
+  if not self.api_data.api_handle.api_key_limit:
+  self.sem_factor = 0.6
 
  self.logger.debug(f"Using sem factor: {self.sem_factor}")
 
@@ -375,10 +379,20 @@ def __next__(self):
 
  self.workload = []
 
- if self.api_data.api_handle.api_key_limit:
- batch_range = 5
+ if self.config.DOWNLOAD_BATCH_RANGE is None:
+ if self.api_data.api_handle.api_key_limit:
+ batch_range = 5
+ else:
+ batch_range = 45
  else:
- batch_range = 45
+ try:
+ batch_range = int(self.config.DOWNLOAD_BATCH_RANGE)
+ except ValueError:
+ self.logger.error(
+ f"Invalid value for DOWNLOAD_BATCH_RANGE, {self.config.DOWNLOAD_BATCH_RANGE} "
+ f"cannot be converted into an integer..."
+ )
+ raise
 
  for i in range(batch_range):
  if not self.first_iteration:
@@ -419,11 +433,12 @@ def process_async(self):
 
  @retry(retry_policy)
  async def fetch(self, session: aiohttp.ClientSession, url: str):
+ request_id = uuid.uuid4()
  try:
  async with session.get(
  url, proxy=self.config.HTTP_PROXY_STRING
  ) as response:
- self.logger.debug(f"Sending request to url: {url}")
+ self.logger.debug(f"[{request_id}] Sending request to url: {url}")
  if response.status == 200:
  data = await response.json()
  if "format" in data:
@@ -462,8 +477,15 @@ async def fetch(self, session: aiohttp.ClientSession, url: str):
  except ContentTypeError:
  return ApiDataRetrievalFailed(url)
  finally:
- self.logger.debug(f"Finished request to url: {url}")
- time.sleep(self.sem_factor / 2)
+ random_sleep = round(
+ random.SystemRandom().uniform(
+ self.config.DOWNLOAD_SLEEP_MIN, self.config.DOWNLOAD_SLEEP_MAX
+ ),
+ 1,
+ )
+ self.logger.debug(f"[{request_id}] Sleeping for {random_sleep} secs...")
+ await asyncio.sleep(random_sleep)
+ self.logger.debug(f"[{request_id}] Finished request")
 
  async def fetch_all(self, loop):
  sem = asyncio.Semaphore(math.ceil(30 / self.sem_factor))