From 3759480ea74e6fb6e727ae6801404779b9238158 Mon Sep 17 00:00:00 2001 From: Paul Tikken Date: Thu, 18 Jan 2024 13:27:33 +0000 Subject: [PATCH 1/4] [CveXplore-257] additional version parsing for better cpe matching results --- CveXplore/.schema_version | 2 +- CveXplore/VERSION | 2 +- .../database_maintenance/sources_process.py | 79 +++++++++++++++++-- .../single_cve_download_and_proces.py | 24 ------ 4 files changed, 76 insertions(+), 31 deletions(-) delete mode 100644 debug_scripts/single_cve_download_and_proces.py diff --git a/CveXplore/.schema_version b/CveXplore/.schema_version index 78215650..d38695d7 100644 --- a/CveXplore/.schema_version +++ b/CveXplore/.schema_version @@ -1,4 +1,4 @@ { - "version": "1.7", + "version": "1.8", "rebuild_needed": true } diff --git a/CveXplore/VERSION b/CveXplore/VERSION index 071e17fe..a6b633f4 100644 --- a/CveXplore/VERSION +++ b/CveXplore/VERSION @@ -1 +1 @@ -0.3.20.dev18 \ No newline at end of file +0.3.20.dev19 \ No newline at end of file diff --git a/CveXplore/core/database_maintenance/sources_process.py b/CveXplore/core/database_maintenance/sources_process.py index dfa7b639..a5e97d24 100644 --- a/CveXplore/core/database_maintenance/sources_process.py +++ b/CveXplore/core/database_maintenance/sources_process.py @@ -57,12 +57,81 @@ def padded_version(version: str): if version == "-" or version == "": return version else: + # normalizing edge cases: + version = version.replace('\\(', ".").replace("\\)", ".").rstrip(".") + ret_list = [] - for v in version.split("."): - try: - ret_list.append(f"{int(v):05d}") - except ValueError: - ret_list.append(v.rjust(5, "0")) + + splitted_version = version.split(".") + # perform check if last part of version can be cast to an int + try: + int(splitted_version[-1]) + # can be cast to an int, proceed 'normally' + for v in splitted_version: + try: + ret_list.append(f"{int(v):05d}") + except ValueError: + ret_list.append(v.rjust(5, "0")) + except ValueError: + # last part of the version cannot be cast to an int, so this means it's either a string or a + # string combined with an integer; handle accordingly + + # first handle all version identifiers leading upto the last part + if len(splitted_version) > 1: + for i in range(len(splitted_version) - 1): + try: + ret_list.append(f"{int(splitted_version[i]):05d}") + except ValueError: + ret_list.append(splitted_version[i].rjust(5, "0")) + + # handle the last part + # check if the last entry is smaller than 5 characters, if so just use that... + if len(splitted_version[-1]) > 5: + try: + ret_list.append(f"{int(splitted_version[-1]):05d}") + except ValueError: + ret_list.append(splitted_version[-1].rjust(5, "0")) + # check is last entry consists only of alphanumeric characters + elif splitted_version[-1].isalpha(): + ret_list.append(splitted_version[-1].rjust(5, "0")) + else: + loop_i = 0 + loop_count = len(splitted_version[-1]) + + # int/str combined value; handle accordingly + while loop_i < loop_count: + current_i = loop_i + # probably digit; so check; + if splitted_version[-1][loop_i].isdigit(): + try: + ret_list.append(f"{int(splitted_version[-1][loop_i]):05d}") + except ValueError: + ret_list.append(splitted_version[-1][loop_i].rjust(5, "0")) + finally: + # perform check if anything that follows consists only of string characters + if splitted_version[-1][loop_i + 1:].isalpha(): + ret_list.append( + splitted_version[-1][loop_i + 1:].rjust(5, "0") + ) + # no point proceeding; just break + break + loop_i += 1 + else: + # ok so probably last part of version identifier is a string; add that with a loop + version_string = "" + try: + while splitted_version[-1][loop_i].isalpha(): + version_string += splitted_version[-1][loop_i] + loop_i += 1 + except IndexError: + # finished splitted_version variable; just pass + loop_i += 1 + pass + + ret_list.append(version_string.rjust(5, "0")) + + if loop_i == current_i: + loop_i += 1 return ".".join(ret_list) diff --git a/debug_scripts/single_cve_download_and_proces.py b/debug_scripts/single_cve_download_and_proces.py deleted file mode 100644 index 9a34a70b..00000000 --- a/debug_scripts/single_cve_download_and_proces.py +++ /dev/null @@ -1,24 +0,0 @@ -import json -import os - -from CveXplore.common.config import Configuration -from CveXplore.core.database_maintenance.sources_process import CVEDownloads -from CveXplore.core.nvd_nist.nvd_nist_api import NvdNistApi - -config = Configuration - -os.environ["MONGODB_CON_DETAILS"] = json.dumps( - { - "host": f"{config.DATASOURCE_PROTOCOL}://{config.MONGODB_HOST}:{config.MONGODB_PORT}" - } -) - -cvd = CVEDownloads() - -nvd = NvdNistApi() - -cvd.is_update = False - -data = nvd.call(nvd.methods.GET, resource={"cveId": "CVE-2016-2148"}, data=1) - -cvd.process_the_item(data["vulnerabilities"][0]) From f96d7ab555b412fe4c4f171ceeae4a856e902f40 Mon Sep 17 00:00:00 2001 From: Paul Tikken Date: Thu, 18 Jan 2024 14:11:17 +0000 Subject: [PATCH 2/4] [CveXplore-257] centralized padded version and stem method to NVDAPIHANDLER class --- .../core/database_maintenance/api_handlers.py | 92 +++++++++++++++ .../database_maintenance/sources_process.py | 107 ------------------ 2 files changed, 92 insertions(+), 107 deletions(-) diff --git a/CveXplore/core/database_maintenance/api_handlers.py b/CveXplore/core/database_maintenance/api_handlers.py index 01d94055..99d9ae0f 100644 --- a/CveXplore/core/database_maintenance/api_handlers.py +++ b/CveXplore/core/database_maintenance/api_handlers.py @@ -34,6 +34,98 @@ def process_item(self, item: dict): # ).entry return item + @staticmethod + def stem(cpe_uri: str): + cpe_stem = cpe_uri.split(":") + return ":".join(cpe_stem[:5]) + + @staticmethod + def padded_version(version: str): + if version == "-" or version == "": + return version + else: + # normalizing edge cases: + version = version.replace("\\(", ".").replace("\\)", ".").rstrip(".") + + ret_list = [] + + splitted_version = version.split(".") + # perform check if last part of version can be cast to an int + try: + int(splitted_version[-1]) + # can be cast to an int, proceed 'normally' + for v in splitted_version: + try: + ret_list.append(f"{int(v):05d}") + except ValueError: + ret_list.append(v.rjust(5, "0")) + except ValueError: + # last part of the version cannot be cast to an int, so this means it's either a string or a + # string combined with an integer; handle accordingly + + # first handle all version identifiers leading upto the last part + if len(splitted_version) > 1: + for i in range(len(splitted_version) - 1): + try: + ret_list.append(f"{int(splitted_version[i]):05d}") + except ValueError: + ret_list.append(splitted_version[i].rjust(5, "0")) + + # handle the last part + # check if the last entry is smaller than 5 characters, if so just use that... + if len(splitted_version[-1]) > 5: + try: + ret_list.append(f"{int(splitted_version[-1]):05d}") + except ValueError: + ret_list.append(splitted_version[-1].rjust(5, "0")) + # check is last entry consists only of alphanumeric characters + elif splitted_version[-1].isalpha(): + ret_list.append(splitted_version[-1].rjust(5, "0")) + else: + loop_i = 0 + loop_count = len(splitted_version[-1]) + + # int/str combined value; handle accordingly + while loop_i < loop_count: + current_i = loop_i + # probably digit; so check; + if splitted_version[-1][loop_i].isdigit(): + try: + ret_list.append( + f"{int(splitted_version[-1][loop_i]):05d}" + ) + except ValueError: + ret_list.append( + splitted_version[-1][loop_i].rjust(5, "0") + ) + finally: + # perform check if anything that follows consists only of string characters + if splitted_version[-1][loop_i + 1 :].isalpha(): + ret_list.append( + splitted_version[-1][loop_i + 1 :].rjust(5, "0") + ) + # no point proceeding; just break + break + loop_i += 1 + else: + # ok so probably last part of version identifier is a string; add that with a loop + version_string = "" + try: + while splitted_version[-1][loop_i].isalpha(): + version_string += splitted_version[-1][loop_i] + loop_i += 1 + except IndexError: + # finished splitted_version variable; just pass + loop_i += 1 + pass + + ret_list.append(version_string.rjust(5, "0")) + + if loop_i == current_i: + loop_i += 1 + + return ".".join(ret_list) + @abstractmethod def process_the_item(self, *args): raise NotImplementedError diff --git a/CveXplore/core/database_maintenance/sources_process.py b/CveXplore/core/database_maintenance/sources_process.py index a5e97d24..86398c80 100644 --- a/CveXplore/core/database_maintenance/sources_process.py +++ b/CveXplore/core/database_maintenance/sources_process.py @@ -47,94 +47,6 @@ def __init__(self): def file_to_queue(self, *args): pass - @staticmethod - def stem(cpe_uri: str): - cpe_stem = cpe_uri.split(":") - return ":".join(cpe_stem[:5]) - - @staticmethod - def padded_version(version: str): - if version == "-" or version == "": - return version - else: - # normalizing edge cases: - version = version.replace('\\(', ".").replace("\\)", ".").rstrip(".") - - ret_list = [] - - splitted_version = version.split(".") - # perform check if last part of version can be cast to an int - try: - int(splitted_version[-1]) - # can be cast to an int, proceed 'normally' - for v in splitted_version: - try: - ret_list.append(f"{int(v):05d}") - except ValueError: - ret_list.append(v.rjust(5, "0")) - except ValueError: - # last part of the version cannot be cast to an int, so this means it's either a string or a - # string combined with an integer; handle accordingly - - # first handle all version identifiers leading upto the last part - if len(splitted_version) > 1: - for i in range(len(splitted_version) - 1): - try: - ret_list.append(f"{int(splitted_version[i]):05d}") - except ValueError: - ret_list.append(splitted_version[i].rjust(5, "0")) - - # handle the last part - # check if the last entry is smaller than 5 characters, if so just use that... - if len(splitted_version[-1]) > 5: - try: - ret_list.append(f"{int(splitted_version[-1]):05d}") - except ValueError: - ret_list.append(splitted_version[-1].rjust(5, "0")) - # check is last entry consists only of alphanumeric characters - elif splitted_version[-1].isalpha(): - ret_list.append(splitted_version[-1].rjust(5, "0")) - else: - loop_i = 0 - loop_count = len(splitted_version[-1]) - - # int/str combined value; handle accordingly - while loop_i < loop_count: - current_i = loop_i - # probably digit; so check; - if splitted_version[-1][loop_i].isdigit(): - try: - ret_list.append(f"{int(splitted_version[-1][loop_i]):05d}") - except ValueError: - ret_list.append(splitted_version[-1][loop_i].rjust(5, "0")) - finally: - # perform check if anything that follows consists only of string characters - if splitted_version[-1][loop_i + 1:].isalpha(): - ret_list.append( - splitted_version[-1][loop_i + 1:].rjust(5, "0") - ) - # no point proceeding; just break - break - loop_i += 1 - else: - # ok so probably last part of version identifier is a string; add that with a loop - version_string = "" - try: - while splitted_version[-1][loop_i].isalpha(): - version_string += splitted_version[-1][loop_i] - loop_i += 1 - except IndexError: - # finished splitted_version variable; just pass - loop_i += 1 - pass - - ret_list.append(version_string.rjust(5, "0")) - - if loop_i == current_i: - loop_i += 1 - - return ".".join(ret_list) - @staticmethod def parse_cpe_version(cpename: str): cpe_list = cpename.split(":") @@ -459,25 +371,6 @@ def get_vendor_product(cpeUri: str): product = cpeUri.split(":")[4] return vendor, product - @staticmethod - def stem(cpeUri: str): - cpeArr = cpeUri.split(":") - return ":".join(cpeArr[:5]) - - @staticmethod - def padded_version(version: str): - if version == "-" or version == "": - return version - else: - ret_list = [] - for v in version.split("."): - try: - ret_list.append(f"{int(v):05d}") - except ValueError: - ret_list.append(v.rjust(5, "0")) - - return ".".join(ret_list) - def file_to_queue(self, *args): pass From aba2681d60377c4025998a0de100cc307a6ccc09 Mon Sep 17 00:00:00 2001 From: Paul Tikken Date: Thu, 18 Jan 2024 15:38:56 +0000 Subject: [PATCH 3/4] [CveXplore-253] loggers updated --- CveXplore/VERSION | 2 +- .../database_maintenance/download_handler.py | 23 +++++++++++------- .../core/database_maintenance/main_updater.py | 3 +++ .../database_maintenance/update_base_class.py | 24 ++++++++++++------- .../core/logging/handlers/cve_explore_rfh.py | 6 +++++ .../logging/handlers/cve_explore_stream.py | 6 +++++ 6 files changed, 46 insertions(+), 18 deletions(-) create mode 100644 CveXplore/core/logging/handlers/cve_explore_rfh.py create mode 100644 CveXplore/core/logging/handlers/cve_explore_stream.py diff --git a/CveXplore/VERSION b/CveXplore/VERSION index a6b633f4..fdfcea3c 100644 --- a/CveXplore/VERSION +++ b/CveXplore/VERSION @@ -1 +1 @@ -0.3.20.dev19 \ No newline at end of file +0.3.20.dev20 \ No newline at end of file diff --git a/CveXplore/core/database_maintenance/download_handler.py b/CveXplore/core/database_maintenance/download_handler.py index 972338f6..ff011c3d 100644 --- a/CveXplore/core/database_maintenance/download_handler.py +++ b/CveXplore/core/database_maintenance/download_handler.py @@ -15,7 +15,6 @@ from datetime import timedelta from io import BytesIO from itertools import islice -from logging.handlers import RotatingFileHandler from shutil import copy from typing import Tuple @@ -30,6 +29,8 @@ from CveXplore.core.general.utils import sanitize from CveXplore.core.worker_queue.worker_q import WorkerQueue from ..database_indexer.db_indexer import DatabaseIndexer +from ..logging.handlers.cve_explore_rfh import CveExploreUpdateRfhHandler +from ..logging.handlers.cve_explore_stream import CveExploreUpdateStreamHandler from ..logging.logger_class import AppLogger from ...database.connection.database_connection import DatabaseConnection @@ -91,15 +92,12 @@ def __init__( crf = None - cli = logging.StreamHandler(stream=sys.stdout) + cli = CveExploreUpdateStreamHandler(stream=sys.stdout) cli.setFormatter(self.formatter) cli.setLevel(logging.INFO) - if self.config.LOGGING_FILE_PATH != "": - if not os.path.exists(self.config.LOGGING_FILE_PATH): - os.makedirs(self.config.LOGGING_FILE_PATH) - - crf = RotatingFileHandler( + if self.config.LOGGING_TO_FILE: + crf = CveExploreUpdateRfhHandler( filename=f"{self.config.LOGGING_FILE_PATH}/{self.config.LOGGING_UPDATE_FILE_NAME}", maxBytes=self.config.LOGGING_MAX_FILE_SIZE, backupCount=self.config.LOGGING_BACKLOG, @@ -107,7 +105,16 @@ def __init__( crf.setLevel(logging.DEBUG) crf.setFormatter(self.formatter) - if not len(self.logger.handlers): + if len(self.logger.handlers) > 0: + for handler in self.logger.handlers: + # add the handlers to the logger + # makes sure no duplicate handlers are added + if not isinstance(handler, CveExploreUpdateRfhHandler): + if crf is not None: + self.logger.addHandler(crf) + if not isinstance(handler, CveExploreUpdateStreamHandler): + self.logger.addHandler(cli) + else: if crf is not None: self.logger.addHandler(crf) self.logger.addHandler(cli) diff --git a/CveXplore/core/database_maintenance/main_updater.py b/CveXplore/core/database_maintenance/main_updater.py index 2bcac939..20462ad7 100644 --- a/CveXplore/core/database_maintenance/main_updater.py +++ b/CveXplore/core/database_maintenance/main_updater.py @@ -53,6 +53,9 @@ def __init__(self, datasource: DatabaseConnectionBase): self.do_initialize = False + def __repr__(self): + return f"<>" + def validate_schema(self): return self.schema_checker.validate_schema() diff --git a/CveXplore/core/database_maintenance/update_base_class.py b/CveXplore/core/database_maintenance/update_base_class.py index fdb1851d..fc77e191 100644 --- a/CveXplore/core/database_maintenance/update_base_class.py +++ b/CveXplore/core/database_maintenance/update_base_class.py @@ -1,9 +1,9 @@ import logging -import os import sys -from logging.handlers import RotatingFileHandler from CveXplore.common.config import Configuration +from CveXplore.core.logging.handlers.cve_explore_rfh import CveExploreUpdateRfhHandler +from CveXplore.core.logging.handlers.cve_explore_stream import CveExploreUpdateStreamHandler class UpdateBaseClass(object): @@ -21,15 +21,12 @@ def __init__(self, logger_name: str): crf = None - cli = logging.StreamHandler(stream=sys.stdout) + cli = CveExploreUpdateStreamHandler(stream=sys.stdout) cli.setFormatter(self.formatter) cli.setLevel(logging.INFO) - if self.config.LOGGING_FILE_PATH != "": - if not os.path.exists(self.config.LOGGING_FILE_PATH): - os.makedirs(self.config.LOGGING_FILE_PATH) - - crf = RotatingFileHandler( + if self.config.LOGGING_TO_FILE: + crf = CveExploreUpdateRfhHandler( filename=f"{self.config.LOGGING_FILE_PATH}/{self.config.LOGGING_UPDATE_FILE_NAME}", maxBytes=self.config.LOGGING_MAX_FILE_SIZE, backupCount=self.config.LOGGING_BACKLOG, @@ -37,7 +34,16 @@ def __init__(self, logger_name: str): crf.setLevel(logging.DEBUG) crf.setFormatter(self.formatter) - if not len(self.logger.handlers): + if len(self.logger.handlers) > 0: + for handler in self.logger.handlers: + # add the handlers to the logger + # makes sure no duplicate handlers are added + if not isinstance(handler, CveExploreUpdateRfhHandler): + if crf is not None: + self.logger.addHandler(crf) + if not isinstance(handler, CveExploreUpdateStreamHandler): + self.logger.addHandler(cli) + else: if crf is not None: self.logger.addHandler(crf) self.logger.addHandler(cli) diff --git a/CveXplore/core/logging/handlers/cve_explore_rfh.py b/CveXplore/core/logging/handlers/cve_explore_rfh.py new file mode 100644 index 00000000..d09645b8 --- /dev/null +++ b/CveXplore/core/logging/handlers/cve_explore_rfh.py @@ -0,0 +1,6 @@ +from logging.handlers import RotatingFileHandler + + +class CveExploreUpdateRfhHandler(RotatingFileHandler): + def __init__(self, **kwargs): + super().__init__(**kwargs) diff --git a/CveXplore/core/logging/handlers/cve_explore_stream.py b/CveXplore/core/logging/handlers/cve_explore_stream.py new file mode 100644 index 00000000..5bed145d --- /dev/null +++ b/CveXplore/core/logging/handlers/cve_explore_stream.py @@ -0,0 +1,6 @@ +from logging import StreamHandler + + +class CveExploreUpdateStreamHandler(StreamHandler): + def __init__(self, **kwargs): + super().__init__(**kwargs) From c6e3a8723994f52bd3b021d7c63ede984a2664f0 Mon Sep 17 00:00:00 2001 From: Paul Tikken Date: Thu, 18 Jan 2024 15:55:48 +0000 Subject: [PATCH 4/4] [CveXplore-253] loggers updated --- CveXplore/VERSION | 2 +- .../core/database_maintenance/download_handler.py | 8 +++++--- .../core/database_maintenance/update_base_class.py | 12 ++++++++---- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/CveXplore/VERSION b/CveXplore/VERSION index fdfcea3c..2ec281db 100644 --- a/CveXplore/VERSION +++ b/CveXplore/VERSION @@ -1 +1 @@ -0.3.20.dev20 \ No newline at end of file +0.3.20.dev21 \ No newline at end of file diff --git a/CveXplore/core/database_maintenance/download_handler.py b/CveXplore/core/database_maintenance/download_handler.py index ff011c3d..95b6da5f 100644 --- a/CveXplore/core/database_maintenance/download_handler.py +++ b/CveXplore/core/database_maintenance/download_handler.py @@ -82,7 +82,8 @@ def __init__( self.logger = logging.getLogger(logger_name) - self.logger.removeHandler(self.logger.handlers[0]) + if len(self.logger.handlers) == 1: + self.logger.removeHandler(self.logger.handlers[0]) self.logger.propagate = False @@ -109,10 +110,11 @@ def __init__( for handler in self.logger.handlers: # add the handlers to the logger # makes sure no duplicate handlers are added - if not isinstance(handler, CveExploreUpdateRfhHandler): + if not isinstance( + handler, CveExploreUpdateRfhHandler + ) and not isinstance(handler, CveExploreUpdateStreamHandler): if crf is not None: self.logger.addHandler(crf) - if not isinstance(handler, CveExploreUpdateStreamHandler): self.logger.addHandler(cli) else: if crf is not None: diff --git a/CveXplore/core/database_maintenance/update_base_class.py b/CveXplore/core/database_maintenance/update_base_class.py index fc77e191..cfea149c 100644 --- a/CveXplore/core/database_maintenance/update_base_class.py +++ b/CveXplore/core/database_maintenance/update_base_class.py @@ -3,7 +3,9 @@ from CveXplore.common.config import Configuration from CveXplore.core.logging.handlers.cve_explore_rfh import CveExploreUpdateRfhHandler -from CveXplore.core.logging.handlers.cve_explore_stream import CveExploreUpdateStreamHandler +from CveXplore.core.logging.handlers.cve_explore_stream import ( + CveExploreUpdateStreamHandler, +) class UpdateBaseClass(object): @@ -11,7 +13,8 @@ def __init__(self, logger_name: str): self.config = Configuration self.logger = logging.getLogger(logger_name) - self.logger.removeHandler(self.logger.handlers[0]) + if len(self.logger.handlers) == 1: + self.logger.removeHandler(self.logger.handlers[0]) self.logger.propagate = False @@ -38,10 +41,11 @@ def __init__(self, logger_name: str): for handler in self.logger.handlers: # add the handlers to the logger # makes sure no duplicate handlers are added - if not isinstance(handler, CveExploreUpdateRfhHandler): + if not isinstance( + handler, CveExploreUpdateRfhHandler + ) and not isinstance(handler, CveExploreUpdateStreamHandler): if crf is not None: self.logger.addHandler(crf) - if not isinstance(handler, CveExploreUpdateStreamHandler): self.logger.addHandler(cli) else: if crf is not None: