diff --git a/examples/minimal_example/main.py b/examples/minimal_example/main.py index 50dbda3..b581e54 100644 --- a/examples/minimal_example/main.py +++ b/examples/minimal_example/main.py @@ -10,6 +10,9 @@ """ +import sys +import logging + from pathlib import Path from json import load, dumps from argparse import ArgumentParser @@ -18,24 +21,57 @@ from my_parsers import time_parser, yml_parser +stderr = logging.StreamHandler(stream=sys.stderr) + +simple_format = logging.Formatter("%(levelname)s : %(message)s") +info_format = logging.Formatter("%(levelname)s : %(module)s : %(message)s") +full_format = logging.Formatter( + "\n%(name)s | %(asctime)s | %(levelname)s : %(levelno)s |" + + " %(filename)s : %(funcName)s : %(lineno)s | %(processName)s : %(process)d | %(message)s\n" +) + +LOG = logging.getLogger(__name__) + arg_parser = ArgumentParser() arg_parser.add_argument("--verbosity", type=str, default="info") args = arg_parser.parse_args() +def set_level(level: str) -> None: + """ + Function used to set LOG object logging level. + + Arguments: + level: logging level as string, available levels: warning, info, debug. + + Returns: + success boolean. + """ + if level == "warning": + stderr.setFormatter(simple_format) + logging.basicConfig(level=logging.WARNING, handlers=(stderr,)) + elif level == "info": + stderr.setFormatter(info_format) + logging.basicConfig(level=logging.INFO, handlers=(stderr,)) + elif level == "debug": + stderr.setFormatter(full_format) + logging.basicConfig(level=logging.DEBUG, handlers=(stderr,)) + else: + raise ValueError("Incorrect logging level %s", level) + + if __name__ == "__main__": + set_level(args.verbosity) config_path = Path(__file__).parent.resolve() / "config.json" if not config_path.is_file(): raise FileNotFoundError(f"No config file at: {config_path}") with config_path.open("r") as f: config = load(f) - config["verbosity"] = args.verbosity arch = Archivist(path="metadata_archive.tar", parsers=[time_parser(), yml_parser()], **config) arch.parse() arch.export() - print("\nResulting metadata:") - print(dumps(arch.get_metadata(), indent=4)) + LOG.info("Resulting metadata:\n%s", dumps(arch.get_metadata(), indent=4)) diff --git a/examples/namelists_example/main.py b/examples/namelists_example/main.py index 65eac33..7cc113f 100644 --- a/examples/namelists_example/main.py +++ b/examples/namelists_example/main.py @@ -11,6 +11,9 @@ """ +import sys +import logging + from pathlib import Path from json import load, dumps from argparse import ArgumentParser @@ -19,24 +22,57 @@ from my_parser import nml_parser +stderr = logging.StreamHandler(stream=sys.stderr) + +simple_format = logging.Formatter("%(levelname)s : %(message)s") +info_format = logging.Formatter("%(levelname)s : %(module)s : %(message)s") +full_format = logging.Formatter( + "\n%(name)s | %(asctime)s | %(levelname)s : %(levelno)s |" + + " %(filename)s : %(funcName)s : %(lineno)s | %(processName)s : %(process)d | %(message)s\n" +) + +LOG = logging.getLogger(__name__) + arg_parser = ArgumentParser() arg_parser.add_argument("--verbosity", type=str, default="info") args = arg_parser.parse_args() +def set_level(level: str) -> None: + """ + Function used to set LOG object logging level. + + Arguments: + level: logging level as string, available levels: warning, info, debug. + + Returns: + success boolean. + """ + if level == "warning": + stderr.setFormatter(simple_format) + logging.basicConfig(level=logging.WARNING, handlers=(stderr,)) + elif level == "info": + stderr.setFormatter(info_format) + logging.basicConfig(level=logging.INFO, handlers=(stderr,)) + elif level == "debug": + stderr.setFormatter(full_format) + logging.basicConfig(level=logging.DEBUG, handlers=(stderr,)) + else: + raise ValueError("Incorrect logging level %s", level) + + if __name__ == "__main__": + set_level(args.verbosity) config_path = Path(__file__).parent.resolve() / "config.json" if not config_path.is_file(): raise FileNotFoundError(f"No config file at: {config_path}") with config_path.open("r") as f: config = load(f) - config["verbosity"] = args.verbosity arch = Archivist(path="metadata_archive.tar", parsers=nml_parser(), **config) arch.parse() arch.export() - print("\nResulting metadata:") - print(dumps(arch.get_metadata(), indent=4)) + LOG.info("Resulting metadata:\n%s", dumps(arch.get_metadata(), indent=4)) diff --git a/examples/ncdump_example/main.py b/examples/ncdump_example/main.py index f1259da..9d2aa8d 100644 --- a/examples/ncdump_example/main.py +++ b/examples/ncdump_example/main.py @@ -8,6 +8,9 @@ """ +import sys +import logging + from pathlib import Path from json import load, dumps from argparse import ArgumentParser @@ -16,24 +19,57 @@ from my_parser import ncdump_hs_parser +stderr = logging.StreamHandler(stream=sys.stderr) + +simple_format = logging.Formatter("%(levelname)s : %(message)s") +info_format = logging.Formatter("%(levelname)s : %(module)s : %(message)s") +full_format = logging.Formatter( + "\n%(name)s | %(asctime)s | %(levelname)s : %(levelno)s |" + + " %(filename)s : %(funcName)s : %(lineno)s | %(processName)s : %(process)d | %(message)s\n" +) + +LOG = logging.getLogger(__name__) + arg_parser = ArgumentParser() arg_parser.add_argument("--verbosity", type=str, default="info") args = arg_parser.parse_args() +def set_level(level: str) -> None: + """ + Function used to set LOG object logging level. + + Arguments: + level: logging level as string, available levels: warning, info, debug. + + Returns: + success boolean. + """ + if level == "warning": + stderr.setFormatter(simple_format) + logging.basicConfig(level=logging.WARNING, handlers=(stderr,)) + elif level == "info": + stderr.setFormatter(info_format) + logging.basicConfig(level=logging.INFO, handlers=(stderr,)) + elif level == "debug": + stderr.setFormatter(full_format) + logging.basicConfig(level=logging.DEBUG, handlers=(stderr,)) + else: + raise ValueError("Incorrect logging level %s", level) + + if __name__ == "__main__": + set_level(args.verbosity) config_path = Path(__file__).parent.resolve() / "config.json" if not config_path.is_file(): raise FileNotFoundError(f"No config file at: {config_path}") with config_path.open("r") as f: config = load(f) - config["verbosity"] = args.verbosity arch = Archivist(path="metadata_archive.tar", parsers=ncdump_hs_parser(), **config) arch.parse() arch.export() - print("\nResulting metadata:") - print(dumps(arch.get_metadata(), indent=4)) + LOG.info("Resulting metadata:\n%s", dumps(arch.get_metadata(), indent=4)) diff --git a/examples/schema_example1/main.py b/examples/schema_example1/main.py index d64711f..7abb113 100644 --- a/examples/schema_example1/main.py +++ b/examples/schema_example1/main.py @@ -11,6 +11,9 @@ """ +import sys +import logging + from pathlib import Path from json import dumps, dump from argparse import ArgumentParser @@ -19,11 +22,21 @@ from my_parsers import time_parser, yml_parser +stderr = logging.StreamHandler(stream=sys.stderr) + +simple_format = logging.Formatter("%(levelname)s : %(message)s") +info_format = logging.Formatter("%(levelname)s : %(module)s : %(message)s") +full_format = logging.Formatter( + "\n%(name)s | %(asctime)s | %(levelname)s : %(levelno)s |" + + " %(filename)s : %(funcName)s : %(lineno)s | %(processName)s : %(process)d | %(message)s\n" +) + +LOG = logging.getLogger(__name__) + arg_parser = ArgumentParser() arg_parser.add_argument("--verbosity", type=str, default="info") args = arg_parser.parse_args() - my_schema = { "$schema": "https://json-schema.org/draft/2020-12/schema", "description": "my example schema", @@ -45,7 +58,31 @@ } +def set_level(level: str) -> None: + """ + Function used to set LOG object logging level. + + Arguments: + level: logging level as string, available levels: warning, info, debug. + + Returns: + success boolean. + """ + if level == "warning": + stderr.setFormatter(simple_format) + logging.basicConfig(level=logging.WARNING, handlers=(stderr,)) + elif level == "info": + stderr.setFormatter(info_format) + logging.basicConfig(level=logging.INFO, handlers=(stderr,)) + elif level == "debug": + stderr.setFormatter(full_format) + logging.basicConfig(level=logging.DEBUG, handlers=(stderr,)) + else: + raise ValueError("Incorrect logging level %s", level) + + if __name__ == "__main__": + set_level(args.verbosity) arch = Archivist( path="metadata_archive.tar", parsers=[time_parser(), yml_parser()], @@ -55,17 +92,14 @@ output_file="metadata.json", overwrite=True, auto_cleanup=True, - verbosity=args.verbosity, ) arch.parse() arch.export() - print("\nResulting schema:") formatted_schema = arch.get_formatted_schema() - print(dumps(formatted_schema, indent=4)) + LOG.info("Resulting schema:\n%s", dumps(formatted_schema, indent=4)) with Path("schema.json").open("w") as f: dump(formatted_schema, f, indent=4) - print("\nResulting metadata:") - print(dumps(arch.get_metadata(), indent=4)) + LOG.info("Resulting metadata:\n%s", dumps(arch.get_metadata(), indent=4)) diff --git a/examples/schema_example2/main.py b/examples/schema_example2/main.py index edfbe38..0bc4d2c 100644 --- a/examples/schema_example2/main.py +++ b/examples/schema_example2/main.py @@ -11,6 +11,9 @@ """ +import sys +import logging + from pathlib import Path from json import dumps, dump from argparse import ArgumentParser @@ -24,11 +27,21 @@ ) +stderr = logging.StreamHandler(stream=sys.stderr) + +simple_format = logging.Formatter("%(levelname)s : %(message)s") +info_format = logging.Formatter("%(levelname)s : %(module)s : %(message)s") +full_format = logging.Formatter( + "\n%(name)s | %(asctime)s | %(levelname)s : %(levelno)s |" + + " %(filename)s : %(funcName)s : %(lineno)s | %(processName)s : %(process)d | %(message)s\n" +) + +LOG = logging.getLogger(__name__) + arg_parser = ArgumentParser() arg_parser.add_argument("--verbosity", type=str, default="info") args = arg_parser.parse_args() - my_schema = { "$schema": "https://json-schema.org/draft/2020-12/schema", "description": "my example schema 2", @@ -70,7 +83,31 @@ } +def set_level(level: str) -> None: + """ + Function used to set LOG object logging level. + + Arguments: + level: logging level as string, available levels: warning, info, debug. + + Returns: + success boolean. + """ + if level == "warning": + stderr.setFormatter(simple_format) + logging.basicConfig(level=logging.WARNING, handlers=(stderr,)) + elif level == "info": + stderr.setFormatter(info_format) + logging.basicConfig(level=logging.INFO, handlers=(stderr,)) + elif level == "debug": + stderr.setFormatter(full_format) + logging.basicConfig(level=logging.DEBUG, handlers=(stderr,)) + else: + raise ValueError("Incorrect logging level %s", level) + + if __name__ == "__main__": + set_level(args.verbosity) arch = Archivist( path="metadata_archive.tar", parsers=[ @@ -85,7 +122,6 @@ output_file="metadata.json", overwrite=True, auto_cleanup=True, - verbosity=args.verbosity, add_description=True, add_type=True, ) @@ -93,11 +129,9 @@ arch.parse() arch.export() - print("\nResulting schema:") formatted_schema = arch.get_formatted_schema() - print(dumps(formatted_schema, indent=4)) + LOG.info("Resulting schema:\n%s", dumps(formatted_schema, indent=4)) with Path("schema.json").open("w") as f: dump(formatted_schema, f, indent=4) - print("\nResulting metadata:") - print(dumps(arch.get_metadata(), indent=4)) + LOG.info("Resulting metadata:\n%s", dumps(arch.get_metadata(), indent=4)) diff --git a/examples/schema_example3/main.py b/examples/schema_example3/main.py index c2a801f..2d0044a 100644 --- a/examples/schema_example3/main.py +++ b/examples/schema_example3/main.py @@ -11,6 +11,9 @@ """ +import sys +import logging + from pathlib import Path from json import dumps, dump from argparse import ArgumentParser @@ -24,11 +27,21 @@ ) +stderr = logging.StreamHandler(stream=sys.stderr) + +simple_format = logging.Formatter("%(levelname)s : %(message)s") +info_format = logging.Formatter("%(levelname)s : %(module)s : %(message)s") +full_format = logging.Formatter( + "\n%(name)s | %(asctime)s | %(levelname)s : %(levelno)s |" + + " %(filename)s : %(funcName)s : %(lineno)s | %(processName)s : %(process)d | %(message)s\n" +) + +LOG = logging.getLogger(__name__) + arg_parser = ArgumentParser() arg_parser.add_argument("--verbosity", type=str, default="info") args = arg_parser.parse_args() - my_schema = { "$schema": "https://json-schema.org/draft/2020-12/schema", "description": "my example schema 3", @@ -69,7 +82,31 @@ } +def set_level(level: str) -> None: + """ + Function used to set LOG object logging level. + + Arguments: + level: logging level as string, available levels: warning, info, debug. + + Returns: + success boolean. + """ + if level == "warning": + stderr.setFormatter(simple_format) + logging.basicConfig(level=logging.WARNING, handlers=(stderr,)) + elif level == "info": + stderr.setFormatter(info_format) + logging.basicConfig(level=logging.INFO, handlers=(stderr,)) + elif level == "debug": + stderr.setFormatter(full_format) + logging.basicConfig(level=logging.DEBUG, handlers=(stderr,)) + else: + raise ValueError("Incorrect logging level %s", level) + + if __name__ == "__main__": + set_level(args.verbosity) arch = Archivist( path="metadata_archive.tar", parsers=[ @@ -84,17 +121,14 @@ output_file="metadata.json", overwrite=True, auto_cleanup=True, - verbosity=args.verbosity, ) arch.parse() arch.export() - print("\nResulting schema:") formatted_schema = arch.get_formatted_schema() - print(dumps(formatted_schema, indent=4)) + LOG.info("Resulting schema:\n%s", dumps(formatted_schema, indent=4)) with Path("schema.json").open("w") as f: dump(formatted_schema, f, indent=4) - print("\nResulting metadata:") - print(dumps(arch.get_metadata(), indent=4)) + LOG.info("Resulting metadata:\n%s", dumps(arch.get_metadata(), indent=4)) diff --git a/examples/schema_example4/main.py b/examples/schema_example4/main.py index e60643d..436c14c 100644 --- a/examples/schema_example4/main.py +++ b/examples/schema_example4/main.py @@ -11,6 +11,9 @@ """ +import sys +import logging + from pathlib import Path from json import dumps, dump from argparse import ArgumentParser @@ -19,11 +22,21 @@ from my_parsers import time_parser, yml_parser +stderr = logging.StreamHandler(stream=sys.stderr) + +simple_format = logging.Formatter("%(levelname)s : %(message)s") +info_format = logging.Formatter("%(levelname)s : %(module)s : %(message)s") +full_format = logging.Formatter( + "\n%(name)s | %(asctime)s | %(levelname)s : %(levelno)s |" + + " %(filename)s : %(funcName)s : %(lineno)s | %(processName)s : %(process)d | %(message)s\n" +) + +LOG = logging.getLogger(__name__) + arg_parser = ArgumentParser() arg_parser.add_argument("--verbosity", type=str, default="info") args = arg_parser.parse_args() - my_schema = { "$schema": "https://json-schema.org/draft/2020-12/schema", "description": "my example schema", @@ -74,7 +87,31 @@ } +def set_level(level: str) -> None: + """ + Function used to set LOG object logging level. + + Arguments: + level: logging level as string, available levels: warning, info, debug. + + Returns: + success boolean. + """ + if level == "warning": + stderr.setFormatter(simple_format) + logging.basicConfig(level=logging.WARNING, handlers=(stderr,)) + elif level == "info": + stderr.setFormatter(info_format) + logging.basicConfig(level=logging.INFO, handlers=(stderr,)) + elif level == "debug": + stderr.setFormatter(full_format) + logging.basicConfig(level=logging.DEBUG, handlers=(stderr,)) + else: + raise ValueError("Incorrect logging level %s", level) + + if __name__ == "__main__": + set_level(args.verbosity) arch = Archivist( path="raw_metadata", parsers=[time_parser(), yml_parser()], @@ -84,7 +121,6 @@ overwrite=True, lazy_load=True, auto_cleanup=True, - verbosity=args.verbosity, add_description=True, add_type=True, ) @@ -92,11 +128,9 @@ arch.parse() arch.export() - print("\nResulting schema:") formatted_schema = arch.get_formatted_schema() - print(dumps(formatted_schema, indent=4)) + LOG.info("Resulting schema:\n%s", dumps(formatted_schema, indent=4)) with Path("schema.json").open("w") as f: dump(formatted_schema, f, indent=4) - print("\nResulting metadata:") - print(dumps(arch.get_metadata(), indent=4)) + LOG.info("Resulting metadata:\n%s", dumps(arch.get_metadata(), indent=4)) diff --git a/examples/schema_example4/schema_tutorial.ipynb b/examples/schema_example4/schema_tutorial.ipynb index c46718c..b5562e5 100644 --- a/examples/schema_example4/schema_tutorial.ipynb +++ b/examples/schema_example4/schema_tutorial.ipynb @@ -23,7 +23,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "id": "740f7145", "metadata": {}, "outputs": [ @@ -65,7 +65,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "3a4f8fc5-878d-4e7b-93f4-ae851f8eccb6", "metadata": {}, "outputs": [], @@ -94,7 +94,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "e5f30a5a", "metadata": {}, "outputs": [ @@ -127,7 +127,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "f40534b1", "metadata": {}, "outputs": [ @@ -162,7 +162,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "45ffb6a9-a966-47df-b417-2f14e7cabb04", "metadata": {}, "outputs": [], @@ -311,7 +311,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "55647207", "metadata": {}, "outputs": [], @@ -412,7 +412,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "id": "18c4cbcd", "metadata": {}, "outputs": [ @@ -447,18 +447,6 @@ " },\n", " {\n", " \"$ref\": \"#/$defs/yml_parser\"\n", - " },\n", - " {\n", - " \"$ref\": \"#/$defs/time_parser\"\n", - " },\n", - " {\n", - " \"$ref\": \"#/$defs/yml_parser\"\n", - " },\n", - " {\n", - " \"$ref\": \"#/$defs/time_parser\"\n", - " },\n", - " {\n", - " \"$ref\": \"#/$defs/yml_parser\"\n", " }\n", " ]\n", " }\n", @@ -550,7 +538,6 @@ " schema=my_schema,\n", " add_description=True,\n", " add_type=True,\n", - " verbosity=\"warning\",\n", ")\n", "\n", "arch.parse()\n", diff --git a/src/metadata_archivist/archivist.py b/src/metadata_archivist/archivist.py index 840ed3b..f87abbb 100644 --- a/src/metadata_archivist/archivist.py +++ b/src/metadata_archivist/archivist.py @@ -12,6 +12,8 @@ """ +import logging + from shutil import rmtree from copy import deepcopy from typing import Union, Iterable, Optional @@ -20,7 +22,9 @@ from metadata_archivist.exporter import Exporter from metadata_archivist.explorer import Explorer from metadata_archivist.formatter import Formatter -from metadata_archivist.logger import LOG, set_level, is_debug + + +LOG = logging.getLogger(__name__) # Default configuration parameters for the Archivist class: # "extraction_directory": string path to extraction directory (not used if exploring a directory). Default "." . @@ -30,7 +34,6 @@ # "overwrite": control boolean to allow overwriting existing metadata file. Default True . # "auto_cleanup": control boolean to clean up after generating metadata. # Deletes extracted files and parsed files if lazy loading. Default True . -# "verbosity": string value of verbosity level. Default "info" . # "add_description": control boolean to add schema description attributes to resulting metadata. Default True . # "add_type": control boolean to add schema type attributes to resulting metadata. Default False . # "output_format": "string value of metadata file output format. Default "JSON" . @@ -41,7 +44,6 @@ "lazy_load": False, "overwrite": True, "auto_cleanup": True, - "verbosity": "info", "add_description": False, "add_type": False, "output_format": "JSON", @@ -109,13 +111,6 @@ def _init_config(self, **kwargs) -> None: self.config = deepcopy(DEFAULT_CONFIG) key_list = list(self.config.keys()) - # Init logger object with verbose configuration - if "verbosity" in kwargs: - if set_level(kwargs["verbosity"]): - self.config["verbosity"] = kwargs["verbosity"] - key_list.remove("verbosity") - kwargs.pop("verbosity", None) - # Init rest of config params for key, value in kwargs.items(): if key in self.config: @@ -127,13 +122,12 @@ def _init_config(self, **kwargs) -> None: else: LOG.warning("Unused argument '%s'", key) - if is_debug(): - for key in key_list: - LOG.debug( - "No argument found for '%s' initializing by default '%s'", - key, - str(self.config[key]), - ) + for key in key_list: + LOG.debug( + "No argument found for '%s' initializing by default '%s'", + key, + str(self.config[key]), + ) def parse(self) -> None: """ diff --git a/src/metadata_archivist/explorer.py b/src/metadata_archivist/explorer.py index d2fc3cc..8010166 100644 --- a/src/metadata_archivist/explorer.py +++ b/src/metadata_archivist/explorer.py @@ -12,6 +12,8 @@ """ +import logging + from pathlib import Path from functools import partial from zipfile import is_zipfile @@ -19,9 +21,11 @@ from typing import List, Tuple, Union from tarfile import is_tarfile, TarFile, open as t_open -from metadata_archivist.logger import LOG from metadata_archivist.helper_functions import pattern_parts_match, check_dir + +LOG = logging.getLogger(__name__) + # Accepted archive file formats _ACCEPTED_FORMATS = list(TarFile.OPEN_METH.keys()) + ["tgz", "txz", "tbz", "tbz2"] diff --git a/src/metadata_archivist/export_rules.py b/src/metadata_archivist/export_rules.py index fae0b12..14c8949 100644 --- a/src/metadata_archivist/export_rules.py +++ b/src/metadata_archivist/export_rules.py @@ -21,12 +21,15 @@ """ +import logging + from pathlib import Path from typing import Callable from json import dump as j_dump from pickle import dump as p_dump, HIGHEST_PROTOCOL -from metadata_archivist.logger import LOG + +LOG = logging.getLogger(__name__) try: from yaml import dump as y_dump diff --git a/src/metadata_archivist/exporter.py b/src/metadata_archivist/exporter.py index a7bd6a7..233c179 100644 --- a/src/metadata_archivist/exporter.py +++ b/src/metadata_archivist/exporter.py @@ -12,11 +12,15 @@ """ -from metadata_archivist.logger import LOG +import logging + from metadata_archivist.helper_functions import check_dir from metadata_archivist.export_rules import EXPORT_RULES, register_export_rule +LOG = logging.getLogger(__name__) + + class Exporter: """ Convenience class for handling different export formats. diff --git a/src/metadata_archivist/formatter.py b/src/metadata_archivist/formatter.py index 7726566..23537be 100644 --- a/src/metadata_archivist/formatter.py +++ b/src/metadata_archivist/formatter.py @@ -14,15 +14,14 @@ """ +import logging + from pathlib import Path from copy import deepcopy from json import load, dumps -from hashlib import sha3_256 -from pickle import dumps as p_dumps, HIGHEST_PROTOCOL from typing import Optional, List, Iterable, NoReturn, Union, Tuple from metadata_archivist.parser import AParser -from metadata_archivist.logger import LOG, is_debug from metadata_archivist import helper_classes as helpers from metadata_archivist.formatting_rules import ( FORMATTING_RULES, @@ -36,6 +35,9 @@ ) +LOG = logging.getLogger(__name__) + + class Formatter: """ A Formatter creates a metadata object (dict) that @@ -435,12 +437,11 @@ def _update_metadata_tree_with_schema( # Check the length of the recursion result and and existence of node if len(recursion_result) > 1 or node not in recursion_result: - if is_debug(): - LOG.debug( - "current metadata tree = %s\nrecursion results = %s", - dumps(tree, indent=4, default=vars), - dumps(recursion_result, indent=4, default=vars), - ) + LOG.debug( + "current metadata tree = %s\nrecursion results = %s", + dumps(tree, indent=4, default=vars), + dumps(recursion_result, indent=4, default=vars), + ) raise RuntimeError("Malformed recursion result when processing regex context") # If the current node is equal to the key in the interpreted schema i.e. last iteration of loop @@ -455,12 +456,11 @@ def _update_metadata_tree_with_schema( # If the break is never reached an error has ocurred else: - if is_debug(): - LOG.debug( - "current metadata tree = %s\nrecursion results = %s", - dumps(tree, indent=4, default=vars), - dumps(recursion_result, indent=4, default=vars), - ) + LOG.debug( + "current metadata tree = %s\nrecursion results = %s", + dumps(tree, indent=4, default=vars), + dumps(recursion_result, indent=4, default=vars), + ) raise RuntimeError("Malformed metadata tree when processing regex context") # Else we add a new entry to the tree using the recursion results @@ -551,20 +551,18 @@ def _combine( if formatter1.config != formatter2.config: for key, value in formatter1.config: if key not in formatter2.config: - if is_debug(): - LOG.debug( - "formatter1.config = %s\nformatter2.config = %s", - dumps(formatter1.config, indent=4, default=vars), - dumps(formatter2.config, indent=4, default=vars), - ) + LOG.debug( + "formatter1.config = %s\nformatter2.config = %s", + dumps(formatter1.config, indent=4, default=vars), + dumps(formatter2.config, indent=4, default=vars), + ) raise KeyError("key mismatch in Formatter.combine.") if value != formatter2.config[key]: - if is_debug(): - LOG.debug( - "formatter1.config = %s\nformatter2.config = %s", - dumps(formatter1.config, indent=4, default=vars), - dumps(formatter2.config, indent=4, default=vars), - ) + LOG.debug( + "formatter1.config = %s\nformatter2.config = %s", + dumps(formatter1.config, indent=4, default=vars), + dumps(formatter2.config, indent=4, default=vars), + ) raise ValueError("Value mismatch in Formatter.combine.") # If different reference but same content then copy content to new config diff --git a/src/metadata_archivist/formatting_rules.py b/src/metadata_archivist/formatting_rules.py index 23b589e..1302dd3 100644 --- a/src/metadata_archivist/formatting_rules.py +++ b/src/metadata_archivist/formatting_rules.py @@ -29,11 +29,12 @@ """ +import logging + from json import dumps from typing import Callable from typing import Union, TYPE_CHECKING -from metadata_archivist.logger import LOG, is_debug from metadata_archivist.helper_classes import SchemaEntry from metadata_archivist.helper_functions import ( pattern_parts_match, @@ -47,6 +48,9 @@ from metadata_archivist.formatter import Formatter +LOG = logging.getLogger(__name__) + + def _format_parser_id_rule( formatter: "Formatter", interpreted_schema: SchemaEntry, @@ -66,12 +70,11 @@ def _format_parser_id_rule( # Currently only one parser reference per entry is allowed # and if a reference exists it must be the only content in the entry if len(interpreted_schema.items()) > 1: - if is_debug(): - LOG.debug( - "schema entry key '%s'\nschema entry content = %s", - interpreted_schema.key, - dumps(interpreted_schema, indent=4, default=vars), - ) + LOG.debug( + "schema entry key '%s'\nschema entry content = %s", + interpreted_schema.key, + dumps(interpreted_schema, indent=4, default=vars), + ) raise RuntimeError("Invalid SchemaEntry content.") # Get parser and its cache @@ -91,12 +94,11 @@ def _format_parser_id_rule( if parsed_metadata is None: parsed_metadata = {} elif not isinstance(parsed_metadata, dict): - if is_debug(): - LOG.debug( - "parsed metadata = %s\ncontext = %s", - dumps(parsed_metadata, indent=4, default=vars), - dumps(interpreted_schema.context, indent=4, default=vars), - ) + LOG.debug( + "parsed metadata = %s\ncontext = %s", + dumps(parsed_metadata, indent=4, default=vars), + dumps(interpreted_schema.context, indent=4, default=vars), + ) raise TypeError("Incorrect parsed_metadata type.") # We skip the last element as it represents the node name of the parsed metadata @@ -118,12 +120,11 @@ def _format_parser_id_rule( if parsed_metadata is None: parsed_metadata = {} elif not isinstance(parsed_metadata, dict): - if is_debug(): - LOG.debug( - "parsed metadata = %s\ncontext = %s", - dumps(parsed_metadata, indent=4, default=vars), - dumps(interpreted_schema.context, indent=4, default=vars), - ) + LOG.debug( + "parsed metadata = %s\ncontext = %s", + dumps(parsed_metadata, indent=4, default=vars), + dumps(interpreted_schema.context, indent=4, default=vars), + ) raise TypeError("Incorrect parsed_metadata type.") # In this case the name of the file should be taken into account in the context path @@ -159,22 +160,20 @@ def _format_parser_id_rule( unpack = parsing_context["unpack"] if isinstance(unpack, bool): if not unpack: - if is_debug(): - LOG.debug( - "parsing context = %s", - dumps(parsing_context, indent=4, default=vars), - ) + LOG.debug( + "parsing context = %s", + dumps(parsing_context, indent=4, default=vars), + ) raise ValueError("Incorrect unpacking configuration in !parsing context: unpack=False.") metadata = unpack_nested_value(metadata) elif isinstance(unpack, int): if unpack == 0: - if is_debug(): - LOG.debug( - "parsing context = %s", - dumps(parsing_context, indent=4, default=vars), - ) + LOG.debug( + "parsing context = %s", + dumps(parsing_context, indent=4, default=vars), + ) raise ValueError("Incorrect unpacking configuration in !parsing context: unpack=0.") metadata = unpack_nested_value(metadata, unpack) @@ -226,8 +225,7 @@ def _format_calculate_rule( raise TypeError("Incorrect value type found while formatting calculation") if not all(key in value for key in ["expression", "variables"]): - if is_debug(): - LOG.debug("!calculate directive value = %s", dumps(value, indent=4, default=vars)) + LOG.debug("!calculate directive value = %s", dumps(value, indent=4, default=vars)) raise RuntimeError("Malformed !calculate entry found while formatting calculation.") add_description = kwargs.pop("add_description", False) @@ -247,8 +245,7 @@ def _format_calculate_rule( ) raise TypeError("Incorrect variable type found while formatting calculation.") if not len(entry.items()) == 1: - if is_debug(): - LOG.debug("entry content = %s", dumps(entry, indent=4, default=vars)) + LOG.debug("entry content = %s", dumps(entry, indent=4, default=vars)) raise ValueError("Incorrect variable entry found while formatting calculation.") parsing_values[variable] = _format_parser_id_rule(formatter, entry, branch, entry["!parser_id"], **kwargs) diff --git a/src/metadata_archivist/helper_classes.py b/src/metadata_archivist/helper_classes.py index 5d62e08..5bc9385 100644 --- a/src/metadata_archivist/helper_classes.py +++ b/src/metadata_archivist/helper_classes.py @@ -16,6 +16,8 @@ """ +import logging + from json import dumps from pathlib import Path from hashlib import sha3_256 @@ -24,7 +26,6 @@ from pickle import loads as p_loads, dumps as p_dumps, HIGHEST_PROTOCOL -from metadata_archivist.logger import LOG, is_debug from metadata_archivist.helper_functions import merge_dicts, IGNORED_ITERABLE_KEYWORDS from metadata_archivist.interpretation_rules import ( INTERPRETATION_RULES, @@ -32,6 +33,9 @@ ) +LOG = logging.getLogger(__name__) + + class CacheEntry: """ Convenience class for storing parsing results. @@ -80,8 +84,7 @@ def load_metadata(self) -> dict: if self.metadata is None: if self._digest is None: - if is_debug(): - LOG.debug("CacheEntry = %s", dumps(self, indent=4, default=vars)) + LOG.debug("CacheEntry = %s", dumps(self, indent=4, default=vars)) raise RuntimeError("Metadata has not been cached yet.") with self.meta_path.open("rb", encoding=None) as f: @@ -93,8 +96,7 @@ def load_metadata(self) -> dict: raise ValueError("Encoded pickle has been tampered with.") if self.metadata is None: - if is_debug(): - LOG.debug("CacheEntry = %s", dumps(self, indent=4, default=vars)) + LOG.debug("CacheEntry = %s", dumps(self, indent=4, default=vars)) raise RuntimeError("Failed to load metadata from CacheEntry.") return self.metadata @@ -469,12 +471,10 @@ def __init__(self, schema: dict) -> None: LOG.debug("schema type '%s' , expected type '%s'", str(type(schema)), str(dict)) raise TypeError("Incorrect schema used for iterator.") if "properties" not in schema or not isinstance(schema["properties"], dict): - if is_debug(): - LOG.debug("schema = %s", dumps(schema, indent=4, default=vars)) + LOG.debug("schema = %s", dumps(schema, indent=4, default=vars)) raise ValueError("Incorrect schema structure, root is expected to contain properties dictionary.") if "$defs" not in schema or not isinstance(schema["$defs"], dict): - if is_debug(): - LOG.debug("schema = %s", dumps(schema, indent=4, default=vars)) + LOG.debug("schema = %s", dumps(schema, indent=4, default=vars)) raise ValueError("Incorrect schema structure, root is expected to contain $defs dictionary.") self.schema = schema @@ -537,11 +537,10 @@ def interpret_schema( # rules must be defined in individual items of the properties, hence a parent key should # always be present. if _parent_key is None: - if is_debug(): - LOG.debug( - "current structure = %s", - dumps(_relative_root, indent=4, default=vars), - ) + LOG.debug( + "current structure = %s", + dumps(_relative_root, indent=4, default=vars), + ) raise RuntimeError("Cannot interpret rule without parent key.") _relative_root = INTERPRETATION_RULES[key](self, val, key, _parent_key, _relative_root) @@ -577,21 +576,15 @@ def generate(self) -> SchemaEntry: Returns: self contained SchemaEntry """ - if is_debug(): - # Passing through dumps for pretty printing, - # however can be costly, so checking if debug is enabled first - LOG.debug("Initial structure = %s", dumps(self.schema, indent=4, default=vars)) + LOG.debug("Initial structure = %s", dumps(self.schema, indent=4, default=vars)) if self.structure.is_empty(): self.structure = self.interpret_schema(self.schema["properties"]) - if is_debug(): - # Passing through dumps for pretty printing, - # however can be costly, so checking if debug is enabled first - LOG.debug( - "Interpreted structure = %s", - dumps(self.structure, indent=4, default=vars), - ) + LOG.debug( + "Interpreted structure = %s", + dumps(self.structure, indent=4, default=vars), + ) return self.structure diff --git a/src/metadata_archivist/helper_functions.py b/src/metadata_archivist/helper_functions.py index 9df8c23..a1bf08c 100644 --- a/src/metadata_archivist/helper_functions.py +++ b/src/metadata_archivist/helper_functions.py @@ -21,6 +21,8 @@ """ +import logging + from json import dumps from re import fullmatch from pathlib import Path @@ -28,8 +30,8 @@ from collections.abc import Iterable from typing import Optional, Any, Tuple -from metadata_archivist.logger import LOG, is_debug +LOG = logging.getLogger(__name__) # List of ignored JSON schema iterable keys IGNORED_ITERABLE_KEYWORDS = [ @@ -110,12 +112,11 @@ def update_dict_with_parts(target_dict: dict, value: Any, parts: list) -> None: if part not in relative_root: relative_root[part] = {} elif not isinstance(relative_root[part], dict): - if is_debug(): - LOG.debug( - "key %s\nrelative root = %s", - part, - dumps(relative_root, indent=4, default=vars), - ) + LOG.debug( + "key %s\nrelative root = %s", + part, + dumps(relative_root, indent=4, default=vars), + ) raise RuntimeError("Duplicate key with incorrect found while updating tree with path hierarchy.") relative_root = relative_root[part] relative_root[parts[-1]] = value @@ -229,14 +230,12 @@ def deep_get_from_schema(schema: dict, keys: list) -> Any: except StopIteration: pass - if is_debug(): - LOG.debug("schema = %s", dumps(schema, indent=4, default=vars)) - LOG.debug("keys = %s", dumps(keys, indent=4, default=vars)) - raise StopIteration("Iterated through schema without finding corresponding keys.") - - if is_debug(): LOG.debug("schema = %s", dumps(schema, indent=4, default=vars)) LOG.debug("keys = %s", dumps(keys, indent=4, default=vars)) + raise StopIteration("Iterated through schema without finding corresponding keys.") + + LOG.debug("schema = %s", dumps(schema, indent=4, default=vars)) + LOG.debug("keys = %s", dumps(keys, indent=4, default=vars)) raise StopIteration("No key found for corresponding schema.") @@ -263,8 +262,7 @@ def pattern_parts_match(pattern_parts: list, actual_parts: list, context: Option if fullmatch(r"\{\w+\}", part) and context is not None: # !varname and regexp should always be in context in this case if "!varname" not in context or "regexp" not in context: - if is_debug(): - LOG.debug("context = %s", dumps(context, indent=4, default=vars)) + LOG.debug("context = %s", dumps(context, indent=4, default=vars)) raise RuntimeError("Badly structured context for pattern matching.") # Match against same index element in file path @@ -300,22 +298,20 @@ def unpack_nested_value(iterable: Any, level: Optional[int] = None) -> Any: if not isinstance(iterable, Iterable): if level is not None and level > 0: - if is_debug(): - LOG.debug( - "level %i\niterable = %s", - level, - dumps(iterable, indent=4, default=vars), - ) - raise RuntimeError("Cannot further unpack iterable.") - return iterable - - if len(iterable) > 1 and (level is None or level > 0): - if is_debug(): LOG.debug( "level %i\niterable = %s", level, dumps(iterable, indent=4, default=vars), ) + raise RuntimeError("Cannot further unpack iterable.") + return iterable + + if len(iterable) > 1 and (level is None or level > 0): + LOG.debug( + "level %i\niterable = %s", + level, + dumps(iterable, indent=4, default=vars), + ) raise IndexError("Multiple branching possible when unpacking nested value.") if level is not None: @@ -505,14 +501,13 @@ def add_info_from_schema( schema_entry = deep_get_from_schema(schema, key_list + [key]) except StopIteration: LOG.warning("No schema entry found for metadata value '%s'", key) - if is_debug(): - LOG.debug( - "key '%s' , value '%s'\nmetadata = %s\nschema = %s", - str(key), - str(value), - dumps(metadata, indent=4, default=vars), - dumps(schema, indent=4, default=vars), - ) + LOG.debug( + "key '%s' , value '%s'\nmetadata = %s\nschema = %s", + str(key), + str(value), + dumps(metadata, indent=4, default=vars), + dumps(schema, indent=4, default=vars), + ) if schema_entry is not None: if add_description: new_value["description"] = schema_entry["description"] diff --git a/src/metadata_archivist/interpretation_rules.py b/src/metadata_archivist/interpretation_rules.py index 73eca56..c2164b1 100644 --- a/src/metadata_archivist/interpretation_rules.py +++ b/src/metadata_archivist/interpretation_rules.py @@ -27,17 +27,21 @@ """ +import logging + from re import sub from json import dumps from typing import Callable -from typing import Optional, Union, TYPE_CHECKING +from typing import Union, TYPE_CHECKING -from metadata_archivist.logger import LOG, is_debug from metadata_archivist.helper_functions import math_check if TYPE_CHECKING: from metadata_archivist.helper_classes import SchemaInterpreter, SchemaEntry + +LOG = logging.getLogger(__name__) + # Constants for schema specific/special values to be considered when parsing. _KNOWN_REFS = [ "#/$defs/", @@ -91,8 +95,7 @@ def _interpret_varname_directive_rule( ) -> "SchemaEntry": # Check if regex context is present in current entry if "useRegex" not in entry.context: - if is_debug(): - LOG.debug("SchemaEntry context = %s", dumps(entry.context, indent=4, default=vars)) + LOG.debug("SchemaEntry context = %s", dumps(entry.context, indent=4, default=vars)) raise RuntimeError("Contextless !varname found.") # Add a !varname context which contains the name to use # and to which expression it corresponds to. @@ -110,12 +113,11 @@ def _interpret_reference_rule( ) -> "SchemaEntry": # Check if reference is well formed against knowledge base if not any(prop_val.startswith(ss) for ss in _KNOWN_REFS): - if is_debug(): - LOG.debug( - "Reference item ('%s' , %s)", - prop_key, - dumps(prop_val, indent=4, default=vars), - ) + LOG.debug( + "Reference item ('%s' , %s)", + prop_key, + dumps(prop_val, indent=4, default=vars), + ) raise ValueError("Malformed reference prop_value.") # Get schema definitions @@ -169,12 +171,11 @@ def _interpret_calculate_directive_rule( # Requires referenced parsers to return numerical values. # references can be supplemented with !parsing directives to properly select value. if not all(key in prop_val for key in ["expression", "variables"]): - if is_debug(): - LOG.debug( - "Directive item ('%s' , %s)", - prop_key, - dumps(prop_val, indent=4, default=vars), - ) + LOG.debug( + "Directive item ('%s' , %s)", + prop_key, + dumps(prop_val, indent=4, default=vars), + ) raise ValueError("Malformed !calculate directive.") expression = prop_val["expression"] @@ -194,13 +195,12 @@ def _interpret_calculate_directive_rule( raise TypeError("Incorrect variables type in !calculate directive.") if len(variable_names) != len(variables): - if is_debug(): - LOG.debug( - "Expression '%s' , expression variables '%s' , defined variables = %s", - expression, - str(variable_names), - dumps(variables, indent=4, default=vars), - ) + LOG.debug( + "Expression '%s' , expression variables '%s' , defined variables = %s", + expression, + str(variable_names), + dumps(variables, indent=4, default=vars), + ) raise RuntimeError("Variables count mismatch in !calculate directive.") # At this point we check if each variable entry corresponds to a reference to a Parser @@ -220,8 +220,7 @@ def _interpret_calculate_directive_rule( raise TypeError("Incorrect variable type in !calculate directive.") if not "$ref" in value: - if is_debug(): - LOG.debug("Variable content = %s", dumps(value, indent=4, default=vars)) + LOG.debug("Variable content = %s", dumps(value, indent=4, default=vars)) raise RuntimeError("Variable does not reference a Parser in !calculate directive.") # We create a SchemaEntry in the context to be specially handled by the Formatter diff --git a/src/metadata_archivist/logger.py b/src/metadata_archivist/logger.py deleted file mode 100644 index 500a715..0000000 --- a/src/metadata_archivist/logger.py +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" - -Internally used logging class extension. - -Initializes logger from logging module. -Sets custom message formatter. - -exports: - LOG: logging object. - set_level: function to change logging level via strings. - is_debug: function to test if logging object is in debug level. - -Authors: Jose V., Matthias K. - -""" - -import sys -import logging - -_stderr = logging.StreamHandler(stream=sys.stderr) - -_simple_format = logging.Formatter("%(levelname)s : %(message)s") -_info_format = logging.Formatter("%(levelname)s : %(module)s : %(message)s") -_full_format = logging.Formatter( - "\n%(name)s | %(asctime)s | %(levelname)s : %(levelno)s |" - + " %(filename)s : %(funcName)s : %(lineno)s | %(processName)s : %(process)d | %(message)s\n" -) - -LOG = logging.getLogger(__name__) -LOG.addHandler(_stderr) -LOG.setLevel(logging.INFO) - - -def set_level(level: str) -> bool: - """ - Function used to set LOG object logging level. - - Arguments: - level: logging level as string, available levels: warning, info, debug. - - Returns: - success boolean. - """ - if level == "warning": - LOG.setLevel(logging.WARNING) - _stderr.setFormatter(_simple_format) - elif level == "info": - LOG.setLevel(logging.INFO) - _stderr.setFormatter(_info_format) - elif level == "debug": - LOG.setLevel(logging.DEBUG) - _stderr.setFormatter(_full_format) - else: - LOG.warning( - "Trying to set incorrect logging level '%s', staying at current level.", - level, - ) - return False - - return True - - -def is_debug() -> bool: - """Status function which returns true if logging level is defined to DEBUG.""" - return LOG.level == logging.DEBUG diff --git a/src/metadata_archivist/parser.py b/src/metadata_archivist/parser.py index 6ef70cd..5b69769 100644 --- a/src/metadata_archivist/parser.py +++ b/src/metadata_archivist/parser.py @@ -12,16 +12,20 @@ """ +import logging + from pathlib import Path from typing import TYPE_CHECKING from abc import ABC, abstractmethod -from metadata_archivist.logger import LOG from metadata_archivist.helper_functions import pattern_parts_match if TYPE_CHECKING: from metadata_archivist.formatter import Formatter + +LOG = logging.getLogger(__name__) + # Try to load jsonschema package components for validation # In case of failure, validation is disabled try: