diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index 0bdd3b1..44ff703 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -36,4 +36,16 @@ jobs: - run: poetry install --no-interaction - - run: poetry run pytest + - name: Check formatting + uses: astral-sh/ruff-action@v1 + with: + args: "format --check" + src: "./src" + + - name: Lint code + uses: astral-sh/ruff-action@v1 + with: + src: "./src" + + - name: Test + run: poetry run pytest diff --git a/README.md b/README.md index aefcf8b..9073264 100644 --- a/README.md +++ b/README.md @@ -229,6 +229,37 @@ installed, you should be able to run: $ poetry run ptw . --now --clear +### Running the linter for code style issues: + + $ poetry run ruff check + + [The `ruff` tool](https://docs.astral.sh/ruff/linter/) will check + the source code for conformity with various style rules. Some of + these can be fixed by `ruff` itself, and if so, the output will + describe how to automatically fix these issues. + + The CI/CD pipeline will run these checks whenever new commits are + pushed to GitHub, and the results will be available in the GitHub + Actions output. + +### Running the code formatter + + $ poetry run ruff format + + [The `ruff` tool](https://docs.astral.sh/ruff/formatter/) will check + the source code for conformity with source code formatting rules. It + will also fix any issues it finds and leave the changes uncommitted + so you can review the changes prior to adding them to the codebase. + + As with the linter, the CI/CD pipeline will run the formatter when + commits are pushed to GitHub. + +### Ruff integration with your editor + + Rather than running `ruff` manually from the commandline, it can be + integrated with the editor of your choice. See the + [ruff editor integration](https://docs.astral.sh/ruff/editors/) guide. + ### Releasing * Update the CHANGELOG to include details of the changes included in the new diff --git a/pyproject.toml b/pyproject.toml index a30262b..482e648 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,6 +70,9 @@ replace = 'v{new_version}' requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" +[tool.ruff.lint] +select = ["E", "F", "I"] + [tool.mypy] files = ["src", "tests"] diff --git a/src/nsidc/metgen/aws.py b/src/nsidc/metgen/aws.py index 7d7b2e9..0b0012d 100644 --- a/src/nsidc/metgen/aws.py +++ b/src/nsidc/metgen/aws.py @@ -1,8 +1,8 @@ import boto3 - KINESIS_PARTITION_KEY = "metgenc-duck" + def kinesis_stream_exists(stream_name): """ Predicate which determines if a Kinesis stream with the given name exists @@ -10,23 +10,23 @@ def kinesis_stream_exists(stream_name): """ client = boto3.client("kinesis") try: - summary = client.describe_stream_summary(StreamName=stream_name) + client.describe_stream_summary(StreamName=stream_name) return True - except Exception as e: + except Exception: return False + def post_to_kinesis(stream_name, cnm_message): """ Posts a message to a Kinesis stream. """ client = boto3.client("kinesis") result = client.put_record( - StreamName=stream_name, - Data=cnm_message, - PartitionKey=KINESIS_PARTITION_KEY + StreamName=stream_name, Data=cnm_message, PartitionKey=KINESIS_PARTITION_KEY ) - return result['ShardId'] + return result["ShardId"] + def staging_bucket_exists(bucket_name): """ @@ -37,9 +37,10 @@ def staging_bucket_exists(bucket_name): try: client.head_bucket(Bucket=bucket_name) return True - except Exception as e: + except Exception: return False + def stage_file(s3_bucket_name, object_name, *, data=None, file=None): """ Stages data into an s3 bucket at a given path. diff --git a/src/nsidc/metgen/cli.py b/src/nsidc/metgen/cli.py index f25d150..6c4ecc3 100644 --- a/src/nsidc/metgen/cli.py +++ b/src/nsidc/metgen/cli.py @@ -2,12 +2,10 @@ import click -from nsidc.metgen import config -from nsidc.metgen import metgen -from nsidc.metgen import constants +from nsidc.metgen import config, constants, metgen +LOGGER = logging.getLogger("metgenc") -LOGGER = logging.getLogger('metgenc') @click.group(epilog="For detailed help on each command, run: metgenc COMMAND --help") def cli(): @@ -16,72 +14,135 @@ def cli(): Cumulus, and post CNM messages.""" pass + @cli.command() -@click.option('-c', '--config', help='Path to configuration file to create or replace') +@click.option("-c", "--config", help="Path to configuration file to create or replace") def init(config): """Populates a configuration file based on user input.""" click.echo(metgen.banner()) config = metgen.init_config(config) - click.echo(f'Initialized the metgen configuration file {config}') + click.echo(f"Initialized the metgen configuration file {config}") + @cli.command() -@click.option('-c', '--config', 'config_filename', help='Path to configuration file to display', required=True) +@click.option( + "-c", + "--config", + "config_filename", + help="Path to configuration file to display", + required=True, +) def info(config_filename): """Summarizes the contents of a configuration file.""" click.echo(metgen.banner()) - configuration = config.configuration(config.config_parser_factory(config_filename), {}) + configuration = config.configuration( + config.config_parser_factory(config_filename), {} + ) metgen.init_logging(configuration) configuration.show() + @cli.command() -@click.option('-c', '--config', 'config_filename', help='Path to configuration file', required=True) -@click.option('-t', '--type', 'content_type', help='JSON content type', default='cnm', show_default=True) +@click.option( + "-c", + "--config", + "config_filename", + help="Path to configuration file", + required=True, +) +@click.option( + "-t", + "--type", + "content_type", + help="JSON content type", + default="cnm", + show_default=True, +) def validate(config_filename, content_type): """Validates the contents of local JSON files.""" click.echo(metgen.banner()) - configuration = config.configuration(config.config_parser_factory(config_filename), {}) + configuration = config.configuration( + config.config_parser_factory(config_filename), {} + ) metgen.init_logging(configuration) metgen.validate(configuration, content_type) + @cli.command() -@click.option('-c', '--config', 'config_filename', required=True, - help='Path to configuration file') -@click.option('-d', '--dry-run', is_flag=True, required=False, default=None, - help='Don\'t stage files on S3 or publish messages to Kinesis') -@click.option('-e', '--env', help='environment', - default=constants.DEFAULT_CUMULUS_ENVIRONMENT, show_default=True) -@click.option('-n', '--number', metavar='count', required=False, - default=constants.DEFAULT_NUMBER, help="Process at most 'count' granules.") -@click.option('-wc', '--write-cnm', is_flag=True, required=False, default=None, - help="Write CNM messages to files.") -@click.option('-o', '--overwrite', is_flag=True, required=False, default=None, - help="Overwrite existing UMM-G files.") +@click.option( + "-c", + "--config", + "config_filename", + help="Path to configuration file", + required=True, +) +@click.option( + "-d", + "--dry-run", + is_flag=True, + required=False, + default=None, + help="Don't stage files on S3 or publish messages to Kinesis", +) +@click.option( + "-e", + "--env", + help="environment", + default=constants.DEFAULT_CUMULUS_ENVIRONMENT, + show_default=True, +) +@click.option( + "-n", + "--number", + help="Process at most 'count' granules.", + metavar="count", + required=False, + default=constants.DEFAULT_NUMBER, +) +@click.option( + "-wc", + "--write-cnm", + is_flag=True, + required=False, + default=None, + help="Write CNM messages to files.", +) +@click.option( + "-o", + "--overwrite", + is_flag=True, + required=False, + default=None, + help="Overwrite existing UMM-G files.", +) def process(config_filename, dry_run, env, number, write_cnm, overwrite): """Processes science data files based on configuration file contents.""" click.echo(metgen.banner()) overrides = { - 'dry_run': dry_run, - 'number': number, - 'overwrite_ummg': overwrite, - 'write_cnm_file': write_cnm, + "dry_run": dry_run, + "number": number, + "overwrite_ummg": overwrite, + "write_cnm_file": write_cnm, } try: - configuration = config.configuration(config.config_parser_factory(config_filename), overrides, env) + configuration = config.configuration( + config.config_parser_factory(config_filename), overrides, env + ) metgen.init_logging(configuration) configuration.show() config.validate(configuration) metgen.process(configuration) except config.ValidationError as e: - logger = logging.getLogger('metgenc') + logger = logging.getLogger("metgenc") logger.error("\nThe configuration is invalid:") for error in e.errors: logger.error(f" * {error}") exit(1) except Exception as e: - logger = logging.getLogger('metgenc') + logger = logging.getLogger("metgenc") logger.error("\nUnable to process data: " + str(e)) exit(1) - click.echo(f'Processing complete') + click.echo("Processing complete") if __name__ == "__main__": diff --git a/src/nsidc/metgen/config.py b/src/nsidc/metgen/config.py index b40449a..ec37c94 100644 --- a/src/nsidc/metgen/config.py +++ b/src/nsidc/metgen/config.py @@ -1,13 +1,10 @@ import configparser import dataclasses -from datetime import datetime, timezone import logging import os.path from pathlib import Path -import uuid -from nsidc.metgen import aws -from nsidc.metgen import constants +from nsidc.metgen import aws, constants class ValidationError(Exception): @@ -16,6 +13,7 @@ class ValidationError(Exception): def __init__(self, errors): self.errors = errors + @dataclasses.dataclass class Config: environment: str @@ -34,23 +32,27 @@ class Config: dry_run: bool def show(self): - # TODO add section headings in the right spot (if we think we need them in the output) - LOGGER = logging.getLogger('metgenc') - LOGGER.info('') - LOGGER.info('Using configuration:') - for k,v in self.__dict__.items(): - LOGGER.info(f' + {k}: {v}') + # TODO: add section headings in the right spot + # (if we think we need them in the output) + LOGGER = logging.getLogger("metgenc") + LOGGER.info("") + LOGGER.info("Using configuration:") + for k, v in self.__dict__.items(): + LOGGER.info(f" + {k}: {v}") if self.dry_run: - LOGGER.info('') - LOGGER.info('Note: The dry-run option was included, so no files will be staged and no CNM messages published.') - LOGGER.info('') + LOGGER.info("") + LOGGER.info( + """Note: The dry-run option was included, so no files will be\ + staged and no CNM messages published.""" + ) + LOGGER.info("") def ummg_path(self): return Path(self.local_output_dir, self.ummg_dir) def cnm_path(self): - return Path(self.local_output_dir, 'cnm') + return Path(self.local_output_dir, "cnm") def config_parser_factory(configuration_file): @@ -58,20 +60,24 @@ def config_parser_factory(configuration_file): Returns a ConfigParser by reading the specified file. """ if configuration_file is None or not os.path.exists(configuration_file): - raise ValueError(f'Unable to find configuration file {configuration_file}') - cfg_parser = configparser.ConfigParser(interpolation=configparser.ExtendedInterpolation()) + raise ValueError(f"Unable to find configuration file {configuration_file}") + cfg_parser = configparser.ConfigParser( + interpolation=configparser.ExtendedInterpolation() + ) # If the config parser gets no value (empty string), interpret it as False - cfg_parser.BOOLEAN_STATES |= [('', False)] + cfg_parser.BOOLEAN_STATES |= [("", False)] cfg_parser.read(configuration_file) return cfg_parser -def _get_configuration_value(environment, section, name, value_type, config_parser, overrides): +def _get_configuration_value( + environment, section, name, value_type, config_parser, overrides +): """ Returns a value from the provided config parser; any value for the key that is provided in the 'overrides' dictionary will take precedence. """ - vars = { 'environment': environment } + vars = {"environment": environment} if overrides.get(name) is None: if value_type is bool: return config_parser.getboolean(section, name) @@ -83,62 +89,137 @@ def _get_configuration_value(environment, section, name, value_type, config_pars else: return overrides.get(name) -def configuration(config_parser, overrides, environment=constants.DEFAULT_CUMULUS_ENVIRONMENT): + +def configuration( + config_parser, overrides, environment=constants.DEFAULT_CUMULUS_ENVIRONMENT +): """ Returns a valid Config object that is populated from the provided config parser based on the 'environment', and with values overriden with anything provided in 'overrides'. """ - config_parser['DEFAULT'] = { - 'kinesis_stream_name': constants.DEFAULT_STAGING_KINESIS_STREAM, - 'staging_bucket_name': constants.DEFAULT_STAGING_BUCKET_NAME, - 'write_cnm_file': constants.DEFAULT_WRITE_CNM_FILE, - 'overwrite_ummg': constants.DEFAULT_OVERWRITE_UMMG, - 'checksum_type': constants.DEFAULT_CHECKSUM_TYPE, - 'number': constants.DEFAULT_NUMBER, - 'dry_run': constants.DEFAULT_DRY_RUN, + config_parser["DEFAULT"] = { + "kinesis_stream_name": constants.DEFAULT_STAGING_KINESIS_STREAM, + "staging_bucket_name": constants.DEFAULT_STAGING_BUCKET_NAME, + "write_cnm_file": constants.DEFAULT_WRITE_CNM_FILE, + "overwrite_ummg": constants.DEFAULT_OVERWRITE_UMMG, + "checksum_type": constants.DEFAULT_CHECKSUM_TYPE, + "number": constants.DEFAULT_NUMBER, + "dry_run": constants.DEFAULT_DRY_RUN, } try: return Config( environment, - _get_configuration_value(environment, 'Source', 'data_dir', str, config_parser, overrides), - _get_configuration_value(environment, 'Collection', 'auth_id', str, config_parser, overrides), - _get_configuration_value(environment, 'Collection', 'version', int, config_parser, overrides), - _get_configuration_value(environment, 'Collection', 'provider', str, config_parser, overrides), - _get_configuration_value(environment, 'Destination', 'local_output_dir', str, config_parser, overrides), - _get_configuration_value(environment, 'Destination', 'ummg_dir', str, config_parser, overrides), - _get_configuration_value(environment, 'Destination', 'kinesis_stream_name', str, config_parser, overrides), - _get_configuration_value(environment, 'Destination', 'staging_bucket_name', str, config_parser, overrides), - _get_configuration_value(environment, 'Destination', 'write_cnm_file', bool, config_parser, overrides), - _get_configuration_value(environment, 'Destination', 'overwrite_ummg', bool, config_parser, overrides), - _get_configuration_value(environment, 'Settings', 'checksum_type', str, config_parser, overrides), - _get_configuration_value(environment, 'Settings', 'number', int, config_parser, overrides), - _get_configuration_value(environment, 'Settings', 'dry_run', bool, config_parser, overrides), + _get_configuration_value( + environment, "Source", "data_dir", str, config_parser, overrides + ), + _get_configuration_value( + environment, "Collection", "auth_id", str, config_parser, overrides + ), + _get_configuration_value( + environment, "Collection", "version", int, config_parser, overrides + ), + _get_configuration_value( + environment, "Collection", "provider", str, config_parser, overrides + ), + _get_configuration_value( + environment, + "Destination", + "local_output_dir", + str, + config_parser, + overrides, + ), + _get_configuration_value( + environment, "Destination", "ummg_dir", str, config_parser, overrides + ), + _get_configuration_value( + environment, + "Destination", + "kinesis_stream_name", + str, + config_parser, + overrides, + ), + _get_configuration_value( + environment, + "Destination", + "staging_bucket_name", + str, + config_parser, + overrides, + ), + _get_configuration_value( + environment, + "Destination", + "write_cnm_file", + bool, + config_parser, + overrides, + ), + _get_configuration_value( + environment, + "Destination", + "overwrite_ummg", + bool, + config_parser, + overrides, + ), + _get_configuration_value( + environment, "Settings", "checksum_type", str, config_parser, overrides + ), + _get_configuration_value( + environment, "Settings", "number", int, config_parser, overrides + ), + _get_configuration_value( + environment, "Settings", "dry_run", bool, config_parser, overrides + ), ) except Exception as e: - raise Exception('Unable to read the configuration file', e) + raise Exception("Unable to read the configuration file", e) + def validate(configuration): """ Validates each value in the configuration. """ validations = [ - ['data_dir', lambda dir: os.path.exists(dir), - 'The data_dir does not exist.'], - ['local_output_dir', lambda dir: os.path.exists(dir), - 'The local_output_dir does not exist.'], - # ['ummg_dir', lambda dir: os.path.exists(dir), - # 'The ummg_dir does not exist.'], ## validate "local_output_dir/ummg_dir" as part of issue-71 - ['kinesis_stream_name', lambda name: configuration.dry_run or aws.kinesis_stream_exists(name), - 'The kinesis stream does not exist.'], - ['staging_bucket_name', lambda name: configuration.dry_run or aws.staging_bucket_exists(name), - 'The staging bucket does not exist.'], - ['number', lambda number: 0 < number, - 'The number of granules to process must be positive.'], + [ + "data_dir", + lambda dir: os.path.exists(dir), + "The data_dir does not exist.", + ], + [ + "local_output_dir", + lambda dir: os.path.exists(dir), + "The local_output_dir does not exist.", + ], + # TODO: validate "local_output_dir/ummg_dir" as part of issue-71 + # [ + # "ummg_dir", + # lambda dir: os.path.exists(dir), + # "The ummg_dir does not exist." + # ], + [ + "kinesis_stream_name", + lambda name: aws.kinesis_stream_exists(name), + "The kinesis stream does not exist.", + ], + [ + "staging_bucket_name", + lambda name: aws.staging_bucket_exists(name), + "The staging bucket does not exist.", + ], + [ + "number", + lambda number: 0 < number, + "The number of granules to process must be positive.", + ], + ] + errors = [ + msg for name, fn, msg in validations if not fn(getattr(configuration, name)) ] - errors = [msg for name, fn, msg in validations if not fn(getattr(configuration, name))] if len(errors) == 0: return True else: raise ValidationError(errors) - diff --git a/src/nsidc/metgen/constants.py b/src/nsidc/metgen/constants.py index ac4ab45..5560946 100644 --- a/src/nsidc/metgen/constants.py +++ b/src/nsidc/metgen/constants.py @@ -1,32 +1,47 @@ # Default configuration values -DEFAULT_CUMULUS_ENVIRONMENT = 'uat' -DEFAULT_STAGING_KINESIS_STREAM = 'nsidc-cumulus-${environment}-external_notification' -DEFAULT_STAGING_BUCKET_NAME = 'nsidc-cumulus-${environment}-ingest-staging' +DEFAULT_CUMULUS_ENVIRONMENT = "uat" +DEFAULT_STAGING_KINESIS_STREAM = "nsidc-cumulus-${environment}-external_notification" +DEFAULT_STAGING_BUCKET_NAME = "nsidc-cumulus-${environment}-ingest-staging" DEFAULT_WRITE_CNM_FILE = False DEFAULT_OVERWRITE_UMMG = False -DEFAULT_CHECKSUM_TYPE = 'SHA256' +DEFAULT_CHECKSUM_TYPE = "SHA256" DEFAULT_NUMBER = 1000000 DEFAULT_DRY_RUN = False # JSON schema locations and versions -CNM_JSON_SCHEMA = ('nsidc.metgen.json-schema', 'cumulus_sns_schema.json') -CNM_JSON_SCHEMA_VERSION = '1.6.1' +CNM_JSON_SCHEMA = ("nsidc.metgen.json-schema", "cumulus_sns_schema.json") +CNM_JSON_SCHEMA_VERSION = "1.6.1" # Configuration sections -SOURCE_SECTION_NAME = 'Source' -COLLECTION_SECTION_NAME = 'Collection' -DESTINATION_SECTION_NAME = 'Destination' -SETTINGS_SECTION_NAME = 'Settings' +SOURCE_SECTION_NAME = "Source" +COLLECTION_SECTION_NAME = "Collection" +DESTINATION_SECTION_NAME = "Destination" +SETTINGS_SECTION_NAME = "Settings" # Spatial coverage DEFAULT_SPATIAL_AXIS_SIZE = 6 # Templates -CNM_BODY_TEMPLATE = ('nsidc.metgen.templates', 'cnm_body_template.json') -CNM_FILES_TEMPLATE = ('nsidc.metgen.templates', 'cnm_files_template.json') -UMMG_BODY_TEMPLATE = ('nsidc.metgen.templates', 'ummg_body_template.json') -UMMG_TEMPORAL_SINGLE_TEMPLATE = ('nsidc.metgen.templates', 'ummg_temporal_single_template.json') -UMMG_TEMPORAL_RANGE_TEMPLATE = ('nsidc.metgen.templates', 'ummg_temporal_range_template.json') -UMMG_SPATIAL_GPOLYGON_TEMPLATE = ('nsidc.metgen.templates', 'ummg_horizontal_gpolygon_template.json') -UMMG_SPATIAL_POINT_TEMPLATE = ('nsidc.metgen.templates', 'ummg_horizontal_point_template.json') -UMMG_SPATIAL_RECTANGLE_TEMPLATE = ('nsidc.metgen.templates', 'ummg_horizontal_rectangle_template.json') +CNM_BODY_TEMPLATE = ("nsidc.metgen.templates", "cnm_body_template.json") +CNM_FILES_TEMPLATE = ("nsidc.metgen.templates", "cnm_files_template.json") +UMMG_BODY_TEMPLATE = ("nsidc.metgen.templates", "ummg_body_template.json") +UMMG_TEMPORAL_SINGLE_TEMPLATE = ( + "nsidc.metgen.templates", + "ummg_temporal_single_template.json", +) +UMMG_TEMPORAL_RANGE_TEMPLATE = ( + "nsidc.metgen.templates", + "ummg_temporal_range_template.json", +) +UMMG_SPATIAL_GPOLYGON_TEMPLATE = ( + "nsidc.metgen.templates", + "ummg_horizontal_gpolygon_template.json", +) +UMMG_SPATIAL_POINT_TEMPLATE = ( + "nsidc.metgen.templates", + "ummg_horizontal_point_template.json", +) +UMMG_SPATIAL_RECTANGLE_TEMPLATE = ( + "nsidc.metgen.templates", + "ummg_horizontal_rectangle_template.json", +) diff --git a/src/nsidc/metgen/metgen.py b/src/nsidc/metgen/metgen.py index 6ab454d..c79e653 100644 --- a/src/nsidc/metgen/metgen.py +++ b/src/nsidc/metgen/metgen.py @@ -2,27 +2,23 @@ import dataclasses import datetime as dt import hashlib -from importlib.resources import open_text import json -import jsonschema import logging import os.path import sys -from typing import Callable +import uuid +from importlib.resources import open_text from pathlib import Path from string import Template -import uuid +from typing import Callable +import jsonschema from funcy import all, filter, partial, rcompose, take from pyfiglet import Figlet from returns.maybe import Maybe from rich.prompt import Confirm, Prompt -from nsidc.metgen import aws -from nsidc.metgen import config -from nsidc.metgen import constants -from nsidc.metgen import netcdf_reader - +from nsidc.metgen import aws, config, constants, netcdf_reader # ------------------------------------------------------------------- CONSOLE_FORMAT = "%(message)s" @@ -32,11 +28,12 @@ # Top-level functions which expose operations to the CLI # ------------------------------------------------------------------- + def init_logging(configuration: config.Config): """ Initialize the logger for metgenc. """ - logger = logging.getLogger('metgenc') + logger = logging.getLogger("metgenc") logger.setLevel(logging.DEBUG) console_handler = logging.StreamHandler(sys.stdout) @@ -49,78 +46,135 @@ def init_logging(configuration: config.Config): logfile_handler.setFormatter(logging.Formatter(LOGFILE_FORMAT)) logger.addHandler(logfile_handler) + def banner(): """ Displays the name of this utility using incredible ASCII-art. """ - f = Figlet(font='slant') - return f.renderText('metgenc') + f = Figlet(font="slant") + return f.renderText("metgenc") + # TODO require a non-blank input for elements that have no default value def init_config(configuration_file): """ - Prompts the user for configuration values and then creates a valid configuration file. + Prompts the user for configuration values and then creates a valid + configuration file. """ - print("""This utility will create a granule metadata configuration file by prompting """ - """you for values for each of the configuration parameters.""") + print( + """This utility will create a granule metadata configuration file by prompting + you for values for each of the configuration parameters.""" + ) print() # prompt for config file name if it's not provided if not configuration_file: - configuration_file = Prompt.ask("configuration file name", default="example.ini") + configuration_file = Prompt.ask( + "configuration file name", default="example.ini" + ) # TODO check file name is safe else: - print(f'Creating configuration file {configuration_file}') + print(f"Creating configuration file {configuration_file}") print() - if (os.path.exists(configuration_file)): - print(f'WARNING: The {configuration_file} already exists.') + if os.path.exists(configuration_file): + print(f"WARNING: The {configuration_file} already exists.") overwrite = Confirm.ask("Overwrite?") if not overwrite: - print('Not overwriting existing file. Exiting.') + print("Not overwriting existing file. Exiting.") exit(1) cfg_parser = configparser.ConfigParser() print() - print(f'{constants.SOURCE_SECTION_NAME} Data Parameters') - print('--------------------------------------------------') + print(f"{constants.SOURCE_SECTION_NAME} Data Parameters") + print("--------------------------------------------------") cfg_parser.add_section(constants.SOURCE_SECTION_NAME) - cfg_parser.set(constants.SOURCE_SECTION_NAME, "data_dir", Prompt.ask("Data directory", default="data")) + cfg_parser.set( + constants.SOURCE_SECTION_NAME, + "data_dir", + Prompt.ask("Data directory", default="data"), + ) print() print() - print(f'{constants.COLLECTION_SECTION_NAME} Parameters') - print('--------------------------------------------------') + print(f"{constants.COLLECTION_SECTION_NAME} Parameters") + print("--------------------------------------------------") cfg_parser.add_section(constants.COLLECTION_SECTION_NAME) - cfg_parser.set(constants.COLLECTION_SECTION_NAME, "auth_id", Prompt.ask("Authoritative ID")) + cfg_parser.set( + constants.COLLECTION_SECTION_NAME, "auth_id", Prompt.ask("Authoritative ID") + ) cfg_parser.set(constants.COLLECTION_SECTION_NAME, "version", Prompt.ask("Version")) - cfg_parser.set(constants.COLLECTION_SECTION_NAME, "provider", Prompt.ask("Provider")) + cfg_parser.set( + constants.COLLECTION_SECTION_NAME, "provider", Prompt.ask("Provider") + ) print() print() - print(f'{constants.DESTINATION_SECTION_NAME} Parameters') - print('--------------------------------------------------') + print(f"{constants.DESTINATION_SECTION_NAME} Parameters") + print("--------------------------------------------------") cfg_parser.add_section(constants.DESTINATION_SECTION_NAME) - cfg_parser.set(constants.DESTINATION_SECTION_NAME, "local_output_dir", Prompt.ask("Local output directory", default="output")) - cfg_parser.set(constants.DESTINATION_SECTION_NAME, "ummg_dir", Prompt.ask("Local UMM-G output directory (relative to local output directory)", default="ummg")) - cfg_parser.set(constants.DESTINATION_SECTION_NAME, "kinesis_stream_name", Prompt.ask("Kinesis stream name", default=constants.DEFAULT_STAGING_KINESIS_STREAM)) - cfg_parser.set(constants.DESTINATION_SECTION_NAME, "staging_bucket_name", Prompt.ask("Cumulus s3 bucket name", default=constants.DEFAULT_STAGING_BUCKET_NAME)) - cfg_parser.set(constants.DESTINATION_SECTION_NAME, "write_cnm_file", Prompt.ask("Write CNM messages to files? (True/False)", default=constants.DEFAULT_WRITE_CNM_FILE)) - cfg_parser.set(constants.DESTINATION_SECTION_NAME, "overwrite_ummg", Prompt.ask("Overwrite existing UMM-G files? (True/False)", default=constants.DEFAULT_OVERWRITE_UMMG)) + cfg_parser.set( + constants.DESTINATION_SECTION_NAME, + "local_output_dir", + Prompt.ask("Local output directory", default="output"), + ) + cfg_parser.set( + constants.DESTINATION_SECTION_NAME, + "ummg_dir", + Prompt.ask( + "Local UMM-G output directory (relative to local output directory)", + default="ummg", + ), + ) + cfg_parser.set( + constants.DESTINATION_SECTION_NAME, + "kinesis_stream_name", + Prompt.ask( + "Kinesis stream name", default=constants.DEFAULT_STAGING_KINESIS_STREAM + ), + ) + cfg_parser.set( + constants.DESTINATION_SECTION_NAME, + "staging_bucket_name", + Prompt.ask( + "Cumulus s3 bucket name", default=constants.DEFAULT_STAGING_BUCKET_NAME + ), + ) + cfg_parser.set( + constants.DESTINATION_SECTION_NAME, + "write_cnm_file", + Prompt.ask( + "Write CNM messages to files? (True/False)", + default=constants.DEFAULT_WRITE_CNM_FILE, + ), + ) + cfg_parser.set( + constants.DESTINATION_SECTION_NAME, + "overwrite_ummg", + Prompt.ask( + "Overwrite existing UMM-G files? (True/False)", + default=constants.DEFAULT_OVERWRITE_UMMG, + ), + ) print() - print(f'{constants.SETTINGS_SECTION_NAME} Parameters') - print('--------------------------------------------------') + print(f"{constants.SETTINGS_SECTION_NAME} Parameters") + print("--------------------------------------------------") cfg_parser.add_section(constants.SETTINGS_SECTION_NAME) - cfg_parser.set(constants.SETTINGS_SECTION_NAME, "checksum_type", Prompt.ask("Checksum type", default=constants.DEFAULT_CHECKSUM_TYPE)) + cfg_parser.set( + constants.SETTINGS_SECTION_NAME, + "checksum_type", + Prompt.ask("Checksum type", default=constants.DEFAULT_CHECKSUM_TYPE), + ) print() - print(f'Saving new configuration: {configuration_file}') + print(f"Saving new configuration: {configuration_file}") with open(configuration_file, "tw") as file: cfg_parser.write(file) return configuration_file + def prepare_output_dirs(configuration): """ Generate paths to ummg and cnm output directories. @@ -128,35 +182,41 @@ def prepare_output_dirs(configuration): TODO: create local_output_dir, ummg_dir, and cnm subdir if they don't exist """ ummg_path = Path(configuration.local_output_dir, configuration.ummg_dir) - cnm_path = Path(configuration.local_output_dir, 'cnm') + cnm_path = Path(configuration.local_output_dir, "cnm") if configuration.overwrite_ummg: scrub_json_files(ummg_path) return (ummg_path, cnm_path) + def scrub_json_files(path): - print(f'Removing existing files in {path}') - for file_path in path.glob('*.json'): + print(f"Removing existing files in {path}") + for file_path in path.glob("*.json"): try: if os.path.isfile(file_path) or os.path.islink(file_path): os.unlink(file_path) except Exception as e: - print('Failed to delete %s: %s' % (file_path, e)) + print("Failed to delete %s: %s" % (file_path, e)) + # ------------------------------------------------------------------- # Data structures for processing Granules and recording results # ------------------------------------------------------------------- + @dataclasses.dataclass class Collection: """Collection info required to ingest a granule""" + auth_id: str version: int + @dataclasses.dataclass class Granule: """Granule to ingest""" + producer_granule_id: str collection: Maybe[Collection] = Maybe.empty data_filenames: list[str] = dataclasses.field(default_factory=list) @@ -165,26 +225,32 @@ class Granule: uuid: Maybe[str] = Maybe.empty cnm_message: Maybe[str] = Maybe.empty + @dataclasses.dataclass class Action: """An audit of a single action performed on a Granule""" + name: str successful: bool message: str startDatetime: Maybe[dt.datetime] = Maybe.empty endDatetime: Maybe[dt.datetime] = Maybe.empty + @dataclasses.dataclass class Ledger: """An audit of the Actions performed on a Granule""" + granule: Granule actions: list[Action] = dataclasses.field(default_factory=list) successful: bool = False startDatetime: Maybe[dt.datetime] = Maybe.empty endDatetime: Maybe[dt.datetime] = Maybe.empty + # ------------------------------------------------------------------- + def process(configuration: config.Config) -> None: """ Process all Granules and record the results and summary. @@ -193,15 +259,15 @@ def process(configuration: config.Config) -> None: # Ordered list of operations to perform on each granule operations = [ - granule_collection, - prepare_granule, - find_existing_ummg, - create_ummg, - stage_files if not configuration.dry_run else null_operation, - create_cnm, - write_cnm, - publish_cnm if not configuration.dry_run else null_operation, - ] + granule_collection, + prepare_granule, + find_existing_ummg, + create_ummg, + stage_files if not configuration.dry_run else null_operation, + create_cnm, + write_cnm, + publish_cnm if not configuration.dry_run else null_operation, + ] # Bind the configuration to each operation configured_operations = [partial(fn, configuration) for fn in operations] @@ -211,24 +277,23 @@ def process(configuration: config.Config) -> None: # The complete pipeline of actions initializes a Ledger, performs all the # operations, finalizes a Ledger, and logs the details of the Ledger. - pipeline = rcompose( - start_ledger, - *recorded_operations, - end_ledger, - log_ledger - ) + pipeline = rcompose(start_ledger, *recorded_operations, end_ledger, log_ledger) # Find all of the input granule files, limit the size of the list based # on the configuration, and execute the pipeline on each of the granules. - candidate_granules = [Granule(p.name, data_filenames=[str(p)]) - for p in Path(configuration.data_dir).glob('*.nc')] + candidate_granules = [ + Granule(p.name, data_filenames=[str(p)]) + for p in Path(configuration.data_dir).glob("*.nc") + ] granules = take(configuration.number, candidate_granules) results = [pipeline(g) for g in granules] summarize_results(results) + # ------------------------------------------------------------------- + def recorder(fn: Callable[[Granule], Granule], ledger: Ledger) -> Ledger: """ Higher-order function that, given a granule operation function and a @@ -237,7 +302,7 @@ def recorder(fn: Callable[[Granule], Granule], ledger: Ledger) -> Ledger: """ # Execute the operation and record the result successful = True - message = '' + message = "" start = dt.datetime.now() new_granule = None try: @@ -249,31 +314,30 @@ def recorder(fn: Callable[[Granule], Granule], ledger: Ledger) -> Ledger: # Store the result in the Ledger new_actions = ledger.actions.copy() - fn_name = fn.func.__name__ if hasattr(fn, 'func') else fn.__name__ + fn_name = fn.func.__name__ if hasattr(fn, "func") else fn.__name__ new_actions.append( Action( fn_name, successful=successful, message=message, startDatetime=start, - endDatetime=end + endDatetime=end, ) ) return dataclasses.replace( ledger, granule=new_granule if new_granule else ledger.granule, - actions=new_actions + actions=new_actions, ) + def start_ledger(granule: Granule) -> Ledger: """ Start a new Ledger of the operations on the given Granule. """ - return Ledger( - granule, - startDatetime=dt.datetime.now() - ) + return Ledger(granule, startDatetime=dt.datetime.now()) + def end_ledger(ledger: Ledger) -> Ledger: """ @@ -282,16 +346,19 @@ def end_ledger(ledger: Ledger) -> Ledger: return dataclasses.replace( ledger, endDatetime=dt.datetime.now(), - successful=all([a.successful for a in ledger.actions]) + successful=all([a.successful for a in ledger.actions]), ) + # ------------------------------------------------------------------- # Granule Operations # ------------------------------------------------------------------- + def null_operation(configuration: config.Config, granule: Granule) -> Granule: return granule + def granule_collection(configuration: config.Config, granule: Granule) -> Granule: """ Find the Granule's Collection and add it to the Granule. @@ -300,28 +367,32 @@ def granule_collection(configuration: config.Config, granule: Granule) -> Granul # collection information from CMR once, then associate it with each # granule. return dataclasses.replace( - granule, - collection=Collection(configuration.auth_id, configuration.version) + granule, collection=Collection(configuration.auth_id, configuration.version) ) + def prepare_granule(configuration: config.Config, granule: Granule) -> Granule: """ Prepare the Granule for creating metadata and submitting it. """ return dataclasses.replace( - granule, + granule, submission_time=dt.datetime.now(dt.timezone.utc).isoformat(), - uuid=str(uuid.uuid4()) + uuid=str(uuid.uuid4()), ) + def find_existing_ummg(configuration: config.Config, granule: Granule) -> Granule: - ummg_filename = configuration.ummg_path().joinpath(granule.producer_granule_id + '.json') + ummg_filename = configuration.ummg_path().joinpath( + granule.producer_granule_id + ".json" + ) if ummg_filename.exists(): return dataclasses.replace(granule, ummg_filename=ummg_filename) else: return granule + def create_ummg(configuration: config.Config, granule: Granule) -> Granule: """ Create the UMM-G file for the Granule. @@ -330,7 +401,9 @@ def create_ummg(configuration: config.Config, granule: Granule) -> Granule: if granule.ummg_filename != Maybe.empty and not configuration.overwrite_ummg: return granule - ummg_file_path = configuration.ummg_path().joinpath(granule.producer_granule_id + '.json') + ummg_file_path = configuration.ummg_path().joinpath( + granule.producer_granule_id + ".json" + ) # Populated metadata_details dict looks like: # { @@ -339,7 +412,8 @@ def create_ummg(configuration: config.Config, granule: Granule) -> Granule: # 'production_date_time' => iso datetime string, # 'temporal' => an array of one (data represent a single point in time) # or two (data cover a time range) datetime strings - # 'geometry' => { 'points': a string representation of one or more lat/lon pairs } + # 'geometry' => { 'points': a string representation of one or more + # lat/lon pairs } # } # } metadata_details = {} @@ -348,24 +422,20 @@ def create_ummg(configuration: config.Config, granule: Granule) -> Granule: # Collapse information about (possibly) multiple files into a granule summary. summary = metadata_summary(metadata_details) - summary['spatial_extent'] = populate_spatial(summary['geometry']) - summary['temporal_extent'] = populate_temporal(summary['temporal']) + summary["spatial_extent"] = populate_spatial(summary["geometry"]) + summary["temporal_extent"] = populate_temporal(summary["temporal"]) # Populate the body template body = ummg_body_template().safe_substitute( - dataclasses.asdict(granule) - | dataclasses.asdict(granule.collection) - | summary + dataclasses.asdict(granule) | dataclasses.asdict(granule.collection) | summary ) # Save it all in a file. with open(ummg_file_path, "tw") as f: print(body, file=f) - return dataclasses.replace( - granule, - ummg_filename=ummg_file_path - ) + return dataclasses.replace(granule, ummg_filename=ummg_file_path) + def stage_files(configuration: config.Config, granule: Granule) -> Granule: """ @@ -375,11 +445,12 @@ def stage_files(configuration: config.Config, granule: Granule) -> Granule: for fn in stuff: filename = os.path.basename(fn) bucket_path = s3_object_path(granule, filename) - with open(fn, 'rb') as f: + with open(fn, "rb") as f: aws.stage_file(configuration.staging_bucket_name, bucket_path, file=f) return granule + def create_cnm(configuration: config.Config, granule: Granule) -> Granule: """ Create a CNM submission message for the Granule. @@ -389,38 +460,48 @@ def create_cnm(configuration: config.Config, granule: Granule) -> Granule: populated_file_templates = [] granule_files = { - 'data': granule.data_filenames, - 'metadata': [granule.ummg_filename] + "data": granule.data_filenames, + "metadata": [granule.ummg_filename], } for type, files in granule_files.items(): for file in files: - populated_file_templates.append(json.loads(files_template.safe_substitute( - cnms_file_json_parts(configuration.staging_bucket_name, - granule, - file, - type)))) + populated_file_templates.append( + json.loads( + files_template.safe_substitute( + cnms_file_json_parts( + configuration.staging_bucket_name, granule, file, type + ) + ) + ) + ) return dataclasses.replace( granule, - cnm_message = body_template.safe_substitute( - dataclasses.asdict(granule) - | dataclasses.asdict(granule.collection) - | dataclasses.asdict(configuration) - | { 'file_content': json.dumps(populated_file_templates), - 'cnm_schema_version': constants.CNM_JSON_SCHEMA_VERSION } - ) + cnm_message=body_template.safe_substitute( + dataclasses.asdict(granule) + | dataclasses.asdict(granule.collection) + | dataclasses.asdict(configuration) + | { + "file_content": json.dumps(populated_file_templates), + "cnm_schema_version": constants.CNM_JSON_SCHEMA_VERSION, + } + ), ) + def write_cnm(configuration: config.Config, granule: Granule) -> Granule: """ Write a CNM message to a file. """ if configuration.write_cnm_file: - cnm_file = configuration.cnm_path().joinpath(granule.producer_granule_id + '.cnm.json') + cnm_file = configuration.cnm_path().joinpath( + granule.producer_granule_id + ".cnm.json" + ) with open(cnm_file, "tw") as f: print(granule.cnm_message, file=f) return granule + def publish_cnm(configuration: config.Config, granule: Granule) -> Granule: """ Publish a CNM message to a Kinesis stream. @@ -429,10 +510,12 @@ def publish_cnm(configuration: config.Config, granule: Granule) -> Granule: aws.post_to_kinesis(stream_name, granule.cnm_message) return granule + # ------------------------------------------------------------------- # Logging functions # ------------------------------------------------------------------- + def log_ledger(ledger: Ledger) -> Ledger: """Log a Ledger of the operations performed on a Granule.""" logger = logging.getLogger("metgenc") @@ -442,7 +525,7 @@ def log_ledger(ledger: Ledger) -> Ledger: logger.info(f" * Start : {ledger.startDatetime}") logger.info(f" * End : {ledger.endDatetime}") logger.info(f" * Successful : {ledger.successful}") - logger.debug(f" * Actions:") + logger.debug(" * Actions:") for a in ledger.actions: logger.debug(f" + Name: {a.name}") logger.debug(f" Start : {a.startDatetime}") @@ -452,6 +535,7 @@ def log_ledger(ledger: Ledger) -> Ledger: logger.debug(f" Reason : {a.message}") return ledger + def summarize_results(ledgers: list[Ledger]) -> None: """ Log a summary of the operations performed on all Granules. @@ -474,55 +558,63 @@ def summarize_results(ledgers: list[Ledger]) -> None: logger.info(f"Successful: {successful_count}") logger.info(f"Failed : {failed_count}") + # ------------------------------------------------------------------- # Utility functions # ------------------------------------------------------------------- + def cnms_file_json_parts(staging_bucket_name, granule, file, file_type): file_mapping = dict() file_name = os.path.basename(file) - file_mapping['file_size'] = os.path.getsize(file) - file_mapping['file_type'] = file_type - file_mapping['checksum'] = checksum(file) - file_mapping['file_name'] = file_name - file_mapping['staging_uri'] = s3_url(staging_bucket_name, granule, file_name) + file_mapping["file_size"] = os.path.getsize(file) + file_mapping["file_type"] = file_type + file_mapping["checksum"] = checksum(file) + file_mapping["file_name"] = file_name + file_mapping["staging_uri"] = s3_url(staging_bucket_name, granule, file_name) return file_mapping + def s3_url(staging_bucket_name, granule, filename): """ Returns the full s3 URL for the given file name. """ object_path = s3_object_path(granule, filename) - return f's3://{staging_bucket_name}/{object_path}' + return f"s3://{staging_bucket_name}/{object_path}" + def s3_object_path(granule, filename): """ Returns the full s3 object path for the granule """ - prefix = Template('external/${auth_id}/${version}/${uuid}/').safe_substitute({ - 'auth_id': granule.collection.auth_id, - 'version': granule.collection.version, - 'uuid': granule.uuid - }) + prefix = Template("external/${auth_id}/${version}/${uuid}/").safe_substitute( + { + "auth_id": granule.collection.auth_id, + "version": granule.collection.version, + "uuid": granule.uuid, + } + ) return prefix + filename + # size is a sum of all associated data file sizes. # all other attributes use the values from the first data file entry. def metadata_summary(details): default = list(details.values())[0] return { - 'size_in_bytes': sum([x['size_in_bytes'] for x in details.values()]), - 'production_date_time': default['production_date_time'], - 'temporal': default['temporal'], - 'geometry': default['geometry'] + "size_in_bytes": sum([x["size_in_bytes"] for x in details.values()]), + "production_date_time": default["production_date_time"], + "temporal": default["temporal"], + "geometry": default["geometry"], } + def checksum(file): BUF_SIZE = 65536 sha256 = hashlib.sha256() - with open(file, 'rb') as f: + with open(file, "rb") as f: while True: data = f.read(BUF_SIZE) if not data: @@ -531,6 +623,7 @@ def checksum(file): return sha256.hexdigest() + # TODO: Use the GranuleSpatialRepresentation value in the collection metadata # to determine the expected spatial type. See Issue #15. For now, default to # a Gpolygon. @@ -539,39 +632,49 @@ def populate_spatial(spatial_values): # { 'points': string representation of an array of {lon: lat:} dicts } return ummg_spatial_gpolygon_template().safe_substitute(spatial_values) + def populate_temporal(datetime_values): if len(datetime_values) > 1: - return ummg_temporal_range_template().safe_substitute({ - 'begin_date_time': datetime_values[0], - 'end_date_time': datetime_values[1]}) + return ummg_temporal_range_template().safe_substitute( + {"begin_date_time": datetime_values[0], "end_date_time": datetime_values[1]} + ) else: - return ummg_temporal_single_template().safe_substitute({ - 'date_time': datetime_values[0]}) + return ummg_temporal_single_template().safe_substitute( + {"date_time": datetime_values[0]} + ) + def ummg_body_template(): return initialize_template(constants.UMMG_BODY_TEMPLATE) + def ummg_temporal_single_template(): return initialize_template(constants.UMMG_TEMPORAL_SINGLE_TEMPLATE) + def ummg_temporal_range_template(): return initialize_template(constants.UMMG_TEMPORAL_RANGE_TEMPLATE) + def ummg_spatial_gpolygon_template(): return initialize_template(constants.UMMG_SPATIAL_GPOLYGON_TEMPLATE) + def cnms_body_template(): return initialize_template(constants.CNM_BODY_TEMPLATE) + def cnms_files_template(): return initialize_template(constants.CNM_FILES_TEMPLATE) + def initialize_template(resource_location): with open_text(*resource_location) as template_file: template_str = template_file.read() return Template(template_str) + def validate(configuration, content_type): """ Validate local JSON files @@ -579,42 +682,47 @@ def validate(configuration, content_type): output_file_path = file_type_path(configuration, content_type) schema_resource_location = schema_file_path(content_type) - logger = logging.getLogger('metgenc') - logger.info('') + logger = logging.getLogger("metgenc") + logger.info("") logger.info(f"Validating files in {output_file_path}...") with open_text(*schema_resource_location) as sf: schema = json.load(sf) # loop through all files and validate each one - for json_file in output_file_path.glob('*.json'): + for json_file in output_file_path.glob("*.json"): apply_schema(schema, json_file) logger.info("Validations complete.") return True + def file_type_path(configuration, content_type): match content_type: - case 'cnm': + case "cnm": return configuration.cnm_path() case _: - return '' + return "" + def schema_file_path(content_type): match content_type: - case 'cnm': + case "cnm": return constants.CNM_JSON_SCHEMA case _: - return '' + return "" + def apply_schema(schema, json_file): - logger = logging.getLogger('metgenc') + logger = logging.getLogger("metgenc") with open(json_file) as jf: json_content = json.load(jf) try: jsonschema.validate(instance=json_content, schema=schema) logger.info(f"Validated {json_file}") except jsonschema.exceptions.ValidationError as err: - logger.error(f'Validation failed for "{err.validator}" in {json_file}: {err.validator_value}') + logger.error( + f"""Validation failed for "{err.validator}"\ + in {json_file}: {err.validator_value}""" + ) return True - diff --git a/src/nsidc/metgen/netcdf_reader.py b/src/nsidc/metgen/netcdf_reader.py index 61d7061..f32209b 100644 --- a/src/nsidc/metgen/netcdf_reader.py +++ b/src/nsidc/metgen/netcdf_reader.py @@ -1,14 +1,14 @@ import json import os.path -import xarray as xr from datetime import timezone -from dateutil.parser import parse -from pyproj import CRS -from pyproj import Transformer +import xarray as xr +from dateutil.parser import parse +from pyproj import CRS, Transformer from nsidc.metgen import constants + def extract_metadata(netcdf_path): """ Read the content at netcdf_path and return a structure with temporal coverage @@ -18,21 +18,23 @@ def extract_metadata(netcdf_path): # TODO: handle errors if any needed attributes don't exist. netcdf = xr.open_dataset(netcdf_path, decode_coords="all") - return { - 'size_in_bytes': os.path.getsize(netcdf_path), - 'production_date_time': ensure_iso(netcdf.attrs['date_modified']), - 'temporal': time_range(netcdf), - 'geometry': {'points': json.dumps(spatial_values(netcdf))} + return { + "size_in_bytes": os.path.getsize(netcdf_path), + "production_date_time": ensure_iso(netcdf.attrs["date_modified"]), + "temporal": time_range(netcdf), + "geometry": {"points": json.dumps(spatial_values(netcdf))}, } + def time_range(netcdf): """Return an array of datetime strings""" datetimes = [] - datetimes.append(ensure_iso(netcdf.attrs['time_coverage_start'])) - datetimes.append(ensure_iso(netcdf.attrs['time_coverage_end'])) + datetimes.append(ensure_iso(netcdf.attrs["time_coverage_start"])) + datetimes.append(ensure_iso(netcdf.attrs["time_coverage_end"])) return datetimes + def spatial_values(netcdf): """ Return an array of dicts, each dict representing one lat/lon pair like so: @@ -49,15 +51,20 @@ def spatial_values(netcdf): crs_4326 = CRS.from_epsg(4326) xformer = Transformer.from_crs(data_crs, crs_4326, always_xy=True) - # Adding padding should give us values that match up to the netcdf.attrs.geospatial_bounds - pad = abs(float(netcdf.crs.GeoTransform.split()[1]))/2 + # Adding padding should give us values that match up to the + # netcdf.attrs.geospatial_bounds + pad = abs(float(netcdf.crs.GeoTransform.split()[1])) / 2 xdata = [x - pad if x < 0 else x + pad for x in netcdf.x.data] ydata = [y - pad if y < 0 else y + pad for y in netcdf.y.data] # Extract the perimeter points and transform to lon, lat perimeter = [xformer.transform(x, y) for (x, y) in thinned_perimeter(xdata, ydata)] - return [{'Longitude': round(lon, 8), 'Latitude': round(lat, 8)} for (lon, lat) in perimeter] + return [ + {"Longitude": round(lon, 8), "Latitude": round(lat, 8)} + for (lon, lat) in perimeter + ] + def thinned_perimeter(xdata, ydata): """ @@ -71,16 +78,25 @@ def thinned_perimeter(xdata, ydata): # Pull out just the perimeter of the grid, counter-clockwise direction, # starting at top left. # xindex[0], yindex[0]..yindex[-2] - left = [(x,y) for x in xdata[:1] for i in yindices[:ylen-1] for y in [ydata[i]]] + left = [(x, y) for x in xdata[:1] for i in yindices[: ylen - 1] for y in [ydata[i]]] # xindex[0]..xindex[-2], yindex[-1] - bottom = [(x,y) for i in xindices[:xlen-1] for x in [xdata[i]] for y in ydata[-1:]] + bottom = [ + (x, y) for i in xindices[: xlen - 1] for x in [xdata[i]] for y in ydata[-1:] + ] # xindex[-1], yindex[-1]..yindex[1] - right = [(x,y) for x in xdata[-1:] for i in yindices[ylen-1:0:-1] for y in [ydata[i]]] + right = [ + (x, y) + for x in xdata[-1:] + for i in yindices[ylen - 1 : 0 : -1] + for y in [ydata[i]] + ] # xindex[-1]..xindex[0], yindex[0] - top = [(x,y) for i in xindices[xlen-1::-1] for x in [xdata[i]] for y in ydata[:1]] + top = [ + (x, y) for i in xindices[xlen - 1 :: -1] for x in [xdata[i]] for y in ydata[:1] + ] # The last point should already be the same as the first, given that top # uses all of the xindices, but just in case... @@ -88,7 +104,8 @@ def thinned_perimeter(xdata, ydata): top.append(left[0]) # concatenate the "sides" and return the perimeter points - return(left + bottom + right + top) + return left + bottom + right + top + def index_subset(original_length): """ @@ -96,16 +113,23 @@ def index_subset(original_length): somewhat arbitrary, and approximately evenly spaced, additional number of indices in between the beginning and end. """ - if(original_length > 6): - return [round(index*count*.2) for count in range(constants.DEFAULT_SPATIAL_AXIS_SIZE) for index in [original_length - 1]] + if original_length > 6: + return [ + round(index * count * 0.2) + for count in range(constants.DEFAULT_SPATIAL_AXIS_SIZE) + for index in [original_length - 1] + ] else: return list(range(original_length)) + def ensure_iso(datetime_str): """ Parse ISO-standard datetime strings without a timezone identifier. """ iso_obj = parse(datetime_str) - return iso_obj.replace(tzinfo=timezone.utc) \ - .isoformat(timespec='milliseconds') \ + return ( + iso_obj.replace(tzinfo=timezone.utc) + .isoformat(timespec="milliseconds") .replace("+00:00", "Z") + ) diff --git a/tests/test_aws.py b/tests/test_aws.py index fbb0633..9bf0014 100644 --- a/tests/test_aws.py +++ b/tests/test_aws.py @@ -1,15 +1,12 @@ import json import os from tempfile import TemporaryFile -from unittest.mock import mock_open, patch, Mock import boto3 -from moto import mock_aws import pytest - +from moto import mock_aws from nsidc.metgen import aws - # Unit tests for the 'aws' module functions. # # The test boundary is the aws module's interface with the AWS library's boto3 @@ -18,6 +15,7 @@ # correct parameters, correctly handle their return values, and handle any # exceptions they may throw. + @pytest.fixture(scope="module") def aws_credentials(): """Mocked AWS Credentials for moto.""" @@ -27,26 +25,27 @@ def aws_credentials(): os.environ["AWS_SESSION_TOKEN"] = "testing" os.environ["AWS_DEFAULT_REGION"] = "us-west-2" + @pytest.fixture def kinesis(aws_credentials): """A mocked Kinesis client.""" with mock_aws(): yield boto3.client("kinesis", region_name="us-west-2") + @pytest.fixture def kinesis_stream_summary(kinesis): """Create a Kinesis stream and return its summary info.""" kinesis.create_stream(StreamName="duck-test-stream", ShardCount=1) summary = kinesis.describe_stream_summary(StreamName="duck-test-stream") - return summary['StreamDescriptionSummary'] + return summary["StreamDescriptionSummary"] + @pytest.fixture def test_message(): """Returns a JSON string for testing.""" - return json.dumps({ - 'foo': 333, - 'bar': 'xyzzy' - }) + return json.dumps({"foo": 333, "bar": "xyzzy"}) + @pytest.fixture def s3(aws_credentials): @@ -54,18 +53,18 @@ def s3(aws_credentials): with mock_aws(): yield boto3.client("s3") + @pytest.fixture def s3_bucket(s3): """Create an S3 buket and return the bucket name.""" bucket_name = "duck-test-bucket" - response = s3.create_bucket( - Bucket=bucket_name, - CreateBucketConfiguration={ - 'LocationConstraint': 'us-west-2' - }, + s3.create_bucket( + Bucket=bucket_name, + CreateBucketConfiguration={"LocationConstraint": "us-west-2"}, ) return bucket_name + @pytest.fixture def science_data(): return """ @@ -74,89 +73,116 @@ def science_data(): bar """ + def test_kinesis_stream_exists_for_valid_name(kinesis_stream_summary): stream_name = "duck-test-stream" assert aws.kinesis_stream_exists(stream_name) + def test_kinesis_stream_exists_for_invalid_name(kinesis_stream_summary): stream_name = "xyzzy" assert not aws.kinesis_stream_exists(stream_name) + def test_post_to_kinesis(kinesis_stream_summary, test_message): """Given a Kinesis stream name and a message, it should post successfully.""" - stream_name = kinesis_stream_summary['StreamName'] + stream_name = kinesis_stream_summary["StreamName"] success = aws.post_to_kinesis(stream_name, test_message) assert type(success) is str + def test_post_to_kinesis_returns_shard_id(kinesis_stream_summary, test_message): - """Given a Kinesis stream name and a test message, the response should include the shard id.""" - stream_name = kinesis_stream_summary['StreamName'] + """ + Given a Kinesis stream name and a test message, the response should include + the shard id. + """ + stream_name = kinesis_stream_summary["StreamName"] result = aws.post_to_kinesis(stream_name, test_message) assert "shardId" in result + def test_post_to_kinesis_with_invalid_stream_name(kinesis_stream_summary, test_message): - """Given an invalid Kinesis stream name and a message, it should raise an exception.""" + """ + Given an invalid Kinesis stream name and a message, it should raise an + exception. + """ invalid_stream_name = "abcd-1234-wxyz-0987" with pytest.raises(Exception): aws.post_to_kinesis(invalid_stream_name, test_message) + def test_post_to_kinesis_with_empty_message(kinesis_stream_summary): - """Given a Kinesis stream name, it should raise an exception when posting an empty message.""" - stream_name = kinesis_stream_summary['StreamName'] + """ + Given a Kinesis stream name, it should raise an exception when posting + an empty message. + """ + stream_name = kinesis_stream_summary["StreamName"] with pytest.raises(Exception): aws.post_to_kinesis(stream_name, None) + def test_stage_data_to_s3(s3, s3_bucket, science_data): object_name = "/external/NSIDC-TEST666/3/abcd-1234-wxyz-0987/science-data.bin" aws.stage_file(s3_bucket, object_name, data=science_data) s3_object = s3.get_object( - Bucket=s3_bucket, - Key=object_name, + Bucket=s3_bucket, + Key=object_name, ) - object_lines = [line.decode(encoding="utf-8") for line in s3_object['Body'].readlines()] + object_lines = [ + line.decode(encoding="utf-8") for line in s3_object["Body"].readlines() + ] object_data = "".join(object_lines) assert object_data == science_data + def test_stage_data_to_s3_with_invalid_bucket_name(s3_bucket, science_data): bucket_name = "xyzzy" object_name = "/external/NSIDC-TEST666/3/abcd-1234-wxyz-0987/science-data.bin" with pytest.raises(Exception): aws.stage_file(bucket_name, object_name, data=science_data) + def test_stage_data_to_s3_with_missing_object_name(s3, s3_bucket, science_data): with pytest.raises(Exception): aws.stage_file(s3_bucket, None, data=science_data) + def test_stage_data_to_s3_with_no_data(s3, s3_bucket): object_name = "/external/NSIDC-TEST666/3/abcd-1234-wxyz-0987/science-data.bin" with pytest.raises(Exception): aws.stage_file(s3_bucket, object_name, data=None) + def test_stage_file_to_s3(s3, s3_bucket, science_data): with TemporaryFile() as source_file: - source_file.write(science_data.encode('UTF-8')) + source_file.write(science_data.encode("UTF-8")) source_file.seek(0) object_name = "/external/NSIDC-TEST666/3/abcd-1234-wxyz-0987/science-data.bin" aws.stage_file(s3_bucket, object_name, file=source_file) s3_object = s3.get_object( - Bucket=s3_bucket, - Key=object_name, + Bucket=s3_bucket, + Key=object_name, ) - object_lines = [line.decode(encoding="utf-8") for line in s3_object['Body'].readlines()] + object_lines = [ + line.decode(encoding="utf-8") for line in s3_object["Body"].readlines() + ] object_data = "".join(object_lines) assert object_data == science_data + def test_stage_file_requires_data_or_file(s3_bucket): with pytest.raises(Exception): - aws.stage_file(s3_bucket, 'foo') + aws.stage_file(s3_bucket, "foo") + def test_staging_bucket_exists_for_valid_name(s3_bucket): bucket_name = "duck-test-bucket" assert aws.staging_bucket_exists(bucket_name) + def test_staging_bucket_exists_for_invalid_name(s3_bucket): bucket_name = "xyzzy" assert not aws.staging_bucket_exists(bucket_name) diff --git a/tests/test_cli.py b/tests/test_cli.py index 7540de3..cf92b8a 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,11 +1,9 @@ from unittest.mock import patch -from click.testing import CliRunner import pytest - +from click.testing import CliRunner from nsidc.metgen.cli import cli - # Unit tests for the 'cli' module functions. # # The test boundary is the cli module's interface with the metgen module, so in @@ -14,53 +12,64 @@ # parameters, correctly handle their return values, and handle any exceptions # they may throw. + @pytest.fixture def cli_runner(): return CliRunner() + def test_without_subcommand(cli_runner): result = cli_runner.invoke(cli) assert result.exit_code == 0 - assert 'Usage' in result.output - assert 'Commands' in result.output - for subcommand in ['info', 'init', 'process']: + assert "Usage" in result.output + assert "Commands" in result.output + for subcommand in ["info", "init", "process"]: assert subcommand in result.output + def test_help(cli_runner): - result = cli_runner.invoke(cli, ['--help']) + result = cli_runner.invoke(cli, ["--help"]) assert result.exit_code == 0 + def test_info_requires_config(cli_runner): - result = cli_runner.invoke(cli, ['info']) + result = cli_runner.invoke(cli, ["info"]) assert result.exit_code != 0 + def test_info_with_config(cli_runner): - result = cli_runner.invoke(cli, ['info', '--config', './example/modscg.ini']) + result = cli_runner.invoke(cli, ["info", "--config", "./example/modscg.ini"]) assert result.exit_code == 0 -@patch('nsidc.metgen.config.Config.show') + +@patch("nsidc.metgen.config.Config.show") def test_info_with_config_summarizes(mock, cli_runner): - result = cli_runner.invoke(cli, ['info', '--config', './example/modscg.ini']) + result = cli_runner.invoke(cli, ["info", "--config", "./example/modscg.ini"]) assert mock.called assert result.exit_code == 0 -@patch('nsidc.metgen.metgen.process') + +@patch("nsidc.metgen.metgen.process") def test_process_requires_config(mock, cli_runner): - result = cli_runner.invoke(cli, ['process']) + result = cli_runner.invoke(cli, ["process"]) assert not mock.called assert result.exit_code != 0 -@patch('nsidc.metgen.config.validate') -@patch('nsidc.metgen.metgen.process') + +@patch("nsidc.metgen.config.validate") +@patch("nsidc.metgen.metgen.process") def test_process_with_config_calls_process(mock_validate, mock_process, cli_runner): - result = cli_runner.invoke(cli, ['process', '--config', './example/modscg.ini']) + cli_runner.invoke(cli, ["process", "--config", "./example/modscg.ini"]) assert mock_process.called -@patch('nsidc.metgen.config.validate') -@patch('nsidc.metgen.metgen.process') + +@patch("nsidc.metgen.config.validate") +@patch("nsidc.metgen.metgen.process") def test_process_with_granule_limit(mock_validate, mock_process, cli_runner): number_files = 2 - result = cli_runner.invoke(cli, ['process', '-n', str(number_files), '--config', './example/modscg.ini']) + result = cli_runner.invoke( + cli, ["process", "-n", str(number_files), "--config", "./example/modscg.ini"] + ) assert mock_process.called args = mock_process.call_args.args @@ -69,53 +78,70 @@ def test_process_with_granule_limit(mock_validate, mock_process, cli_runner): assert configuration.number == number_files assert result.exit_code == 0 -@patch('nsidc.metgen.config.configuration') -@patch('nsidc.metgen.metgen.process') -@patch('nsidc.metgen.config.validate') -def test_process_with_no_write_cnm(mock_validate, process_mock, configuration_mock, cli_runner): - result = cli_runner.invoke(cli, ['process', '--config', './example/modscg.ini']) + +@patch("nsidc.metgen.config.configuration") +@patch("nsidc.metgen.metgen.process") +@patch("nsidc.metgen.config.validate") +def test_process_with_no_write_cnm( + mock_validate, process_mock, configuration_mock, cli_runner +): + result = cli_runner.invoke(cli, ["process", "--config", "./example/modscg.ini"]) assert configuration_mock.called args = configuration_mock.call_args.args overrides = args[1] - assert overrides['write_cnm_file'] == None + assert overrides["write_cnm_file"] is None assert result.exit_code == 0 -@patch('nsidc.metgen.config.configuration') -@patch('nsidc.metgen.metgen.process') -@patch('nsidc.metgen.config.validate') -def test_process_with_write_cnm(mock_validate, process_mock, configuration_mock, cli_runner): - result = cli_runner.invoke(cli, ['process', '-wc', '--config', './example/modscg.ini']) + +@patch("nsidc.metgen.config.configuration") +@patch("nsidc.metgen.metgen.process") +@patch("nsidc.metgen.config.validate") +def test_process_with_write_cnm( + mock_validate, process_mock, configuration_mock, cli_runner +): + result = cli_runner.invoke( + cli, ["process", "-wc", "--config", "./example/modscg.ini"] + ) assert configuration_mock.called args = configuration_mock.call_args.args overrides = args[1] - assert overrides['write_cnm_file'] == True + assert overrides["write_cnm_file"] assert result.exit_code == 0 -@patch('nsidc.metgen.config.configuration') -@patch('nsidc.metgen.metgen.process') -@patch('nsidc.metgen.config.validate') -def test_process_with_no_overwrite(mock_validate, process_mock, configuration_mock, cli_runner): - result = cli_runner.invoke(cli, ['process', '--config', './example/modscg.ini']) + +@patch("nsidc.metgen.config.configuration") +@patch("nsidc.metgen.metgen.process") +@patch("nsidc.metgen.config.validate") +def test_process_with_no_overwrite( + mock_validate, process_mock, configuration_mock, cli_runner +): + result = cli_runner.invoke(cli, ["process", "--config", "./example/modscg.ini"]) assert configuration_mock.called args = configuration_mock.call_args.args overrides = args[1] - assert overrides['overwrite_ummg'] == None + assert overrides["overwrite_ummg"] is None assert result.exit_code == 0 -@patch('nsidc.metgen.config.configuration') -@patch('nsidc.metgen.metgen.process') -@patch('nsidc.metgen.config.validate') -def test_process_with_overwrite(mock_validate, process_mock, configuration_mock, cli_runner): - result = cli_runner.invoke(cli, ['process', '-o', '--config', './example/modscg.ini']) + +@patch("nsidc.metgen.config.configuration") +@patch("nsidc.metgen.metgen.process") +@patch("nsidc.metgen.config.validate") +def test_process_with_overwrite( + mock_validate, process_mock, configuration_mock, cli_runner +): + result = cli_runner.invoke( + cli, ["process", "-o", "--config", "./example/modscg.ini"] + ) assert configuration_mock.called args = configuration_mock.call_args.args overrides = args[1] - assert overrides['overwrite_ummg'] == True + assert overrides["overwrite_ummg"] assert result.exit_code == 0 + # TODO: When process raises an exception, cli handles it and displays a message # and has non-zero exit code diff --git a/tests/test_config.py b/tests/test_config.py index f07951f..f9815b1 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,12 +1,9 @@ -from configparser import ConfigParser, ExtendedInterpolation import dataclasses +from configparser import ConfigParser, ExtendedInterpolation from unittest.mock import patch import pytest - -from nsidc.metgen import config -from nsidc.metgen import constants - +from nsidc.metgen import config, constants # Unit tests for the 'config' module functions. # @@ -16,45 +13,42 @@ # call them with the correct parameters, correctly handle their return values, # and handle any exceptions they may throw. + @pytest.fixture def expected_keys(): - return set(['environment', - 'data_dir', - 'auth_id', - 'version', - 'provider', - 'local_output_dir', - 'ummg_dir', - 'kinesis_stream_name', - 'staging_bucket_name', - 'write_cnm_file', - 'overwrite_ummg', - 'checksum_type', - 'number', - 'dry_run',]) + return set( + [ + "environment", + "data_dir", + "auth_id", + "version", + "provider", + "local_output_dir", + "ummg_dir", + "kinesis_stream_name", + "staging_bucket_name", + "write_cnm_file", + "overwrite_ummg", + "checksum_type", + "number", + "dry_run", + ] + ) + @pytest.fixture def cfg_parser(): cp = ConfigParser(interpolation=ExtendedInterpolation()) - cp['Source'] = { - 'data_dir': '/data/example' - } - cp['Collection'] = { - 'auth_id': 'DATA-0001', - 'version': 42, - 'provider': 'FOO' - } - cp['Destination'] = { - 'local_output_dir': '/output/here', - 'ummg_dir': 'ummg', - 'kinesis_stream_name': "xyzzy-${environment}-stream", - 'staging_bucket_name': "xyzzy-${environment}-bucket", - 'write_cnm_file': False - } - cp['Settings'] = { - 'checksum_type': 'SHA256', - 'number': 1 + cp["Source"] = {"data_dir": "/data/example"} + cp["Collection"] = {"auth_id": "DATA-0001", "version": 42, "provider": "FOO"} + cp["Destination"] = { + "local_output_dir": "/output/here", + "ummg_dir": "ummg", + "kinesis_stream_name": "xyzzy-${environment}-stream", + "staging_bucket_name": "xyzzy-${environment}-bucket", + "write_cnm_file": False, } + cp["Settings"] = {"checksum_type": "SHA256", "number": 1} return cp @@ -62,47 +56,51 @@ def test_config_parser_without_filename(): with pytest.raises(ValueError): config.config_parser_factory(None) -@patch('nsidc.metgen.metgen.os.path.exists', return_value = True) + +@patch("nsidc.metgen.metgen.os.path.exists", return_value=True) def test_config_parser_return_type(mock): - result = config.config_parser_factory('foo.ini') + result = config.config_parser_factory("foo.ini") assert isinstance(result, ConfigParser) -@patch('nsidc.metgen.metgen.os.path.exists', return_value = True) + +@patch("nsidc.metgen.metgen.os.path.exists", return_value=True) def test_config_parser_handles_empty_strings_for_booleans(mock): - cp = config.config_parser_factory('foo.ini') - cp['foo'] = { - 'success': '' - } - assert cp.getboolean('foo', 'success') == False + cp = config.config_parser_factory("foo.ini") + cp["foo"] = {"success": ""} + assert not cp.getboolean("foo", "success") + def test_config_from_config_parser(cfg_parser): cfg = config.configuration(cfg_parser, {}, constants.DEFAULT_CUMULUS_ENVIRONMENT) assert isinstance(cfg, config.Config) + def test_config_with_no_write_cnm(cfg_parser, expected_keys): cfg = config.configuration(cfg_parser, {}, constants.DEFAULT_CUMULUS_ENVIRONMENT) config_keys = set(cfg.__dict__) assert len(config_keys - expected_keys) == 0 - assert cfg.environment == 'uat' - assert cfg.data_dir == '/data/example' - assert cfg.auth_id == 'DATA-0001' - assert cfg.kinesis_stream_name == 'xyzzy-uat-stream' + assert cfg.environment == "uat" + assert cfg.data_dir == "/data/example" + assert cfg.auth_id == "DATA-0001" + assert cfg.kinesis_stream_name == "xyzzy-uat-stream" assert not cfg.write_cnm_file + def test_config_with_write_cnm(cfg_parser, expected_keys): - cfg_parser.set("Destination", "write_cnm_file", 'True') + cfg_parser.set("Destination", "write_cnm_file", "True") cfg = config.configuration(cfg_parser, {}) config_keys = set(cfg.__dict__) assert len(config_keys - expected_keys) == 0 - assert cfg.data_dir == '/data/example' - assert cfg.auth_id == 'DATA-0001' - assert cfg.kinesis_stream_name == 'xyzzy-uat-stream' - assert cfg.environment == 'uat' - assert cfg.write_cnm_file == True + assert cfg.data_dir == "/data/example" + assert cfg.auth_id == "DATA-0001" + assert cfg.kinesis_stream_name == "xyzzy-uat-stream" + assert cfg.environment == "uat" + assert cfg.write_cnm_file + def test_config_with_no_overwrite_ummg(cfg_parser, expected_keys): cfg = config.configuration(cfg_parser, {}, constants.DEFAULT_CUMULUS_ENVIRONMENT) @@ -111,37 +109,59 @@ def test_config_with_no_overwrite_ummg(cfg_parser, expected_keys): assert len(config_keys - expected_keys) == 0 assert not cfg.overwrite_ummg + def test_config_with_overwrite_ummg(cfg_parser, expected_keys): - cfg_parser.set("Destination", "overwrite_ummg", 'True') + cfg_parser.set("Destination", "overwrite_ummg", "True") cfg = config.configuration(cfg_parser, {}) config_keys = set(cfg.__dict__) assert len(config_keys - expected_keys) == 0 - assert cfg.overwrite_ummg == True + assert cfg.overwrite_ummg + def test_get_configuration_value(cfg_parser): environment = constants.DEFAULT_CUMULUS_ENVIRONMENT - result = config._get_configuration_value(environment, "Source", "data_dir", str, cfg_parser, {}) + result = config._get_configuration_value( + environment, "Source", "data_dir", str, cfg_parser, {} + ) assert result == cfg_parser.get("Source", "data_dir") + def test_get_configuration_value_with_override(cfg_parser): environment = constants.DEFAULT_CUMULUS_ENVIRONMENT - overrides = { 'data_dir': 'foobar' } - result = config._get_configuration_value(environment, "Source", "data_dir", str, cfg_parser, overrides) - assert result == overrides['data_dir'] + overrides = {"data_dir": "foobar"} + result = config._get_configuration_value( + environment, "Source", "data_dir", str, cfg_parser, overrides + ) + assert result == overrides["data_dir"] + def test_get_configuration_value_interpolates_the_environment(cfg_parser): environment = constants.DEFAULT_CUMULUS_ENVIRONMENT - result = config._get_configuration_value(environment, "Destination", "kinesis_stream_name", str, cfg_parser, {}) + result = config._get_configuration_value( + environment, "Destination", "kinesis_stream_name", str, cfg_parser, {} + ) assert result == "xyzzy-uat-stream" -@pytest.mark.parametrize("section,option,expected", [ - ("Destination", "kinesis_stream_name", f"nsidc-cumulus-{constants.DEFAULT_CUMULUS_ENVIRONMENT}-external_notification"), - ("Destination", "staging_bucket_name", f"nsidc-cumulus-{constants.DEFAULT_CUMULUS_ENVIRONMENT}-ingest-staging"), + +@pytest.mark.parametrize( + "section,option,expected", + [ + ( + "Destination", + "kinesis_stream_name", + f"nsidc-cumulus-{constants.DEFAULT_CUMULUS_ENVIRONMENT}-external_notification", + ), + ( + "Destination", + "staging_bucket_name", + f"nsidc-cumulus-{constants.DEFAULT_CUMULUS_ENVIRONMENT}-ingest-staging", + ), ("Destination", "write_cnm_file", constants.DEFAULT_WRITE_CNM_FILE), ("Settings", "checksum_type", constants.DEFAULT_CHECKSUM_TYPE), ("Settings", "number", constants.DEFAULT_NUMBER), - ]) + ], +) def test_configuration_has_good_defaults(cfg_parser, section, option, expected): cfg_parser.remove_option(section, option) result = config.configuration(cfg_parser, {}, constants.DEFAULT_CUMULUS_ENVIRONMENT) @@ -149,17 +169,18 @@ def test_configuration_has_good_defaults(cfg_parser, section, option, expected): assert result_dict[option] == expected -@patch('nsidc.metgen.metgen.os.path.exists', return_value = True) -@patch('nsidc.metgen.metgen.aws.kinesis_stream_exists', return_value = True) -@patch('nsidc.metgen.metgen.aws.staging_bucket_exists', return_value = True) +@patch("nsidc.metgen.metgen.os.path.exists", return_value=True) +@patch("nsidc.metgen.metgen.aws.kinesis_stream_exists", return_value=True) +@patch("nsidc.metgen.metgen.aws.staging_bucket_exists", return_value=True) def test_validate_with_valid_checks(m1, m2, m3, cfg_parser): cfg = config.configuration(cfg_parser, {}) valid = config.validate(cfg) - assert valid == True + assert valid + -@patch('nsidc.metgen.metgen.os.path.exists', return_value = False) -@patch('nsidc.metgen.metgen.aws.kinesis_stream_exists', return_value = False) -@patch('nsidc.metgen.metgen.aws.staging_bucket_exists', return_value = False) +@patch("nsidc.metgen.metgen.os.path.exists", return_value=False) +@patch("nsidc.metgen.metgen.aws.kinesis_stream_exists", return_value=False) +@patch("nsidc.metgen.metgen.aws.staging_bucket_exists", return_value=False) def test_validate_with_invalid_checks(m1, m2, m3, cfg_parser): cfg = config.configuration(cfg_parser, {}) with pytest.raises(config.ValidationError) as exc_info: diff --git a/tests/test_metgen.py b/tests/test_metgen.py index f04ccf2..f6a619a 100644 --- a/tests/test_metgen.py +++ b/tests/test_metgen.py @@ -1,12 +1,9 @@ -from configparser import ConfigParser import datetime as dt from unittest.mock import patch -from funcy import identity, partial import pytest - -from nsidc.metgen import config -from nsidc.metgen import metgen +from funcy import identity, partial +from nsidc.metgen import config, metgen # Unit tests for the 'metgen' module functions. # @@ -16,133 +13,155 @@ # metgen functions call them with the correct parameters, correctly handle # their return values, and handle any exceptions they may throw. + @pytest.fixture def granule_metadata_list(): return { - 'first_id': { - 'size_in_bytes': 100, - 'production_date_time': 'then', - 'temporal': 'now', - 'geometry': 'big' + "first_id": { + "size_in_bytes": 100, + "production_date_time": "then", + "temporal": "now", + "geometry": "big", + }, + "second_id": { + "size_in_bytes": 200, + "production_date_time": "before", + "temporal": "after", + "geometry": "small", }, - 'second_id': { - 'size_in_bytes': 200, - 'production_date_time': 'before', - 'temporal': 'after', - 'geometry': 'small' - } } + @pytest.fixture def one_granule_metadata(): return { - 'first_id': { - 'size_in_bytes': 150, - 'production_date_time': 'then', - 'temporal': 'now', - 'geometry': 'big' + "first_id": { + "size_in_bytes": 150, + "production_date_time": "then", + "temporal": "now", + "geometry": "big", } } + @pytest.fixture def fake_config(): - return config.Config('uat', 'data', 'auth_id', 'version', - 'foobar', 'output', 'ummg', 'stream', - 'bucket', True, True, 'sha', 3) + return config.Config( + "uat", + "data", + "auth_id", + "version", + "foobar", + "output", + "ummg", + "stream", + "bucket", + True, + True, + "sha", + 3, + ) + def test_banner(): assert len(metgen.banner()) > 0 + def test_gets_single_file_size(one_granule_metadata): summary = metgen.metadata_summary(one_granule_metadata) - assert summary['size_in_bytes'] == 150 + assert summary["size_in_bytes"] == 150 + def test_sums_multiple_file_sizes(granule_metadata_list): summary = metgen.metadata_summary(granule_metadata_list) - assert summary['size_in_bytes'] == 300 + assert summary["size_in_bytes"] == 300 + def test_uses_first_file_as_default(granule_metadata_list): summary = metgen.metadata_summary(granule_metadata_list) - assert summary['production_date_time'] == 'then' - assert summary['temporal'] == 'now' - assert summary['geometry'] == 'big' + assert summary["production_date_time"] == "then" + assert summary["temporal"] == "now" + assert summary["geometry"] == "big" + def test_returns_only_gpolygon(): - result = metgen.populate_spatial({'points': 'some list of points'}) + result = metgen.populate_spatial({"points": "some list of points"}) assert "GPolygons" in result + def test_returns_single_datetime(): result = metgen.populate_temporal([123]) assert '"SingleDateTime": "123"' in result + def test_returns_datetime_range(): result = metgen.populate_temporal([123, 456]) - assert 'RangeDateTime' in result + assert "RangeDateTime" in result assert '"BeginningDateTime": "123"' in result assert '"EndingDateTime": "456"' in result + def test_s3_object_path_has_no_leading_slash(): - granule = metgen.Granule( - 'foo', - metgen.Collection('ABCD', 2), - uuid='abcd-1234' - ) - expected = 'external/ABCD/2/abcd-1234/xyzzy.bin' - assert metgen.s3_object_path(granule, 'xyzzy.bin') == expected + granule = metgen.Granule("foo", metgen.Collection("ABCD", 2), uuid="abcd-1234") + expected = "external/ABCD/2/abcd-1234/xyzzy.bin" + assert metgen.s3_object_path(granule, "xyzzy.bin") == expected + def test_s3_url_simple_case(): - staging_bucket_name = 'xyzzy-bucket' - granule = metgen.Granule( - 'foo', - metgen.Collection('ABCD', 2), - uuid='abcd-1234' - ) - expected = 's3://xyzzy-bucket/external/ABCD/2/abcd-1234/xyzzy.bin' - assert metgen.s3_url(staging_bucket_name, granule, 'xyzzy.bin') == expected + staging_bucket_name = "xyzzy-bucket" + granule = metgen.Granule("foo", metgen.Collection("ABCD", 2), uuid="abcd-1234") + expected = "s3://xyzzy-bucket/external/ABCD/2/abcd-1234/xyzzy.bin" + assert metgen.s3_url(staging_bucket_name, granule, "xyzzy.bin") == expected + @patch("nsidc.metgen.metgen.dt.datetime") def test_start_ledger(mock_datetime): now = dt.datetime(2099, 7, 4, 10, 11, 12) mock_datetime.now.return_value = now - granule = metgen.Granule('abcd-1234') + granule = metgen.Granule("abcd-1234") actual = metgen.start_ledger(granule) assert actual.granule == granule assert actual.startDatetime == now + @patch("nsidc.metgen.metgen.dt.datetime") def test_end_ledger(mock_datetime): now = dt.datetime(2099, 7, 4, 10, 11, 12) mock_datetime.now.return_value = now - granule = metgen.Granule('abcd-1234') - ledger = metgen.Ledger(granule, [metgen.Action('foo', True, '')], startDatetime = now) + granule = metgen.Granule("abcd-1234") + ledger = metgen.Ledger(granule, [metgen.Action("foo", True, "")], startDatetime=now) actual = metgen.end_ledger(ledger) assert actual.granule == granule - assert actual.successful == True + assert actual.successful assert actual.startDatetime == now assert actual.endDatetime == now + @patch("nsidc.metgen.metgen.dt.datetime") def test_end_ledger_with_unsuccessful_actions(mock_datetime): now = dt.datetime(2099, 7, 4, 10, 11, 12) mock_datetime.now.return_value = now - granule = metgen.Granule('abcd-1234') - ledger = metgen.Ledger(granule, - [metgen.Action('foo', False, ''), metgen.Action('bar', False, 'Oops')], - startDatetime = now) + granule = metgen.Granule("abcd-1234") + ledger = metgen.Ledger( + granule, + [metgen.Action("foo", False, ""), metgen.Action("bar", False, "Oops")], + startDatetime=now, + ) actual = metgen.end_ledger(ledger) assert actual.granule == granule - assert actual.successful == False + assert not actual.successful assert actual.startDatetime == now assert actual.endDatetime == now + def test_recorder(): - granule = metgen.Granule('abcd-1234') + granule = metgen.Granule("abcd-1234") ledger = metgen.start_ledger(granule) new_ledger = partial(metgen.recorder, identity)(ledger) @@ -150,9 +169,11 @@ def test_recorder(): assert new_ledger.granule == ledger.granule assert len(new_ledger.actions) == 1 + def test_recorder_with_failing_operation(): - granule = metgen.Granule('abcd-1234') + granule = metgen.Granule("abcd-1234") ledger = metgen.start_ledger(granule) + def failing_op(): raise Exception() @@ -160,4 +181,4 @@ def failing_op(): assert new_ledger.granule == ledger.granule assert len(new_ledger.actions) == 1 - assert new_ledger.actions[0].successful == False + assert not new_ledger.actions[0].successful diff --git a/tests/test_netcdf_reader.py b/tests/test_netcdf_reader.py index 91ba63a..b6d7e9c 100644 --- a/tests/test_netcdf_reader.py +++ b/tests/test_netcdf_reader.py @@ -1,9 +1,5 @@ -from unittest.mock import patch - import pytest - -from nsidc.metgen import constants -from nsidc.metgen import netcdf_reader +from nsidc.metgen import constants, netcdf_reader # Unit tests for the 'netcdf_reader' module functions. # @@ -13,49 +9,87 @@ # call them with the correct parameters, correctly handle their return values, # and handle any exceptions they may throw. + @pytest.fixture def xdata(): return list(range(0, 6, 2)) + @pytest.fixture def ydata(): return list(range(0, 25, 5)) + @pytest.fixture def big_xdata(): return list(range(0, 20, 2)) + @pytest.fixture def big_ydata(): return list(range(0, 50, 5)) + def test_large_grids_are_thinned(big_xdata, big_ydata): result = netcdf_reader.thinned_perimeter(big_xdata, big_ydata) assert len(result) == (constants.DEFAULT_SPATIAL_AXIS_SIZE * 4) - 3 + def test_small_grids_are_not_thinned(xdata, ydata): result = netcdf_reader.thinned_perimeter(xdata, ydata) assert len(result) == (len(xdata) * 2) + (len(ydata) * 2) - 3 + def test_perimeter_is_closed_polygon(xdata, ydata): result = netcdf_reader.thinned_perimeter(xdata, ydata) assert result[0] == result[-1] + def test_no_other_duplicate_values(big_xdata, big_ydata): result = netcdf_reader.thinned_perimeter(big_xdata, big_ydata) result_set = set(result) assert len(result_set) == len(result) - 1 -@pytest.mark.parametrize("input,expected", [ - pytest.param('2001-01-01', '2001-01-01T00:00:00.000Z', id="Date and no time"), - pytest.param('2001-01-01 18:59:59', '2001-01-01T18:59:59.000Z', id="Date with time"), - pytest.param('2001-01-01 18:59.5', '2001-01-01T18:59:30.000Z', id="Datetime and fractional minutes"), - pytest.param('2001-01-01 18:59.500', '2001-01-01T18:59:30.000Z', id="Datetime and zero padded fractional minutes"), - pytest.param('2001-01-01 18:59.34', '2001-01-01T18:59:20.000Z', id="Datetime and other fractional minutes value"), - pytest.param('2001-01-01 18:59.999', '2001-01-01T18:59:59.000Z', id="Datetime and other fractional minutes value"), - pytest.param('2001-01-01 18:59:20.666', '2001-01-01T18:59:20.666Z', id="Datetime and fractional seconds"), - pytest.param('2001-01-01 18:59', '2001-01-01T18:59:00.000Z', id="Datetime and hours/minutes"), -]) + +@pytest.mark.parametrize( + "input,expected", + [ + pytest.param("2001-01-01", "2001-01-01T00:00:00.000Z", id="Date and no time"), + pytest.param( + "2001-01-01 18:59:59", "2001-01-01T18:59:59.000Z", id="Date with time" + ), + pytest.param( + "2001-01-01 18:59.5", + "2001-01-01T18:59:30.000Z", + id="Datetime and fractional minutes", + ), + pytest.param( + "2001-01-01 18:59.500", + "2001-01-01T18:59:30.000Z", + id="Datetime and zero padded fractional minutes", + ), + pytest.param( + "2001-01-01 18:59.34", + "2001-01-01T18:59:20.000Z", + id="Datetime and other fractional minutes value", + ), + pytest.param( + "2001-01-01 18:59.999", + "2001-01-01T18:59:59.000Z", + id="Datetime and other fractional minutes value", + ), + pytest.param( + "2001-01-01 18:59:20.666", + "2001-01-01T18:59:20.666Z", + id="Datetime and fractional seconds", + ), + pytest.param( + "2001-01-01 18:59", + "2001-01-01T18:59:00.000Z", + id="Datetime and hours/minutes", + ), + ], +) def test_correctly_reads_date_time_strings(input, expected): result = netcdf_reader.ensure_iso(input) assert result == expected