From 3d0125c1df879e1da6c9bff08a48293724d279db Mon Sep 17 00:00:00 2001 From: "Kevin W. Beam" Date: Tue, 26 Nov 2024 17:32:19 -0700 Subject: [PATCH 1/3] Add the --dry-run cli option --- src/nsidc/metgen/cli.py | 24 ++++++++++++++++-------- src/nsidc/metgen/config.py | 6 ++++++ src/nsidc/metgen/metgen.py | 7 +++++-- 3 files changed, 27 insertions(+), 10 deletions(-) diff --git a/src/nsidc/metgen/cli.py b/src/nsidc/metgen/cli.py index 55432e4..f25d150 100644 --- a/src/nsidc/metgen/cli.py +++ b/src/nsidc/metgen/cli.py @@ -44,18 +44,26 @@ def validate(config_filename, content_type): metgen.validate(configuration, content_type) @cli.command() -@click.option('-c', '--config', 'config_filename', help='Path to configuration file', required=True) -@click.option('-e', '--env', help='environment', default=constants.DEFAULT_CUMULUS_ENVIRONMENT, show_default=True) -@click.option('-n', '--number', help="Process at most 'count' granules.", metavar='count', required=False, default=constants.DEFAULT_NUMBER) -@click.option('-wc', '--write-cnm', is_flag=True, required=False, default=None, help="Write CNM messages to files.") -@click.option('-o', '--overwrite', is_flag=True, required=False, default=None, help="Overwrite existing UMM-G files.") -def process(config_filename, env, overwrite, write_cnm, number): +@click.option('-c', '--config', 'config_filename', required=True, + help='Path to configuration file') +@click.option('-d', '--dry-run', is_flag=True, required=False, default=None, + help='Don\'t stage files on S3 or publish messages to Kinesis') +@click.option('-e', '--env', help='environment', + default=constants.DEFAULT_CUMULUS_ENVIRONMENT, show_default=True) +@click.option('-n', '--number', metavar='count', required=False, + default=constants.DEFAULT_NUMBER, help="Process at most 'count' granules.") +@click.option('-wc', '--write-cnm', is_flag=True, required=False, default=None, + help="Write CNM messages to files.") +@click.option('-o', '--overwrite', is_flag=True, required=False, default=None, + help="Overwrite existing UMM-G files.") +def process(config_filename, dry_run, env, number, write_cnm, overwrite): """Processes science data files based on configuration file contents.""" click.echo(metgen.banner()) overrides = { - 'write_cnm_file': write_cnm, + 'dry_run': dry_run, + 'number': number, 'overwrite_ummg': overwrite, - 'number': number + 'write_cnm_file': write_cnm, } try: configuration = config.configuration(config.config_parser_factory(config_filename), overrides, env) diff --git a/src/nsidc/metgen/config.py b/src/nsidc/metgen/config.py index 499fd6c..a448be6 100644 --- a/src/nsidc/metgen/config.py +++ b/src/nsidc/metgen/config.py @@ -31,6 +31,7 @@ class Config: overwrite_ummg: bool checksum_type: str number: int + dry_run: bool def show(self): # TODO add section headings in the right spot (if we think we need them in the output) @@ -40,6 +41,10 @@ def show(self): for k,v in self.__dict__.items(): LOGGER.info(f' + {k}: {v}') + if self.dry_run: + LOGGER.info('') + LOGGER.info('Note: The dry-run option was included, so no files will be staged and no CNM messages published.') + def ummg_path(self): return Path(self.local_output_dir, self.ummg_dir) @@ -106,6 +111,7 @@ def configuration(config_parser, overrides, environment=constants.DEFAULT_CUMULU _get_configuration_value(environment, 'Destination', 'overwrite_ummg', bool, config_parser, overrides), _get_configuration_value(environment, 'Settings', 'checksum_type', str, config_parser, overrides), _get_configuration_value(environment, 'Settings', 'number', int, config_parser, overrides), + _get_configuration_value(environment, 'Settings', 'dry_run', bool, config_parser, overrides), ) except Exception as e: return Exception('Unable to read the configuration file', e) diff --git a/src/nsidc/metgen/metgen.py b/src/nsidc/metgen/metgen.py index ea1c34f..42631ce 100644 --- a/src/nsidc/metgen/metgen.py +++ b/src/nsidc/metgen/metgen.py @@ -196,10 +196,10 @@ def process(configuration: config.Config) -> None: prepare_granule, find_existing_ummg, create_ummg, - stage_files, + stage_files if not configuration.dry_run else null_operation, create_cnm, write_cnm, - publish_cnm, + publish_cnm if not configuration.dry_run else null_operation, ] # Bind the configuration to each operation @@ -288,6 +288,9 @@ def end_ledger(ledger: Ledger) -> Ledger: # Granule Operations # ------------------------------------------------------------------- +def null_operation(configuration: config.Config, granule: Granule) -> Granule: + return granule + def granule_collection(configuration: config.Config, granule: Granule) -> Granule: """ Find the Granule's Collection and add it to the Granule. From 7ddfbf646dadbabc780b75532e09c83ea4a62862 Mon Sep 17 00:00:00 2001 From: "Kevin W. Beam" Date: Wed, 27 Nov 2024 11:31:14 -0700 Subject: [PATCH 2/3] Fix failing tests after adding dry_run --- src/nsidc/metgen/config.py | 3 ++- src/nsidc/metgen/constants.py | 1 + tests/test_config.py | 3 ++- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/nsidc/metgen/config.py b/src/nsidc/metgen/config.py index a448be6..1639a44 100644 --- a/src/nsidc/metgen/config.py +++ b/src/nsidc/metgen/config.py @@ -95,6 +95,7 @@ def configuration(config_parser, overrides, environment=constants.DEFAULT_CUMULU 'overwrite_ummg': constants.DEFAULT_OVERWRITE_UMMG, 'checksum_type': constants.DEFAULT_CHECKSUM_TYPE, 'number': constants.DEFAULT_NUMBER, + 'dry_run': constants.DEFAULT_DRY_RUN, } try: return Config( @@ -114,7 +115,7 @@ def configuration(config_parser, overrides, environment=constants.DEFAULT_CUMULU _get_configuration_value(environment, 'Settings', 'dry_run', bool, config_parser, overrides), ) except Exception as e: - return Exception('Unable to read the configuration file', e) + raise Exception('Unable to read the configuration file', e) def validate(configuration): """ diff --git a/src/nsidc/metgen/constants.py b/src/nsidc/metgen/constants.py index fcfa51b..4bcfe70 100644 --- a/src/nsidc/metgen/constants.py +++ b/src/nsidc/metgen/constants.py @@ -6,6 +6,7 @@ DEFAULT_OVERWRITE_UMMG = False DEFAULT_CHECKSUM_TYPE = 'SHA256' DEFAULT_NUMBER = 1000000 +DEFAULT_DRY_RUN = False # JSON schema locations and versions CNM_JSON_SCHEMA = 'src/nsidc/metgen/json-schema/cumulus_sns_schema.json' diff --git a/tests/test_config.py b/tests/test_config.py index c70e7c7..f07951f 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -30,7 +30,8 @@ def expected_keys(): 'write_cnm_file', 'overwrite_ummg', 'checksum_type', - 'number']) + 'number', + 'dry_run',]) @pytest.fixture def cfg_parser(): From 18a34944cc8a341d6c13e5ec7206f9559d2231d1 Mon Sep 17 00:00:00 2001 From: "Kevin W. Beam" Date: Wed, 27 Nov 2024 11:42:46 -0700 Subject: [PATCH 3/3] Don't validate AWS params in dry_run mode --- src/nsidc/metgen/config.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/nsidc/metgen/config.py b/src/nsidc/metgen/config.py index 1639a44..b40449a 100644 --- a/src/nsidc/metgen/config.py +++ b/src/nsidc/metgen/config.py @@ -44,6 +44,7 @@ def show(self): if self.dry_run: LOGGER.info('') LOGGER.info('Note: The dry-run option was included, so no files will be staged and no CNM messages published.') + LOGGER.info('') def ummg_path(self): return Path(self.local_output_dir, self.ummg_dir) @@ -122,12 +123,18 @@ def validate(configuration): Validates each value in the configuration. """ validations = [ - ['data_dir', lambda dir: os.path.exists(dir), 'The data_dir does not exist.'], - ['local_output_dir', lambda dir: os.path.exists(dir), 'The local_output_dir does not exist.'], - # ['ummg_dir', lambda dir: os.path.exists(dir), 'The ummg_dir does not exist.'], ## validate "local_output_dir/ummg_dir" as part of issue-71 - ['kinesis_stream_name', lambda name: aws.kinesis_stream_exists(name), 'The kinesis stream does not exist.'], - ['staging_bucket_name', lambda name: aws.staging_bucket_exists(name), 'The staging bucket does not exist.'], - ['number', lambda number: 0 < number, 'The number of granules to process must be positive.'], + ['data_dir', lambda dir: os.path.exists(dir), + 'The data_dir does not exist.'], + ['local_output_dir', lambda dir: os.path.exists(dir), + 'The local_output_dir does not exist.'], + # ['ummg_dir', lambda dir: os.path.exists(dir), + # 'The ummg_dir does not exist.'], ## validate "local_output_dir/ummg_dir" as part of issue-71 + ['kinesis_stream_name', lambda name: configuration.dry_run or aws.kinesis_stream_exists(name), + 'The kinesis stream does not exist.'], + ['staging_bucket_name', lambda name: configuration.dry_run or aws.staging_bucket_exists(name), + 'The staging bucket does not exist.'], + ['number', lambda number: 0 < number, + 'The number of granules to process must be positive.'], ] errors = [msg for name, fn, msg in validations if not fn(getattr(configuration, name))] if len(errors) == 0: