diff --git a/.github/workflows/push.yml b/.github/workflows/build-test.yml similarity index 71% rename from .github/workflows/push.yml rename to .github/workflows/build-test.yml index 3014ff5..44ff703 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/build-test.yml @@ -1,10 +1,9 @@ +name: Build & test on: push: - branches: [main] - pull_request: jobs: - test: + build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -37,4 +36,16 @@ jobs: - run: poetry install --no-interaction - - run: poetry run pytest + - name: Check formatting + uses: astral-sh/ruff-action@v1 + with: + args: "format --check" + src: "./src" + + - name: Lint code + uses: astral-sh/ruff-action@v1 + with: + src: "./src" + + - name: Test + run: poetry run pytest diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 0000000..d147919 --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,74 @@ +name: Publish release to PyPI + +on: + release: + types: [released] + +jobs: + build: + name: Build package + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: 3.12 + + - name: cache poetry install + uses: actions/cache@v4 + with: + path: ~/.local + key: poetry-1.8.3-0 + + - uses: snok/install-poetry@v1 + with: + version: 1.8.3 + virtualenvs-create: true + virtualenvs-in-project: true + + - name: cache deps + id: cache-deps + uses: actions/cache@v4 + with: + path: .venv + key: pydeps-${{ hashFiles('**/poetry.lock') }} + + - run: poetry install --no-interaction --no-root + if: steps.cache-deps.outputs.cache-hit != 'true' + + - run: poetry install --no-interaction + + - run: poetry build + + - uses: actions/upload-artifact@v4 + with: + name: release-dists + path: dist/ + + publish: + name: Publish to PyPI + runs-on: ubuntu-latest + needs: [build] + permissions: + id-token: write + attestations: write + contents: read + environment: + name: PyPI + url: https://pypi.org/p/nsidc-metgenc + steps: + - uses: actions/download-artifact@v4 + with: + name: release-dists + path: dist/ + + - name: Generate artifact attestation for sdist and wheel + uses: actions/attest-build-provenance@v1.4.3 + with: + subject-path: "dist/*" + + - name: Publish package distributions to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + packages-dir: dist/ diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..6fa1c41 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,20 @@ +MetGenC Changelog + +## UNRELEASED + +This is the Minimum Viable Product (MVP) release of MetGenC. The +features include: + + * Provides a prompt-driven means of configuring MetGenC to ingest + a new collection. + * Processing is driven by a configuration file for control of various + aspects of the ingest. + * Generates a UUID and submission time for each granule. + * Creates UMM-G compliant metadata for each source granule. + * The UMM-G includes required attributes, including temporal and + spatial bounds. + * Generates a Cumulus Notification Message (CNM) for each granule. + * Stages the science data files and their UMM-G metadata in + a configurable S3 bucket location. + * Submits the CNM message to a configurable Kinesis stream in + order to trigger a Cumulus workflow. diff --git a/README.md b/README.md index ba24d6d..aaaf40f 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,9 @@ # MetGenC +![build & test workflow](https://github.com/nsidc/granule-metgen/actions/workflows/build-test.yml/badge.svg) +![workflow workflow](https://github.com/nsidc/granule-metgen/actions/workflows/publish.yml/badge.svg) + The `MetGenC` toolkit enables Operations staff and data producers to create metadata files conforming to NASA's Common Metadata Repository UMM-G specification and ingest data directly to NASA EOSDIS’s Cumulus archive. Cumulus is an @@ -31,14 +34,8 @@ or $ python3 --version -Next, you must also install [Poetry](https://python-poetry.org/) either by using the [official -installer](https://python-poetry.org/docs/#installing-with-the-official-installer) -if you’re comfortable following the instructions, or by using a package -manager (like Homebrew) if this is more familiar to you. When successfully -installed, you should be able to run: - - $ poetry --version - Poetry (version 1.8.3) +Next, install the AWS commandline interface (CLI) by [following the appropriate +instructions for your platform](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html). Lastly, you will need to create & setup AWS credentials for yourself. The ways in which this can be accomplished are detailed in the **AWS Credentials** section below. @@ -125,29 +122,13 @@ Notes: the value `Y`, rather than assuming the variable is named `x`. -## Installation of MetGenC from GitHub - -Make a local directory (i.e., on your computer), and then `cd` into that -directory. Clone the `granule-metgen` repository using ssh if you have [added -ssh keys to your GitHub -account](https://docs.github.com/en/authentication/connecting-to-github-with-ssh/adding-a-new-ssh-key-to-your-github-account) -or via https if you haven't: +## Installing MetGenC - $ mkdir -p ~/my-projects; cd ~/my-projects - # Install using ssh: - $ git clone git@github.com:nsidc/granule-metgen.git - OR - # Install using https: - $ git clone https://github.com/nsidc/granule-metgen.git +MetGenC can be installed from [PyPI](https://pypi.org/): -Enter the `granule-metgen` directory and run Poetry to have it install the `granule-metgen` dependencies. Then start a new shell in which you can run the tool: + $ pip install nsidc-metgenc - $ cd granule-metgen - $ poetry install - $ poetry shell - -With the Poetry shell running, start the metgenc tool JUST to verify that it’s working by requesting its usage options and having them -returned. There’s more to do (detailed in the **Usage** section below) before MetGenC can be run to successfully create ummg files, cnm messages, and stage data to an S3 bucket for ingest!):: +That's it! Now we're ready to run MetGenC and see what it can do: $ metgenc --help Usage: metgenc [OPTIONS] COMMAND [ARGS]... @@ -288,6 +269,15 @@ TBD * [Python](https://www.python.org/) v3.12+ * [Poetry](https://python-poetry.org/docs/#installing-with-the-official-installer) +You can install [Poetry](https://python-poetry.org/) either by using the [official +installer](https://python-poetry.org/docs/#installing-with-the-official-installer) +if you’re comfortable following the instructions, or by using a package +manager (like Homebrew) if this is more familiar to you. When successfully +installed, you should be able to run: + + $ poetry --version + Poetry (version 1.8.3) + ### Installing Dependencies * Use Poetry to create and activate a virtual environment @@ -298,7 +288,7 @@ TBD $ poetry install -### Run tests: +### Run tests $ poetry run pytest @@ -306,6 +296,69 @@ TBD $ poetry run ptw . --now --clear +### Running the linter for code style issues: + + $ poetry run ruff check + +[The `ruff` tool](https://docs.astral.sh/ruff/linter/) will check +the source code for conformity with various style rules. Some of +these can be fixed by `ruff` itself, and if so, the output will +describe how to automatically fix these issues. + +The CI/CD pipeline will run these checks whenever new commits are +pushed to GitHub, and the results will be available in the GitHub +Actions output. + +### Running the code formatter + + $ poetry run ruff format + +[The `ruff` tool](https://docs.astral.sh/ruff/formatter/) will check +the source code for conformity with source code formatting rules. It +will also fix any issues it finds and leave the changes uncommitted +so you can review the changes prior to adding them to the codebase. + +As with the linter, the CI/CD pipeline will run the formatter when +commits are pushed to GitHub. + +### Ruff integration with your editor + +Rather than running `ruff` manually from the commandline, it can be +integrated with the editor of your choice. See the +[ruff editor integration](https://docs.astral.sh/ruff/editors/) guide. + +### Releasing + +* Update the CHANGELOG to include details of the changes included in the new + release. The version should be the string literal 'UNRELEASED' (without + single-quotes). It will be replaced with the actual version number after + we bump the version below. + +* Show the current version and the possible next versions: + + $ bump-my-version show-bump + 0.3.0 ── bump ─┬─ major ─ 1.0.0 + ├─ minor ─ 0.4.0 + ╰─ patch ─ 0.3.1 + +* Bump the version to the desired number, for example: + + $ bump-my-version bump minor + + You will see the latest commit & tag by looking at `git log`. You can then + push these to GitHub (`git push --follow-tags`) to trigger the CI/CD + workflow. + +* On the [GitHub repository](https://github.com/nsidc/granule-metgen), click + 'Releases' and follow the steps documented on the + [GitHub Releases page](https://docs.github.com/en/repositories/releasing-projects-on-github/managing-releases-in-a-repository#creating-a-release). + Draft a new Release using the version tag created above. After you have + published the release, the MetGenC Publish GHA workflow will be started. + Check that the workflow succeeds on the + [MetGenC Actions page](https://github.com/nsidc/granule-metgen/actions), + and verify that the + [new MetGenC release is available on PyPI](https://pypi.org/project/nsidc-metgenc/). + ## Credit This content was developed by the National Snow and Ice Data Center with funding from diff --git a/poetry.lock b/poetry.lock index 96e4a10..60f1b00 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "annotated-types" @@ -114,6 +114,43 @@ urllib3 = {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version > [package.extras] crt = ["awscrt (==0.22.0)"] +[[package]] +name = "bracex" +version = "2.5.post1" +description = "Bash style brace expander." +optional = false +python-versions = ">=3.8" +files = [ + {file = "bracex-2.5.post1-py3-none-any.whl", hash = "sha256:13e5732fec27828d6af308628285ad358047cec36801598368cb28bc631dbaf6"}, + {file = "bracex-2.5.post1.tar.gz", hash = "sha256:12c50952415bfa773d2d9ccb8e79651b8cdb1f31a42f6091b804f6ba2b4a66b6"}, +] + +[[package]] +name = "bump-my-version" +version = "0.28.1" +description = "Version bump your Python project" +optional = false +python-versions = ">=3.8" +files = [ + {file = "bump_my_version-0.28.1-py3-none-any.whl", hash = "sha256:df7fdb02a1b43c122a6714df6d1fe4efc7a1220b5638ca5a0eb3018813c1b222"}, + {file = "bump_my_version-0.28.1.tar.gz", hash = "sha256:e608def5191baf505b6cde88bd679a0a95fc4cfeace4247adb60ac0f8a7e57ee"}, +] + +[package.dependencies] +click = "*" +pydantic = ">=2.0.0" +pydantic-settings = "*" +questionary = "*" +rich = "*" +rich-click = "*" +tomlkit = "*" +wcmatch = ">=8.5.1" + +[package.extras] +dev = ["generate-changelog (>=0.7.6)", "git-fame (>=1.12.2)", "pip-tools", "pre-commit"] +docs = ["black", "markdown-customblocks", "mdx-truly-sane-lists", "mkdocs", "mkdocs-click", "mkdocs-drawio", "mkdocs-gen-files", "mkdocs-git-authors-plugin", "mkdocs-git-committers-plugin", "mkdocs-git-revision-date-localized-plugin (>=1.2.6)", "mkdocs-include-markdown-plugin", "mkdocs-literate-nav", "mkdocs-material", "mkdocstrings[python]", "python-frontmatter"] +test = ["coverage", "freezegun", "pre-commit", "pytest", "pytest-cov", "pytest-mock", "pytest-sugar"] + [[package]] name = "certifi" version = "2024.8.30" @@ -1310,6 +1347,20 @@ files = [ {file = "ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3"}, ] +[[package]] +name = "prompt-toolkit" +version = "3.0.36" +description = "Library for building powerful interactive command lines in Python" +optional = false +python-versions = ">=3.6.2" +files = [ + {file = "prompt_toolkit-3.0.36-py3-none-any.whl", hash = "sha256:aa64ad242a462c5ff0363a7b9cfe696c20d55d9fc60c11fd8e632d064804d305"}, + {file = "prompt_toolkit-3.0.36.tar.gz", hash = "sha256:3e163f254bef5a03b146397d7c1963bd3e2812f0964bb9a24e6ec761fd28db63"}, +] + +[package.dependencies] +wcwidth = "*" + [[package]] name = "py-partiql-parser" version = "0.5.6" @@ -1459,6 +1510,26 @@ files = [ [package.dependencies] typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" +[[package]] +name = "pydantic-settings" +version = "2.6.1" +description = "Settings management using Pydantic" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pydantic_settings-2.6.1-py3-none-any.whl", hash = "sha256:7fb0637c786a558d3103436278a7c4f1cfd29ba8973238a50c5bb9a55387da87"}, + {file = "pydantic_settings-2.6.1.tar.gz", hash = "sha256:e0f92546d8a9923cb8941689abf85d6601a8c19a23e97a34b2964a2e3f813ca0"}, +] + +[package.dependencies] +pydantic = ">=2.7.0" +python-dotenv = ">=0.21.0" + +[package.extras] +azure-key-vault = ["azure-identity (>=1.16.0)", "azure-keyvault-secrets (>=4.8.0)"] +toml = ["tomli (>=2.0.1)"] +yaml = ["pyyaml (>=6.0.1)"] + [[package]] name = "pyfiglet" version = "1.0.2" @@ -1583,6 +1654,20 @@ files = [ [package.dependencies] six = ">=1.5" +[[package]] +name = "python-dotenv" +version = "1.0.1" +description = "Read key-value pairs from a .env file and set them as environment variables" +optional = false +python-versions = ">=3.8" +files = [ + {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"}, + {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"}, +] + +[package.extras] +cli = ["click (>=5.0)"] + [[package]] name = "pytz" version = "2024.2" @@ -1683,6 +1768,20 @@ files = [ {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"}, ] +[[package]] +name = "questionary" +version = "2.0.1" +description = "Python library to build pretty command line user prompts ⭐️" +optional = false +python-versions = ">=3.8" +files = [ + {file = "questionary-2.0.1-py3-none-any.whl", hash = "sha256:8ab9a01d0b91b68444dff7f6652c1e754105533f083cbe27597c8110ecc230a2"}, + {file = "questionary-2.0.1.tar.gz", hash = "sha256:bcce898bf3dbb446ff62830c86c5c6fb9a22a54146f0f5597d3da43b10d8fc8b"}, +] + +[package.dependencies] +prompt_toolkit = ">=2.0,<=3.0.36" + [[package]] name = "referencing" version = "0.35.1" @@ -1990,6 +2089,26 @@ pygments = ">=2.13.0,<3.0.0" [package.extras] jupyter = ["ipywidgets (>=7.5.1,<9)"] +[[package]] +name = "rich-click" +version = "1.8.4" +description = "Format click help output nicely with rich" +optional = false +python-versions = ">=3.7" +files = [ + {file = "rich_click-1.8.4-py3-none-any.whl", hash = "sha256:2d2841b3cebe610d5682baa1194beaf78ab00c4fa31931533261b5eba2ee80b7"}, + {file = "rich_click-1.8.4.tar.gz", hash = "sha256:0f49471f04439269d0e66a6f43120f52d11d594869a2a0be600cfb12eb0616b9"}, +] + +[package.dependencies] +click = ">=7" +rich = ">=10.7" +typing-extensions = ">=4" + +[package.extras] +dev = ["mypy", "packaging", "pre-commit", "pytest", "pytest-cov", "rich-codex", "ruff", "types-setuptools"] +docs = ["markdown-include", "mkdocs", "mkdocs-glightbox", "mkdocs-material-extensions", "mkdocs-material[imaging] (>=9.5.18,<9.6.0)", "mkdocs-rss-plugin", "mkdocstrings[python]", "rich-codex"] + [[package]] name = "rpds-py" version = "0.21.0" @@ -2249,6 +2368,17 @@ mpmath = ">=1.1.0,<1.4" [package.extras] dev = ["hypothesis (>=6.70.0)", "pytest (>=7.1.0)"] +[[package]] +name = "tomlkit" +version = "0.13.2" +description = "Style preserving TOML library" +optional = false +python-versions = ">=3.8" +files = [ + {file = "tomlkit-0.13.2-py3-none-any.whl", hash = "sha256:7a974427f6e119197f670fbbbeae7bef749a6c14e793db934baefc1b5f03efde"}, + {file = "tomlkit-0.13.2.tar.gz", hash = "sha256:fff5fe59a87295b278abd31bec92c15d9bc4a06885ab12bcea52c71119392e79"}, +] + [[package]] name = "toolz" version = "1.0.0" @@ -2341,6 +2471,31 @@ files = [ [package.extras] watchmedo = ["PyYAML (>=3.10)"] +[[package]] +name = "wcmatch" +version = "10.0" +description = "Wildcard/glob file name matcher." +optional = false +python-versions = ">=3.8" +files = [ + {file = "wcmatch-10.0-py3-none-any.whl", hash = "sha256:0dd927072d03c0a6527a20d2e6ad5ba8d0380e60870c383bc533b71744df7b7a"}, + {file = "wcmatch-10.0.tar.gz", hash = "sha256:e72f0de09bba6a04e0de70937b0cf06e55f36f37b3deb422dfaf854b867b840a"}, +] + +[package.dependencies] +bracex = ">=2.1.1" + +[[package]] +name = "wcwidth" +version = "0.2.13" +description = "Measures the displayed width of unicode strings in a terminal" +optional = false +python-versions = "*" +files = [ + {file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"}, + {file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"}, +] + [[package]] name = "werkzeug" version = "3.1.3" @@ -2476,4 +2631,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.12" -content-hash = "f23f8f81f98749abc04b7adaae503168707b49f305dc487e5755b24b4caacc54" +content-hash = "a3d1919a01b2ce7eb2072972a0d827ce6b467bf857f23a5e5045aa513304fd1e" diff --git a/pyproject.toml b/pyproject.toml index feaf465..482e648 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] -name = "nsidc-metgen" -version = "0.1.0" -description = "The nsidc-metgen package enables data producers as well as Operations staff managing the data ingest workflow to create metadata files conforming to NASA's Common Metadata Repository UMM-G specification." +name = "nsidc-metgenc" +version = "0.4.0" +description = "The nsidc-metgenc package enables data producers as well as Operations staff managing the data ingest workflow to create metadata files conforming to NASA's Common Metadata Repository UMM-G specification." authors = ["National Snow and Ice Data Center (NSIDC) "] readme = "README.md" package-mode = true @@ -30,11 +30,49 @@ pytest-watcher = "^0.4.3" [tool.poetry.group.dev.dependencies] ruff = "^0.5.5" mypy = "^1.11.1" +bump-my-version = "^0.28.1" + +[tool.bumpversion] +current_version = "0.4.0" +parse = "(?P\\d+)\\.(?P\\d+)\\.(?P\\d+)" +serialize = ["{major}.{minor}.{patch}"] +search = "{current_version}" +replace = "{new_version}" +regex = false +ignore_missing_version = false +ignore_missing_files = false +tag = true +sign_tags = false +tag_name = "v{new_version}" +tag_message = "Bump version: {current_version} → {new_version}" +allow_dirty = false +commit = true +message = "Bump version: {current_version} → {new_version}" +commit_args = "" +setup_hooks = [] +pre_commit_hooks = [] +post_commit_hooks = [] + +[[tool.bumpversion.files]] +filename = "src/nsidc/metgen/__init__.py" + +[[tool.bumpversion.files]] +filename = "pyproject.toml" +search = 'version = "{current_version}"' +replace = 'version = "{new_version}"' + +[[tool.bumpversion.files]] +filename = "CHANGELOG.md" +search = 'UNRELEASED' +replace = 'v{new_version}' [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" +[tool.ruff.lint] +select = ["E", "F", "I"] + [tool.mypy] files = ["src", "tests"] diff --git a/src/nsidc/metgen/__init__.py b/src/nsidc/metgen/__init__.py index e69de29..a3d9f01 100644 --- a/src/nsidc/metgen/__init__.py +++ b/src/nsidc/metgen/__init__.py @@ -0,0 +1,4 @@ +__version__ = "v0.4.0" + + +__all__ = ["__version__"] diff --git a/src/nsidc/metgen/aws.py b/src/nsidc/metgen/aws.py index 7d7b2e9..0b0012d 100644 --- a/src/nsidc/metgen/aws.py +++ b/src/nsidc/metgen/aws.py @@ -1,8 +1,8 @@ import boto3 - KINESIS_PARTITION_KEY = "metgenc-duck" + def kinesis_stream_exists(stream_name): """ Predicate which determines if a Kinesis stream with the given name exists @@ -10,23 +10,23 @@ def kinesis_stream_exists(stream_name): """ client = boto3.client("kinesis") try: - summary = client.describe_stream_summary(StreamName=stream_name) + client.describe_stream_summary(StreamName=stream_name) return True - except Exception as e: + except Exception: return False + def post_to_kinesis(stream_name, cnm_message): """ Posts a message to a Kinesis stream. """ client = boto3.client("kinesis") result = client.put_record( - StreamName=stream_name, - Data=cnm_message, - PartitionKey=KINESIS_PARTITION_KEY + StreamName=stream_name, Data=cnm_message, PartitionKey=KINESIS_PARTITION_KEY ) - return result['ShardId'] + return result["ShardId"] + def staging_bucket_exists(bucket_name): """ @@ -37,9 +37,10 @@ def staging_bucket_exists(bucket_name): try: client.head_bucket(Bucket=bucket_name) return True - except Exception as e: + except Exception: return False + def stage_file(s3_bucket_name, object_name, *, data=None, file=None): """ Stages data into an s3 bucket at a given path. diff --git a/src/nsidc/metgen/cli.py b/src/nsidc/metgen/cli.py index 9f672e6..2805ab8 100644 --- a/src/nsidc/metgen/cli.py +++ b/src/nsidc/metgen/cli.py @@ -2,10 +2,7 @@ import click -from nsidc.metgen import config -from nsidc.metgen import metgen -from nsidc.metgen import constants - +from nsidc.metgen import config, constants, metgen LOGGER = logging.getLogger(constants.ROOT_LOGGER) @@ -16,57 +13,122 @@ def cli(): Cumulus, and post CNM messages.""" pass + @cli.command() -@click.option('-c', '--config', help='Path to configuration file to create or replace') +@click.option("-c", "--config", help="Path to configuration file to create or replace") def init(config): """Populates a configuration file based on user input.""" click.echo(metgen.banner()) config = metgen.init_config(config) - click.echo(f'Initialized the metgen configuration file {config}') + click.echo(f"Initialized the metgen configuration file {config}") + @cli.command() -@click.option('-c', '--config', 'config_filename', help='Path to configuration file to display', required=True) +@click.option( + "-c", + "--config", + "config_filename", + help="Path to configuration file to display", + required=True, +) + def info(config_filename): """Summarizes the contents of a configuration file.""" click.echo(metgen.banner()) - configuration = config.configuration(config.config_parser_factory(config_filename), {}) + configuration = config.configuration( + config.config_parser_factory(config_filename), {} + ) metgen.init_logging(configuration) configuration.show() + @cli.command() -@click.option('-c', '--config', 'config_filename', help='Path to configuration file', required=True) -@click.option('-t', '--type', 'content_type', help='JSON content type (cnm or ummg)', default='cnm', show_default=True) +@click.option( + "-c", + "--config", + "config_filename", + help="Path to configuration file", + required=True, +) +@click.option( + "-t", + "--type", + "content_type", + help="JSON content type", + default="cnm", + show_default=True, +) + def validate(config_filename, content_type): """Validates the contents of local JSON files.""" click.echo(metgen.banner()) - configuration = config.configuration(config.config_parser_factory(config_filename), {}) + configuration = config.configuration( + config.config_parser_factory(config_filename), {} + ) metgen.init_logging(configuration) metgen.validate(configuration, content_type) + @cli.command() -@click.option('-c', '--config', 'config_filename', required=True, - help='Path to configuration file') -@click.option('-d', '--dry-run', is_flag=True, required=False, default=None, - help='Don\'t stage files on S3 or publish messages to Kinesis') -@click.option('-e', '--env', help='environment', - default=constants.DEFAULT_CUMULUS_ENVIRONMENT, show_default=True) -@click.option('-n', '--number', metavar='count', required=False, - default=constants.DEFAULT_NUMBER, help="Process at most 'count' granules.") -@click.option('-wc', '--write-cnm', is_flag=True, required=False, default=None, - help="Write CNM messages to files.") -@click.option('-o', '--overwrite', is_flag=True, required=False, default=None, - help="Overwrite existing UMM-G files.") +@click.option( + "-c", + "--config", + "config_filename", + help="Path to configuration file", + required=True, +) +@click.option( + "-d", + "--dry-run", + is_flag=True, + required=False, + default=None, + help="Don't stage files on S3 or publish messages to Kinesis", +) +@click.option( + "-e", + "--env", + help="environment", + default=constants.DEFAULT_CUMULUS_ENVIRONMENT, + show_default=True, +) +@click.option( + "-n", + "--number", + help="Process at most 'count' granules.", + metavar="count", + required=False, + default=constants.DEFAULT_NUMBER, +) +@click.option( + "-wc", + "--write-cnm", + is_flag=True, + required=False, + default=None, + help="Write CNM messages to files.", +) +@click.option( + "-o", + "--overwrite", + is_flag=True, + required=False, + default=None, + help="Overwrite existing UMM-G files.", +) def process(config_filename, dry_run, env, number, write_cnm, overwrite): """Processes science data files based on configuration file contents.""" click.echo(metgen.banner()) overrides = { - 'dry_run': dry_run, - 'number': number, - 'overwrite_ummg': overwrite, - 'write_cnm_file': write_cnm, + "dry_run": dry_run, + "number": number, + "overwrite_ummg": overwrite, + "write_cnm_file": write_cnm, } try: - configuration = config.configuration(config.config_parser_factory(config_filename), overrides, env) + configuration = config.configuration( + config.config_parser_factory(config_filename), overrides, env + ) metgen.init_logging(configuration) configuration.show() config.validate(configuration) @@ -81,7 +143,7 @@ def process(config_filename, dry_run, env, number, write_cnm, overwrite): logger = logging.getLogger(constants.ROOT_LOGGER) logger.error("\nUnable to process data: " + str(e)) exit(1) - click.echo(f'Processing complete') + click.echo("Processing complete") if __name__ == "__main__": diff --git a/src/nsidc/metgen/config.py b/src/nsidc/metgen/config.py index a87644a..dbb8399 100644 --- a/src/nsidc/metgen/config.py +++ b/src/nsidc/metgen/config.py @@ -1,13 +1,10 @@ import configparser import dataclasses -from datetime import datetime, timezone import logging import os.path from pathlib import Path -import uuid -from nsidc.metgen import aws -from nsidc.metgen import constants +from nsidc.metgen import aws, constants class ValidationError(Exception): @@ -16,6 +13,7 @@ class ValidationError(Exception): def __init__(self, errors): self.errors = errors + @dataclasses.dataclass class Config: environment: str @@ -34,23 +32,27 @@ class Config: dry_run: bool def show(self): - # TODO add section headings in the right spot (if we think we need them in the output) + # TODO: add section headings in the right spot + # (if we think we need them in the output) LOGGER = logging.getLogger(constants.ROOT_LOGGER) - LOGGER.info('') - LOGGER.info('Using configuration:') - for k,v in self.__dict__.items(): - LOGGER.info(f' + {k}: {v}') + LOGGER.info("") + LOGGER.info("Using configuration:") + for k, v in self.__dict__.items(): + LOGGER.info(f" + {k}: {v}") if self.dry_run: - LOGGER.info('') - LOGGER.info('Note: The dry-run option was included, so no files will be staged and no CNM messages published.') - LOGGER.info('') + LOGGER.info("") + LOGGER.info( + """Note: The dry-run option was included, so no files will be\ + staged and no CNM messages published.""" + ) + LOGGER.info("") def ummg_path(self): return Path(self.local_output_dir, self.ummg_dir) def cnm_path(self): - return Path(self.local_output_dir, 'cnm') + return Path(self.local_output_dir, "cnm") def config_parser_factory(configuration_file): @@ -58,20 +60,24 @@ def config_parser_factory(configuration_file): Returns a ConfigParser by reading the specified file. """ if configuration_file is None or not os.path.exists(configuration_file): - raise ValueError(f'Unable to find configuration file {configuration_file}') - cfg_parser = configparser.ConfigParser(interpolation=configparser.ExtendedInterpolation()) + raise ValueError(f"Unable to find configuration file {configuration_file}") + cfg_parser = configparser.ConfigParser( + interpolation=configparser.ExtendedInterpolation() + ) # If the config parser gets no value (empty string), interpret it as False - cfg_parser.BOOLEAN_STATES |= [('', False)] + cfg_parser.BOOLEAN_STATES |= [("", False)] cfg_parser.read(configuration_file) return cfg_parser -def _get_configuration_value(environment, section, name, value_type, config_parser, overrides): +def _get_configuration_value( + environment, section, name, value_type, config_parser, overrides +): """ Returns a value from the provided config parser; any value for the key that is provided in the 'overrides' dictionary will take precedence. """ - vars = { 'environment': environment } + vars = {"environment": environment} if overrides.get(name) is None: if value_type is bool: return config_parser.getboolean(section, name) @@ -83,62 +89,137 @@ def _get_configuration_value(environment, section, name, value_type, config_pars else: return overrides.get(name) -def configuration(config_parser, overrides, environment=constants.DEFAULT_CUMULUS_ENVIRONMENT): + +def configuration( + config_parser, overrides, environment=constants.DEFAULT_CUMULUS_ENVIRONMENT +): """ Returns a valid Config object that is populated from the provided config parser based on the 'environment', and with values overriden with anything provided in 'overrides'. """ - config_parser['DEFAULT'] = { - 'kinesis_stream_name': constants.DEFAULT_STAGING_KINESIS_STREAM, - 'staging_bucket_name': constants.DEFAULT_STAGING_BUCKET_NAME, - 'write_cnm_file': constants.DEFAULT_WRITE_CNM_FILE, - 'overwrite_ummg': constants.DEFAULT_OVERWRITE_UMMG, - 'checksum_type': constants.DEFAULT_CHECKSUM_TYPE, - 'number': constants.DEFAULT_NUMBER, - 'dry_run': constants.DEFAULT_DRY_RUN, + config_parser["DEFAULT"] = { + "kinesis_stream_name": constants.DEFAULT_STAGING_KINESIS_STREAM, + "staging_bucket_name": constants.DEFAULT_STAGING_BUCKET_NAME, + "write_cnm_file": constants.DEFAULT_WRITE_CNM_FILE, + "overwrite_ummg": constants.DEFAULT_OVERWRITE_UMMG, + "checksum_type": constants.DEFAULT_CHECKSUM_TYPE, + "number": constants.DEFAULT_NUMBER, + "dry_run": constants.DEFAULT_DRY_RUN, } try: return Config( environment, - _get_configuration_value(environment, 'Source', 'data_dir', str, config_parser, overrides), - _get_configuration_value(environment, 'Collection', 'auth_id', str, config_parser, overrides), - _get_configuration_value(environment, 'Collection', 'version', int, config_parser, overrides), - _get_configuration_value(environment, 'Collection', 'provider', str, config_parser, overrides), - _get_configuration_value(environment, 'Destination', 'local_output_dir', str, config_parser, overrides), - _get_configuration_value(environment, 'Destination', 'ummg_dir', str, config_parser, overrides), - _get_configuration_value(environment, 'Destination', 'kinesis_stream_name', str, config_parser, overrides), - _get_configuration_value(environment, 'Destination', 'staging_bucket_name', str, config_parser, overrides), - _get_configuration_value(environment, 'Destination', 'write_cnm_file', bool, config_parser, overrides), - _get_configuration_value(environment, 'Destination', 'overwrite_ummg', bool, config_parser, overrides), - _get_configuration_value(environment, 'Settings', 'checksum_type', str, config_parser, overrides), - _get_configuration_value(environment, 'Settings', 'number', int, config_parser, overrides), - _get_configuration_value(environment, 'Settings', 'dry_run', bool, config_parser, overrides), + _get_configuration_value( + environment, "Source", "data_dir", str, config_parser, overrides + ), + _get_configuration_value( + environment, "Collection", "auth_id", str, config_parser, overrides + ), + _get_configuration_value( + environment, "Collection", "version", int, config_parser, overrides + ), + _get_configuration_value( + environment, "Collection", "provider", str, config_parser, overrides + ), + _get_configuration_value( + environment, + "Destination", + "local_output_dir", + str, + config_parser, + overrides, + ), + _get_configuration_value( + environment, "Destination", "ummg_dir", str, config_parser, overrides + ), + _get_configuration_value( + environment, + "Destination", + "kinesis_stream_name", + str, + config_parser, + overrides, + ), + _get_configuration_value( + environment, + "Destination", + "staging_bucket_name", + str, + config_parser, + overrides, + ), + _get_configuration_value( + environment, + "Destination", + "write_cnm_file", + bool, + config_parser, + overrides, + ), + _get_configuration_value( + environment, + "Destination", + "overwrite_ummg", + bool, + config_parser, + overrides, + ), + _get_configuration_value( + environment, "Settings", "checksum_type", str, config_parser, overrides + ), + _get_configuration_value( + environment, "Settings", "number", int, config_parser, overrides + ), + _get_configuration_value( + environment, "Settings", "dry_run", bool, config_parser, overrides + ), ) except Exception as e: - raise Exception('Unable to read the configuration file', e) + raise Exception("Unable to read the configuration file", e) + def validate(configuration): """ Validates each value in the configuration. """ validations = [ - ['data_dir', lambda dir: os.path.exists(dir), - 'The data_dir does not exist.'], - ['local_output_dir', lambda dir: os.path.exists(dir), - 'The local_output_dir does not exist.'], - # ['ummg_dir', lambda dir: os.path.exists(dir), - # 'The ummg_dir does not exist.'], ## validate "local_output_dir/ummg_dir" as part of issue-71 - ['kinesis_stream_name', lambda name: configuration.dry_run or aws.kinesis_stream_exists(name), - 'The kinesis stream does not exist.'], - ['staging_bucket_name', lambda name: configuration.dry_run or aws.staging_bucket_exists(name), - 'The staging bucket does not exist.'], - ['number', lambda number: 0 < number, - 'The number of granules to process must be positive.'], + [ + "data_dir", + lambda dir: os.path.exists(dir), + "The data_dir does not exist.", + ], + [ + "local_output_dir", + lambda dir: os.path.exists(dir), + "The local_output_dir does not exist.", + ], + # TODO: validate "local_output_dir/ummg_dir" as part of issue-71 + # [ + # "ummg_dir", + # lambda dir: os.path.exists(dir), + # "The ummg_dir does not exist." + # ], + [ + "kinesis_stream_name", + lambda name: aws.kinesis_stream_exists(name), + "The kinesis stream does not exist.", + ], + [ + "staging_bucket_name", + lambda name: aws.staging_bucket_exists(name), + "The staging bucket does not exist.", + ], + [ + "number", + lambda number: 0 < number, + "The number of granules to process must be positive.", + ], + ] + errors = [ + msg for name, fn, msg in validations if not fn(getattr(configuration, name)) ] - errors = [msg for name, fn, msg in validations if not fn(getattr(configuration, name))] if len(errors) == 0: return True else: raise ValidationError(errors) - diff --git a/src/nsidc/metgen/constants.py b/src/nsidc/metgen/constants.py index dbf9ea2..0339184 100644 --- a/src/nsidc/metgen/constants.py +++ b/src/nsidc/metgen/constants.py @@ -1,10 +1,10 @@ # Default configuration values -DEFAULT_CUMULUS_ENVIRONMENT = 'uat' -DEFAULT_STAGING_KINESIS_STREAM = 'nsidc-cumulus-${environment}-external_notification' -DEFAULT_STAGING_BUCKET_NAME = 'nsidc-cumulus-${environment}-ingest-staging' +DEFAULT_CUMULUS_ENVIRONMENT = "uat" +DEFAULT_STAGING_KINESIS_STREAM = "nsidc-cumulus-${environment}-external_notification" +DEFAULT_STAGING_BUCKET_NAME = "nsidc-cumulus-${environment}-ingest-staging" DEFAULT_WRITE_CNM_FILE = False DEFAULT_OVERWRITE_UMMG = False -DEFAULT_CHECKSUM_TYPE = 'SHA256' +DEFAULT_CHECKSUM_TYPE = "SHA256" DEFAULT_NUMBER = 1000000 DEFAULT_DRY_RUN = False @@ -12,26 +12,41 @@ ROOT_LOGGER = 'metgenc' # JSON schema locations and versions -CNM_JSON_SCHEMA = 'src/nsidc/metgen/json-schema/cumulus_sns_schema.json' -CNM_JSON_SCHEMA_VERSION = '1.6.1' -UMMG_JSON_SCHEMA = 'src/nsidc/metgen/json-schema/umm-g-json-schema.json' +CNM_JSON_SCHEMA = ("nsidc.metgen.json-schema", "cumulus_sns_schema.json") +CNM_JSON_SCHEMA_VERSION = "1.6.1" +UMMG_JSON_SCHEMA = ("nsidc.metgen.json-schema", "umm-g-json-schema.json") UMMG_JSON_SCHEMA_VERSION = '1.6.6' # Configuration sections -SOURCE_SECTION_NAME = 'Source' -COLLECTION_SECTION_NAME = 'Collection' -DESTINATION_SECTION_NAME = 'Destination' -SETTINGS_SECTION_NAME = 'Settings' +SOURCE_SECTION_NAME = "Source" +COLLECTION_SECTION_NAME = "Collection" +DESTINATION_SECTION_NAME = "Destination" +SETTINGS_SECTION_NAME = "Settings" # Spatial coverage DEFAULT_SPATIAL_AXIS_SIZE = 6 # Templates -CNM_BODY_TEMPLATE = 'src/nsidc/metgen/templates/cnm_body_template.json' -CNM_FILES_TEMPLATE = 'src/nsidc/metgen/templates/cnm_files_template.json' -UMMG_BODY_TEMPLATE = 'src/nsidc/metgen/templates/ummg_body_template.json' -UMMG_TEMPORAL_SINGLE_TEMPLATE = 'src/nsidc/metgen/templates/ummg_temporal_single_template.json' -UMMG_TEMPORAL_RANGE_TEMPLATE = 'src/nsidc/metgen/templates/ummg_temporal_range_template.json' -UMMG_SPATIAL_GPOLYGON_TEMPLATE = 'src/nsidc/metgen/templates/ummg_horizontal_gpolygon_template.json' -UMMG_SPATIAL_POINT_TEMPLATE = 'src/nsidc/metgen/templates/ummg_horizontal_point_template.json' -UMMG_SPATIAL_RECTANGLE_TEMPLATE = 'src/nsidc/metgen/templates/ummg_horizontal_rectangle_template.json' +CNM_BODY_TEMPLATE = ("nsidc.metgen.templates", "cnm_body_template.json") +CNM_FILES_TEMPLATE = ("nsidc.metgen.templates", "cnm_files_template.json") +UMMG_BODY_TEMPLATE = ("nsidc.metgen.templates", "ummg_body_template.json") +UMMG_TEMPORAL_SINGLE_TEMPLATE = ( + "nsidc.metgen.templates", + "ummg_temporal_single_template.json", +) +UMMG_TEMPORAL_RANGE_TEMPLATE = ( + "nsidc.metgen.templates", + "ummg_temporal_range_template.json", +) +UMMG_SPATIAL_GPOLYGON_TEMPLATE = ( + "nsidc.metgen.templates", + "ummg_horizontal_gpolygon_template.json", +) +UMMG_SPATIAL_POINT_TEMPLATE = ( + "nsidc.metgen.templates", + "ummg_horizontal_point_template.json", +) +UMMG_SPATIAL_RECTANGLE_TEMPLATE = ( + "nsidc.metgen.templates", + "ummg_horizontal_rectangle_template.json", +) diff --git a/src/nsidc/metgen/metgen.py b/src/nsidc/metgen/metgen.py index 1d49288..1e35884 100644 --- a/src/nsidc/metgen/metgen.py +++ b/src/nsidc/metgen/metgen.py @@ -3,25 +3,22 @@ import datetime as dt import hashlib import json -import jsonschema import logging import os.path import sys -from typing import Callable +import uuid +from importlib.resources import open_text from pathlib import Path from string import Template -import uuid +from typing import Callable +import jsonschema from funcy import all, filter, partial, rcompose, take from pyfiglet import Figlet from returns.maybe import Maybe from rich.prompt import Confirm, Prompt -from nsidc.metgen import aws -from nsidc.metgen import config -from nsidc.metgen import constants -from nsidc.metgen import netcdf_reader - +from nsidc.metgen import aws, config, constants, netcdf_reader # ------------------------------------------------------------------- CONSOLE_FORMAT = "%(message)s" @@ -31,6 +28,7 @@ # Top-level functions which expose operations to the CLI # ------------------------------------------------------------------- + def init_logging(configuration: config.Config): """ Initialize the logger for metgenc. @@ -48,78 +46,135 @@ def init_logging(configuration: config.Config): logfile_handler.setFormatter(logging.Formatter(LOGFILE_FORMAT)) logger.addHandler(logfile_handler) + def banner(): """ Displays the name of this utility using incredible ASCII-art. """ - f = Figlet(font='slant') - return f.renderText('metgenc') + f = Figlet(font="slant") + return f.renderText("metgenc") + # TODO require a non-blank input for elements that have no default value def init_config(configuration_file): """ - Prompts the user for configuration values and then creates a valid configuration file. + Prompts the user for configuration values and then creates a valid + configuration file. """ - print("""This utility will create a granule metadata configuration file by prompting """ - """you for values for each of the configuration parameters.""") + print( + """This utility will create a granule metadata configuration file by prompting + you for values for each of the configuration parameters.""" + ) print() # prompt for config file name if it's not provided if not configuration_file: - configuration_file = Prompt.ask("configuration file name", default="example.ini") + configuration_file = Prompt.ask( + "configuration file name", default="example.ini" + ) # TODO check file name is safe else: - print(f'Creating configuration file {configuration_file}') + print(f"Creating configuration file {configuration_file}") print() - if (os.path.exists(configuration_file)): - print(f'WARNING: The {configuration_file} already exists.') + if os.path.exists(configuration_file): + print(f"WARNING: The {configuration_file} already exists.") overwrite = Confirm.ask("Overwrite?") if not overwrite: - print('Not overwriting existing file. Exiting.') + print("Not overwriting existing file. Exiting.") exit(1) cfg_parser = configparser.ConfigParser() print() - print(f'{constants.SOURCE_SECTION_NAME} Data Parameters') - print('--------------------------------------------------') + print(f"{constants.SOURCE_SECTION_NAME} Data Parameters") + print("--------------------------------------------------") cfg_parser.add_section(constants.SOURCE_SECTION_NAME) - cfg_parser.set(constants.SOURCE_SECTION_NAME, "data_dir", Prompt.ask("Data directory", default="data")) + cfg_parser.set( + constants.SOURCE_SECTION_NAME, + "data_dir", + Prompt.ask("Data directory", default="data"), + ) print() print() - print(f'{constants.COLLECTION_SECTION_NAME} Parameters') - print('--------------------------------------------------') + print(f"{constants.COLLECTION_SECTION_NAME} Parameters") + print("--------------------------------------------------") cfg_parser.add_section(constants.COLLECTION_SECTION_NAME) - cfg_parser.set(constants.COLLECTION_SECTION_NAME, "auth_id", Prompt.ask("Authoritative ID")) + cfg_parser.set( + constants.COLLECTION_SECTION_NAME, "auth_id", Prompt.ask("Authoritative ID") + ) cfg_parser.set(constants.COLLECTION_SECTION_NAME, "version", Prompt.ask("Version")) - cfg_parser.set(constants.COLLECTION_SECTION_NAME, "provider", Prompt.ask("Provider")) + cfg_parser.set( + constants.COLLECTION_SECTION_NAME, "provider", Prompt.ask("Provider") + ) print() print() - print(f'{constants.DESTINATION_SECTION_NAME} Parameters') - print('--------------------------------------------------') + print(f"{constants.DESTINATION_SECTION_NAME} Parameters") + print("--------------------------------------------------") cfg_parser.add_section(constants.DESTINATION_SECTION_NAME) - cfg_parser.set(constants.DESTINATION_SECTION_NAME, "local_output_dir", Prompt.ask("Local output directory", default="output")) - cfg_parser.set(constants.DESTINATION_SECTION_NAME, "ummg_dir", Prompt.ask("Local UMM-G output directory (relative to local output directory)", default="ummg")) - cfg_parser.set(constants.DESTINATION_SECTION_NAME, "kinesis_stream_name", Prompt.ask("Kinesis stream name", default=constants.DEFAULT_STAGING_KINESIS_STREAM)) - cfg_parser.set(constants.DESTINATION_SECTION_NAME, "staging_bucket_name", Prompt.ask("Cumulus s3 bucket name", default=constants.DEFAULT_STAGING_BUCKET_NAME)) - cfg_parser.set(constants.DESTINATION_SECTION_NAME, "write_cnm_file", Prompt.ask("Write CNM messages to files? (True/False)", default=constants.DEFAULT_WRITE_CNM_FILE)) - cfg_parser.set(constants.DESTINATION_SECTION_NAME, "overwrite_ummg", Prompt.ask("Overwrite existing UMM-G files? (True/False)", default=constants.DEFAULT_OVERWRITE_UMMG)) + cfg_parser.set( + constants.DESTINATION_SECTION_NAME, + "local_output_dir", + Prompt.ask("Local output directory", default="output"), + ) + cfg_parser.set( + constants.DESTINATION_SECTION_NAME, + "ummg_dir", + Prompt.ask( + "Local UMM-G output directory (relative to local output directory)", + default="ummg", + ), + ) + cfg_parser.set( + constants.DESTINATION_SECTION_NAME, + "kinesis_stream_name", + Prompt.ask( + "Kinesis stream name", default=constants.DEFAULT_STAGING_KINESIS_STREAM + ), + ) + cfg_parser.set( + constants.DESTINATION_SECTION_NAME, + "staging_bucket_name", + Prompt.ask( + "Cumulus s3 bucket name", default=constants.DEFAULT_STAGING_BUCKET_NAME + ), + ) + cfg_parser.set( + constants.DESTINATION_SECTION_NAME, + "write_cnm_file", + Prompt.ask( + "Write CNM messages to files? (True/False)", + default=constants.DEFAULT_WRITE_CNM_FILE, + ), + ) + cfg_parser.set( + constants.DESTINATION_SECTION_NAME, + "overwrite_ummg", + Prompt.ask( + "Overwrite existing UMM-G files? (True/False)", + default=constants.DEFAULT_OVERWRITE_UMMG, + ), + ) print() - print(f'{constants.SETTINGS_SECTION_NAME} Parameters') - print('--------------------------------------------------') + print(f"{constants.SETTINGS_SECTION_NAME} Parameters") + print("--------------------------------------------------") cfg_parser.add_section(constants.SETTINGS_SECTION_NAME) - cfg_parser.set(constants.SETTINGS_SECTION_NAME, "checksum_type", Prompt.ask("Checksum type", default=constants.DEFAULT_CHECKSUM_TYPE)) + cfg_parser.set( + constants.SETTINGS_SECTION_NAME, + "checksum_type", + Prompt.ask("Checksum type", default=constants.DEFAULT_CHECKSUM_TYPE), + ) print() - print(f'Saving new configuration: {configuration_file}') + print(f"Saving new configuration: {configuration_file}") with open(configuration_file, "tw") as file: cfg_parser.write(file) return configuration_file + def prepare_output_dirs(configuration): """ Generate paths to ummg and cnm output directories. @@ -127,35 +182,41 @@ def prepare_output_dirs(configuration): TODO: create local_output_dir, ummg_dir, and cnm subdir if they don't exist """ ummg_path = Path(configuration.local_output_dir, configuration.ummg_dir) - cnm_path = Path(configuration.local_output_dir, 'cnm') + cnm_path = Path(configuration.local_output_dir, "cnm") if configuration.overwrite_ummg: scrub_json_files(ummg_path) return (ummg_path, cnm_path) + def scrub_json_files(path): - print(f'Removing existing files in {path}') - for file_path in path.glob('*.json'): + print(f"Removing existing files in {path}") + for file_path in path.glob("*.json"): try: if os.path.isfile(file_path) or os.path.islink(file_path): os.unlink(file_path) except Exception as e: - print('Failed to delete %s: %s' % (file_path, e)) + print("Failed to delete %s: %s" % (file_path, e)) + # ------------------------------------------------------------------- # Data structures for processing Granules and recording results # ------------------------------------------------------------------- + @dataclasses.dataclass class Collection: """Collection info required to ingest a granule""" + auth_id: str version: int + @dataclasses.dataclass class Granule: """Granule to ingest""" + producer_granule_id: str collection: Maybe[Collection] = Maybe.empty data_filenames: list[str] = dataclasses.field(default_factory=list) @@ -164,26 +225,32 @@ class Granule: uuid: Maybe[str] = Maybe.empty cnm_message: Maybe[str] = Maybe.empty + @dataclasses.dataclass class Action: """An audit of a single action performed on a Granule""" + name: str successful: bool message: str startDatetime: Maybe[dt.datetime] = Maybe.empty endDatetime: Maybe[dt.datetime] = Maybe.empty + @dataclasses.dataclass class Ledger: """An audit of the Actions performed on a Granule""" + granule: Granule actions: list[Action] = dataclasses.field(default_factory=list) successful: bool = False startDatetime: Maybe[dt.datetime] = Maybe.empty endDatetime: Maybe[dt.datetime] = Maybe.empty + # ------------------------------------------------------------------- + def process(configuration: config.Config) -> None: """ Process all Granules and record the results and summary. @@ -192,15 +259,15 @@ def process(configuration: config.Config) -> None: # Ordered list of operations to perform on each granule operations = [ - granule_collection, - prepare_granule, - find_existing_ummg, - create_ummg, - stage_files if not configuration.dry_run else null_operation, - create_cnm, - write_cnm, - publish_cnm if not configuration.dry_run else null_operation, - ] + granule_collection, + prepare_granule, + find_existing_ummg, + create_ummg, + stage_files if not configuration.dry_run else null_operation, + create_cnm, + write_cnm, + publish_cnm if not configuration.dry_run else null_operation, + ] # Bind the configuration to each operation configured_operations = [partial(fn, configuration) for fn in operations] @@ -210,24 +277,23 @@ def process(configuration: config.Config) -> None: # The complete pipeline of actions initializes a Ledger, performs all the # operations, finalizes a Ledger, and logs the details of the Ledger. - pipeline = rcompose( - start_ledger, - *recorded_operations, - end_ledger, - log_ledger - ) + pipeline = rcompose(start_ledger, *recorded_operations, end_ledger, log_ledger) # Find all of the input granule files, limit the size of the list based # on the configuration, and execute the pipeline on each of the granules. - candidate_granules = [Granule(p.name, data_filenames=[str(p)]) - for p in Path(configuration.data_dir).glob('*.nc')] + candidate_granules = [ + Granule(p.name, data_filenames=[str(p)]) + for p in Path(configuration.data_dir).glob("*.nc") + ] granules = take(configuration.number, candidate_granules) results = [pipeline(g) for g in granules] summarize_results(results) + # ------------------------------------------------------------------- + def recorder(fn: Callable[[Granule], Granule], ledger: Ledger) -> Ledger: """ Higher-order function that, given a granule operation function and a @@ -236,7 +302,7 @@ def recorder(fn: Callable[[Granule], Granule], ledger: Ledger) -> Ledger: """ # Execute the operation and record the result successful = True - message = '' + message = "" start = dt.datetime.now() new_granule = None try: @@ -248,31 +314,30 @@ def recorder(fn: Callable[[Granule], Granule], ledger: Ledger) -> Ledger: # Store the result in the Ledger new_actions = ledger.actions.copy() - fn_name = fn.func.__name__ if hasattr(fn, 'func') else fn.__name__ + fn_name = fn.func.__name__ if hasattr(fn, "func") else fn.__name__ new_actions.append( Action( fn_name, successful=successful, message=message, startDatetime=start, - endDatetime=end + endDatetime=end, ) ) return dataclasses.replace( ledger, granule=new_granule if new_granule else ledger.granule, - actions=new_actions + actions=new_actions, ) + def start_ledger(granule: Granule) -> Ledger: """ Start a new Ledger of the operations on the given Granule. """ - return Ledger( - granule, - startDatetime=dt.datetime.now() - ) + return Ledger(granule, startDatetime=dt.datetime.now()) + def end_ledger(ledger: Ledger) -> Ledger: """ @@ -281,16 +346,19 @@ def end_ledger(ledger: Ledger) -> Ledger: return dataclasses.replace( ledger, endDatetime=dt.datetime.now(), - successful=all([a.successful for a in ledger.actions]) + successful=all([a.successful for a in ledger.actions]), ) + # ------------------------------------------------------------------- # Granule Operations # ------------------------------------------------------------------- + def null_operation(configuration: config.Config, granule: Granule) -> Granule: return granule + def granule_collection(configuration: config.Config, granule: Granule) -> Granule: """ Find the Granule's Collection and add it to the Granule. @@ -299,28 +367,32 @@ def granule_collection(configuration: config.Config, granule: Granule) -> Granul # collection information from CMR once, then associate it with each # granule. return dataclasses.replace( - granule, - collection=Collection(configuration.auth_id, configuration.version) + granule, collection=Collection(configuration.auth_id, configuration.version) ) + def prepare_granule(configuration: config.Config, granule: Granule) -> Granule: """ Prepare the Granule for creating metadata and submitting it. """ return dataclasses.replace( - granule, + granule, submission_time=dt.datetime.now(dt.timezone.utc).isoformat(), - uuid=str(uuid.uuid4()) + uuid=str(uuid.uuid4()), ) + def find_existing_ummg(configuration: config.Config, granule: Granule) -> Granule: - ummg_filename = configuration.ummg_path().joinpath(granule.producer_granule_id + '.json') + ummg_filename = configuration.ummg_path().joinpath( + granule.producer_granule_id + ".json" + ) if ummg_filename.exists(): return dataclasses.replace(granule, ummg_filename=ummg_filename) else: return granule + def create_ummg(configuration: config.Config, granule: Granule) -> Granule: """ Create the UMM-G file for the Granule. @@ -329,7 +401,9 @@ def create_ummg(configuration: config.Config, granule: Granule) -> Granule: if granule.ummg_filename != Maybe.empty and not configuration.overwrite_ummg: return granule - ummg_file_path = configuration.ummg_path().joinpath(granule.producer_granule_id + '.json') + ummg_file_path = configuration.ummg_path().joinpath( + granule.producer_granule_id + ".json" + ) # Populated metadata_details dict looks like: # { @@ -338,7 +412,8 @@ def create_ummg(configuration: config.Config, granule: Granule) -> Granule: # 'production_date_time' => iso datetime string, # 'temporal' => an array of one (data represent a single point in time) # or two (data cover a time range) datetime strings - # 'geometry' => { 'points': a string representation of one or more lat/lon pairs } + # 'geometry' => { 'points': a string representation of one or more + # lat/lon pairs } # } # } metadata_details = {} @@ -347,25 +422,21 @@ def create_ummg(configuration: config.Config, granule: Granule) -> Granule: # Collapse information about (possibly) multiple files into a granule summary. summary = metadata_summary(metadata_details) - summary['spatial_extent'] = populate_spatial(summary['geometry']) - summary['temporal_extent'] = populate_temporal(summary['temporal']) - summary['ummg_schema_version'] = constants.UMMG_JSON_SCHEMA_VERSION + summary["spatial_extent"] = populate_spatial(summary["geometry"]) + summary["temporal_extent"] = populate_temporal(summary["temporal"]) + summary["ummg_schema_version"] = constants.UMMG_JSON_SCHEMA_VERSION # Populate the body template body = ummg_body_template().safe_substitute( - dataclasses.asdict(granule) - | dataclasses.asdict(granule.collection) - | summary + dataclasses.asdict(granule) | dataclasses.asdict(granule.collection) | summary ) # Save it all in a file. with open(ummg_file_path, "tw") as f: print(body, file=f) - return dataclasses.replace( - granule, - ummg_filename=ummg_file_path - ) + return dataclasses.replace(granule, ummg_filename=ummg_file_path) + def stage_files(configuration: config.Config, granule: Granule) -> Granule: """ @@ -375,11 +446,12 @@ def stage_files(configuration: config.Config, granule: Granule) -> Granule: for fn in stuff: filename = os.path.basename(fn) bucket_path = s3_object_path(granule, filename) - with open(fn, 'rb') as f: + with open(fn, "rb") as f: aws.stage_file(configuration.staging_bucket_name, bucket_path, file=f) return granule + def create_cnm(configuration: config.Config, granule: Granule) -> Granule: """ Create a CNM submission message for the Granule. @@ -389,38 +461,48 @@ def create_cnm(configuration: config.Config, granule: Granule) -> Granule: populated_file_templates = [] granule_files = { - 'data': granule.data_filenames, - 'metadata': [granule.ummg_filename] + "data": granule.data_filenames, + "metadata": [granule.ummg_filename], } for type, files in granule_files.items(): for file in files: - populated_file_templates.append(json.loads(files_template.safe_substitute( - cnms_file_json_parts(configuration.staging_bucket_name, - granule, - file, - type)))) + populated_file_templates.append( + json.loads( + files_template.safe_substitute( + cnms_file_json_parts( + configuration.staging_bucket_name, granule, file, type + ) + ) + ) + ) return dataclasses.replace( granule, - cnm_message = body_template.safe_substitute( - dataclasses.asdict(granule) - | dataclasses.asdict(granule.collection) - | dataclasses.asdict(configuration) - | { 'file_content': json.dumps(populated_file_templates), - 'cnm_schema_version': constants.CNM_JSON_SCHEMA_VERSION } - ) + cnm_message=body_template.safe_substitute( + dataclasses.asdict(granule) + | dataclasses.asdict(granule.collection) + | dataclasses.asdict(configuration) + | { + "file_content": json.dumps(populated_file_templates), + "cnm_schema_version": constants.CNM_JSON_SCHEMA_VERSION, + } + ), ) + def write_cnm(configuration: config.Config, granule: Granule) -> Granule: """ Write a CNM message to a file. """ if configuration.write_cnm_file: - cnm_file = configuration.cnm_path().joinpath(granule.producer_granule_id + '.cnm.json') + cnm_file = configuration.cnm_path().joinpath( + granule.producer_granule_id + ".cnm.json" + ) with open(cnm_file, "tw") as f: print(granule.cnm_message, file=f) return granule + def publish_cnm(configuration: config.Config, granule: Granule) -> Granule: """ Publish a CNM message to a Kinesis stream. @@ -429,10 +511,12 @@ def publish_cnm(configuration: config.Config, granule: Granule) -> Granule: aws.post_to_kinesis(stream_name, granule.cnm_message) return granule + # ------------------------------------------------------------------- # Logging functions # ------------------------------------------------------------------- + def log_ledger(ledger: Ledger) -> Ledger: """Log a Ledger of the operations performed on a Granule.""" logger = logging.getLogger(constants.ROOT_LOGGER) @@ -442,7 +526,7 @@ def log_ledger(ledger: Ledger) -> Ledger: logger.info(f" * Start : {ledger.startDatetime}") logger.info(f" * End : {ledger.endDatetime}") logger.info(f" * Successful : {ledger.successful}") - logger.debug(f" * Actions:") + logger.debug(" * Actions:") for a in ledger.actions: logger.debug(f" + Name: {a.name}") logger.debug(f" Start : {a.startDatetime}") @@ -452,6 +536,7 @@ def log_ledger(ledger: Ledger) -> Ledger: logger.debug(f" Reason : {a.message}") return ledger + def summarize_results(ledgers: list[Ledger]) -> None: """ Log a summary of the operations performed on all Granules. @@ -474,55 +559,63 @@ def summarize_results(ledgers: list[Ledger]) -> None: logger.info(f"Successful: {successful_count}") logger.info(f"Failed : {failed_count}") + # ------------------------------------------------------------------- # Utility functions # ------------------------------------------------------------------- + def cnms_file_json_parts(staging_bucket_name, granule, file, file_type): file_mapping = dict() file_name = os.path.basename(file) - file_mapping['file_size'] = os.path.getsize(file) - file_mapping['file_type'] = file_type - file_mapping['checksum'] = checksum(file) - file_mapping['file_name'] = file_name - file_mapping['staging_uri'] = s3_url(staging_bucket_name, granule, file_name) + file_mapping["file_size"] = os.path.getsize(file) + file_mapping["file_type"] = file_type + file_mapping["checksum"] = checksum(file) + file_mapping["file_name"] = file_name + file_mapping["staging_uri"] = s3_url(staging_bucket_name, granule, file_name) return file_mapping + def s3_url(staging_bucket_name, granule, filename): """ Returns the full s3 URL for the given file name. """ object_path = s3_object_path(granule, filename) - return f's3://{staging_bucket_name}/{object_path}' + return f"s3://{staging_bucket_name}/{object_path}" + def s3_object_path(granule, filename): """ Returns the full s3 object path for the granule """ - prefix = Template('external/${auth_id}/${version}/${uuid}/').safe_substitute({ - 'auth_id': granule.collection.auth_id, - 'version': granule.collection.version, - 'uuid': granule.uuid - }) + prefix = Template("external/${auth_id}/${version}/${uuid}/").safe_substitute( + { + "auth_id": granule.collection.auth_id, + "version": granule.collection.version, + "uuid": granule.uuid, + } + ) return prefix + filename + # size is a sum of all associated data file sizes. # all other attributes use the values from the first data file entry. def metadata_summary(details): default = list(details.values())[0] return { - 'size_in_bytes': sum([x['size_in_bytes'] for x in details.values()]), - 'production_date_time': default['production_date_time'], - 'temporal': default['temporal'], - 'geometry': default['geometry'] + "size_in_bytes": sum([x["size_in_bytes"] for x in details.values()]), + "production_date_time": default["production_date_time"], + "temporal": default["temporal"], + "geometry": default["geometry"], } + def checksum(file): BUF_SIZE = 65536 sha256 = hashlib.sha256() - with open(file, 'rb') as f: + with open(file, "rb") as f: while True: data = f.read(BUF_SIZE) if not data: @@ -531,6 +624,7 @@ def checksum(file): return sha256.hexdigest() + # TODO: Use the GranuleSpatialRepresentation value in the collection metadata # to determine the expected spatial type. See Issue #15. For now, default to # a Gpolygon. @@ -539,70 +633,82 @@ def populate_spatial(spatial_values): # { 'points': string representation of an array of {lon: lat:} dicts } return ummg_spatial_gpolygon_template().safe_substitute(spatial_values) + def populate_temporal(datetime_values): if len(datetime_values) > 1: - return ummg_temporal_range_template().safe_substitute({ - 'begin_date_time': datetime_values[0], - 'end_date_time': datetime_values[1]}) + return ummg_temporal_range_template().safe_substitute( + {"begin_date_time": datetime_values[0], "end_date_time": datetime_values[1]} + ) else: - return ummg_temporal_single_template().safe_substitute({ - 'date_time': datetime_values[0]}) + return ummg_temporal_single_template().safe_substitute( + {"date_time": datetime_values[0]} + ) + def ummg_body_template(): return initialize_template(constants.UMMG_BODY_TEMPLATE) + def ummg_temporal_single_template(): return initialize_template(constants.UMMG_TEMPORAL_SINGLE_TEMPLATE) + def ummg_temporal_range_template(): return initialize_template(constants.UMMG_TEMPORAL_RANGE_TEMPLATE) + def ummg_spatial_gpolygon_template(): return initialize_template(constants.UMMG_SPATIAL_GPOLYGON_TEMPLATE) + def cnms_body_template(): return initialize_template(constants.CNM_BODY_TEMPLATE) + def cnms_files_template(): return initialize_template(constants.CNM_FILES_TEMPLATE) -def initialize_template(file): - with open(file) as template_file: + +def initialize_template(resource_location): + with open_text(*resource_location) as template_file: template_str = template_file.read() return Template(template_str) + def validate(configuration, content_type): """ Validate local CNM or UMM-G (JSON) files """ output_file_path = file_type_path(configuration, content_type) - schema_file, dummy_json = schema_file_path(content_type) + schema_resource_location, dummy_json = schema_file_path(content_type) logger = logging.getLogger(constants.ROOT_LOGGER) - logger.info('') + logger.info("") logger.info(f"Validating files in {output_file_path}...") - with open(schema_file) as sf: + with open_text(*schema_resource_location) as sf: schema = json.load(sf) # loop through all files and validate each one - for json_file in output_file_path.glob('*.json'): + for json_file in output_file_path.glob("*.json"): apply_schema(schema, json_file, dummy_json) logger.info("Validations complete.") return True + def file_type_path(configuration, content_type): """ Return directory containing JSON files to be validated. """ match content_type: - case 'cnm': + case "cnm": return configuration.cnm_path() case 'ummg': return configuration.ummg_path() case _: - return '' + return "" + def schema_file_path(content_type): """ @@ -610,6 +716,7 @@ def schema_file_path(content_type): """ dummy_json = dict() match content_type: +<<<<<<< HEAD case 'cnm': return constants.CNM_JSON_SCHEMA, dummy_json case 'ummg': @@ -626,13 +733,25 @@ def apply_schema(schema, json_file, dummy_json): Apply JSON schema to generated JSON content. """ logger = logging.getLogger(constants.ROOT_LOGGER) +======= + case "cnm": + return constants.CNM_JSON_SCHEMA + case _: + return "" + + +def apply_schema(schema, json_file): + logger = logging.getLogger("metgenc") +>>>>>>> main with open(json_file) as jf: json_content = json.load(jf) try: jsonschema.validate(instance=json_content | dummy_json, schema=schema) logger.info(f"No validation errors: {json_file}") except jsonschema.exceptions.ValidationError as err: - logger.error(f'Validation failed for "{err.validator}" in {json_file}: {err.validator_value}') + logger.error( + f"""Validation failed for "{err.validator}"\ + in {json_file}: {err.validator_value}""" + ) return True - diff --git a/src/nsidc/metgen/netcdf_reader.py b/src/nsidc/metgen/netcdf_reader.py index 61d7061..f32209b 100644 --- a/src/nsidc/metgen/netcdf_reader.py +++ b/src/nsidc/metgen/netcdf_reader.py @@ -1,14 +1,14 @@ import json import os.path -import xarray as xr from datetime import timezone -from dateutil.parser import parse -from pyproj import CRS -from pyproj import Transformer +import xarray as xr +from dateutil.parser import parse +from pyproj import CRS, Transformer from nsidc.metgen import constants + def extract_metadata(netcdf_path): """ Read the content at netcdf_path and return a structure with temporal coverage @@ -18,21 +18,23 @@ def extract_metadata(netcdf_path): # TODO: handle errors if any needed attributes don't exist. netcdf = xr.open_dataset(netcdf_path, decode_coords="all") - return { - 'size_in_bytes': os.path.getsize(netcdf_path), - 'production_date_time': ensure_iso(netcdf.attrs['date_modified']), - 'temporal': time_range(netcdf), - 'geometry': {'points': json.dumps(spatial_values(netcdf))} + return { + "size_in_bytes": os.path.getsize(netcdf_path), + "production_date_time": ensure_iso(netcdf.attrs["date_modified"]), + "temporal": time_range(netcdf), + "geometry": {"points": json.dumps(spatial_values(netcdf))}, } + def time_range(netcdf): """Return an array of datetime strings""" datetimes = [] - datetimes.append(ensure_iso(netcdf.attrs['time_coverage_start'])) - datetimes.append(ensure_iso(netcdf.attrs['time_coverage_end'])) + datetimes.append(ensure_iso(netcdf.attrs["time_coverage_start"])) + datetimes.append(ensure_iso(netcdf.attrs["time_coverage_end"])) return datetimes + def spatial_values(netcdf): """ Return an array of dicts, each dict representing one lat/lon pair like so: @@ -49,15 +51,20 @@ def spatial_values(netcdf): crs_4326 = CRS.from_epsg(4326) xformer = Transformer.from_crs(data_crs, crs_4326, always_xy=True) - # Adding padding should give us values that match up to the netcdf.attrs.geospatial_bounds - pad = abs(float(netcdf.crs.GeoTransform.split()[1]))/2 + # Adding padding should give us values that match up to the + # netcdf.attrs.geospatial_bounds + pad = abs(float(netcdf.crs.GeoTransform.split()[1])) / 2 xdata = [x - pad if x < 0 else x + pad for x in netcdf.x.data] ydata = [y - pad if y < 0 else y + pad for y in netcdf.y.data] # Extract the perimeter points and transform to lon, lat perimeter = [xformer.transform(x, y) for (x, y) in thinned_perimeter(xdata, ydata)] - return [{'Longitude': round(lon, 8), 'Latitude': round(lat, 8)} for (lon, lat) in perimeter] + return [ + {"Longitude": round(lon, 8), "Latitude": round(lat, 8)} + for (lon, lat) in perimeter + ] + def thinned_perimeter(xdata, ydata): """ @@ -71,16 +78,25 @@ def thinned_perimeter(xdata, ydata): # Pull out just the perimeter of the grid, counter-clockwise direction, # starting at top left. # xindex[0], yindex[0]..yindex[-2] - left = [(x,y) for x in xdata[:1] for i in yindices[:ylen-1] for y in [ydata[i]]] + left = [(x, y) for x in xdata[:1] for i in yindices[: ylen - 1] for y in [ydata[i]]] # xindex[0]..xindex[-2], yindex[-1] - bottom = [(x,y) for i in xindices[:xlen-1] for x in [xdata[i]] for y in ydata[-1:]] + bottom = [ + (x, y) for i in xindices[: xlen - 1] for x in [xdata[i]] for y in ydata[-1:] + ] # xindex[-1], yindex[-1]..yindex[1] - right = [(x,y) for x in xdata[-1:] for i in yindices[ylen-1:0:-1] for y in [ydata[i]]] + right = [ + (x, y) + for x in xdata[-1:] + for i in yindices[ylen - 1 : 0 : -1] + for y in [ydata[i]] + ] # xindex[-1]..xindex[0], yindex[0] - top = [(x,y) for i in xindices[xlen-1::-1] for x in [xdata[i]] for y in ydata[:1]] + top = [ + (x, y) for i in xindices[xlen - 1 :: -1] for x in [xdata[i]] for y in ydata[:1] + ] # The last point should already be the same as the first, given that top # uses all of the xindices, but just in case... @@ -88,7 +104,8 @@ def thinned_perimeter(xdata, ydata): top.append(left[0]) # concatenate the "sides" and return the perimeter points - return(left + bottom + right + top) + return left + bottom + right + top + def index_subset(original_length): """ @@ -96,16 +113,23 @@ def index_subset(original_length): somewhat arbitrary, and approximately evenly spaced, additional number of indices in between the beginning and end. """ - if(original_length > 6): - return [round(index*count*.2) for count in range(constants.DEFAULT_SPATIAL_AXIS_SIZE) for index in [original_length - 1]] + if original_length > 6: + return [ + round(index * count * 0.2) + for count in range(constants.DEFAULT_SPATIAL_AXIS_SIZE) + for index in [original_length - 1] + ] else: return list(range(original_length)) + def ensure_iso(datetime_str): """ Parse ISO-standard datetime strings without a timezone identifier. """ iso_obj = parse(datetime_str) - return iso_obj.replace(tzinfo=timezone.utc) \ - .isoformat(timespec='milliseconds') \ + return ( + iso_obj.replace(tzinfo=timezone.utc) + .isoformat(timespec="milliseconds") .replace("+00:00", "Z") + ) diff --git a/tests/test_aws.py b/tests/test_aws.py index fbb0633..9bf0014 100644 --- a/tests/test_aws.py +++ b/tests/test_aws.py @@ -1,15 +1,12 @@ import json import os from tempfile import TemporaryFile -from unittest.mock import mock_open, patch, Mock import boto3 -from moto import mock_aws import pytest - +from moto import mock_aws from nsidc.metgen import aws - # Unit tests for the 'aws' module functions. # # The test boundary is the aws module's interface with the AWS library's boto3 @@ -18,6 +15,7 @@ # correct parameters, correctly handle their return values, and handle any # exceptions they may throw. + @pytest.fixture(scope="module") def aws_credentials(): """Mocked AWS Credentials for moto.""" @@ -27,26 +25,27 @@ def aws_credentials(): os.environ["AWS_SESSION_TOKEN"] = "testing" os.environ["AWS_DEFAULT_REGION"] = "us-west-2" + @pytest.fixture def kinesis(aws_credentials): """A mocked Kinesis client.""" with mock_aws(): yield boto3.client("kinesis", region_name="us-west-2") + @pytest.fixture def kinesis_stream_summary(kinesis): """Create a Kinesis stream and return its summary info.""" kinesis.create_stream(StreamName="duck-test-stream", ShardCount=1) summary = kinesis.describe_stream_summary(StreamName="duck-test-stream") - return summary['StreamDescriptionSummary'] + return summary["StreamDescriptionSummary"] + @pytest.fixture def test_message(): """Returns a JSON string for testing.""" - return json.dumps({ - 'foo': 333, - 'bar': 'xyzzy' - }) + return json.dumps({"foo": 333, "bar": "xyzzy"}) + @pytest.fixture def s3(aws_credentials): @@ -54,18 +53,18 @@ def s3(aws_credentials): with mock_aws(): yield boto3.client("s3") + @pytest.fixture def s3_bucket(s3): """Create an S3 buket and return the bucket name.""" bucket_name = "duck-test-bucket" - response = s3.create_bucket( - Bucket=bucket_name, - CreateBucketConfiguration={ - 'LocationConstraint': 'us-west-2' - }, + s3.create_bucket( + Bucket=bucket_name, + CreateBucketConfiguration={"LocationConstraint": "us-west-2"}, ) return bucket_name + @pytest.fixture def science_data(): return """ @@ -74,89 +73,116 @@ def science_data(): bar """ + def test_kinesis_stream_exists_for_valid_name(kinesis_stream_summary): stream_name = "duck-test-stream" assert aws.kinesis_stream_exists(stream_name) + def test_kinesis_stream_exists_for_invalid_name(kinesis_stream_summary): stream_name = "xyzzy" assert not aws.kinesis_stream_exists(stream_name) + def test_post_to_kinesis(kinesis_stream_summary, test_message): """Given a Kinesis stream name and a message, it should post successfully.""" - stream_name = kinesis_stream_summary['StreamName'] + stream_name = kinesis_stream_summary["StreamName"] success = aws.post_to_kinesis(stream_name, test_message) assert type(success) is str + def test_post_to_kinesis_returns_shard_id(kinesis_stream_summary, test_message): - """Given a Kinesis stream name and a test message, the response should include the shard id.""" - stream_name = kinesis_stream_summary['StreamName'] + """ + Given a Kinesis stream name and a test message, the response should include + the shard id. + """ + stream_name = kinesis_stream_summary["StreamName"] result = aws.post_to_kinesis(stream_name, test_message) assert "shardId" in result + def test_post_to_kinesis_with_invalid_stream_name(kinesis_stream_summary, test_message): - """Given an invalid Kinesis stream name and a message, it should raise an exception.""" + """ + Given an invalid Kinesis stream name and a message, it should raise an + exception. + """ invalid_stream_name = "abcd-1234-wxyz-0987" with pytest.raises(Exception): aws.post_to_kinesis(invalid_stream_name, test_message) + def test_post_to_kinesis_with_empty_message(kinesis_stream_summary): - """Given a Kinesis stream name, it should raise an exception when posting an empty message.""" - stream_name = kinesis_stream_summary['StreamName'] + """ + Given a Kinesis stream name, it should raise an exception when posting + an empty message. + """ + stream_name = kinesis_stream_summary["StreamName"] with pytest.raises(Exception): aws.post_to_kinesis(stream_name, None) + def test_stage_data_to_s3(s3, s3_bucket, science_data): object_name = "/external/NSIDC-TEST666/3/abcd-1234-wxyz-0987/science-data.bin" aws.stage_file(s3_bucket, object_name, data=science_data) s3_object = s3.get_object( - Bucket=s3_bucket, - Key=object_name, + Bucket=s3_bucket, + Key=object_name, ) - object_lines = [line.decode(encoding="utf-8") for line in s3_object['Body'].readlines()] + object_lines = [ + line.decode(encoding="utf-8") for line in s3_object["Body"].readlines() + ] object_data = "".join(object_lines) assert object_data == science_data + def test_stage_data_to_s3_with_invalid_bucket_name(s3_bucket, science_data): bucket_name = "xyzzy" object_name = "/external/NSIDC-TEST666/3/abcd-1234-wxyz-0987/science-data.bin" with pytest.raises(Exception): aws.stage_file(bucket_name, object_name, data=science_data) + def test_stage_data_to_s3_with_missing_object_name(s3, s3_bucket, science_data): with pytest.raises(Exception): aws.stage_file(s3_bucket, None, data=science_data) + def test_stage_data_to_s3_with_no_data(s3, s3_bucket): object_name = "/external/NSIDC-TEST666/3/abcd-1234-wxyz-0987/science-data.bin" with pytest.raises(Exception): aws.stage_file(s3_bucket, object_name, data=None) + def test_stage_file_to_s3(s3, s3_bucket, science_data): with TemporaryFile() as source_file: - source_file.write(science_data.encode('UTF-8')) + source_file.write(science_data.encode("UTF-8")) source_file.seek(0) object_name = "/external/NSIDC-TEST666/3/abcd-1234-wxyz-0987/science-data.bin" aws.stage_file(s3_bucket, object_name, file=source_file) s3_object = s3.get_object( - Bucket=s3_bucket, - Key=object_name, + Bucket=s3_bucket, + Key=object_name, ) - object_lines = [line.decode(encoding="utf-8") for line in s3_object['Body'].readlines()] + object_lines = [ + line.decode(encoding="utf-8") for line in s3_object["Body"].readlines() + ] object_data = "".join(object_lines) assert object_data == science_data + def test_stage_file_requires_data_or_file(s3_bucket): with pytest.raises(Exception): - aws.stage_file(s3_bucket, 'foo') + aws.stage_file(s3_bucket, "foo") + def test_staging_bucket_exists_for_valid_name(s3_bucket): bucket_name = "duck-test-bucket" assert aws.staging_bucket_exists(bucket_name) + def test_staging_bucket_exists_for_invalid_name(s3_bucket): bucket_name = "xyzzy" assert not aws.staging_bucket_exists(bucket_name) diff --git a/tests/test_cli.py b/tests/test_cli.py index 7540de3..cf92b8a 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,11 +1,9 @@ from unittest.mock import patch -from click.testing import CliRunner import pytest - +from click.testing import CliRunner from nsidc.metgen.cli import cli - # Unit tests for the 'cli' module functions. # # The test boundary is the cli module's interface with the metgen module, so in @@ -14,53 +12,64 @@ # parameters, correctly handle their return values, and handle any exceptions # they may throw. + @pytest.fixture def cli_runner(): return CliRunner() + def test_without_subcommand(cli_runner): result = cli_runner.invoke(cli) assert result.exit_code == 0 - assert 'Usage' in result.output - assert 'Commands' in result.output - for subcommand in ['info', 'init', 'process']: + assert "Usage" in result.output + assert "Commands" in result.output + for subcommand in ["info", "init", "process"]: assert subcommand in result.output + def test_help(cli_runner): - result = cli_runner.invoke(cli, ['--help']) + result = cli_runner.invoke(cli, ["--help"]) assert result.exit_code == 0 + def test_info_requires_config(cli_runner): - result = cli_runner.invoke(cli, ['info']) + result = cli_runner.invoke(cli, ["info"]) assert result.exit_code != 0 + def test_info_with_config(cli_runner): - result = cli_runner.invoke(cli, ['info', '--config', './example/modscg.ini']) + result = cli_runner.invoke(cli, ["info", "--config", "./example/modscg.ini"]) assert result.exit_code == 0 -@patch('nsidc.metgen.config.Config.show') + +@patch("nsidc.metgen.config.Config.show") def test_info_with_config_summarizes(mock, cli_runner): - result = cli_runner.invoke(cli, ['info', '--config', './example/modscg.ini']) + result = cli_runner.invoke(cli, ["info", "--config", "./example/modscg.ini"]) assert mock.called assert result.exit_code == 0 -@patch('nsidc.metgen.metgen.process') + +@patch("nsidc.metgen.metgen.process") def test_process_requires_config(mock, cli_runner): - result = cli_runner.invoke(cli, ['process']) + result = cli_runner.invoke(cli, ["process"]) assert not mock.called assert result.exit_code != 0 -@patch('nsidc.metgen.config.validate') -@patch('nsidc.metgen.metgen.process') + +@patch("nsidc.metgen.config.validate") +@patch("nsidc.metgen.metgen.process") def test_process_with_config_calls_process(mock_validate, mock_process, cli_runner): - result = cli_runner.invoke(cli, ['process', '--config', './example/modscg.ini']) + cli_runner.invoke(cli, ["process", "--config", "./example/modscg.ini"]) assert mock_process.called -@patch('nsidc.metgen.config.validate') -@patch('nsidc.metgen.metgen.process') + +@patch("nsidc.metgen.config.validate") +@patch("nsidc.metgen.metgen.process") def test_process_with_granule_limit(mock_validate, mock_process, cli_runner): number_files = 2 - result = cli_runner.invoke(cli, ['process', '-n', str(number_files), '--config', './example/modscg.ini']) + result = cli_runner.invoke( + cli, ["process", "-n", str(number_files), "--config", "./example/modscg.ini"] + ) assert mock_process.called args = mock_process.call_args.args @@ -69,53 +78,70 @@ def test_process_with_granule_limit(mock_validate, mock_process, cli_runner): assert configuration.number == number_files assert result.exit_code == 0 -@patch('nsidc.metgen.config.configuration') -@patch('nsidc.metgen.metgen.process') -@patch('nsidc.metgen.config.validate') -def test_process_with_no_write_cnm(mock_validate, process_mock, configuration_mock, cli_runner): - result = cli_runner.invoke(cli, ['process', '--config', './example/modscg.ini']) + +@patch("nsidc.metgen.config.configuration") +@patch("nsidc.metgen.metgen.process") +@patch("nsidc.metgen.config.validate") +def test_process_with_no_write_cnm( + mock_validate, process_mock, configuration_mock, cli_runner +): + result = cli_runner.invoke(cli, ["process", "--config", "./example/modscg.ini"]) assert configuration_mock.called args = configuration_mock.call_args.args overrides = args[1] - assert overrides['write_cnm_file'] == None + assert overrides["write_cnm_file"] is None assert result.exit_code == 0 -@patch('nsidc.metgen.config.configuration') -@patch('nsidc.metgen.metgen.process') -@patch('nsidc.metgen.config.validate') -def test_process_with_write_cnm(mock_validate, process_mock, configuration_mock, cli_runner): - result = cli_runner.invoke(cli, ['process', '-wc', '--config', './example/modscg.ini']) + +@patch("nsidc.metgen.config.configuration") +@patch("nsidc.metgen.metgen.process") +@patch("nsidc.metgen.config.validate") +def test_process_with_write_cnm( + mock_validate, process_mock, configuration_mock, cli_runner +): + result = cli_runner.invoke( + cli, ["process", "-wc", "--config", "./example/modscg.ini"] + ) assert configuration_mock.called args = configuration_mock.call_args.args overrides = args[1] - assert overrides['write_cnm_file'] == True + assert overrides["write_cnm_file"] assert result.exit_code == 0 -@patch('nsidc.metgen.config.configuration') -@patch('nsidc.metgen.metgen.process') -@patch('nsidc.metgen.config.validate') -def test_process_with_no_overwrite(mock_validate, process_mock, configuration_mock, cli_runner): - result = cli_runner.invoke(cli, ['process', '--config', './example/modscg.ini']) + +@patch("nsidc.metgen.config.configuration") +@patch("nsidc.metgen.metgen.process") +@patch("nsidc.metgen.config.validate") +def test_process_with_no_overwrite( + mock_validate, process_mock, configuration_mock, cli_runner +): + result = cli_runner.invoke(cli, ["process", "--config", "./example/modscg.ini"]) assert configuration_mock.called args = configuration_mock.call_args.args overrides = args[1] - assert overrides['overwrite_ummg'] == None + assert overrides["overwrite_ummg"] is None assert result.exit_code == 0 -@patch('nsidc.metgen.config.configuration') -@patch('nsidc.metgen.metgen.process') -@patch('nsidc.metgen.config.validate') -def test_process_with_overwrite(mock_validate, process_mock, configuration_mock, cli_runner): - result = cli_runner.invoke(cli, ['process', '-o', '--config', './example/modscg.ini']) + +@patch("nsidc.metgen.config.configuration") +@patch("nsidc.metgen.metgen.process") +@patch("nsidc.metgen.config.validate") +def test_process_with_overwrite( + mock_validate, process_mock, configuration_mock, cli_runner +): + result = cli_runner.invoke( + cli, ["process", "-o", "--config", "./example/modscg.ini"] + ) assert configuration_mock.called args = configuration_mock.call_args.args overrides = args[1] - assert overrides['overwrite_ummg'] == True + assert overrides["overwrite_ummg"] assert result.exit_code == 0 + # TODO: When process raises an exception, cli handles it and displays a message # and has non-zero exit code diff --git a/tests/test_config.py b/tests/test_config.py index f07951f..f9815b1 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,12 +1,9 @@ -from configparser import ConfigParser, ExtendedInterpolation import dataclasses +from configparser import ConfigParser, ExtendedInterpolation from unittest.mock import patch import pytest - -from nsidc.metgen import config -from nsidc.metgen import constants - +from nsidc.metgen import config, constants # Unit tests for the 'config' module functions. # @@ -16,45 +13,42 @@ # call them with the correct parameters, correctly handle their return values, # and handle any exceptions they may throw. + @pytest.fixture def expected_keys(): - return set(['environment', - 'data_dir', - 'auth_id', - 'version', - 'provider', - 'local_output_dir', - 'ummg_dir', - 'kinesis_stream_name', - 'staging_bucket_name', - 'write_cnm_file', - 'overwrite_ummg', - 'checksum_type', - 'number', - 'dry_run',]) + return set( + [ + "environment", + "data_dir", + "auth_id", + "version", + "provider", + "local_output_dir", + "ummg_dir", + "kinesis_stream_name", + "staging_bucket_name", + "write_cnm_file", + "overwrite_ummg", + "checksum_type", + "number", + "dry_run", + ] + ) + @pytest.fixture def cfg_parser(): cp = ConfigParser(interpolation=ExtendedInterpolation()) - cp['Source'] = { - 'data_dir': '/data/example' - } - cp['Collection'] = { - 'auth_id': 'DATA-0001', - 'version': 42, - 'provider': 'FOO' - } - cp['Destination'] = { - 'local_output_dir': '/output/here', - 'ummg_dir': 'ummg', - 'kinesis_stream_name': "xyzzy-${environment}-stream", - 'staging_bucket_name': "xyzzy-${environment}-bucket", - 'write_cnm_file': False - } - cp['Settings'] = { - 'checksum_type': 'SHA256', - 'number': 1 + cp["Source"] = {"data_dir": "/data/example"} + cp["Collection"] = {"auth_id": "DATA-0001", "version": 42, "provider": "FOO"} + cp["Destination"] = { + "local_output_dir": "/output/here", + "ummg_dir": "ummg", + "kinesis_stream_name": "xyzzy-${environment}-stream", + "staging_bucket_name": "xyzzy-${environment}-bucket", + "write_cnm_file": False, } + cp["Settings"] = {"checksum_type": "SHA256", "number": 1} return cp @@ -62,47 +56,51 @@ def test_config_parser_without_filename(): with pytest.raises(ValueError): config.config_parser_factory(None) -@patch('nsidc.metgen.metgen.os.path.exists', return_value = True) + +@patch("nsidc.metgen.metgen.os.path.exists", return_value=True) def test_config_parser_return_type(mock): - result = config.config_parser_factory('foo.ini') + result = config.config_parser_factory("foo.ini") assert isinstance(result, ConfigParser) -@patch('nsidc.metgen.metgen.os.path.exists', return_value = True) + +@patch("nsidc.metgen.metgen.os.path.exists", return_value=True) def test_config_parser_handles_empty_strings_for_booleans(mock): - cp = config.config_parser_factory('foo.ini') - cp['foo'] = { - 'success': '' - } - assert cp.getboolean('foo', 'success') == False + cp = config.config_parser_factory("foo.ini") + cp["foo"] = {"success": ""} + assert not cp.getboolean("foo", "success") + def test_config_from_config_parser(cfg_parser): cfg = config.configuration(cfg_parser, {}, constants.DEFAULT_CUMULUS_ENVIRONMENT) assert isinstance(cfg, config.Config) + def test_config_with_no_write_cnm(cfg_parser, expected_keys): cfg = config.configuration(cfg_parser, {}, constants.DEFAULT_CUMULUS_ENVIRONMENT) config_keys = set(cfg.__dict__) assert len(config_keys - expected_keys) == 0 - assert cfg.environment == 'uat' - assert cfg.data_dir == '/data/example' - assert cfg.auth_id == 'DATA-0001' - assert cfg.kinesis_stream_name == 'xyzzy-uat-stream' + assert cfg.environment == "uat" + assert cfg.data_dir == "/data/example" + assert cfg.auth_id == "DATA-0001" + assert cfg.kinesis_stream_name == "xyzzy-uat-stream" assert not cfg.write_cnm_file + def test_config_with_write_cnm(cfg_parser, expected_keys): - cfg_parser.set("Destination", "write_cnm_file", 'True') + cfg_parser.set("Destination", "write_cnm_file", "True") cfg = config.configuration(cfg_parser, {}) config_keys = set(cfg.__dict__) assert len(config_keys - expected_keys) == 0 - assert cfg.data_dir == '/data/example' - assert cfg.auth_id == 'DATA-0001' - assert cfg.kinesis_stream_name == 'xyzzy-uat-stream' - assert cfg.environment == 'uat' - assert cfg.write_cnm_file == True + assert cfg.data_dir == "/data/example" + assert cfg.auth_id == "DATA-0001" + assert cfg.kinesis_stream_name == "xyzzy-uat-stream" + assert cfg.environment == "uat" + assert cfg.write_cnm_file + def test_config_with_no_overwrite_ummg(cfg_parser, expected_keys): cfg = config.configuration(cfg_parser, {}, constants.DEFAULT_CUMULUS_ENVIRONMENT) @@ -111,37 +109,59 @@ def test_config_with_no_overwrite_ummg(cfg_parser, expected_keys): assert len(config_keys - expected_keys) == 0 assert not cfg.overwrite_ummg + def test_config_with_overwrite_ummg(cfg_parser, expected_keys): - cfg_parser.set("Destination", "overwrite_ummg", 'True') + cfg_parser.set("Destination", "overwrite_ummg", "True") cfg = config.configuration(cfg_parser, {}) config_keys = set(cfg.__dict__) assert len(config_keys - expected_keys) == 0 - assert cfg.overwrite_ummg == True + assert cfg.overwrite_ummg + def test_get_configuration_value(cfg_parser): environment = constants.DEFAULT_CUMULUS_ENVIRONMENT - result = config._get_configuration_value(environment, "Source", "data_dir", str, cfg_parser, {}) + result = config._get_configuration_value( + environment, "Source", "data_dir", str, cfg_parser, {} + ) assert result == cfg_parser.get("Source", "data_dir") + def test_get_configuration_value_with_override(cfg_parser): environment = constants.DEFAULT_CUMULUS_ENVIRONMENT - overrides = { 'data_dir': 'foobar' } - result = config._get_configuration_value(environment, "Source", "data_dir", str, cfg_parser, overrides) - assert result == overrides['data_dir'] + overrides = {"data_dir": "foobar"} + result = config._get_configuration_value( + environment, "Source", "data_dir", str, cfg_parser, overrides + ) + assert result == overrides["data_dir"] + def test_get_configuration_value_interpolates_the_environment(cfg_parser): environment = constants.DEFAULT_CUMULUS_ENVIRONMENT - result = config._get_configuration_value(environment, "Destination", "kinesis_stream_name", str, cfg_parser, {}) + result = config._get_configuration_value( + environment, "Destination", "kinesis_stream_name", str, cfg_parser, {} + ) assert result == "xyzzy-uat-stream" -@pytest.mark.parametrize("section,option,expected", [ - ("Destination", "kinesis_stream_name", f"nsidc-cumulus-{constants.DEFAULT_CUMULUS_ENVIRONMENT}-external_notification"), - ("Destination", "staging_bucket_name", f"nsidc-cumulus-{constants.DEFAULT_CUMULUS_ENVIRONMENT}-ingest-staging"), + +@pytest.mark.parametrize( + "section,option,expected", + [ + ( + "Destination", + "kinesis_stream_name", + f"nsidc-cumulus-{constants.DEFAULT_CUMULUS_ENVIRONMENT}-external_notification", + ), + ( + "Destination", + "staging_bucket_name", + f"nsidc-cumulus-{constants.DEFAULT_CUMULUS_ENVIRONMENT}-ingest-staging", + ), ("Destination", "write_cnm_file", constants.DEFAULT_WRITE_CNM_FILE), ("Settings", "checksum_type", constants.DEFAULT_CHECKSUM_TYPE), ("Settings", "number", constants.DEFAULT_NUMBER), - ]) + ], +) def test_configuration_has_good_defaults(cfg_parser, section, option, expected): cfg_parser.remove_option(section, option) result = config.configuration(cfg_parser, {}, constants.DEFAULT_CUMULUS_ENVIRONMENT) @@ -149,17 +169,18 @@ def test_configuration_has_good_defaults(cfg_parser, section, option, expected): assert result_dict[option] == expected -@patch('nsidc.metgen.metgen.os.path.exists', return_value = True) -@patch('nsidc.metgen.metgen.aws.kinesis_stream_exists', return_value = True) -@patch('nsidc.metgen.metgen.aws.staging_bucket_exists', return_value = True) +@patch("nsidc.metgen.metgen.os.path.exists", return_value=True) +@patch("nsidc.metgen.metgen.aws.kinesis_stream_exists", return_value=True) +@patch("nsidc.metgen.metgen.aws.staging_bucket_exists", return_value=True) def test_validate_with_valid_checks(m1, m2, m3, cfg_parser): cfg = config.configuration(cfg_parser, {}) valid = config.validate(cfg) - assert valid == True + assert valid + -@patch('nsidc.metgen.metgen.os.path.exists', return_value = False) -@patch('nsidc.metgen.metgen.aws.kinesis_stream_exists', return_value = False) -@patch('nsidc.metgen.metgen.aws.staging_bucket_exists', return_value = False) +@patch("nsidc.metgen.metgen.os.path.exists", return_value=False) +@patch("nsidc.metgen.metgen.aws.kinesis_stream_exists", return_value=False) +@patch("nsidc.metgen.metgen.aws.staging_bucket_exists", return_value=False) def test_validate_with_invalid_checks(m1, m2, m3, cfg_parser): cfg = config.configuration(cfg_parser, {}) with pytest.raises(config.ValidationError) as exc_info: diff --git a/tests/test_metgen.py b/tests/test_metgen.py index 8edf88e..143e85f 100644 --- a/tests/test_metgen.py +++ b/tests/test_metgen.py @@ -1,12 +1,9 @@ -from configparser import ConfigParser import datetime as dt from unittest.mock import patch -from funcy import identity, partial import pytest - -from nsidc.metgen import config -from nsidc.metgen import metgen +from funcy import identity, partial +from nsidc.metgen import config, metgen # Unit tests for the 'metgen' module functions. # @@ -16,133 +13,155 @@ # metgen functions call them with the correct parameters, correctly handle # their return values, and handle any exceptions they may throw. + @pytest.fixture def granule_metadata_list(): return { - 'first_id': { - 'size_in_bytes': 100, - 'production_date_time': 'then', - 'temporal': 'now', - 'geometry': 'big' + "first_id": { + "size_in_bytes": 100, + "production_date_time": "then", + "temporal": "now", + "geometry": "big", + }, + "second_id": { + "size_in_bytes": 200, + "production_date_time": "before", + "temporal": "after", + "geometry": "small", }, - 'second_id': { - 'size_in_bytes': 200, - 'production_date_time': 'before', - 'temporal': 'after', - 'geometry': 'small' - } } + @pytest.fixture def one_granule_metadata(): return { - 'first_id': { - 'size_in_bytes': 150, - 'production_date_time': 'then', - 'temporal': 'now', - 'geometry': 'big' + "first_id": { + "size_in_bytes": 150, + "production_date_time": "then", + "temporal": "now", + "geometry": "big", } } + @pytest.fixture def fake_config(): - return config.Config('uat', 'data', 'auth_id', 'version', - 'foobar', 'output', 'ummg', 'stream', - 'bucket', True, True, 'sha', 3) + return config.Config( + "uat", + "data", + "auth_id", + "version", + "foobar", + "output", + "ummg", + "stream", + "bucket", + True, + True, + "sha", + 3, + ) + def test_banner(): assert len(metgen.banner()) > 0 + def test_gets_single_file_size(one_granule_metadata): summary = metgen.metadata_summary(one_granule_metadata) - assert summary['size_in_bytes'] == 150 + assert summary["size_in_bytes"] == 150 + def test_sums_multiple_file_sizes(granule_metadata_list): summary = metgen.metadata_summary(granule_metadata_list) - assert summary['size_in_bytes'] == 300 + assert summary["size_in_bytes"] == 300 + def test_uses_first_file_as_default(granule_metadata_list): summary = metgen.metadata_summary(granule_metadata_list) - assert summary['production_date_time'] == 'then' - assert summary['temporal'] == 'now' - assert summary['geometry'] == 'big' + assert summary["production_date_time"] == "then" + assert summary["temporal"] == "now" + assert summary["geometry"] == "big" + def test_returns_only_gpolygon(): - result = metgen.populate_spatial({'points': 'some list of points'}) + result = metgen.populate_spatial({"points": "some list of points"}) assert "GPolygons" in result + def test_returns_single_datetime(): result = metgen.populate_temporal([123]) assert '"SingleDateTime": "123"' in result + def test_returns_datetime_range(): result = metgen.populate_temporal([123, 456]) - assert 'RangeDateTime' in result + assert "RangeDateTime" in result assert '"BeginningDateTime": "123"' in result assert '"EndingDateTime": "456"' in result + def test_s3_object_path_has_no_leading_slash(): - granule = metgen.Granule( - 'foo', - metgen.Collection('ABCD', 2), - uuid='abcd-1234' - ) - expected = 'external/ABCD/2/abcd-1234/xyzzy.bin' - assert metgen.s3_object_path(granule, 'xyzzy.bin') == expected + granule = metgen.Granule("foo", metgen.Collection("ABCD", 2), uuid="abcd-1234") + expected = "external/ABCD/2/abcd-1234/xyzzy.bin" + assert metgen.s3_object_path(granule, "xyzzy.bin") == expected + def test_s3_url_simple_case(): - staging_bucket_name = 'xyzzy-bucket' - granule = metgen.Granule( - 'foo', - metgen.Collection('ABCD', 2), - uuid='abcd-1234' - ) - expected = 's3://xyzzy-bucket/external/ABCD/2/abcd-1234/xyzzy.bin' - assert metgen.s3_url(staging_bucket_name, granule, 'xyzzy.bin') == expected + staging_bucket_name = "xyzzy-bucket" + granule = metgen.Granule("foo", metgen.Collection("ABCD", 2), uuid="abcd-1234") + expected = "s3://xyzzy-bucket/external/ABCD/2/abcd-1234/xyzzy.bin" + assert metgen.s3_url(staging_bucket_name, granule, "xyzzy.bin") == expected + @patch("nsidc.metgen.metgen.dt.datetime") def test_start_ledger(mock_datetime): now = dt.datetime(2099, 7, 4, 10, 11, 12) mock_datetime.now.return_value = now - granule = metgen.Granule('abcd-1234') + granule = metgen.Granule("abcd-1234") actual = metgen.start_ledger(granule) assert actual.granule == granule assert actual.startDatetime == now + @patch("nsidc.metgen.metgen.dt.datetime") def test_end_ledger(mock_datetime): now = dt.datetime(2099, 7, 4, 10, 11, 12) mock_datetime.now.return_value = now - granule = metgen.Granule('abcd-1234') - ledger = metgen.Ledger(granule, [metgen.Action('foo', True, '')], startDatetime = now) + granule = metgen.Granule("abcd-1234") + ledger = metgen.Ledger(granule, [metgen.Action("foo", True, "")], startDatetime=now) actual = metgen.end_ledger(ledger) assert actual.granule == granule - assert actual.successful == True + assert actual.successful assert actual.startDatetime == now assert actual.endDatetime == now + @patch("nsidc.metgen.metgen.dt.datetime") def test_end_ledger_with_unsuccessful_actions(mock_datetime): now = dt.datetime(2099, 7, 4, 10, 11, 12) mock_datetime.now.return_value = now - granule = metgen.Granule('abcd-1234') - ledger = metgen.Ledger(granule, - [metgen.Action('foo', False, ''), metgen.Action('bar', False, 'Oops')], - startDatetime = now) + granule = metgen.Granule("abcd-1234") + ledger = metgen.Ledger( + granule, + [metgen.Action("foo", False, ""), metgen.Action("bar", False, "Oops")], + startDatetime=now, + ) actual = metgen.end_ledger(ledger) assert actual.granule == granule - assert actual.successful == False + assert not actual.successful assert actual.startDatetime == now assert actual.endDatetime == now + def test_recorder(): - granule = metgen.Granule('abcd-1234') + granule = metgen.Granule("abcd-1234") ledger = metgen.start_ledger(granule) new_ledger = partial(metgen.recorder, identity)(ledger) @@ -150,9 +169,11 @@ def test_recorder(): assert new_ledger.granule == ledger.granule assert len(new_ledger.actions) == 1 + def test_recorder_with_failing_operation(): - granule = metgen.Granule('abcd-1234') + granule = metgen.Granule("abcd-1234") ledger = metgen.start_ledger(granule) + def failing_op(): raise Exception() @@ -161,6 +182,7 @@ def failing_op(): assert new_ledger.granule == ledger.granule assert len(new_ledger.actions) == 1 assert new_ledger.actions[0].successful == False + assert not new_ledger.actions[0].successful def test_no_dummy_json_for_cnm(): schema_path, dummy_json = metgen.schema_file_path('cnm') @@ -182,6 +204,7 @@ def test_dummy_json_used(mock_validate, mock_open): fake_json = {"key": [{"foo": "bar"}]} fake_dummy_json = {"missing_key": "missing_foo"} - with patch('nsidc.metgen.metgen.json.load', return_value = fake_json): - metgen.apply_schema('schema file', 'json_file', fake_dummy_json) - mock_validate.assert_called_once_with(instance = fake_json | fake_dummy_json, schema='schema file') + with patch("nsidc.metgen.metgen.json.load", return_value = fake_json): + metgen.apply_schema("schema file", "json_file", fake_dummy_json) + mock_validate.assert_called_once_with(instance = fake_json | fake_dummy_json, schema="schema file") + diff --git a/tests/test_netcdf_reader.py b/tests/test_netcdf_reader.py index 91ba63a..b6d7e9c 100644 --- a/tests/test_netcdf_reader.py +++ b/tests/test_netcdf_reader.py @@ -1,9 +1,5 @@ -from unittest.mock import patch - import pytest - -from nsidc.metgen import constants -from nsidc.metgen import netcdf_reader +from nsidc.metgen import constants, netcdf_reader # Unit tests for the 'netcdf_reader' module functions. # @@ -13,49 +9,87 @@ # call them with the correct parameters, correctly handle their return values, # and handle any exceptions they may throw. + @pytest.fixture def xdata(): return list(range(0, 6, 2)) + @pytest.fixture def ydata(): return list(range(0, 25, 5)) + @pytest.fixture def big_xdata(): return list(range(0, 20, 2)) + @pytest.fixture def big_ydata(): return list(range(0, 50, 5)) + def test_large_grids_are_thinned(big_xdata, big_ydata): result = netcdf_reader.thinned_perimeter(big_xdata, big_ydata) assert len(result) == (constants.DEFAULT_SPATIAL_AXIS_SIZE * 4) - 3 + def test_small_grids_are_not_thinned(xdata, ydata): result = netcdf_reader.thinned_perimeter(xdata, ydata) assert len(result) == (len(xdata) * 2) + (len(ydata) * 2) - 3 + def test_perimeter_is_closed_polygon(xdata, ydata): result = netcdf_reader.thinned_perimeter(xdata, ydata) assert result[0] == result[-1] + def test_no_other_duplicate_values(big_xdata, big_ydata): result = netcdf_reader.thinned_perimeter(big_xdata, big_ydata) result_set = set(result) assert len(result_set) == len(result) - 1 -@pytest.mark.parametrize("input,expected", [ - pytest.param('2001-01-01', '2001-01-01T00:00:00.000Z', id="Date and no time"), - pytest.param('2001-01-01 18:59:59', '2001-01-01T18:59:59.000Z', id="Date with time"), - pytest.param('2001-01-01 18:59.5', '2001-01-01T18:59:30.000Z', id="Datetime and fractional minutes"), - pytest.param('2001-01-01 18:59.500', '2001-01-01T18:59:30.000Z', id="Datetime and zero padded fractional minutes"), - pytest.param('2001-01-01 18:59.34', '2001-01-01T18:59:20.000Z', id="Datetime and other fractional minutes value"), - pytest.param('2001-01-01 18:59.999', '2001-01-01T18:59:59.000Z', id="Datetime and other fractional minutes value"), - pytest.param('2001-01-01 18:59:20.666', '2001-01-01T18:59:20.666Z', id="Datetime and fractional seconds"), - pytest.param('2001-01-01 18:59', '2001-01-01T18:59:00.000Z', id="Datetime and hours/minutes"), -]) + +@pytest.mark.parametrize( + "input,expected", + [ + pytest.param("2001-01-01", "2001-01-01T00:00:00.000Z", id="Date and no time"), + pytest.param( + "2001-01-01 18:59:59", "2001-01-01T18:59:59.000Z", id="Date with time" + ), + pytest.param( + "2001-01-01 18:59.5", + "2001-01-01T18:59:30.000Z", + id="Datetime and fractional minutes", + ), + pytest.param( + "2001-01-01 18:59.500", + "2001-01-01T18:59:30.000Z", + id="Datetime and zero padded fractional minutes", + ), + pytest.param( + "2001-01-01 18:59.34", + "2001-01-01T18:59:20.000Z", + id="Datetime and other fractional minutes value", + ), + pytest.param( + "2001-01-01 18:59.999", + "2001-01-01T18:59:59.000Z", + id="Datetime and other fractional minutes value", + ), + pytest.param( + "2001-01-01 18:59:20.666", + "2001-01-01T18:59:20.666Z", + id="Datetime and fractional seconds", + ), + pytest.param( + "2001-01-01 18:59", + "2001-01-01T18:59:00.000Z", + id="Datetime and hours/minutes", + ), + ], +) def test_correctly_reads_date_time_strings(input, expected): result = netcdf_reader.ensure_iso(input) assert result == expected