diff --git a/.github/workflows/push.yml b/.github/workflows/build-test.yml similarity index 94% rename from .github/workflows/push.yml rename to .github/workflows/build-test.yml index 3014ff5..0bdd3b1 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/build-test.yml @@ -1,10 +1,9 @@ +name: Build & test on: push: - branches: [main] - pull_request: jobs: - test: + build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 0000000..d147919 --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,74 @@ +name: Publish release to PyPI + +on: + release: + types: [released] + +jobs: + build: + name: Build package + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: 3.12 + + - name: cache poetry install + uses: actions/cache@v4 + with: + path: ~/.local + key: poetry-1.8.3-0 + + - uses: snok/install-poetry@v1 + with: + version: 1.8.3 + virtualenvs-create: true + virtualenvs-in-project: true + + - name: cache deps + id: cache-deps + uses: actions/cache@v4 + with: + path: .venv + key: pydeps-${{ hashFiles('**/poetry.lock') }} + + - run: poetry install --no-interaction --no-root + if: steps.cache-deps.outputs.cache-hit != 'true' + + - run: poetry install --no-interaction + + - run: poetry build + + - uses: actions/upload-artifact@v4 + with: + name: release-dists + path: dist/ + + publish: + name: Publish to PyPI + runs-on: ubuntu-latest + needs: [build] + permissions: + id-token: write + attestations: write + contents: read + environment: + name: PyPI + url: https://pypi.org/p/nsidc-metgenc + steps: + - uses: actions/download-artifact@v4 + with: + name: release-dists + path: dist/ + + - name: Generate artifact attestation for sdist and wheel + uses: actions/attest-build-provenance@v1.4.3 + with: + subject-path: "dist/*" + + - name: Publish package distributions to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + packages-dir: dist/ diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..6fa1c41 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,20 @@ +MetGenC Changelog + +## UNRELEASED + +This is the Minimum Viable Product (MVP) release of MetGenC. The +features include: + + * Provides a prompt-driven means of configuring MetGenC to ingest + a new collection. + * Processing is driven by a configuration file for control of various + aspects of the ingest. + * Generates a UUID and submission time for each granule. + * Creates UMM-G compliant metadata for each source granule. + * The UMM-G includes required attributes, including temporal and + spatial bounds. + * Generates a Cumulus Notification Message (CNM) for each granule. + * Stages the science data files and their UMM-G metadata in + a configurable S3 bucket location. + * Submits the CNM message to a configurable Kinesis stream in + order to trigger a Cumulus workflow. diff --git a/README.md b/README.md index cb4bde9..aefcf8b 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,9 @@ # MetGenC +![build & test workflow](https://github.com/nsidc/granule-metgen/actions/workflows/build-test.yml/badge.svg) +![workflow workflow](https://github.com/nsidc/granule-metgen/actions/workflows/publish.yml/badge.svg) + The `MetGenC` toolkit enables Operations staff and data producers to create metadata files conforming to NASA's Common Metadata Repository UMM-G specification and ingest data directly to NASA EOSDIS’s Cumulus archive. Cumulus is an @@ -31,14 +34,8 @@ or $ python3 --version -Next, you must also install [Poetry](https://python-poetry.org/) either by using the [official -installer](https://python-poetry.org/docs/#installing-with-the-official-installer) -if you’re comfortable following the instructions, or by using a package -manager (like Homebrew) if this is more familiar to you. When successfully -installed, you should be able to run: - - $ poetry --version - Poetry (version 1.8.3) +Next, install the AWS commandline interface (CLI) by [following the appropriate +instructions for your platform](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html). Lastly, you will need to create & setup AWS credentials for yourself. The ways in which this can be accomplished are detailed in the **AWS Credentials** section below. @@ -58,29 +55,13 @@ this can be accomplished are detailed in the **AWS Credentials** section below. can be used to determine the padding added to x and y values. - Date/time strings can be parsed using `datetime.fromisoformat` -## Installation of MetGenC from GitHub - -Make a local directory (i.e., on your computer), and then `cd` into that -directory. Clone the `granule-metgen` repository using ssh if you have [added -ssh keys to your GitHub -account](https://docs.github.com/en/authentication/connecting-to-github-with-ssh/adding-a-new-ssh-key-to-your-github-account) -or via https if you haven't: - - $ mkdir -p ~/my-projects; cd ~/my-projects - # Install using ssh: - $ git clone git@github.com:nsidc/granule-metgen.git - OR - # Install using https: - $ git clone https://github.com/nsidc/granule-metgen.git +## Installing MetGenC -Enter the `granule-metgen` directory and run Poetry to have it install the `granule-metgen` dependencies. Then start a new shell in which you can run the tool: +MetGenC can be installed from [PyPI](https://pypi.org/): - $ cd granule-metgen - $ poetry install - $ poetry shell + $ pip install nsidc-metgenc -With the Poetry shell running, start the metgenc tool JUST to verify that it’s working by requesting its usage options and having them -returned. There’s more to do (detailed in the **Usage** section below) before MetGenC can be run to successfully create ummg files, cnm messages, and stage data to an S3 bucket for ingest!):: +That's it! Now we're ready to run MetGenC and see what it can do: $ metgenc --help Usage: metgenc [OPTIONS] COMMAND [ARGS]... @@ -221,6 +202,15 @@ TBD * [Python](https://www.python.org/) v3.12+ * [Poetry](https://python-poetry.org/docs/#installing-with-the-official-installer) +You can install [Poetry](https://python-poetry.org/) either by using the [official +installer](https://python-poetry.org/docs/#installing-with-the-official-installer) +if you’re comfortable following the instructions, or by using a package +manager (like Homebrew) if this is more familiar to you. When successfully +installed, you should be able to run: + + $ poetry --version + Poetry (version 1.8.3) + ### Installing Dependencies * Use Poetry to create and activate a virtual environment @@ -231,7 +221,7 @@ TBD $ poetry install -### Run tests: +### Run tests $ poetry run pytest @@ -239,6 +229,38 @@ TBD $ poetry run ptw . --now --clear +### Releasing + +* Update the CHANGELOG to include details of the changes included in the new + release. The version should be the string literal 'UNRELEASED' (without + single-quotes). It will be replaced with the actual version number after + we bump the version below. + +* Show the current version and the possible next versions: + + $ bump-my-version show-bump + 0.3.0 ── bump ─┬─ major ─ 1.0.0 + ├─ minor ─ 0.4.0 + ╰─ patch ─ 0.3.1 + +* Bump the version to the desired number, for example: + + $ bump-my-version bump minor + + You will see the latest commit & tag by looking at `git log`. You can then + push these to GitHub (`git push --follow-tags`) to trigger the CI/CD + workflow. + +* On the [GitHub repository](https://github.com/nsidc/granule-metgen), click + 'Releases' and follow the steps documented on the + [GitHub Releases page](https://docs.github.com/en/repositories/releasing-projects-on-github/managing-releases-in-a-repository#creating-a-release). + Draft a new Release using the version tag created above. After you have + published the release, the MetGenC Publish GHA workflow will be started. + Check that the workflow succeeds on the + [MetGenC Actions page](https://github.com/nsidc/granule-metgen/actions), + and verify that the + [new MetGenC release is available on PyPI](https://pypi.org/project/nsidc-metgenc/). + ## Credit This content was developed by the National Snow and Ice Data Center with funding from diff --git a/poetry.lock b/poetry.lock index 96e4a10..60f1b00 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "annotated-types" @@ -114,6 +114,43 @@ urllib3 = {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version > [package.extras] crt = ["awscrt (==0.22.0)"] +[[package]] +name = "bracex" +version = "2.5.post1" +description = "Bash style brace expander." +optional = false +python-versions = ">=3.8" +files = [ + {file = "bracex-2.5.post1-py3-none-any.whl", hash = "sha256:13e5732fec27828d6af308628285ad358047cec36801598368cb28bc631dbaf6"}, + {file = "bracex-2.5.post1.tar.gz", hash = "sha256:12c50952415bfa773d2d9ccb8e79651b8cdb1f31a42f6091b804f6ba2b4a66b6"}, +] + +[[package]] +name = "bump-my-version" +version = "0.28.1" +description = "Version bump your Python project" +optional = false +python-versions = ">=3.8" +files = [ + {file = "bump_my_version-0.28.1-py3-none-any.whl", hash = "sha256:df7fdb02a1b43c122a6714df6d1fe4efc7a1220b5638ca5a0eb3018813c1b222"}, + {file = "bump_my_version-0.28.1.tar.gz", hash = "sha256:e608def5191baf505b6cde88bd679a0a95fc4cfeace4247adb60ac0f8a7e57ee"}, +] + +[package.dependencies] +click = "*" +pydantic = ">=2.0.0" +pydantic-settings = "*" +questionary = "*" +rich = "*" +rich-click = "*" +tomlkit = "*" +wcmatch = ">=8.5.1" + +[package.extras] +dev = ["generate-changelog (>=0.7.6)", "git-fame (>=1.12.2)", "pip-tools", "pre-commit"] +docs = ["black", "markdown-customblocks", "mdx-truly-sane-lists", "mkdocs", "mkdocs-click", "mkdocs-drawio", "mkdocs-gen-files", "mkdocs-git-authors-plugin", "mkdocs-git-committers-plugin", "mkdocs-git-revision-date-localized-plugin (>=1.2.6)", "mkdocs-include-markdown-plugin", "mkdocs-literate-nav", "mkdocs-material", "mkdocstrings[python]", "python-frontmatter"] +test = ["coverage", "freezegun", "pre-commit", "pytest", "pytest-cov", "pytest-mock", "pytest-sugar"] + [[package]] name = "certifi" version = "2024.8.30" @@ -1310,6 +1347,20 @@ files = [ {file = "ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3"}, ] +[[package]] +name = "prompt-toolkit" +version = "3.0.36" +description = "Library for building powerful interactive command lines in Python" +optional = false +python-versions = ">=3.6.2" +files = [ + {file = "prompt_toolkit-3.0.36-py3-none-any.whl", hash = "sha256:aa64ad242a462c5ff0363a7b9cfe696c20d55d9fc60c11fd8e632d064804d305"}, + {file = "prompt_toolkit-3.0.36.tar.gz", hash = "sha256:3e163f254bef5a03b146397d7c1963bd3e2812f0964bb9a24e6ec761fd28db63"}, +] + +[package.dependencies] +wcwidth = "*" + [[package]] name = "py-partiql-parser" version = "0.5.6" @@ -1459,6 +1510,26 @@ files = [ [package.dependencies] typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" +[[package]] +name = "pydantic-settings" +version = "2.6.1" +description = "Settings management using Pydantic" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pydantic_settings-2.6.1-py3-none-any.whl", hash = "sha256:7fb0637c786a558d3103436278a7c4f1cfd29ba8973238a50c5bb9a55387da87"}, + {file = "pydantic_settings-2.6.1.tar.gz", hash = "sha256:e0f92546d8a9923cb8941689abf85d6601a8c19a23e97a34b2964a2e3f813ca0"}, +] + +[package.dependencies] +pydantic = ">=2.7.0" +python-dotenv = ">=0.21.0" + +[package.extras] +azure-key-vault = ["azure-identity (>=1.16.0)", "azure-keyvault-secrets (>=4.8.0)"] +toml = ["tomli (>=2.0.1)"] +yaml = ["pyyaml (>=6.0.1)"] + [[package]] name = "pyfiglet" version = "1.0.2" @@ -1583,6 +1654,20 @@ files = [ [package.dependencies] six = ">=1.5" +[[package]] +name = "python-dotenv" +version = "1.0.1" +description = "Read key-value pairs from a .env file and set them as environment variables" +optional = false +python-versions = ">=3.8" +files = [ + {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"}, + {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"}, +] + +[package.extras] +cli = ["click (>=5.0)"] + [[package]] name = "pytz" version = "2024.2" @@ -1683,6 +1768,20 @@ files = [ {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"}, ] +[[package]] +name = "questionary" +version = "2.0.1" +description = "Python library to build pretty command line user prompts ⭐️" +optional = false +python-versions = ">=3.8" +files = [ + {file = "questionary-2.0.1-py3-none-any.whl", hash = "sha256:8ab9a01d0b91b68444dff7f6652c1e754105533f083cbe27597c8110ecc230a2"}, + {file = "questionary-2.0.1.tar.gz", hash = "sha256:bcce898bf3dbb446ff62830c86c5c6fb9a22a54146f0f5597d3da43b10d8fc8b"}, +] + +[package.dependencies] +prompt_toolkit = ">=2.0,<=3.0.36" + [[package]] name = "referencing" version = "0.35.1" @@ -1990,6 +2089,26 @@ pygments = ">=2.13.0,<3.0.0" [package.extras] jupyter = ["ipywidgets (>=7.5.1,<9)"] +[[package]] +name = "rich-click" +version = "1.8.4" +description = "Format click help output nicely with rich" +optional = false +python-versions = ">=3.7" +files = [ + {file = "rich_click-1.8.4-py3-none-any.whl", hash = "sha256:2d2841b3cebe610d5682baa1194beaf78ab00c4fa31931533261b5eba2ee80b7"}, + {file = "rich_click-1.8.4.tar.gz", hash = "sha256:0f49471f04439269d0e66a6f43120f52d11d594869a2a0be600cfb12eb0616b9"}, +] + +[package.dependencies] +click = ">=7" +rich = ">=10.7" +typing-extensions = ">=4" + +[package.extras] +dev = ["mypy", "packaging", "pre-commit", "pytest", "pytest-cov", "rich-codex", "ruff", "types-setuptools"] +docs = ["markdown-include", "mkdocs", "mkdocs-glightbox", "mkdocs-material-extensions", "mkdocs-material[imaging] (>=9.5.18,<9.6.0)", "mkdocs-rss-plugin", "mkdocstrings[python]", "rich-codex"] + [[package]] name = "rpds-py" version = "0.21.0" @@ -2249,6 +2368,17 @@ mpmath = ">=1.1.0,<1.4" [package.extras] dev = ["hypothesis (>=6.70.0)", "pytest (>=7.1.0)"] +[[package]] +name = "tomlkit" +version = "0.13.2" +description = "Style preserving TOML library" +optional = false +python-versions = ">=3.8" +files = [ + {file = "tomlkit-0.13.2-py3-none-any.whl", hash = "sha256:7a974427f6e119197f670fbbbeae7bef749a6c14e793db934baefc1b5f03efde"}, + {file = "tomlkit-0.13.2.tar.gz", hash = "sha256:fff5fe59a87295b278abd31bec92c15d9bc4a06885ab12bcea52c71119392e79"}, +] + [[package]] name = "toolz" version = "1.0.0" @@ -2341,6 +2471,31 @@ files = [ [package.extras] watchmedo = ["PyYAML (>=3.10)"] +[[package]] +name = "wcmatch" +version = "10.0" +description = "Wildcard/glob file name matcher." +optional = false +python-versions = ">=3.8" +files = [ + {file = "wcmatch-10.0-py3-none-any.whl", hash = "sha256:0dd927072d03c0a6527a20d2e6ad5ba8d0380e60870c383bc533b71744df7b7a"}, + {file = "wcmatch-10.0.tar.gz", hash = "sha256:e72f0de09bba6a04e0de70937b0cf06e55f36f37b3deb422dfaf854b867b840a"}, +] + +[package.dependencies] +bracex = ">=2.1.1" + +[[package]] +name = "wcwidth" +version = "0.2.13" +description = "Measures the displayed width of unicode strings in a terminal" +optional = false +python-versions = "*" +files = [ + {file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"}, + {file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"}, +] + [[package]] name = "werkzeug" version = "3.1.3" @@ -2476,4 +2631,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.12" -content-hash = "f23f8f81f98749abc04b7adaae503168707b49f305dc487e5755b24b4caacc54" +content-hash = "a3d1919a01b2ce7eb2072972a0d827ce6b467bf857f23a5e5045aa513304fd1e" diff --git a/pyproject.toml b/pyproject.toml index feaf465..a30262b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] -name = "nsidc-metgen" -version = "0.1.0" -description = "The nsidc-metgen package enables data producers as well as Operations staff managing the data ingest workflow to create metadata files conforming to NASA's Common Metadata Repository UMM-G specification." +name = "nsidc-metgenc" +version = "0.4.0" +description = "The nsidc-metgenc package enables data producers as well as Operations staff managing the data ingest workflow to create metadata files conforming to NASA's Common Metadata Repository UMM-G specification." authors = ["National Snow and Ice Data Center (NSIDC) "] readme = "README.md" package-mode = true @@ -30,6 +30,41 @@ pytest-watcher = "^0.4.3" [tool.poetry.group.dev.dependencies] ruff = "^0.5.5" mypy = "^1.11.1" +bump-my-version = "^0.28.1" + +[tool.bumpversion] +current_version = "0.4.0" +parse = "(?P\\d+)\\.(?P\\d+)\\.(?P\\d+)" +serialize = ["{major}.{minor}.{patch}"] +search = "{current_version}" +replace = "{new_version}" +regex = false +ignore_missing_version = false +ignore_missing_files = false +tag = true +sign_tags = false +tag_name = "v{new_version}" +tag_message = "Bump version: {current_version} → {new_version}" +allow_dirty = false +commit = true +message = "Bump version: {current_version} → {new_version}" +commit_args = "" +setup_hooks = [] +pre_commit_hooks = [] +post_commit_hooks = [] + +[[tool.bumpversion.files]] +filename = "src/nsidc/metgen/__init__.py" + +[[tool.bumpversion.files]] +filename = "pyproject.toml" +search = 'version = "{current_version}"' +replace = 'version = "{new_version}"' + +[[tool.bumpversion.files]] +filename = "CHANGELOG.md" +search = 'UNRELEASED' +replace = 'v{new_version}' [build-system] requires = ["poetry-core"] diff --git a/src/nsidc/metgen/__init__.py b/src/nsidc/metgen/__init__.py index e69de29..a3d9f01 100644 --- a/src/nsidc/metgen/__init__.py +++ b/src/nsidc/metgen/__init__.py @@ -0,0 +1,4 @@ +__version__ = "v0.4.0" + + +__all__ = ["__version__"] diff --git a/src/nsidc/metgen/constants.py b/src/nsidc/metgen/constants.py index 4bcfe70..ac4ab45 100644 --- a/src/nsidc/metgen/constants.py +++ b/src/nsidc/metgen/constants.py @@ -9,7 +9,7 @@ DEFAULT_DRY_RUN = False # JSON schema locations and versions -CNM_JSON_SCHEMA = 'src/nsidc/metgen/json-schema/cumulus_sns_schema.json' +CNM_JSON_SCHEMA = ('nsidc.metgen.json-schema', 'cumulus_sns_schema.json') CNM_JSON_SCHEMA_VERSION = '1.6.1' # Configuration sections @@ -22,11 +22,11 @@ DEFAULT_SPATIAL_AXIS_SIZE = 6 # Templates -CNM_BODY_TEMPLATE = 'src/nsidc/metgen/templates/cnm_body_template.json' -CNM_FILES_TEMPLATE = 'src/nsidc/metgen/templates/cnm_files_template.json' -UMMG_BODY_TEMPLATE = 'src/nsidc/metgen/templates/ummg_body_template.json' -UMMG_TEMPORAL_SINGLE_TEMPLATE = 'src/nsidc/metgen/templates/ummg_temporal_single_template.json' -UMMG_TEMPORAL_RANGE_TEMPLATE = 'src/nsidc/metgen/templates/ummg_temporal_range_template.json' -UMMG_SPATIAL_GPOLYGON_TEMPLATE = 'src/nsidc/metgen/templates/ummg_horizontal_gpolygon_template.json' -UMMG_SPATIAL_POINT_TEMPLATE = 'src/nsidc/metgen/templates/ummg_horizontal_point_template.json' -UMMG_SPATIAL_RECTANGLE_TEMPLATE = 'src/nsidc/metgen/templates/ummg_horizontal_rectangle_template.json' +CNM_BODY_TEMPLATE = ('nsidc.metgen.templates', 'cnm_body_template.json') +CNM_FILES_TEMPLATE = ('nsidc.metgen.templates', 'cnm_files_template.json') +UMMG_BODY_TEMPLATE = ('nsidc.metgen.templates', 'ummg_body_template.json') +UMMG_TEMPORAL_SINGLE_TEMPLATE = ('nsidc.metgen.templates', 'ummg_temporal_single_template.json') +UMMG_TEMPORAL_RANGE_TEMPLATE = ('nsidc.metgen.templates', 'ummg_temporal_range_template.json') +UMMG_SPATIAL_GPOLYGON_TEMPLATE = ('nsidc.metgen.templates', 'ummg_horizontal_gpolygon_template.json') +UMMG_SPATIAL_POINT_TEMPLATE = ('nsidc.metgen.templates', 'ummg_horizontal_point_template.json') +UMMG_SPATIAL_RECTANGLE_TEMPLATE = ('nsidc.metgen.templates', 'ummg_horizontal_rectangle_template.json') diff --git a/src/nsidc/metgen/metgen.py b/src/nsidc/metgen/metgen.py index 42631ce..6ab454d 100644 --- a/src/nsidc/metgen/metgen.py +++ b/src/nsidc/metgen/metgen.py @@ -2,6 +2,7 @@ import dataclasses import datetime as dt import hashlib +from importlib.resources import open_text import json import jsonschema import logging @@ -565,8 +566,8 @@ def cnms_body_template(): def cnms_files_template(): return initialize_template(constants.CNM_FILES_TEMPLATE) -def initialize_template(file): - with open(file) as template_file: +def initialize_template(resource_location): + with open_text(*resource_location) as template_file: template_str = template_file.read() return Template(template_str) @@ -576,12 +577,12 @@ def validate(configuration, content_type): Validate local JSON files """ output_file_path = file_type_path(configuration, content_type) - schema_file = schema_file_path(content_type) + schema_resource_location = schema_file_path(content_type) logger = logging.getLogger('metgenc') logger.info('') logger.info(f"Validating files in {output_file_path}...") - with open(schema_file) as sf: + with open_text(*schema_resource_location) as sf: schema = json.load(sf) # loop through all files and validate each one