Skip to content

Commit

Permalink
add collection_id to items (#12)
Browse files Browse the repository at this point in the history
* optionally use `collection` when creating an item

* update ruff
  • Loading branch information
hrodmn authored Aug 16, 2024
1 parent 7a86619 commit 6fcb2fd
Show file tree
Hide file tree
Showing 12 changed files with 64 additions and 24 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -142,4 +142,4 @@ cython_debug/
scratch
.Trash-0
.virtual_documents

.envrc
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,6 @@ repos:
- click != 8.1.0
- stactools
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.0.288
rev: v0.6.0
hooks:
- id: ruff
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ number as needed.

## [Unreleased]

- Nothing.
- Add `collection_id` to items

[Unreleased]: <https://github.com/stactools-packages/noaa-hrrr/tree/main/>

Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ dev = [
"pre-commit~=3.4",
"pytest-cov~=4.1",
"pytest~=7.4",
"ruff==0.0.288",
"ruff==0.6.0",
]
docs = ["pystac~=1.8", "ipykernel~=6.25", "jinja2~=3.1"]

Expand All @@ -60,7 +60,7 @@ strict = true
mypy_path = "src"

[tool.ruff]
select = ["E", "F", "I"]
lint.select = ["E", "F", "I"]

[tool.setuptools.package-data]
"stactools.noaa_hrrr.data" = ["*.csv.gz"]
3 changes: 2 additions & 1 deletion scripts/update-examples
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ import shutil
from datetime import datetime
from pathlib import Path

import stactools.noaa_hrrr.stac
from pystac import CatalogType

import stactools.noaa_hrrr.stac
from stactools.noaa_hrrr.metadata import (
CloudProvider,
Product,
Expand Down
7 changes: 2 additions & 5 deletions src/stactools/noaa_hrrr/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,13 @@

import click
from click import Command, Group

from stactools.noaa_hrrr import stac
from stactools.noaa_hrrr.constants import (
COLLECTION_ID_FORMAT,
EXTENDED_FORECAST_MAX_HOUR,
)
from stactools.noaa_hrrr.metadata import (
CloudProvider,
Product,
Region,
)
from stactools.noaa_hrrr.metadata import CloudProvider, Product, Region

logger = logging.getLogger(__name__)

Expand Down
8 changes: 5 additions & 3 deletions src/stactools/noaa_hrrr/inventory.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
"""Each .grib file in the HRRR dataset contains dozens or hundreds of distinct variables
that represent data along several dimensions. The inventory files published by NOAA are
useful for the human-readable descriptions, but more reliable inventory dataframes can
useful for the human-readable descriptions, but more reliable inventory dataframes can
be generated by reading the sidecar .grib2.idx files.
The functions in this module generate the metadata required to define the coordinates
along the forecast_valid x level dimensions on which specific variables have data. These
dataframes are used to populate the datacube extension metadata for each collection.
The dimensions of interest are:
1. forecast_valid: either the average, minimum, maximum, or accumulated value for a
1. forecast_valid: either the average, minimum, maximum, or accumulated value for a
specific time range, e.g. 3-4 hours, 0-1 day, etc.
For forecast hour 0, the level is "analysis"
2. level: the models generate predictions of many of the variables for various levels
2. level: the models generate predictions of many of the variables for various levels
in the atmosphere, e.g. 0-9000 ft, cloud surface, top of atmosphere, etc.
"""

import logging
import multiprocessing as mp
from datetime import datetime, timedelta
Expand All @@ -23,6 +24,7 @@

import httpx
import pandas as pd

from stactools.noaa_hrrr.constants import (
BYTE_SIZE,
DESCRIPTION,
Expand Down
1 change: 1 addition & 0 deletions src/stactools/noaa_hrrr/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from parse import Result, parse
from rasterio.crs import CRS
from rasterio.warp import transform_bounds

from stactools.noaa_hrrr.constants import (
EXTENDED_FORECAST_MAX_HOUR,
STANDARD_FORECAST_MAX_HOUR,
Expand Down
33 changes: 23 additions & 10 deletions src/stactools/noaa_hrrr/stac.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,11 @@
import logging
import multiprocessing as mp
from datetime import datetime, timedelta
from typing import Union
from typing import Optional, Union

import pandas as pd
import pystac
from pystac import (
Collection,
Extent,
Item,
SpatialExtent,
TemporalExtent,
)
from pystac import Collection, Extent, Item, SpatialExtent, TemporalExtent
from pystac.catalog import CatalogType
from pystac.extensions.datacube import (
DatacubeExtension,
Expand All @@ -23,6 +17,7 @@
from pystac.extensions.item_assets import AssetDefinition, ItemAssetsExtension
from pystac.item_collection import ItemCollection
from pystac.provider import Provider, ProviderRole

from stactools.noaa_hrrr.constants import (
BYTE_SIZE,
COLLECTION_ID_FORMAT,
Expand Down Expand Up @@ -438,6 +433,7 @@ def create_item(
cloud_provider: CloudProvider,
reference_datetime: datetime,
forecast_hour: int,
collection: Optional[Collection] = None,
) -> Item:
"""Creates a STAC item for a region x product x cloud provider x reference_datetime
(cycle run hour) combination.
Expand Down Expand Up @@ -487,6 +483,7 @@ def create_item(
cloud_provider=cloud_provider,
reference_datetime=reference_datetime,
forecast_hour=forecast_hour,
collection=collection,
)


Expand All @@ -497,6 +494,7 @@ def create_item_from_idx_df(
cloud_provider: CloudProvider,
reference_datetime: datetime,
forecast_hour: int,
collection: Optional[Collection] = None,
) -> Item:
"""Creates a STAC item for a region x product x cloud provider x reference_datetime
(cycle run hour) combination and a provided idx dataframe.
Expand Down Expand Up @@ -550,6 +548,7 @@ def create_item_from_idx_df(
geometry=region_config.geometry_4326,
bbox=region_config.bbox_4326,
datetime=forecast_datetime,
collection=collection,
properties={
"forecast:reference_time": reference_datetime.strftime(
"%Y-%m-%dT%H:%M:%SZ"
Expand Down Expand Up @@ -613,11 +612,17 @@ def create_item_safe(
cloud_provider: CloudProvider,
reference_datetime: datetime,
forecast_hour: int,
collection: Optional[Collection],
) -> Union[Item, None]:
"""Try to create an item and raise a warning if it fails"""
try:
return create_item(
region, product, cloud_provider, reference_datetime, forecast_hour
region,
product,
cloud_provider,
reference_datetime,
forecast_hour,
collection,
)
except NotFoundError as e:
logging.warning(e)
Expand All @@ -630,6 +635,7 @@ def create_item_collection(
cloud_provider: CloudProvider,
start_date: datetime,
end_date: datetime,
collection: Optional[Collection] = None,
) -> pystac.ItemCollection:
"""Create an item collection containing all items for a date range"""

Expand All @@ -644,7 +650,14 @@ def create_item_collection(
forecast_cycle_type = ForecastCycleType.from_timestamp(reference_datetime)
for forecast_hour in forecast_cycle_type.generate_forecast_hours():
tasks.append(
(region, product, cloud_provider, reference_datetime, forecast_hour)
(
region,
product,
cloud_provider,
reference_datetime,
forecast_hour,
collection,
)
)

reference_date += one_day
Expand Down
1 change: 1 addition & 0 deletions tests/test_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from click import Group
from click.testing import CliRunner
from pystac import Collection, Item

from stactools.noaa_hrrr.commands import create_noaahrrr_command
from stactools.noaa_hrrr.metadata import (
CloudProvider,
Expand Down
1 change: 1 addition & 0 deletions tests/test_inventory.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pandas as pd
import pytest

from stactools.noaa_hrrr.inventory import (
DESCRIPTION_COLS,
INVENTORY_COLS,
Expand Down
24 changes: 24 additions & 0 deletions tests/test_stac.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from datetime import datetime, timedelta

import pytest

from stactools.noaa_hrrr import stac
from stactools.noaa_hrrr.constants import (
COLLECTION_ID_FORMAT,
Expand Down Expand Up @@ -81,6 +82,29 @@ def test_create_item(
_ = json.dumps(item.to_dict())


def test_create_item_with_collection() -> None:
region = Region.conus
product = Product.sfc
cloud_provider = CloudProvider.aws
item = stac.create_item(
region=region,
product=product,
cloud_provider=cloud_provider,
reference_datetime=datetime(year=2024, month=1, day=1, hour=6),
forecast_hour=12,
collection=stac.create_collection(
region=region,
product=product,
cloud_provider=cloud_provider,
),
)
assert item.collection_id == COLLECTION_ID_FORMAT.format(
region=region.value,
product=product.value,
cloud_provider=cloud_provider.value,
)


def test_create_item_collection() -> None:
start_date = datetime(year=2024, month=5, day=1)
item_collection = stac.create_item_collection(
Expand Down

0 comments on commit 6fcb2fd

Please sign in to comment.