Skip to content

Commit

Permalink
Make default asset name agnostic to raster vs vector in discovery
Browse files Browse the repository at this point in the history
  • Loading branch information
moradology committed Aug 31, 2023
1 parent 2fec970 commit a4a253f
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 4 deletions.
8 changes: 5 additions & 3 deletions dags/veda_data_pipeline/utils/s3_discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def construct_single_asset_items(discovered_files: List[str]) -> dict:
item = {
"item_id": filename,
"assets": {
"cog_default": {
"default": {
"title": "Default COG Layer",
"description": "Cloud optimized default layer to display on map",
"href": f"{prefix}/{filename}",
Expand Down Expand Up @@ -216,8 +216,6 @@ def s3_discovery_handler(event, chunk_size=2800, role_arn=None, bucket_output=No
item["item_id"] = id_template.format(item["item_id"])

if dry_run:
print(f"-DRYRUN- Discovered {len(items_with_assets)} items")
for idx in range(0, min(10, len(items_with_assets))):
print("-DRYRUN- Example item")
print(json.dumps(items_with_assets[idx]))

Check failure on line 220 in dags/veda_data_pipeline/utils/s3_discovery.py

View workflow job for this annotation

GitHub Actions / Flake8

dags/veda_data_pipeline/utils/s3_discovery.py#L220

Undefined name 'idx' (F821)

Expand All @@ -240,6 +238,10 @@ def s3_discovery_handler(event, chunk_size=2800, role_arn=None, bucket_output=No
**date_fields,
}

if dry_run and item_count < 10:
print("-DRYRUN- Example item")
print(json.dumps(file_obj))

payload["objects"].append(file_obj)
if records == chunk_size:
out_keys.append(generate_payload(s3_prefix_key=key, payload=payload))
Expand Down
4 changes: 4 additions & 0 deletions docker_tasks/build_stac/utils/stac.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ def generate_stac(event: events.RegexEvent) -> pystac.Item:
for asset_name, asset_definition in event.assets.items():
with rasterio.open(asset_definition["href"]) as src:
media_type = stac.get_media_type(src)
# The default asset name for cogs is "cog_default", so we need to intercept 'default'
# and change it
if asset_name == "default":
asset_name = "cog_default"
assets[asset_name] = pystac.Asset(
title=asset_definition["title"],
description=asset_definition["description"],
Expand Down
2 changes: 1 addition & 1 deletion docker_tasks/vector_ingest/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ def handler(event, context):
s3_objects = event_received["objects"]
status = list()
for s3_object in s3_objects:
href = s3_object["s3_filename"]
href = s3_object["assets"]["default"]["href"]
collection = s3_object["collection"]
downloaded_filepath = download_file(href)
print(f"[ DOWNLOAD FILEPATH ]: {downloaded_filepath}")
Expand Down

0 comments on commit a4a253f

Please sign in to comment.