Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Write to Parquet with GeoParquet 1.1 metadata #40

Merged
merged 10 commits into from
Apr 19, 2024
Merged
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ module = [
"pandas.*",
"pyarrow.*",
"pypgstac.*",
"pyproj.*",
"rich.*",
"shapely.*",
"tqdm.*",
Expand All @@ -78,4 +79,4 @@ ignore_missing_imports = true

[[tool.mypy.overrides]]
module = "stac_geoparquet.*"
disallow_untyped_defs = true
disallow_untyped_defs = true
46 changes: 46 additions & 0 deletions stac_geoparquet/to_parquet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import json
from typing import Any

import pyarrow as pa
import pyarrow.parquet as pq
from pyproj import CRS

WGS84_CRS_JSON = CRS.from_epsg(4326).to_json_dict()


def to_parquet(table: pa.Table, where: Any, **kwargs: Any) -> None:
"""Write an Arrow table with STAC data to GeoParquet

This writes metadata compliant with GeoParquet 1.1.

Args:
table: The table to write to Parquet
where: The destination for saving.
"""
# TODO: include bbox of geometries
column_meta = {
"encoding": "WKB",
# TODO: specify known geometry types
"geometry_types": [],
"crs": WGS84_CRS_JSON,
"edges": "planar",
"covering": {
"bbox": {
"xmin": ["bbox", "xmin"],
"ymin": ["bbox", "ymin"],
"xmax": ["bbox", "xmax"],
"ymax": ["bbox", "ymax"],
}
},
}
geo_meta = {
"version": "1.1.0-dev",
"columns": {"geometry": column_meta},
"primary_column": "geometry",
}

metadata = table.schema.metadata or {}
metadata.update({b"geo": json.dumps(geo_meta).encode("utf-8")})
table = table.replace_schema_metadata(metadata)

pq.write_table(table, where, **kwargs)
Loading