Skip to content

Commit

Permalink
Avoid JSON in _items_to_arrow
Browse files Browse the repository at this point in the history
  • Loading branch information
Tom Augspurger committed Oct 6, 2023
1 parent 37b21f3 commit bc40430
Showing 1 changed file with 13 additions and 7 deletions.
20 changes: 13 additions & 7 deletions stac_geoparquet/to_arrow.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Convert STAC data into Arrow tables
"""

from collections import defaultdict
import json
from tempfile import NamedTemporaryFile
from typing import IO, Any, Sequence, Union
Expand Down Expand Up @@ -55,13 +56,18 @@ def _stac_items_to_arrow(items: Sequence[dict[str, Any]]) -> pa.Table:
Returns:
_description_
"""
# TODO:!! Can just call pa.array() on the list of python dicts!!
with NamedTemporaryFile("w+b", suffix=".json") as f:
for item in items:
f.write(json.dumps(item, separators=(",", ":")).encode("utf-8"))
f.write("\n".encode("utf-8"))

return _stac_ndjson_to_arrow(f)
# TODO: Handle STAC items with different schemas
# This will fail if any of the items is missing a field since the arrays
# will be different lengths.
d = defaultdict(list)

for item in items:
for k, v in item.items():
d[k].append(v)

arrays = {k: pa.array(v) for k, v in d.items()}
t = pa.table(arrays)
return t


def _bring_properties_to_top_level(table: pa.Table) -> pa.Table:
Expand Down

0 comments on commit bc40430

Please sign in to comment.