Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add an option for a browser link column #23

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 30 additions & 3 deletions stac_geoparquet/stac_geoparquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
STAC_ITEM_TYPES = ["application/json", "application/geo+json"]

SELF_LINK_COLUMN = "self_link"
BROWSER_LINK_COLUMN = "browser_link"


def _fix_array(v):
Expand All @@ -31,7 +32,9 @@ def _fix_array(v):


def to_geodataframe(
items: Sequence[dict[str, Any]], add_self_link: bool = False
items: Sequence[dict[str, Any]],
add_self_link: bool = False,
add_browser_link: bool = False,
) -> geopandas.GeoDataFrame:
"""
Convert a sequence of STAC items to a :class:`geopandas.GeoDataFrame`.
Expand All @@ -43,6 +46,8 @@ def to_geodataframe(
----------
items: A sequence of STAC items.
add_self_link: Add the absolute link (if available) to the source STAC Item as a separate column named "self_link"
add_browser_link: Add an absolute link to an alternate HTML representation of the source STAC Item (if available)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that this should explicitly document the conditions for this link being exracted:

  • The relation type must be alternate
  • The media type must be text/html
  • The href must be absolute (or whatever we're checking for. Fully qualified domain name?)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done :-)

as a separate column named "browser_link"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think sphinx wants subsequent lines indented a bit. Maybe 3 spaces.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added 3 spaces (but I'm not sure it has the intended effect?!).


Returns
-------
Expand All @@ -55,6 +60,7 @@ def to_geodataframe(
if k in item2:
raise ValueError("k", k)
item2[k] = v

if add_self_link:
self_href = None
for link in item["links"]:
Expand All @@ -66,6 +72,19 @@ def to_geodataframe(
self_href = link["href"]
break
item2[SELF_LINK_COLUMN] = self_href

if add_browser_link:
browser_href = None
for link in item["links"]:
if (
link["rel"] == "alternate"
and link["type"] == "text/html"
and urlparse(link["href"]).netloc
):
browser_href = link["href"]
break
item2[BROWSER_LINK_COLUMN] = browser_href

items2.append(item2)

# Filter out missing geoms in MultiPolygons
Expand Down Expand Up @@ -111,7 +130,15 @@ def to_geodataframe(
columns.remove(col)

gdf = pd.concat([gdf[columns], gdf.drop(columns=columns)], axis="columns")
for k in ["type", "stac_version", "id", "collection", SELF_LINK_COLUMN]:
string_columns = [
"type",
"stac_version",
"id",
"collection",
SELF_LINK_COLUMN,
BROWSER_LINK_COLUMN,
]
for k in string_columns:
if k in gdf:
gdf[k] = gdf[k].astype("string")

Expand Down Expand Up @@ -142,7 +169,7 @@ def to_dict(record: dict) -> dict:
for k, v in record.items():
v = _fix_array(v)

if k == SELF_LINK_COLUMN:
if k == SELF_LINK_COLUMN or k == BROWSER_LINK_COLUMN:
continue
elif k in top_level_keys:
item[k] = v
Expand Down