Skip to content

Commit

Permalink
Merge pull request #80 from mggg/1.1.0
Browse files Browse the repository at this point in the history
Maintenance for Shapely 2.x - now matches up with gerrychain.
  • Loading branch information
drdeford authored Oct 16, 2023
2 parents 615afed + c9679a7 commit f458f94
Show file tree
Hide file tree
Showing 8 changed files with 103 additions and 58 deletions.
18 changes: 9 additions & 9 deletions maup/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import geopandas
from .adjacencies import adjacencies
from .assign import assign
from .indexed_geometries import IndexedGeometries
from .intersections import intersections, prorate
from .repair import close_gaps, resolve_overlaps, make_valid, autorepair, snap_to_grid, crop_to, expand_to, doctor
from .repair import close_gaps, autorepair, snap_to_grid, crop_to, doctor, resolve_overlaps
from .normalize import normalize
from .progress_bar import progress

import geopandas

# warn about https://github.com/geopandas/geopandas/issues/2199
if geopandas.options.use_pygeos:
Expand All @@ -18,17 +18,17 @@

__version__ = "1.0.8"
__all__ = [
"adjacencies",
"assign",
"IndexedGeometries",
"intersections",
"prorate",
"adjacencies",
"close_gaps",
"autorepair",
"resolve_overlaps",
"snap_to_grid",
"IndexedGeometries",
"crop_to",
"doctor",
"normalize",
"progress",
"make_valid",
"autorepair",
"doctor"
]
"progress"
]
42 changes: 33 additions & 9 deletions maup/adjacencies.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import warnings

from geopandas import GeoSeries
from geopandas import GeoSeries, GeoDataFrame

from .indexed_geometries import IndexedGeometries
from shapely import make_valid

from .indexed_geometries import IndexedGeometries, get_geometries
from .progress_bar import progress


Expand All @@ -24,24 +26,42 @@ def iter_adjacencies(geometries):
for j, inter in inters.items():
yield (i, j), inter


def adjacencies(
geometries, adjacency_type="rook", *, warn_for_overlaps=True, warn_for_islands=True
geometries,
adjacency_type="rook",
output_type="geoseries", *, warn_for_overlaps=True, warn_for_islands=True
):
"""Returns adjacencies between geometries. The return type is a
"""Returns adjacencies between geometries.
The default return type is a
`GeoSeries` with a `MultiIndex`, whose (i, j)th entry is the pairwise
intersection between geometry `i` and geometry `j`. We ensure that
`i < j` always holds, so that any adjacency is represented just once
in the series.
If output_type == "geodataframe", the return type is a range-indexed GeoDataFrame
with a "neighbors" column containing the pair (i,j) for the geometry consisting
of the intersection between geometry `i` and geometry `j`.
"""
if adjacency_type not in ["rook", "queen"]:
raise ValueError('adjacency_type must be "rook" or "queen"')

index, geoms = zip(*iter_adjacencies(geometries))
inters = GeoSeries(geoms, index=index, crs=geometries.crs)
orig_crs = geometries.crs
geometries = get_geometries(geometries)
geometries = make_valid(geometries)

adjs = list(iter_adjacencies(geometries))
if adjs:
index, geoms = zip(*adjs)
else:
index, geoms = [[],[]]

if output_type == "geodataframe":
inters = GeoDataFrame({"neighbors" : index, "geometry" : geoms})
else:
inters = GeoSeries(geoms, index=index)

if adjacency_type == "rook":
inters = inters[inters.length > 0]
inters = inters[inters.length > 0].copy()

if warn_for_overlaps:
overlaps = inters[inters.area > 0]
Expand All @@ -54,11 +74,15 @@ def adjacencies(
)

if warn_for_islands:
islands = set(geometries.index) - set(i for pair in inters.index for i in pair)
if output_type == "geodataframe":
islands = set(geometries.index) - set(i for pair in inters["neighbors"] for i in pair)
else:
islands = set(geometries.index) - set(i for pair in inters.index for i in pair)
if len(islands) > 0:
warnings.warn(
"Found islands.\n" "Indices of islands: {}".format(islands),
IslandWarning,
)

inters.crs = orig_crs
return inters
3 changes: 2 additions & 1 deletion maup/assign.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ def assign(sources, targets):
dtype="float"
)
assignment.update(assignments_by_area)


# TODO: add a warning here if there are still unassigned source geometries.
return assignment.astype(targets.index.dtype, errors="ignore")


Expand Down
18 changes: 13 additions & 5 deletions maup/indexed_geometries.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,33 @@
import pandas
import geopandas
import numpy

from shapely.prepared import prep
from shapely.strtree import STRtree
from .progress_bar import progress


def get_geometries(geometries):
return getattr(geometries, "geometry", geometries)
if isinstance(geometries, geopandas.GeoDataFrame):
return getattr(geometries, "geometry", geometries)
return geometries


class IndexedGeometries:
def __init__(self, geometries):
self.geometries = get_geometries(geometries)
self.spatial_index = STRtree(self.geometries)
self.index = self.geometries.index


def query(self, geometry):
relevant_indices = [index for index in self.spatial_index.query(geometry)]
def query(self, geometry):
relevant_index_array = self.spatial_index.query(geometry)
relevant_indices = [*set(numpy.ndarray.flatten(relevant_index_array))]
relevant_geometries = self.geometries.iloc[relevant_indices]
return relevant_geometries

def intersections(self, geometry):
relevant_geometries = self.query(geometry)
relevant_geometries = self.query(geometry)
intersections = relevant_geometries.intersection(geometry)
return intersections[-(intersections.is_empty | intersections.isna())]

Expand All @@ -43,10 +50,11 @@ def assign(self, targets):
)
]
if groups:
return pandas.concat(groups).reindex(self.geometries.index)
return pandas.concat(groups).reindex(self.index)
else:
return geopandas.GeoSeries()


def enumerate_intersections(self, targets):
target_geometries = get_geometries(targets)
for i, target in progress(target_geometries.items(), len(target_geometries)):
Expand Down
22 changes: 16 additions & 6 deletions maup/intersections.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,15 @@


@require_same_crs
def intersections(sources, targets, area_cutoff=None):
"""Computes all of the nonempty intersections between two sets of geometries.
The returned `~geopandas.GeoSeries` will have a MultiIndex, where the geometry
at index *(i, j)* is the intersection of ``sources[i]`` and ``targets[j]``
(if it is not empty).

def intersections(sources, targets, output_type= "geoseries", area_cutoff=None):
"""Computes all of the nonempty intersections between two sets of geometries.
By default, the returned `~geopandas.GeoSeries` will have a MultiIndex, where the
geometry at index *(i, j)* is the intersection of ``sources[i]`` and ``targets[j]``
(if it is not empty).
If output_type == "geodataframe", the return type is a range-indexed GeoDataFrame
with "source" and "target" columns containing the indices i,j, respectively, for the
intersection of ``sources[i]`` and ``targets[j]``
:param sources: geometries
:type sources: :class:`~geopandas.GeoSeries` or :class:`~geopandas.GeoDataFrame`
:param targets: geometries
Expand All @@ -22,8 +25,10 @@ def intersections(sources, targets, area_cutoff=None):
area greater than ``area_cutoff``
:type area_cutoff: Number or None
"""

reindexed_sources = get_geometries_with_range_index(sources)
reindexed_targets = get_geometries_with_range_index(targets)

spatially_indexed_sources = IndexedGeometries(reindexed_sources)

records = [
Expand All @@ -33,15 +38,20 @@ def intersections(sources, targets, area_cutoff=None):
reindexed_targets
)
]

df = GeoDataFrame.from_records(records, columns=["source", "target", "geometry"])
df = df.sort_values(by=["source", "target"]).reset_index(drop=True)
df.crs = sources.crs

geometries = df.set_index(["source", "target"]).geometry
geometries.sort_index(inplace=True)
geometries.crs = sources.crs

if area_cutoff is not None:
df = df[df.area > area_cutoff].reset_index(drop=True)
geometries = geometries[geometries.area > area_cutoff]

return geometries
return df if output_type == "geodataframe" else geometries


def prorate(relationship, data, weights, aggregate_by="sum"):
Expand Down
47 changes: 24 additions & 23 deletions maup/repair.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
import math
import pandas
import functools
import warnings

from geopandas import GeoSeries, GeoDataFrame
from geopandas import GeoSeries
from shapely.geometry import MultiPolygon, Polygon
from shapely.ops import unary_union
# from shapely.validation import make_valid # currently in alpha
from shapely import make_valid

from .adjacencies import adjacencies
from .assign import assign_to_max
Expand All @@ -30,7 +29,7 @@ def holes_of_union(geometries):
if not all(
isinstance(geometry, (Polygon, MultiPolygon)) for geometry in geometries
):
raise TypeError(f"Must be a Polygon or MultiPolygon (got types {set([type(x) for x in geometries])})!")
raise TypeError(f"Must be a Polygon or MultiPolygon (got types {set([x.geom_type for x in geometries])})!")

union = unary_union(geometries)
series = holes(union)
Expand All @@ -53,6 +52,8 @@ def holes(geometry):
raise TypeError("geometry must be a Polygon or MultiPolygon to have holes")




def close_gaps(geometries, relative_threshold=0.1):
"""Closes gaps between geometries by assigning the hole to the polygon
that shares the most perimeter with the hole.
Expand Down Expand Up @@ -86,7 +87,7 @@ def resolve_overlaps(geometries, relative_threshold=0.1):
"""
geometries = get_geometries(geometries)
inters = adjacencies(geometries, warn_for_islands=False, warn_for_overlaps=False)
overlaps = inters[inters.area > 0].buffer(0)
overlaps = make_valid(inters[inters.area > 0])

if relative_threshold is not None:
left_areas, right_areas = split_by_level(geometries.area, overlaps.index)
Expand Down Expand Up @@ -116,39 +117,38 @@ def autorepair(geometries, relative_threshold=0.1):
basis. Set `relative_threshold=None` to attempt to resolve all overlaps. See
`resolve_overlaps()` and `close_gaps()` for more.
"""
shp = geometries.copy()
geometries = get_geometries(geometries)

shp["geometry"] = remove_repeated_vertices(shp)
shp["geometry"] = make_valid(shp)
shp["geometry"] = resolve_overlaps(shp, relative_threshold=relative_threshold)
shp["geometry"] = make_valid(shp)
shp["geometry"] = close_gaps(shp, relative_threshold=relative_threshold)
shp["geometry"] = make_valid(shp)
geometries = remove_repeated_vertices(geometries)
geometries = make_valid(geometries)
geometries = resolve_overlaps(geometries, relative_threshold=relative_threshold)
geometries = make_valid(geometries)
geometries = close_gaps(geometries, relative_threshold=relative_threshold)
geometries = make_valid(geometries)

return shp["geometry"]
return geometries

def make_valid(geometries):
"""
Applies the shapely .buffer(0) and make_valid (once released) trick to all
geometries. Should help resolve various topological issues in shapefiles.
"""
return geometries["geometry"].simplify(0).buffer(0)
# return geometries["geometry"].buffer(0).apply(lambda x: make_valid(x))

return get_geometries(geometries).simplify(0).buffer(0)

def remove_repeated_vertices(geometries):
"""
Removes repeated vertices. Vertices are considered to be repeated if they
appear consecutively, excluding the start and end points.
"""
return geometries["geometry"].apply(lambda x: apply_func_to_polygon_parts(x, dedup_vertices))
return get_geometries(geometries).apply(lambda x: apply_func_to_polygon_parts(x, dedup_vertices))

def snap_to_grid(geometries, n=-7):
"""
Snap the geometries to a grid by rounding to the nearest 10^n. Helps to
resolve floating point precision issues in shapefiles.
"""
func = functools.partial(snap_polygon_to_grid, n=n)
return geometries["geometry"].apply(lambda x: apply_func_to_polygon_parts(x, func))
return get_geometries(geometries).apply(lambda x: apply_func_to_polygon_parts(x, func))

@require_same_crs
def crop_to(source, target):
Expand All @@ -173,19 +173,20 @@ def expand_to(source, target):
"""
Expands the source geometries to the target geometries.
"""
geometries = get_geometries(source).simplify(0).buffer(0)
geometries = make_valid(get_geometries(source))

source_union = unary_union(geometries)

leftover_geometries = get_geometries(target).apply(lambda x: x - source_union)
leftover_geometries = leftover_geometries[~leftover_geometries.is_empty].explode()
leftover_geometries = leftover_geometries[~leftover_geometries.is_empty].explode(index_parts=False)

geometries = absorb_by_shared_perimeter(
leftover_geometries, get_geometries(source), relative_threshold=None
)

return geometries


def doctor(source, target=None):
"""
Detects quality issues in a given set of source and target geometries. Quality
Expand Down Expand Up @@ -221,9 +222,10 @@ def count_overlaps(shp):
"""
Counts overlaps. Code is taken directly from the resolve_overlaps function in maup.
"""
inters = adjacencies(shp["geometry"], warn_for_islands=False, warn_for_overlaps=False)
overlaps = inters[inters.area > 0].buffer(0)
inters = adjacencies(shp.geometry, warn_for_islands=False, warn_for_overlaps=False)
overlaps = (inters[inters.area > 0]).buffer(0)
return len(overlaps)


def apply_func_to_polygon_parts(shape, func):
if isinstance(shape, Polygon):
Expand Down Expand Up @@ -252,7 +254,6 @@ def split_by_level(series, multiindex):
for i in range(multiindex.nlevels)
)


@require_same_crs
def absorb_by_shared_perimeter(sources, targets, relative_threshold=None):
if len(sources) == 0:
Expand Down
4 changes: 2 additions & 2 deletions tests/test_crs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

def test_require_same_crs(square, four_square_grid):
square_gdf = gpd.GeoDataFrame([{"geometry": square}])
square_gdf.crs = "foo"
four_square_grid.crs = "bar"
square_gdf.crs = "epsg:4269"
four_square_grid.crs = "epsg:4326"

@require_same_crs
def f(sources, targets):
Expand Down
Loading

0 comments on commit f458f94

Please sign in to comment.