Skip to content

Commit

Permalink
[SEDONA-658] Add ST_Simplify (#1606)
Browse files Browse the repository at this point in the history
* feat: Add ST_Simplify

* fix: snowflake tests

* docs: fix formatting
  • Loading branch information
furqaankhan authored Sep 27, 2024
1 parent 603f821 commit 0a8b82d
Show file tree
Hide file tree
Showing 21 changed files with 214 additions and 0 deletions.
5 changes: 5 additions & 0 deletions common/src/main/java/org/apache/sedona/common/Functions.java
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
import org.locationtech.jts.operation.valid.TopologyValidationError;
import org.locationtech.jts.precision.GeometryPrecisionReducer;
import org.locationtech.jts.precision.MinimumClearance;
import org.locationtech.jts.simplify.DouglasPeuckerSimplifier;
import org.locationtech.jts.simplify.PolygonHullSimplifier;
import org.locationtech.jts.simplify.TopologyPreservingSimplifier;
import org.locationtech.jts.simplify.VWSimplifier;
Expand Down Expand Up @@ -1543,6 +1544,10 @@ public static Geometry[] h3ToGeom(long[] cells) {
return polygons.toArray(new Polygon[0]);
}

public static Geometry simplify(Geometry geom, double distanceTolerance) {
return DouglasPeuckerSimplifier.simplify(geom, distanceTolerance);
}

// create static function named simplifyPreserveTopology
public static Geometry simplifyPreserveTopology(Geometry geometry, double distanceTolerance) {
return TopologyPreservingSimplifier.simplify(geometry, distanceTolerance);
Expand Down
23 changes: 23 additions & 0 deletions common/src/test/java/org/apache/sedona/common/FunctionsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -1790,6 +1790,29 @@ public void removeRepeatedPointsGeometryCollection() throws ParseException {
assertEquals(6000, actualSRID);
}

@Test
public void simplify() throws ParseException {
Geometry geom = Constructors.geomFromWKT("POINT (1 2)", 1111);
geom = Functions.buffer(geom, 10, false, "quad_segs=12");
int actualPoints = Functions.nPoints(Functions.simplify(geom, 0.1));
int expectedPoints = 33;
assertEquals(expectedPoints, actualPoints);

actualPoints = Functions.nPoints(Functions.simplify(geom, 0.5));
expectedPoints = 17;
assertEquals(expectedPoints, actualPoints);

actualPoints = Functions.nPoints(Functions.simplify(geom, 1));
expectedPoints = 9;
assertEquals(expectedPoints, actualPoints);

Geometry actual = Functions.simplify(geom, 10);
actualPoints = Functions.nPoints(actual);
expectedPoints = 4;
assertEquals(expectedPoints, actualPoints);
assertEquals(1111, actual.getSRID());
}

@Test
public void simplifyVW() throws ParseException {
Geometry geom = Constructors.geomFromEWKT("LINESTRING(5 2, 3 8, 6 20, 7 25, 10 10)");
Expand Down
23 changes: 23 additions & 0 deletions docs/api/flink/Function.md
Original file line number Diff line number Diff line change
Expand Up @@ -3513,6 +3513,29 @@ Output:
3021
```

## ST_Simplify

Introduction: This function simplifies the input geometry by applying the Douglas-Peucker algorithm.

!!!Note
The simplification may not preserve topology, potentially producing invalid geometries. Use [ST_SimplifyPreserveTopology](#st_simplifypreservetopology) to retain valid topology after simplification.

Format: `ST_Simplify(geom: Geometry, tolerance: Double)`

Since: `v1.7.0`

SQL Example:

```sql
SELECT ST_Simplify(ST_Buffer(ST_GeomFromWKT('POINT (0 2)'), 10), 1)
```

Output:

```
POLYGON ((10 2, 7.0710678118654755 -5.071067811865475, 0.0000000000000006 -8, -7.071067811865475 -5.0710678118654755, -10 1.9999999999999987, -7.071067811865477 9.071067811865476, -0.0000000000000018 12, 7.071067811865474 9.071067811865477, 10 2))
```

## ST_SimplifyPolygonHull

Introduction: This function computes a topology-preserving simplified hull, either outer or inner, for a polygonal geometry input. An outer hull fully encloses the original geometry, while an inner hull lies entirely within. The result maintains the same structure as the input, including handling of MultiPolygons and holes, represented as a polygonal geometry formed from a subset of vertices.
Expand Down
21 changes: 21 additions & 0 deletions docs/api/snowflake/vector-data/Function.md
Original file line number Diff line number Diff line change
Expand Up @@ -2698,6 +2698,27 @@ Output:
LINESTRING(177 10, 179 10, 181 10, 183 10)
```

## ST_Simplify

Introduction: This function simplifies the input geometry by applying the Douglas-Peucker algorithm.

!!!Note
The simplification may not preserve topology, potentially producing invalid geometries. Use [ST_SimplifyPreserveTopology](#st_simplifypreservetopology) to retain valid topology after simplification.

Format: `ST_Simplify(geom: Geometry, tolerance: Double)`

SQL Example:

```sql
SELECT ST_Simplify(ST_Buffer(ST_GeomFromWKT('POINT (0 2)'), 10), 1)
```

Output:

```
POLYGON ((10 2, 7.0710678118654755 -5.071067811865475, 0.0000000000000006 -8, -7.071067811865475 -5.0710678118654755, -10 1.9999999999999987, -7.071067811865477 9.071067811865476, -0.0000000000000018 12, 7.071067811865474 9.071067811865477, 10 2))
```

## ST_SimplifyPolygonHull

Introduction: This function computes a topology-preserving simplified hull, either outer or inner, for a polygonal geometry input. An outer hull fully encloses the original geometry, while an inner hull lies entirely within. The result maintains the same structure as the input, including handling of MultiPolygons and holes, represented as a polygonal geometry formed from a subset of vertices.
Expand Down
23 changes: 23 additions & 0 deletions docs/api/sql/Function.md
Original file line number Diff line number Diff line change
Expand Up @@ -3573,6 +3573,29 @@ Output:
LINESTRING(177 10, 179 10, 181 10, 183 10)
```

## ST_Simplify

Introduction: This function simplifies the input geometry by applying the Douglas-Peucker algorithm.

!!!Note
The simplification may not preserve topology, potentially producing invalid geometries. Use [ST_SimplifyPreserveTopology](#st_simplifypreservetopology) to retain valid topology after simplification.

Format: `ST_Simplify(geom: Geometry, tolerance: Double)`

Since: `v1.7.0`

SQL Example:

```sql
SELECT ST_Simplify(ST_Buffer(ST_GeomFromWKT('POINT (0 2)'), 10), 1)
```

Output:

```
POLYGON ((10 2, 7.0710678118654755 -5.071067811865475, 0.0000000000000006 -8, -7.071067811865475 -5.0710678118654755, -10 1.9999999999999987, -7.071067811865477 9.071067811865476, -0.0000000000000018 12, 7.071067811865474 9.071067811865477, 10 2))
```

## ST_SimplifyPolygonHull

Introduction: This function computes a topology-preserving simplified hull, either outer or inner, for a polygonal geometry input. An outer hull fully encloses the original geometry, while an inner hull lies entirely within. The result maintains the same structure as the input, including handling of MultiPolygons and holes, represented as a polygonal geometry formed from a subset of vertices.
Expand Down
1 change: 1 addition & 0 deletions flink/src/main/java/org/apache/sedona/flink/Catalog.java
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ public static UserDefinedFunction[] getFuncs() {
new Functions.ST_Multi(),
new Functions.ST_StartPoint(),
new Functions.ST_ShiftLongitude(),
new Functions.ST_Simplify(),
new Functions.ST_SimplifyPreserveTopology(),
new Functions.ST_SimplifyVW(),
new Functions.ST_SimplifyPolygonHull(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1394,6 +1394,16 @@ public Geometry[] eval(@DataTypeHint(value = "ARRAY<BIGINT>") Long[] cells) {
}
}

public static class ST_Simplify extends ScalarFunction {
@DataTypeHint(value = "RAW", bridgedTo = org.locationtech.jts.geom.Geometry.class)
public Geometry eval(
@DataTypeHint(value = "RAW", bridgedTo = org.locationtech.jts.geom.Geometry.class) Object o,
@DataTypeHint("Double") Double distanceTolerance) {
Geometry geom = (Geometry) o;
return org.apache.sedona.common.Functions.simplify(geom, distanceTolerance);
}
}

public static class ST_SimplifyPreserveTopology extends ScalarFunction {
@DataTypeHint(value = "RAW", bridgedTo = org.locationtech.jts.geom.Geometry.class)
public Geometry eval(
Expand Down
11 changes: 11 additions & 0 deletions flink/src/test/java/org/apache/sedona/flink/FunctionTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -1739,6 +1739,17 @@ public void testStartPoint() {
assertEquals("POINT (0 0)", result.toString());
}

@Test
public void testSimplify() {
Table table = tableEnv.sqlQuery("SELECT ST_Buffer(ST_GeomFromWKT('POINT (0 2)'), 10) AS geom");
Geometry actualGeometry =
(Geometry)
first(table.select(call(Functions.ST_Simplify.class.getSimpleName(), $("geom"), 1)))
.getField(0);
int actualPoints = actualGeometry.getNumPoints();
assertEquals(9, actualPoints);
}

@Test
public void testSimplifyPreserveTopology() {
Table table =
Expand Down
16 changes: 16 additions & 0 deletions python/sedona/sql/st_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1666,6 +1666,22 @@ def ST_SubDivideExplode(
return _call_st_function("ST_SubDivideExplode", (geometry, max_vertices))


@validate_argument_types
def ST_Simplify(
geometry: ColumnOrName, distance_tolerance: ColumnOrNameOrNumber
) -> Column:
"""Simplify a geometry using Douglas-Peucker algorithm within a specified tolerance while preserving topological relationships.
:param geometry: Geometry column to simplify.
:type geometry: ColumnOrName
:param distance_tolerance: Tolerance for merging points together to simplify the geometry as either a number or numeric column.
:type distance_tolerance: ColumnOrNameOrNumber
:return: Simplified geometry as a geometry column.
:rtype: Column
"""
return _call_st_function("ST_Simplify", (geometry, distance_tolerance))


@validate_argument_types
def ST_SimplifyPreserveTopology(
geometry: ColumnOrName, distance_tolerance: ColumnOrNameOrNumber
Expand Down
9 changes: 9 additions & 0 deletions python/tests/sql/test_dataframe_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -900,6 +900,13 @@
"",
"POLYGON ((0 0, 1 0, 1 1, 0 0))",
),
(
stf.ST_Simplify,
("geom", 0.1),
"0.9_poly",
"",
"POLYGON ((0 0, 1 0, 1 1, 0 0))",
),
(
stf.ST_SimplifyPreserveTopology,
("geom", 0.2),
Expand Down Expand Up @@ -1330,6 +1337,8 @@
(stf.ST_SetSRID, ("", None)),
(stf.ST_SetSRID, ("", 3021.0)),
(stf.ST_ShiftLongitude, (None,)),
(stf.ST_Simplify, (None, 2)),
(stf.ST_Simplify, ("", None)),
(stf.ST_SimplifyPreserveTopology, (None, 0.2)),
(stf.ST_SimplifyPreserveTopology, ("", None)),
(stf.ST_SimplifyVW, (None, 2)),
Expand Down
8 changes: 8 additions & 0 deletions python/tests/sql/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -1442,6 +1442,14 @@ def test_isPolygonCW(self):
).take(1)[0][0]
assert actual

def test_st_simplify(self):
baseDf = self.spark.sql(
"SELECT ST_Buffer(ST_GeomFromWKT('POINT (0 2)'), 10) AS geom"
)
actualPoints = baseDf.selectExpr("ST_NPoints(ST_Simplify(geom, 1))").first()[0]
expectedPoints = 9
assert expectedPoints == actualPoints

def test_st_simplify_vw(self):
basedf = self.spark.sql(
"SELECT ST_GeomFromWKT('LINESTRING(5 2, 3 8, 6 20, 7 25, 10 10)') as geom"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -877,6 +877,16 @@ public void test_ST_SetSRID() {
"SRID=4326;POINT (1 2)");
}

@Test
public void test_ST_Simplify() {
registerUDF("ST_Simplify", byte[].class, double.class);
registerUDF("ST_Buffer", byte[].class, double.class);
registerUDF("ST_NPoints", byte[].class);
verifySqlSingleRes(
"SELECT sedona.ST_NPoints(sedona.ST_Simplify(sedona.ST_Buffer(sedona.ST_GeomFromWKT('POINT (0 2)'), 10), 1))",
9);
}

@Test
public void test_ST_SimplifyPreserveTopology() {
registerUDF("ST_SimplifyPreserveTopology", byte[].class, double.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -838,6 +838,16 @@ public void test_ST_SetSRID() {
"SRID=0;POINT(1 2)");
}

@Test
public void test_ST_Simplify() {
registerUDFV2("ST_Simplify", String.class, double.class);
registerUDFV2("ST_Buffer", String.class, double.class);
registerUDFV2("ST_NPoints", String.class);
verifySqlSingleRes(
"SELECT sedona.ST_NPoints(sedona.ST_Simplify(sedona.ST_Buffer(ST_GeomFromWKT('POINT (0 2)'), 10), 1))",
9);
}

@Test
public void test_ST_SimplifyPreserveTopology() {
registerUDFV2("ST_SimplifyPreserveTopology", String.class, double.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -967,6 +967,12 @@ public static byte[] ST_SetSRID(byte[] geometry, int srid) {
return GeometrySerde.serialize(Functions.setSRID(GeometrySerde.deserialize(geometry), srid));
}

@UDFAnnotations.ParamMeta(argNames = {"geometry", "distanceTolerance"})
public static byte[] ST_Simplify(byte[] geometry, double distanceTolerance) {
return GeometrySerde.serialize(
Functions.simplify(GeometrySerde.deserialize(geometry), distanceTolerance));
}

@UDFAnnotations.ParamMeta(argNames = {"geometry", "distanceTolerance"})
public static byte[] ST_SimplifyPreserveTopology(byte[] geometry, double distanceTolerance) {
return GeometrySerde.serialize(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1090,6 +1090,15 @@ public static String ST_SetSRID(String geometry, int srid) {
return GeometrySerde.serGeoJson(Functions.setSRID(GeometrySerde.deserGeoJson(geometry), srid));
}

@UDFAnnotations.ParamMeta(
argNames = {"geometry", "distanceTolerance"},
argTypes = {"Geometry", "double"},
returnTypes = "Geometry")
public static String ST_Simplify(String geometry, double distanceTolerance) {
return GeometrySerde.serGeoJson(
Functions.simplify(GeometrySerde.deserGeoJson(geometry), distanceTolerance));
}

@UDFAnnotations.ParamMeta(
argNames = {"geometry", "distanceTolerance"},
argTypes = {"Geometry", "double"},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ object Catalog {
function[ST_AsHEXEWKB](),
function[ST_AsGML](),
function[ST_AsKML](),
function[ST_Simplify](),
function[ST_SimplifyVW](),
function[ST_SimplifyPolygonHull](),
function[ST_SRID](),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,14 @@ case class ST_ReducePrecision(inputExpressions: Seq[Expression])
}
}

case class ST_Simplify(inputExpressions: Seq[Expression])
extends InferredExpression(Functions.simplify _) {

protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = {
copy(inputExpressions = newChildren)
}
}

case class ST_SimplifyVW(inputExpressions: Seq[Expression])
extends InferredExpression(Functions.simplifyVW _) {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,11 @@ object st_functions extends DataFrameAPI {
def ST_Transform(geometry: Column, targetCRS: Column): Column =
wrapExpression[ST_Transform](geometry, targetCRS)

def ST_Simplify(geometry: Column, distanceTolerance: Column): Column =
wrapExpression[ST_Simplify](geometry, distanceTolerance)
def ST_Simplify(geometry: String, distanceTolerance: Double): Column =
wrapExpression[ST_Simplify](geometry, distanceTolerance)

def ST_SimplifyVW(geometry: Column, distanceTolerance: Column): Column =
wrapExpression[ST_SimplifyVW](geometry, distanceTolerance)
def ST_SimplifyVW(geometry: String, distanceTolerance: Double): Column =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ class PreserveSRIDSuite extends TestBaseScala with TableDrivenPropertyChecks {
("ST_Intersection(geom1, ST_Point(0, 1))", 1000),
("ST_MakeValid(geom1)", 1000),
("ST_ReducePrecision(geom1, 6)", 1000),
("ST_Simplify(geom1, 0.1)", 1000),
("ST_SimplifyVW(geom1, 0.1)", 1000),
("ST_SimplifyPolygonHull(geom1, 0.5)", 1000),
("ST_SetSRID(geom1, 2000)", 2000),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -900,6 +900,13 @@ class dataFrameAPITestScala extends TestBaseScala {
assert(actualResult == expectedResult)
}

it("Passed ST_Simplify") {
val baseDf = sparkSession.sql("SELECT ST_Buffer(ST_GeomFromWKT('POINT (0 2)'), 10) AS geom")
val actualPoints = baseDf.select(ST_NPoints(ST_Simplify("geom", 1))).first().get(0)
val expectedPoints = 9
assertEquals(expectedPoints, actualPoints)
}

it("Passed ST_SimplifyVW") {
val baseDf = sparkSession.sql(
"SELECT ST_GeomFromWKT('LINESTRING(5 2, 3 8, 6 20, 7 25, 10 10)') AS geom")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -884,6 +884,13 @@ class functionTestScala
assert(Hex.encodeHexString(df.first().get(0).asInstanceOf[Array[Byte]]) == s)
}

it("Passed ST_Simplify") {
val baseDf = sparkSession.sql("SELECT ST_Buffer(ST_GeomFromWKT('POINT (0 2)'), 10) AS geom")
val actualPoints = baseDf.selectExpr("ST_NPoints(ST_Simplify(geom, 1))").first().get(0)
val expectedPoints = 9
assertEquals(expectedPoints, actualPoints)
}

it("Passed ST_SimplifyVW") {
val baseDf = sparkSession.sql(
"SELECT ST_GeomFromWKT('LINESTRING(5 2, 3 8, 6 20, 7 25, 10 10)') AS geom")
Expand Down

0 comments on commit 0a8b82d

Please sign in to comment.