Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implemented turf-clusters-dbscan with spatial data structure (fix #2492) #2497

Merged
merged 9 commits into from
Nov 25, 2023
153 changes: 123 additions & 30 deletions packages/turf-clusters-dbscan/index.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,24 @@
import { GeoJsonProperties, FeatureCollection, Point } from "geojson";
import clone from "@turf/clone";
import distance from "@turf/distance";
import { coordAll } from "@turf/meta";
import { convertLength, Units } from "@turf/helpers";
import clustering from "density-clustering";
import { degreesToRadians, lengthToDegrees, Units } from "@turf/helpers";
import RBush from "rbush";

export type Dbscan = "core" | "edge" | "noise";
export type DbscanProps = GeoJsonProperties & {
dbscan?: Dbscan;
cluster?: number;
};

// Structure of a point in the spatial index
type IndexedPoint = {
minX: number;
minY: number;
maxX: number;
maxY: number;
index: number;
};

/**
* Takes a set of {@link Point|points} and partition them into clusters according to {@link DBSCAN's|https://en.wikipedia.org/wiki/DBSCAN} data clustering algorithm.
*
Expand Down Expand Up @@ -53,37 +61,122 @@ function clustersDbscan(
if (options.mutate !== true) points = clone(points);

// Defaults
options.minPoints = options.minPoints || 3;

// create clustered ids
var dbscan = new clustering.DBSCAN();
var clusteredIds = dbscan.run(
coordAll(points),
convertLength(maxDistance, options.units),
options.minPoints,
distance
const minPoints = options.minPoints || 3;

// Calculate the distance in degrees for region queries
const latDistanceInDegrees = lengthToDegrees(maxDistance, options.units);

// Create a spatial index
var tree = new RBush(points.features.length);

// Keeps track of whether a point has been visited or not.
var visited = points.features.map((_) => false);

// Keeps track of whether a point is assigned to a cluster or not.
var assigned = points.features.map((_) => false);

// Keeps track of whether a point is noise|edge or not.
var isnoise = points.features.map((_) => false);

// Keeps track of the clusterId for each point
var clusterIds: number[] = points.features.map((_) => -1);

// Index each point for spatial queries
tree.load(
points.features.map((point, index) => {
var [x, y] = point.geometry.coordinates;
return {
minX: x,
minY: y,
maxX: x,
maxY: y,
index: index,
} as IndexedPoint;
})
);

// Tag points to Clusters ID
var clusterId = -1;
clusteredIds.forEach(function (clusterIds) {
clusterId++;
// assign cluster ids to input points
clusterIds.forEach(function (idx) {
var clusterPoint = points.features[idx];
if (!clusterPoint.properties) clusterPoint.properties = {};
clusterPoint.properties.cluster = clusterId;
clusterPoint.properties.dbscan = "core";
});
// Function to find neighbors of a point within a given distance
const regionQuery = (index: number): IndexedPoint[] => {
const point = points.features[index];
const [x, y] = point.geometry.coordinates;

const minY = Math.max(y - latDistanceInDegrees, -90.0);
const maxY = Math.min(y + latDistanceInDegrees, 90.0);

const lonDistanceInDegrees = (function () {
// Handle the case where the bounding box crosses the poles
if (minY < 0 && maxY > 0) {
return latDistanceInDegrees;
}
if (Math.abs(minY) < Math.abs(maxY)) {
return latDistanceInDegrees / Math.cos(degreesToRadians(maxY));
} else {
return latDistanceInDegrees / Math.cos(degreesToRadians(minY));
}
})();

const minX = Math.max(x - lonDistanceInDegrees, -360.0);
const maxX = Math.min(x + lonDistanceInDegrees, 360.0);

// Calculate the bounding box for the region query
const bbox = { minX, minY, maxX, maxY };
return tree.search(bbox).filter((neighbor) => {
const neighborIndex = (neighbor as IndexedPoint).index;
const neighborPoint = points.features[neighborIndex];
const distanceInKm = distance(point, neighborPoint, {
units: "kilometers",
});
return distanceInKm <= maxDistance;
}) as IndexedPoint[];
};

// Function to expand a cluster
const expandCluster = (clusteredId: number, neighbors: IndexedPoint[]) => {
for (var i = 0; i < neighbors.length; i++) {
var neighbor = neighbors[i];
const neighborIndex = neighbor.index;
if (!visited[neighborIndex]) {
visited[neighborIndex] = true;
const nextNeighbors = regionQuery(neighborIndex);
if (nextNeighbors.length >= minPoints) {
neighbors.push(...nextNeighbors);
}
}
if (!assigned[neighborIndex]) {
assigned[neighborIndex] = true;
clusterIds[neighborIndex] = clusteredId;
}
}
};

// Main DBSCAN clustering algorithm
var nextClusteredId = 0;
points.features.forEach((_, index) => {
if (visited[index]) return;
const neighbors = regionQuery(index);
if (neighbors.length >= minPoints) {
const clusteredId = nextClusteredId;
nextClusteredId++;
visited[index] = true;
expandCluster(clusteredId, neighbors);
} else {
isnoise[index] = true;
}
});

// handle noise points, if any
// edges points are tagged by DBSCAN as both 'noise' and 'cluster' as they can "reach" less than 'minPoints' number of points
dbscan.noise.forEach(function (noiseId) {
var noisePoint = points.features[noiseId];
if (!noisePoint.properties) noisePoint.properties = {};
if (noisePoint.properties.cluster) noisePoint.properties.dbscan = "edge";
else noisePoint.properties.dbscan = "noise";
// Assign DBSCAN properties to each point
points.features.forEach((_, index) => {
var clusterPoint = points.features[index];
if (!clusterPoint.properties) {
clusterPoint.properties = {};
}

if (clusterIds[index] >= 0) {
clusterPoint.properties.dbscan = isnoise[index] ? "edge" : "core";
clusterPoint.properties.cluster = clusterIds[index];
} else {
clusterPoint.properties.dbscan = "noise";
}
});

return points as FeatureCollection<Point, DbscanProps>;
Expand Down
3 changes: 1 addition & 2 deletions packages/turf-clusters-dbscan/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@
"devDependencies": {
"@turf/centroid": "^7.0.0-alpha.2",
"@turf/clusters": "^7.0.0-alpha.2",
"@types/density-clustering": "^1.3.3",
"@types/tape": "^4.2.32",
"benchmark": "^2.1.4",
"chromatism": "^3.0.0",
Expand All @@ -75,7 +74,7 @@
"@turf/distance": "^7.0.0-alpha.2",
"@turf/helpers": "^7.0.0-alpha.2",
"@turf/meta": "^7.0.0-alpha.2",
"density-clustering": "1.3.0",
"rbush": "^3.0.1",
"tslib": "^2.6.2"
}
}