Skip to content

Commit

Permalink
Implement classifications to continuous data (wildlife-dynamics#151)
Browse files Browse the repository at this point in the history
  • Loading branch information
Yun-Wu authored May 17, 2024
1 parent 29f6fda commit cc4fb51
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 0 deletions.
2 changes: 2 additions & 0 deletions ecoscope/analysis/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from ecoscope.analysis import UD, astronomy, seasons
from ecoscope.analysis.classifier import apply_classification
from ecoscope.analysis.ecograph import Ecograph, get_feature_gdf
from ecoscope.analysis.percentile import get_percentile_area
from ecoscope.analysis.speed import SpeedDataFrame
Expand All @@ -11,4 +12,5 @@
"get_feature_gdf",
"get_percentile_area",
"seasons",
"apply_classification",
]
39 changes: 39 additions & 0 deletions ecoscope/analysis/classifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import mapclassify

classification_methods = {
"equal_interval": mapclassify.EqualInterval,
"natural_breaks": mapclassify.NaturalBreaks,
"quantile": mapclassify.Quantiles,
"std_mean": mapclassify.StdMean,
"max_breaks": mapclassify.MaximumBreaks,
"fisher_jenks": mapclassify.FisherJenks,
}


# pass in a series and output the series
def apply_classification(x, labels=None, scheme="natural_breaks", **kwargs):
"""
Classifies the data in a GeoDataFrame column using specified classification scheme.
Args:
y : An array containing the data to classify.
labels (str): labels of bins, use bin edges if labels==None.
scheme (str): Classification scheme to use [equal_interval, natural_breaks, quantile, std_mean, max_breaks,
fisher_jenks]
**kwargs: Additional keyword arguments specific to the classification scheme.
Returns:
result: an array of corresponding labels of the input data.
"""

classifier_class = classification_methods.get(scheme)

if not classifier_class:
raise ValueError(f"Invalid classification scheme. Choose from: {list(classification_methods.keys())}")

classifier = classifier_class(x, **kwargs)
if labels is None:
labels = classifier.bins
assert len(labels) == len(classifier.bins)
return [labels[i] for i in classifier.yb]
41 changes: 41 additions & 0 deletions tests/test_classifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import pytest

import ecoscope


@pytest.mark.parametrize(
"scheme,kwargs,expected",
[
("equal_interval", {"k": 2}, [3, 3, 3, 5, 5]),
("quantile", {"k": 2}, [3, 3, 3, 5, 5]),
(
"std_mean",
{"multiples": [-2, -1, 1, 2]},
[1.4188611699158102, 4.58113883008419, 4.58113883008419, 4.58113883008419, 6.16227766016838],
),
("max_breaks", {"k": 4}, [2.5, 2.5, 3.5, 4.5, 5.0]),
("fisher_jenks", {"k": 5}, [1.0, 2.0, 3.0, 4.0, 5.0]),
],
)
def test_classify_data(scheme, kwargs, expected):
y = [1, 2, 3, 4, 5]
result = ecoscope.analysis.apply_classification(y, scheme=scheme, **kwargs)
assert result == expected, f"Failed on scheme {scheme}"


def test_classify_with_labels():
y = [1, 2, 3, 4, 5]
result = ecoscope.analysis.apply_classification(y, labels=["1", "2"], scheme="equal_interval", k=2)
assert result == ["1", "1", "1", "2", "2"]


def test_classify_with_invalid_labels():
y = [1, 2, 3, 4, 5]
with pytest.raises(AssertionError):
ecoscope.analysis.apply_classification(y, labels=[0], scheme="std_mean")


def test_classify_with_invalid_scheme():
y = [1, 2, 3, 4, 5]
with pytest.raises(ValueError):
ecoscope.analysis.apply_classification(y, scheme="InvalidScheme")

0 comments on commit cc4fb51

Please sign in to comment.