diff --git a/ecoscope/analysis/__init__.py b/ecoscope/analysis/__init__.py index 46ca082e..21ba0244 100644 --- a/ecoscope/analysis/__init__.py +++ b/ecoscope/analysis/__init__.py @@ -1,4 +1,5 @@ from ecoscope.analysis import UD, astronomy, seasons +from ecoscope.analysis.classifier import apply_classification from ecoscope.analysis.ecograph import Ecograph, get_feature_gdf from ecoscope.analysis.percentile import get_percentile_area from ecoscope.analysis.speed import SpeedDataFrame @@ -11,4 +12,5 @@ "get_feature_gdf", "get_percentile_area", "seasons", + "apply_classification", ] diff --git a/ecoscope/analysis/classifier.py b/ecoscope/analysis/classifier.py new file mode 100644 index 00000000..51cf84ff --- /dev/null +++ b/ecoscope/analysis/classifier.py @@ -0,0 +1,39 @@ +import mapclassify + +classification_methods = { + "equal_interval": mapclassify.EqualInterval, + "natural_breaks": mapclassify.NaturalBreaks, + "quantile": mapclassify.Quantiles, + "std_mean": mapclassify.StdMean, + "max_breaks": mapclassify.MaximumBreaks, + "fisher_jenks": mapclassify.FisherJenks, +} + + +# pass in a series and output the series +def apply_classification(x, labels=None, scheme="natural_breaks", **kwargs): + """ + Classifies the data in a GeoDataFrame column using specified classification scheme. + + Args: + y : An array containing the data to classify. + labels (str): labels of bins, use bin edges if labels==None. + scheme (str): Classification scheme to use [equal_interval, natural_breaks, quantile, std_mean, max_breaks, + fisher_jenks] + + **kwargs: Additional keyword arguments specific to the classification scheme. + + Returns: + result: an array of corresponding labels of the input data. + """ + + classifier_class = classification_methods.get(scheme) + + if not classifier_class: + raise ValueError(f"Invalid classification scheme. Choose from: {list(classification_methods.keys())}") + + classifier = classifier_class(x, **kwargs) + if labels is None: + labels = classifier.bins + assert len(labels) == len(classifier.bins) + return [labels[i] for i in classifier.yb] diff --git a/tests/test_classifier.py b/tests/test_classifier.py new file mode 100644 index 00000000..44b01d48 --- /dev/null +++ b/tests/test_classifier.py @@ -0,0 +1,41 @@ +import pytest + +import ecoscope + + +@pytest.mark.parametrize( + "scheme,kwargs,expected", + [ + ("equal_interval", {"k": 2}, [3, 3, 3, 5, 5]), + ("quantile", {"k": 2}, [3, 3, 3, 5, 5]), + ( + "std_mean", + {"multiples": [-2, -1, 1, 2]}, + [1.4188611699158102, 4.58113883008419, 4.58113883008419, 4.58113883008419, 6.16227766016838], + ), + ("max_breaks", {"k": 4}, [2.5, 2.5, 3.5, 4.5, 5.0]), + ("fisher_jenks", {"k": 5}, [1.0, 2.0, 3.0, 4.0, 5.0]), + ], +) +def test_classify_data(scheme, kwargs, expected): + y = [1, 2, 3, 4, 5] + result = ecoscope.analysis.apply_classification(y, scheme=scheme, **kwargs) + assert result == expected, f"Failed on scheme {scheme}" + + +def test_classify_with_labels(): + y = [1, 2, 3, 4, 5] + result = ecoscope.analysis.apply_classification(y, labels=["1", "2"], scheme="equal_interval", k=2) + assert result == ["1", "1", "1", "2", "2"] + + +def test_classify_with_invalid_labels(): + y = [1, 2, 3, 4, 5] + with pytest.raises(AssertionError): + ecoscope.analysis.apply_classification(y, labels=[0], scheme="std_mean") + + +def test_classify_with_invalid_scheme(): + y = [1, 2, 3, 4, 5] + with pytest.raises(ValueError): + ecoscope.analysis.apply_classification(y, scheme="InvalidScheme")