Merge branch 'master' into 108/test_coverage

atmorling · May 20, 2024 · 53fee41 · 53fee41
2 parents b7eca72 + cc4fb51
commit 53fee41
Show file tree

Hide file tree

Showing 11 changed files with 193 additions and 50 deletions.
diff --git a/ecoscope/analysis/__init__.py b/ecoscope/analysis/__init__.py
@@ -1,4 +1,5 @@
 from ecoscope.analysis import UD, astronomy, seasons
+from ecoscope.analysis.classifier import apply_classification
 from ecoscope.analysis.ecograph import Ecograph, get_feature_gdf
 from ecoscope.analysis.percentile import get_percentile_area
 from ecoscope.analysis.speed import SpeedDataFrame
@@ -11,4 +12,5 @@
     "get_feature_gdf",
     "get_percentile_area",
     "seasons",
+    "apply_classification",
 ]
diff --git a/ecoscope/analysis/classifier.py b/ecoscope/analysis/classifier.py
@@ -0,0 +1,39 @@
+import mapclassify
+
+classification_methods = {
+    "equal_interval": mapclassify.EqualInterval,
+    "natural_breaks": mapclassify.NaturalBreaks,
+    "quantile": mapclassify.Quantiles,
+    "std_mean": mapclassify.StdMean,
+    "max_breaks": mapclassify.MaximumBreaks,
+    "fisher_jenks": mapclassify.FisherJenks,
+}
+
+
+# pass in a series and output the series
+def apply_classification(x, labels=None, scheme="natural_breaks", **kwargs):
+    """
+    Classifies the data in a GeoDataFrame column using specified classification scheme.
+
+    Args:
+    y : An array containing the data to classify.
+    labels (str): labels of bins, use bin edges if labels==None.
+    scheme (str): Classification scheme to use [equal_interval, natural_breaks, quantile, std_mean, max_breaks,
+    fisher_jenks]
+
+    **kwargs: Additional keyword arguments specific to the classification scheme.
+
+    Returns:
+    result: an array of corresponding labels of the input data.
+    """
+
+    classifier_class = classification_methods.get(scheme)
+
+    if not classifier_class:
+        raise ValueError(f"Invalid classification scheme. Choose from: {list(classification_methods.keys())}")
+
+    classifier = classifier_class(x, **kwargs)
+    if labels is None:
+        labels = classifier.bins
+    assert len(labels) == len(classifier.bins)
+    return [labels[i] for i in classifier.yb]
diff --git a/ecoscope/analysis/seasons.py b/ecoscope/analysis/seasons.py
@@ -19,11 +19,11 @@ def _min_max_scaler(x):
     return x_std
 
 
-def std_ndvi_vals(aoi=None, img_coll=None, band=None, img_scale=1, start=None, end=None):
+def std_ndvi_vals(aoi=None, img_coll=None, nir_band=None, red_band=None, img_scale=1, start=None, end=None):
 
     coll = (
         ee.ImageCollection(img_coll)
-        .select(band)
+        .select([nir_band, red_band])
         .filterDate(start, end)
         .map(lambda x: x.multiply(ee.Image(img_scale)).set("system:time_start", x.get("system:time_start")))
     )
@@ -33,11 +33,14 @@ def std_ndvi_vals(aoi=None, img_coll=None, band=None, img_scale=1, start=None, e
     else:
         geo = None
 
+    img_dates = pandas.to_datetime(coll.aggregate_array("system:time_start").getInfo(), unit="ms", utc=True)
+
+    coll = coll.map(lambda x: x.normalizedDifference([nir_band, red_band]))
     ndvi_vals = coll.toBands().reduceRegion("mean", geo, bestEffort=True).values().getInfo()
 
     df = pandas.DataFrame(
         {
-            "img_date": pandas.to_datetime(coll.aggregate_array("system:time_start").getInfo(), unit="ms", utc=True),
+            "img_date": img_dates,
             "NDVI": ndvi_vals,
         }
     ).dropna(axis=0)
@@ -110,21 +113,21 @@ def seasonal_windows(ndvi_vals, cuts, season_labels):
 
 
 def add_seasonal_index(
-    df, index_name, start_date, end_date, aoi_geom_filter=None, seasons=2, season_labels=["dry", "wet"]
+    df, index_name, start_date, end_date, time_col, aoi_geom_filter=None, seasons=2, season_labels=["dry", "wet"]
 ):
 
     aoi_ = None
     try:
         aoi_ = aoi_geom_filter.dissolve().iloc[0]["geometry"]
-    except:
+    except AttributeError:
         aoi_ = aoi_geom_filter
 
     if len(season_labels) != seasons:
         raise Exception(
             f"Parameter value 'seasons' ({seasons}) must match the number of 'season_labels' elements ({season_labels})"
         )
     # extract the standardized NDVI ndvi_vals within the AOI
-    ndvi_vals = std_ndvi_vals(aoi_, start=since_filter.isoformat(), end=until_filter.isoformat())
+    ndvi_vals = std_ndvi_vals(aoi_, start=start_date.isoformat(), end=end_date.isoformat())
 
     # calculate the seasonal transition point
     cuts = val_cuts(ndvi_vals, seasons)

diff --git a/ecoscope/io/earthranger.py b/ecoscope/io/earthranger.py
@@ -2,10 +2,13 @@
 import pytz
 import json
 import typing
+import math
 
+import numpy as np
 import geopandas as gpd
 import pandas as pd
 import requests
+from dateutil import parser
 from erclient.client import ERClient, ERClientException, ERClientNotFound
 from tqdm.auto import tqdm
 
@@ -122,13 +125,14 @@ def get_subjects(
         self,
         include_inactive=None,
         bbox=None,
-        subject_group=None,
+        subject_group_id=None,
         name=None,
         updated_since=None,
         tracks=None,
         id=None,
         updated_until=None,
-        group_name=None,
+        subject_group_name=None,
+        max_ids_per_request=50,
         **addl_kwargs,
     ):
         """
@@ -137,13 +141,15 @@ def get_subjects(
         include_inactive: Include inactive subjects in list.
         bbox: Include subjects having track data within this bounding box defined by a 4-tuple of coordinates marking
             west, south, east, north.
-        subject_group: Indicate a subject group for which Subjects should be listed.
+        subject_group_id: Indicate a subject group id for which Subjects should be listed.
+            This is translated to the subject_group parameter in the ER backend
         name : Find subjects with the given name
         updated_since: Return Subject that have been updated since the given timestamp.
         tracks: Indicate whether to render each subject's recent tracks.
         id: A comma-delimited list of Subject IDs.
         updated_until
-        group_name
+        subject_group_name: A subject group name for which Subjects should be listed.
+            This is translated to the group_name parameter in the ER backend
         Returns
         -------
         subjects : pd.DataFrame
@@ -153,13 +159,13 @@ def get_subjects(
             addl_kwargs,
             include_inactive=include_inactive,
             bbox=bbox,
-            subject_group=subject_group,
+            subject_group=subject_group_id,
             name=name,
             updated_since=updated_since,
             tracks=tracks,
             id=id,
             updated_until=updated_until,
-            group_name=group_name,
+            group_name=subject_group_name,
         )
 
         assert params.get("subject_group") is None or params.get("group_name") is None
@@ -178,11 +184,32 @@ def get_subjects(
             except IndexError:
                 raise KeyError("`group_name` not found")
 
-        df = pd.DataFrame(
-            self.get_objects_multithreaded(
-                object="subjects/", threads=self.tcp_limit, page_size=self.sub_page_size, **params
+        if params.get("id") is not None:
+            params["id"] = params.get("id").split(",")
+
+            def partial_subjects(subjects):
+                params["id"] = ",".join(subjects)
+                return pd.DataFrame(
+                    self.get_objects_multithreaded(
+                        object="subjects/", threads=self.tcp_limit, page_size=self.sub_page_size, **params
+                    )
+                )
+
+            df = pd.concat(
+                [
+                    partial_subjects(s)
+                    for s in np.array_split(params["id"], math.ceil(len(params["id"]) / max_ids_per_request))
+                ],
+                ignore_index=True,
             )
-        )
+
+        else:
+            df = pd.DataFrame(
+                self.get_objects_multithreaded(
+                    object="subjects/", threads=self.tcp_limit, page_size=self.sub_page_size, **params
+                )
+            )
+
         assert not df.empty
 
         df["hex"] = df["additional"].str["rgb"].map(to_hex) if "additional" in df else "#ff0000"
@@ -350,8 +377,8 @@ def get_subject_observations(
         Get observations for each listed subject and create a `Relocations` object.
         Parameters
         ----------
-        subject_ids : str or list[str]
-            List of subject UUIDs
+        subject_ids : str or list[str] or pd.DataFrame
+            List of subject UUIDs, or a DataFrame of subjects
         include_source_details : bool, optional
             Whether to merge source info into dataframe
         include_subject_details : bool, optional
@@ -369,6 +396,10 @@ def get_subject_observations(
 
         if isinstance(subject_ids, str):
             subject_ids = [subject_ids]
+        elif isinstance(subject_ids, pd.DataFrame):
+            subject_ids = subject_ids.id.tolist()
+        elif not isinstance(subject_ids, list):
+            raise ValueError(f"subject_ids must be either a str or list[str] or pd.DataFrame, not {type(subject_ids)}")
 
         observations = self._get_observations(subject_ids=subject_ids, **kwargs)
 
@@ -382,11 +413,18 @@ def get_subject_observations(
                 right_on="source__id",
             )
         if include_subject_details:
-            observations = observations.merge(
-                self.get_subjects(id=",".join(subject_ids), include_inactive=True).add_prefix("subject__"),
-                left_on="subject_id",
-                right_on="subject__id",
-            )
+            if isinstance(subject_ids, pd.DataFrame):
+                observations = observations.merge(
+                    subject_ids.add_prefix("subject__"),
+                    left_on="subject_id",
+                    right_on="subject__id",
+                )
+            else:
+                observations = observations.merge(
+                    self.get_subjects(id=",".join(subject_ids), include_inactive=True).add_prefix("subject__"),
+                    left_on="subject_id",
+                    right_on="subject__id",
+                )
 
         if include_subjectsource_details:
             observations = observations.merge(
@@ -456,13 +494,15 @@ def get_subjectsource_observations(
         else:
             return observations
 
-    def get_subjectgroup_observations(self, subject_group=None, group_name=None, include_inactive=True, **kwargs):
+    def get_subjectgroup_observations(
+        self, subject_group_id=None, subject_group_name=None, include_inactive=True, **kwargs
+    ):
         """
         Parameters
         ----------
-        subject_group : str
+        subject_group_id : str
             UUID of subject group to filter by
-        group_name : str
+        subject_group_name : str
             Common name of subject group to filter by
         include_inactive : bool, optional
             Whether to get observations for Subjects marked inactive by EarthRanger
@@ -475,14 +515,14 @@ def get_subjectgroup_observations(self, subject_group=None, group_name=None, inc
             Observations in `Relocations` format
         """
 
-        assert (subject_group is None) != (group_name is None)
+        assert (subject_group_id is None) != (subject_group_name is None)
 
-        if subject_group:
-            subject_ids = self.get_subjects(subject_group=subject_group, include_inactive=include_inactive).id.tolist()
+        if subject_group_id:
+            subjects = self.get_subjects(subject_group_id=subject_group_id, include_inactive=include_inactive)
         else:
-            subject_ids = self.get_subjects(group_name=group_name, include_inactive=include_inactive).id.tolist()
+            subjects = self.get_subjects(subject_group_name=subject_group_name, include_inactive=include_inactive)
 
-        return self.get_subject_observations(subject_ids, **kwargs)
+        return self.get_subject_observations(subjects, **kwargs)
 
     def get_event_types(self, include_inactive=False, **addl_kwargs):
         params = self._clean_kwargs(addl_kwargs, include_inactive=include_inactive)
@@ -593,7 +633,8 @@ def get_events(
         )
 
         assert not df.empty
-        df["time"] = pd.to_datetime(df["time"])
+
+        df["time"] = df["time"].apply(lambda x: pd.to_datetime(parser.parse(x)))
 
         gdf = gpd.GeoDataFrame(df)
         if gdf.loc[0, "location"] is not None:

diff --git a/ecoscope/version.py b/ecoscope/version.py
@@ -1 +1 @@
-__version__ = "1.7.0"
+__version__ = "1.7.2"
diff --git a/notebooks/01. IO/EarthRanger_IO.ipynb b/notebooks/01. IO/EarthRanger_IO.ipynb
@@ -423,7 +423,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### By `SubjectGroup` `group_name`"
+    "### By `SubjectGroup` `subject_group_name`"
    ]
   },
   {
@@ -432,7 +432,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "er_io.get_subjects(group_name=\"Elephants\")"
+    "er_io.get_subjects(subject_group_name=\"Elephants\")"
    ]
   },
   {
@@ -459,7 +459,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### By `SubjectGroup` `group_name`:"
+    "### By `SubjectGroup` `subject_group_name`:"
    ]
   },
   {
@@ -479,7 +479,7 @@
    "outputs": [],
    "source": [
     "relocs = er_io.get_subjectgroup_observations(\n",
-    "    group_name=\"Elephants\",\n",
+    "    subject_group_name=\"Elephants\",\n",
     "    filter=0,\n",
     "    since=since,\n",
     "    until=until,\n",
@@ -704,7 +704,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "elephants = er_io.get_subjectgroup_observations(group_name=\"Elephants\", since=pd.Timestamp(\"2022-01-01\").isoformat())\n",
+    "elephants = er_io.get_subjectgroup_observations(\n",
+    "    subject_group_name=\"Elephants\", since=pd.Timestamp(\"2022-01-01\").isoformat()\n",
+    ")\n",
     "\n",
     "if not elephants.empty:\n",
     "    for i, value in elephants.iterrows():\n",
@@ -834,7 +836,7 @@
    "outputs": [],
    "source": [
     "relocs = er_io.get_subjectgroup_observations(\n",
-    "    group_name=\"Elephants\",\n",
+    "    subject_group_name=\"Elephants\",\n",
     "    filter=0,\n",
     "    since=since,\n",
     "    until=until,\n",

diff --git a/notebooks/05. Environmental Analyses/Seasonal Calculation.ipynb b/notebooks/05. Environmental Analyses/Seasonal Calculation.ipynb
@@ -155,15 +155,16 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "img_coll = \"MODIS/061/MOD13A1\"\n",
+    "img_coll = \"MODIS/061/MCD43A4\"\n",
     "band = \"NDVI\"\n",
-    "img_scale = 0.0001\n",
-    "since_filter = \"2010-01-01\"\n",
+    "img_scale = 1\n",
+    "since_filter = \"2020-01-01\"\n",
     "until_filter = \"2022-06-18\"\n",
     "\n",
     "ndvi_vals = seasons.std_ndvi_vals(\n",
     "    img_coll=img_coll,\n",
-    "    band=band,\n",
+    "    nir_band=\"Nadir_Reflectance_Band2\",\n",
+    "    red_band=\"Nadir_Reflectance_Band1\",\n",
     "    img_scale=img_scale,\n",
     "    aoi=aoi,\n",
     "    start=since_filter,\n",