Skip to content

Commit

Permalink
Merge branch 'master' into 108/test_coverage
Browse files Browse the repository at this point in the history
  • Loading branch information
atmorling authored May 20, 2024
2 parents b7eca72 + cc4fb51 commit 53fee41
Show file tree
Hide file tree
Showing 11 changed files with 193 additions and 50 deletions.
2 changes: 2 additions & 0 deletions ecoscope/analysis/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from ecoscope.analysis import UD, astronomy, seasons
from ecoscope.analysis.classifier import apply_classification
from ecoscope.analysis.ecograph import Ecograph, get_feature_gdf
from ecoscope.analysis.percentile import get_percentile_area
from ecoscope.analysis.speed import SpeedDataFrame
Expand All @@ -11,4 +12,5 @@
"get_feature_gdf",
"get_percentile_area",
"seasons",
"apply_classification",
]
39 changes: 39 additions & 0 deletions ecoscope/analysis/classifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import mapclassify

classification_methods = {
"equal_interval": mapclassify.EqualInterval,
"natural_breaks": mapclassify.NaturalBreaks,
"quantile": mapclassify.Quantiles,
"std_mean": mapclassify.StdMean,
"max_breaks": mapclassify.MaximumBreaks,
"fisher_jenks": mapclassify.FisherJenks,
}


# pass in a series and output the series
def apply_classification(x, labels=None, scheme="natural_breaks", **kwargs):
"""
Classifies the data in a GeoDataFrame column using specified classification scheme.
Args:
y : An array containing the data to classify.
labels (str): labels of bins, use bin edges if labels==None.
scheme (str): Classification scheme to use [equal_interval, natural_breaks, quantile, std_mean, max_breaks,
fisher_jenks]
**kwargs: Additional keyword arguments specific to the classification scheme.
Returns:
result: an array of corresponding labels of the input data.
"""

classifier_class = classification_methods.get(scheme)

if not classifier_class:
raise ValueError(f"Invalid classification scheme. Choose from: {list(classification_methods.keys())}")

classifier = classifier_class(x, **kwargs)
if labels is None:
labels = classifier.bins
assert len(labels) == len(classifier.bins)
return [labels[i] for i in classifier.yb]
15 changes: 9 additions & 6 deletions ecoscope/analysis/seasons.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ def _min_max_scaler(x):
return x_std


def std_ndvi_vals(aoi=None, img_coll=None, band=None, img_scale=1, start=None, end=None):
def std_ndvi_vals(aoi=None, img_coll=None, nir_band=None, red_band=None, img_scale=1, start=None, end=None):

coll = (
ee.ImageCollection(img_coll)
.select(band)
.select([nir_band, red_band])
.filterDate(start, end)
.map(lambda x: x.multiply(ee.Image(img_scale)).set("system:time_start", x.get("system:time_start")))
)
Expand All @@ -33,11 +33,14 @@ def std_ndvi_vals(aoi=None, img_coll=None, band=None, img_scale=1, start=None, e
else:
geo = None

img_dates = pandas.to_datetime(coll.aggregate_array("system:time_start").getInfo(), unit="ms", utc=True)

coll = coll.map(lambda x: x.normalizedDifference([nir_band, red_band]))
ndvi_vals = coll.toBands().reduceRegion("mean", geo, bestEffort=True).values().getInfo()

df = pandas.DataFrame(
{
"img_date": pandas.to_datetime(coll.aggregate_array("system:time_start").getInfo(), unit="ms", utc=True),
"img_date": img_dates,
"NDVI": ndvi_vals,
}
).dropna(axis=0)
Expand Down Expand Up @@ -110,21 +113,21 @@ def seasonal_windows(ndvi_vals, cuts, season_labels):


def add_seasonal_index(
df, index_name, start_date, end_date, aoi_geom_filter=None, seasons=2, season_labels=["dry", "wet"]
df, index_name, start_date, end_date, time_col, aoi_geom_filter=None, seasons=2, season_labels=["dry", "wet"]
):

aoi_ = None
try:
aoi_ = aoi_geom_filter.dissolve().iloc[0]["geometry"]
except:
except AttributeError:
aoi_ = aoi_geom_filter

if len(season_labels) != seasons:
raise Exception(
f"Parameter value 'seasons' ({seasons}) must match the number of 'season_labels' elements ({season_labels})"
)
# extract the standardized NDVI ndvi_vals within the AOI
ndvi_vals = std_ndvi_vals(aoi_, start=since_filter.isoformat(), end=until_filter.isoformat())
ndvi_vals = std_ndvi_vals(aoi_, start=start_date.isoformat(), end=end_date.isoformat())

# calculate the seasonal transition point
cuts = val_cuts(ndvi_vals, seasons)
Expand Down
93 changes: 67 additions & 26 deletions ecoscope/io/earthranger.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@
import pytz
import json
import typing
import math

import numpy as np
import geopandas as gpd
import pandas as pd
import requests
from dateutil import parser
from erclient.client import ERClient, ERClientException, ERClientNotFound
from tqdm.auto import tqdm

Expand Down Expand Up @@ -122,13 +125,14 @@ def get_subjects(
self,
include_inactive=None,
bbox=None,
subject_group=None,
subject_group_id=None,
name=None,
updated_since=None,
tracks=None,
id=None,
updated_until=None,
group_name=None,
subject_group_name=None,
max_ids_per_request=50,
**addl_kwargs,
):
"""
Expand All @@ -137,13 +141,15 @@ def get_subjects(
include_inactive: Include inactive subjects in list.
bbox: Include subjects having track data within this bounding box defined by a 4-tuple of coordinates marking
west, south, east, north.
subject_group: Indicate a subject group for which Subjects should be listed.
subject_group_id: Indicate a subject group id for which Subjects should be listed.
This is translated to the subject_group parameter in the ER backend
name : Find subjects with the given name
updated_since: Return Subject that have been updated since the given timestamp.
tracks: Indicate whether to render each subject's recent tracks.
id: A comma-delimited list of Subject IDs.
updated_until
group_name
subject_group_name: A subject group name for which Subjects should be listed.
This is translated to the group_name parameter in the ER backend
Returns
-------
subjects : pd.DataFrame
Expand All @@ -153,13 +159,13 @@ def get_subjects(
addl_kwargs,
include_inactive=include_inactive,
bbox=bbox,
subject_group=subject_group,
subject_group=subject_group_id,
name=name,
updated_since=updated_since,
tracks=tracks,
id=id,
updated_until=updated_until,
group_name=group_name,
group_name=subject_group_name,
)

assert params.get("subject_group") is None or params.get("group_name") is None
Expand All @@ -178,11 +184,32 @@ def get_subjects(
except IndexError:
raise KeyError("`group_name` not found")

df = pd.DataFrame(
self.get_objects_multithreaded(
object="subjects/", threads=self.tcp_limit, page_size=self.sub_page_size, **params
if params.get("id") is not None:
params["id"] = params.get("id").split(",")

def partial_subjects(subjects):
params["id"] = ",".join(subjects)
return pd.DataFrame(
self.get_objects_multithreaded(
object="subjects/", threads=self.tcp_limit, page_size=self.sub_page_size, **params
)
)

df = pd.concat(
[
partial_subjects(s)
for s in np.array_split(params["id"], math.ceil(len(params["id"]) / max_ids_per_request))
],
ignore_index=True,
)
)

else:
df = pd.DataFrame(
self.get_objects_multithreaded(
object="subjects/", threads=self.tcp_limit, page_size=self.sub_page_size, **params
)
)

assert not df.empty

df["hex"] = df["additional"].str["rgb"].map(to_hex) if "additional" in df else "#ff0000"
Expand Down Expand Up @@ -350,8 +377,8 @@ def get_subject_observations(
Get observations for each listed subject and create a `Relocations` object.
Parameters
----------
subject_ids : str or list[str]
List of subject UUIDs
subject_ids : str or list[str] or pd.DataFrame
List of subject UUIDs, or a DataFrame of subjects
include_source_details : bool, optional
Whether to merge source info into dataframe
include_subject_details : bool, optional
Expand All @@ -369,6 +396,10 @@ def get_subject_observations(

if isinstance(subject_ids, str):
subject_ids = [subject_ids]
elif isinstance(subject_ids, pd.DataFrame):
subject_ids = subject_ids.id.tolist()
elif not isinstance(subject_ids, list):
raise ValueError(f"subject_ids must be either a str or list[str] or pd.DataFrame, not {type(subject_ids)}")

observations = self._get_observations(subject_ids=subject_ids, **kwargs)

Expand All @@ -382,11 +413,18 @@ def get_subject_observations(
right_on="source__id",
)
if include_subject_details:
observations = observations.merge(
self.get_subjects(id=",".join(subject_ids), include_inactive=True).add_prefix("subject__"),
left_on="subject_id",
right_on="subject__id",
)
if isinstance(subject_ids, pd.DataFrame):
observations = observations.merge(
subject_ids.add_prefix("subject__"),
left_on="subject_id",
right_on="subject__id",
)
else:
observations = observations.merge(
self.get_subjects(id=",".join(subject_ids), include_inactive=True).add_prefix("subject__"),
left_on="subject_id",
right_on="subject__id",
)

if include_subjectsource_details:
observations = observations.merge(
Expand Down Expand Up @@ -456,13 +494,15 @@ def get_subjectsource_observations(
else:
return observations

def get_subjectgroup_observations(self, subject_group=None, group_name=None, include_inactive=True, **kwargs):
def get_subjectgroup_observations(
self, subject_group_id=None, subject_group_name=None, include_inactive=True, **kwargs
):
"""
Parameters
----------
subject_group : str
subject_group_id : str
UUID of subject group to filter by
group_name : str
subject_group_name : str
Common name of subject group to filter by
include_inactive : bool, optional
Whether to get observations for Subjects marked inactive by EarthRanger
Expand All @@ -475,14 +515,14 @@ def get_subjectgroup_observations(self, subject_group=None, group_name=None, inc
Observations in `Relocations` format
"""

assert (subject_group is None) != (group_name is None)
assert (subject_group_id is None) != (subject_group_name is None)

if subject_group:
subject_ids = self.get_subjects(subject_group=subject_group, include_inactive=include_inactive).id.tolist()
if subject_group_id:
subjects = self.get_subjects(subject_group_id=subject_group_id, include_inactive=include_inactive)
else:
subject_ids = self.get_subjects(group_name=group_name, include_inactive=include_inactive).id.tolist()
subjects = self.get_subjects(subject_group_name=subject_group_name, include_inactive=include_inactive)

return self.get_subject_observations(subject_ids, **kwargs)
return self.get_subject_observations(subjects, **kwargs)

def get_event_types(self, include_inactive=False, **addl_kwargs):
params = self._clean_kwargs(addl_kwargs, include_inactive=include_inactive)
Expand Down Expand Up @@ -593,7 +633,8 @@ def get_events(
)

assert not df.empty
df["time"] = pd.to_datetime(df["time"])

df["time"] = df["time"].apply(lambda x: pd.to_datetime(parser.parse(x)))

gdf = gpd.GeoDataFrame(df)
if gdf.loc[0, "location"] is not None:
Expand Down
2 changes: 1 addition & 1 deletion ecoscope/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.7.0"
__version__ = "1.7.2"
14 changes: 8 additions & 6 deletions notebooks/01. IO/EarthRanger_IO.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### By `SubjectGroup` `group_name`"
"### By `SubjectGroup` `subject_group_name`"
]
},
{
Expand All @@ -432,7 +432,7 @@
"metadata": {},
"outputs": [],
"source": [
"er_io.get_subjects(group_name=\"Elephants\")"
"er_io.get_subjects(subject_group_name=\"Elephants\")"
]
},
{
Expand All @@ -459,7 +459,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### By `SubjectGroup` `group_name`:"
"### By `SubjectGroup` `subject_group_name`:"
]
},
{
Expand All @@ -479,7 +479,7 @@
"outputs": [],
"source": [
"relocs = er_io.get_subjectgroup_observations(\n",
" group_name=\"Elephants\",\n",
" subject_group_name=\"Elephants\",\n",
" filter=0,\n",
" since=since,\n",
" until=until,\n",
Expand Down Expand Up @@ -704,7 +704,9 @@
"metadata": {},
"outputs": [],
"source": [
"elephants = er_io.get_subjectgroup_observations(group_name=\"Elephants\", since=pd.Timestamp(\"2022-01-01\").isoformat())\n",
"elephants = er_io.get_subjectgroup_observations(\n",
" subject_group_name=\"Elephants\", since=pd.Timestamp(\"2022-01-01\").isoformat()\n",
")\n",
"\n",
"if not elephants.empty:\n",
" for i, value in elephants.iterrows():\n",
Expand Down Expand Up @@ -834,7 +836,7 @@
"outputs": [],
"source": [
"relocs = er_io.get_subjectgroup_observations(\n",
" group_name=\"Elephants\",\n",
" subject_group_name=\"Elephants\",\n",
" filter=0,\n",
" since=since,\n",
" until=until,\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,15 +155,16 @@
"metadata": {},
"outputs": [],
"source": [
"img_coll = \"MODIS/061/MOD13A1\"\n",
"img_coll = \"MODIS/061/MCD43A4\"\n",
"band = \"NDVI\"\n",
"img_scale = 0.0001\n",
"since_filter = \"2010-01-01\"\n",
"img_scale = 1\n",
"since_filter = \"2020-01-01\"\n",
"until_filter = \"2022-06-18\"\n",
"\n",
"ndvi_vals = seasons.std_ndvi_vals(\n",
" img_coll=img_coll,\n",
" band=band,\n",
" nir_band=\"Nadir_Reflectance_Band2\",\n",
" red_band=\"Nadir_Reflectance_Band1\",\n",
" img_scale=img_scale,\n",
" aoi=aoi,\n",
" start=since_filter,\n",
Expand Down
Loading

0 comments on commit 53fee41

Please sign in to comment.