diff --git a/docs/usage/index.rst b/docs/usage/index.rst index 5707c4f2..2a9df85d 100644 --- a/docs/usage/index.rst +++ b/docs/usage/index.rst @@ -42,4 +42,5 @@ Then you can open a PR with the new file and it will be reviewed and merged. template export plot/index - asset \ No newline at end of file + asset + profile \ No newline at end of file diff --git a/docs/usage/profile.ipynb b/docs/usage/profile.ipynb new file mode 100644 index 00000000..c8ce0c09 --- /dev/null +++ b/docs/usage/profile.ipynb @@ -0,0 +1,395 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Profile Earth Engine computation\n", + "\n", + "The Earth Engine API provides tools for profiling the performance of your computations but they are not always the easiest to use to get the number you are looking for. The `geetools` library supercharge the original profiler to make any computation evaluation the easiest possible." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![github](https://img.shields.io/badge/-see%20sources-white?logo=github&labelColor=555)](https://github.com/gee-community/geetools/blob/main/docs/usage/profile.ipynb)\n", + "[![colab](https://img.shields.io/badge/-open%20in%20colab-blue?logo=googlecolab&labelColor=555)](https://colab.research.google.com/github/gee-community/geetools/blob/main/docs/usage/profile.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "tags": [ + "remove-input" + ] + }, + "outputs": [], + "source": [ + "import ee, pytest_gee, os\n", + "\n", + "if \"EARTHENGINE_SERVICE_ACCOUNT\" in os.environ:\n", + " pytest_gee.init_ee_from_service_account()\n", + "elif \"EARTHENGINE_PROJECT\" in os.environ:\n", + " pytest_gee.init_ee_from_token()\n", + "else:\n", + " raise ValueError(\"Cannot authenticate with Earth Engine.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set up environment\n", + "\n", + "Install all the requireed libs if necessary. and perform the import satements upstream." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# uncomment if installation of libs is necessary\n", + "# !pip install earthengine-api geetools" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import ee\n", + "import geetools\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# uncomment if authetication to GEE is needed\n", + "# ee.Authenticate()\n", + "# ee.Intialize(project=\"\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Example data\n", + "\n", + "The following examples rely on a `ee.FeatureCollection` composed of three ecoregion features that define regions by which to reduce image data. The Image data are PRISM climate normals, where bands describe climate variables per month; e.g., July precipitation or January mean temperature.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "ecoregions = (\n", + " ee.FeatureCollection(\"projects/google/charts_feature_example\")\n", + " .select([\"label\", \"value\",\"warm\"])\n", + ")\n", + "\n", + "normClim = ee.ImageCollection('OREGONSTATE/PRISM/Norm91m').toBands()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## default profiler \n", + "\n", + "The default profiler from Earth Engine can be called as a context manager, it will print at the end of the cell the extensive description of your computation. " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " EECUĀ·s PeakMem Count Description\n", + " 0.245 652k 87 Loading assets: projects/google/charts_feature_example\n", + " 0.229 59k 6 Algorithm Image.reduceRegions\n", + " 0.127 345k 831 (plumbing)\n", + " 0.023 607k 86 no description available\n", + " 0.010 111k 11 Loading assets: OREGONSTATE/PRISM/Norm91m/04@1662730567169297\n", + " 0.010 210k 11 Loading assets: OREGONSTATE/PRISM/Norm91m/03@1662731338127317\n", + " 0.009 198k 11 Loading assets: OREGONSTATE/PRISM/Norm91m/12@1662731114457874\n", + " 0.009 111k 11 Loading assets: OREGONSTATE/PRISM/Norm91m/09@1662730604988590\n", + " 0.009 209k 11 Loading assets: OREGONSTATE/PRISM/Norm91m/06@1662731651724226\n", + " 0.009 198k 11 Loading assets: OREGONSTATE/PRISM/Norm91m/01@1662731626359925\n", + " 0.009 200k 11 Loading assets: OREGONSTATE/PRISM/Norm91m/08@1662731245955723\n", + " 0.008 109k 11 Loading assets: OREGONSTATE/PRISM/Norm91m/02@1662731486284455\n", + " 0.008 211k 11 Loading assets: OREGONSTATE/PRISM/Norm91m/05@1662731334196830\n", + " 0.008 109k 11 Loading assets: OREGONSTATE/PRISM/Norm91m/10@1662731228874571\n", + " 0.008 202k 11 Loading assets: OREGONSTATE/PRISM/Norm91m/07@1662732032798195\n", + " 0.008 199k 11 Loading assets: OREGONSTATE/PRISM/Norm91m/11@1662731554688435\n", + " 0.006 352 72 Reprojecting pixels from GEOGCS[\"GCS_North_American_1983\",DATUM[\"North_American_Datum_1983\",SPHEROID[...] to GEOGCS[\"GCS_North_American_1983\",DATUM[\"North_American_Datum_1983\",SPHEROID[...]\n", + " 0.002 3.2k 13 Algorithm Collection.reduceColumns with reducer Reducer.toList\n", + " 0.001 9.8k 15 Algorithm ImageCollection.toBands\n", + " 0.001 4.0k 15 Algorithm Image.select\n", + " 0.001 5.8k 15 Algorithm Image.rename\n", + " 0.001 3.1k 14 Algorithm ReduceRegions.AggregationContainer\n", + " 0.000 61k 51 Loading assets: OREGONSTATE/PRISM/Norm91m\n", + " 0.000 448 3 Listing collection\n", + " 0.000 112k 3 Computing image mask from geometry\n", + " - 89k 26 Algorithm Collection.reduceColumns\n", + " - 45k 19 Algorithm List.map\n", + " - 30k 5 Loading assets: OREGONSTATE/PRISM/Norm91m/12\n", + " - 30k 5 Loading assets: OREGONSTATE/PRISM/Norm91m/11\n", + " - 30k 5 Loading assets: OREGONSTATE/PRISM/Norm91m/10\n", + " - 30k 5 Loading assets: OREGONSTATE/PRISM/Norm91m/09\n", + " - 30k 5 Loading assets: OREGONSTATE/PRISM/Norm91m/08\n", + " - 30k 5 Loading assets: OREGONSTATE/PRISM/Norm91m/07\n", + " - 30k 5 Loading assets: OREGONSTATE/PRISM/Norm91m/06\n", + " - 30k 5 Loading assets: OREGONSTATE/PRISM/Norm91m/05\n", + " - 30k 5 Loading assets: OREGONSTATE/PRISM/Norm91m/04\n", + " - 30k 5 Loading assets: OREGONSTATE/PRISM/Norm91m/03\n", + " - 30k 5 Loading assets: OREGONSTATE/PRISM/Norm91m/02\n", + " - 30k 5 Loading assets: OREGONSTATE/PRISM/Norm91m/01\n", + " - 27k 14 Algorithm Dictionary.fromLists\n", + " - 23k 40 Algorithm AggregateFeatureCollection.array\n", + " - 11k 15 Algorithm Collection.loadTable\n", + " - 10k 15 Algorithm ImageCollection.load\n", + " - 8.6k 4 Algorithm Projection\n", + " - 8.5k 1 Algorithm (user-defined function)\n", + " - 6.9k 4 Algorithm ReduceRegions.ReduceRegionsEnumerator\n", + " - 5.2k 15 Algorithm Collection.map\n", + " - 3.3k 14 Algorithm Feature.select\n", + " - 3.3k 10 Algorithm If\n", + " - 3.2k 4 Algorithm Reducer.forEach\n", + " - 3.2k 10 Algorithm Number.eq\n", + " - 3.1k 4 Algorithm String.compareTo\n", + " - 3.0k 37 Algorithm String\n", + " - 3.0k 10 Algorithm ObjectType\n", + " - 2.9k 20 Loading assets: OREGONSTATE/PRISM\n", + " - 2.8k 20 Loading assets: projects/google\n", + " - 1.8k 1 Algorithm Number.format\n", + " - 600 5 Algorithm Reducer.mean\n", + " - 432 7 Expression evaluation\n", + " - 288 72 Algorithm Image.load computing pixels\n" + ] + } + ], + "source": [ + "with ee.profilePrinting():\n", + " normClim.geetools.byBands(\n", + " regions = ecoregions,\n", + " reducer = \"mean\",\n", + " scale = 500,\n", + " regionId = \"label\",\n", + " bands = [f\"{i:02d}_tmean\" for i in range(1,13)],\n", + " ).getInfo()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This result is extremely useful but cannot be further explored in the notebook." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## geetools profiler\n", + "\n", + "The `geetools` profiler is a context manager object that fill a dictionary member (`profile`) with the content of the string profile. This dictionary can be transformed into a table easily." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'EECU-s': [0.001, None],\n", + " 'PeakMem': [4720, 3200],\n", + " 'Count': [3, 3],\n", + " 'Description': ['(plumbing)', 'Algorithm Number.add']}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# example with a simple function\n", + "with ee.geetools.Profiler() as p:\n", + " ee.Number(3.14).add(0.00159).getInfo()\n", + "p.profile" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "With a bigger method we can valorized the results as a pandas dataframe and extract key informations." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EECU-sPeakMemCountDescription
00.463590006Algorithm Image.reduceRegions
10.26765200087Loading assets: projects/google/charts_feature...
20.151362000831(plumbing)
30.03059600086no description available
40.01020800011Loading assets: OREGONSTATE/PRISM/Norm91m/06@1...
\n", + "
" + ], + "text/plain": [ + " EECU-s PeakMem Count Description\n", + "0 0.463 59000 6 Algorithm Image.reduceRegions\n", + "1 0.267 652000 87 Loading assets: projects/google/charts_feature...\n", + "2 0.151 362000 831 (plumbing)\n", + "3 0.030 596000 86 no description available\n", + "4 0.010 208000 11 Loading assets: OREGONSTATE/PRISM/Norm91m/06@1..." + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "with ee.geetools.Profiler() as p:\n", + " normClim.geetools.byBands(\n", + " regions = ecoregions,\n", + " reducer = \"mean\",\n", + " scale = 500,\n", + " regionId = \"label\",\n", + " bands = [f\"{i:02d}_tmean\" for i in range(1,13)],\n", + " ).getInfo()\n", + "df = pd.DataFrame(p.profile)\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1.0400000000000003" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# total EECU cost of the computation\n", + "float(df[\"EECU-s\"].sum())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "geetools", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/geetools/__init__.py b/geetools/__init__.py index a8de3e05..9a77e1a3 100644 --- a/geetools/__init__.py +++ b/geetools/__init__.py @@ -1,6 +1,9 @@ """A package to use with Google Earth Engine Python API.""" import ee +# import the accessor namespace +from .accessors import geetools + # it needs to be imported first as it's the mother class from . import ComputedObject @@ -29,7 +32,7 @@ from .Array import ArrayAccessor from .DateRange import DateRangeAccessor from .Export import ExportAccessor - +from .ee_profiler import Profiler __title__ = "geetools" __summary__ = "A set of useful tools to use with Google Earth Engine Python" "API" diff --git a/geetools/accessors.py b/geetools/accessors.py index de2b459d..31a2cce9 100644 --- a/geetools/accessors.py +++ b/geetools/accessors.py @@ -3,6 +3,8 @@ from typing import Callable +import ee + def register_class_accessor(klass: type, name: str) -> Callable: """Create an accessor through the provided namespace to a given class. @@ -65,3 +67,15 @@ def decorator(accessor: Callable) -> object: def _register_extention(obj: object) -> Callable: """Add the function to any object.""" return lambda f: (setattr(obj, f.__name__, f) or f) # type: ignore + + +# create a geetools namespace that can be use directly on the ee module + + +@_register_extention(ee) +class geetools: + """Namespace class for the geetools library.""" + + def __init__(self): + """The geetools namespace cannot be instantiated.""" + raise AttributeError("Cannot instantiate geetools") diff --git a/geetools/ee_profiler.py b/geetools/ee_profiler.py new file mode 100644 index 00000000..2016f3a8 --- /dev/null +++ b/geetools/ee_profiler.py @@ -0,0 +1,95 @@ +"""A profiler context manager for Earth Engine Python API.""" +from __future__ import annotations + +import io +import re + +import ee +from anyascii import anyascii + +from .accessors import _register_extention + + +@_register_extention(ee.geetools) +class Profiler: + """A profiler context manager for Earth Engine Python API. + + Examples: + .. code-block:: python + + import ee, geetools + + ee.Initialize() + + with ee.Profiler() as p: + ee.Number(3.14).add(0.00159).getInfo() + res = p.profile + """ + + _output_capture: io.StringIO | None = None + "The output of the profiler." + + _profile_context: ee.profilePrinting | None = None + "The raw profile context." + + profile: dict | None = None + "The profile data as a dictionary." + + def __enter__(self): + """Enter the context manager.""" + self._output_capture = io.StringIO() + self._profile_context = ee.profilePrinting(destination=self._output_capture) + self._profile_context.__enter__() + return self + + def __exit__(self, *args): + """Exit the context manager.""" + self._profile_context.__exit__(*args) + + # Check if there's anything captured + profile_output = self._output_capture.getvalue() + if profile_output: + self.profile = self._to_dict(profile_output) + else: + self.profile = None # Handle the case where no output is captured + print("Warning: No profile output was captured.") + + self._output_capture.close() + + def _memory(self, mem_str: str) -> int: + """Transform a memory string to an integer.""" + mapping = {"": 1, "k": 3, "M": 6, "G": 9, "T": 12} + + # Match numbers with optional multipliers (k, M, etc.) + # and apply the multiplier to the number + match = re.match(r"([\d.]+)([kMGT]?)", mem_str) + if match is None: + raise ValueError(f"Invalid memory string: {mem_str}") + + number, multiplier = float(match.group(1)), match.group(2) + + return int(number * 10 ** mapping[multiplier]) + + def _to_dict(self, input: str) -> dict: + """Transform the output of a Earthengine profiler into a dictionary compatible with pandas DataFrame.""" + # Split the string into lines + lines = input.strip().splitlines() + + # First line contains column headers + # Initialize a dictionary to hold lists for each column + headers = [anyascii(h.strip()) for h in lines[0].split()] + result: dict = {header: [] for header in headers} + + # Process each line of data after the header + for line in lines[1:]: + # Split the line by spaces, considering multiple spaces as a separator + # Handle missing values denoted by "-" + parts = line.split() + + # Populate the dictionary with values for each column + result[headers[0]].append(float(parts[0]) if parts[0] != "-" else None) # EECU + result[headers[1]].append(self._memory(parts[1])) # Mem is a string to convert + result[headers[2]].append(int(parts[2])) # Count is an integer + result[headers[3]].append(" ".join(parts[3:])) # Description can have multiple words + + return result diff --git a/tests/test_Profiler.py b/tests/test_Profiler.py new file mode 100644 index 00000000..54d1db6a --- /dev/null +++ b/tests/test_Profiler.py @@ -0,0 +1,15 @@ +"""Test the ee_profiler module.""" +import ee + +import geetools # noqa: F401 + + +class TestProfiler: + """Test the Profiler class.""" + + def test_profiler(self): + """Test the Profiler class.""" + + with ee.geetools.Profiler() as p: + ee.Number(3.14).add(0.00159).getInfo() + assert [k for k in p.profile] == ["EECU-s", "PeakMem", "Count", "Description"]