From f05e82b751d54433099c89c9ce7d68c629c592dc Mon Sep 17 00:00:00 2001 From: Xee authors Date: Mon, 8 Jan 2024 20:57:42 -0800 Subject: [PATCH] Efficient way to convert a structured numpy array into a 3d array. This CL uses a more efficient way to convert the numpy structured array removing the need to copy the array into memory as a list. Using `.view()` doesn't seem to change the data buffer and should be more memory efficient. Also adds checks for requested vs returned data types from EE. Will try to use requested type but will warn user if conversion is not possible then use returned EE type. Closes #9 PiperOrigin-RevId: 596786971 --- docs/conf.py | 2 +- xee/ext.py | 29 +++++++++++++++++++++++++---- xee/ext_test.py | 2 -- xee/types.py | 2 +- 4 files changed, 27 insertions(+), 8 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index e8b1df8..990cb51 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -16,8 +16,8 @@ # Print Python environment info for easier debugging on ReadTheDocs -import sys import subprocess +import sys import xee # verify this works print('python exec:', sys.executable) diff --git a/xee/ext.py b/xee/ext.py index eaab42b..36a5316 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -25,7 +25,7 @@ import math import os import sys -from typing import Any, Dict, List, Iterable, Literal, Optional, Tuple, Union +from typing import Any, Dict, Iterable, List, Literal, Optional, Tuple, Union from urllib import parse import warnings @@ -464,9 +464,30 @@ def image_to_array( pixels_getter, params, catch=ee.ee_exception.EEException ) - # TODO(#9): Find a way to make this more efficient. This is needed because - # `raw` is a structured array of all the same dtype (i.e. number of images). - arr = np.array(raw.tolist(), dtype=dtype) + # Extract out the shape information from EE response. + y_size, x_size = raw.shape + n_bands = len(raw.dtype) + + # Get a view (no copy) of the data as the returned type from EE + # then reshape to the correct shape based on the request. + # This is needed because `raw` is a structured array of all the same dtype + # (i.e. number of images) and this converts it to an ndarray. + arr = raw.view(raw.dtype[0]).reshape( + y_size, + x_size, + n_bands, + ) + + # try converting the data to desired dtype in place without copying + # if conversion is not allowed then just use the EE returned dtype + try: + arr = arr.astype(dtype, copy=False) + except ValueError: + warnings.warn( + f'Could convert EE results to requested dtype {dtype} ' + f'falling back to returned dtype from EE {np.dtype(raw.dtype[0])}' + ) + data = arr.T current_mask_value = np.array(self.mask_value, dtype=data.dtype) # Sets EE nodata masked value to NaNs. diff --git a/xee/ext_test.py b/xee/ext_test.py index 74b47f4..ae732c8 100644 --- a/xee/ext_test.py +++ b/xee/ext_test.py @@ -2,10 +2,8 @@ from absl.testing import absltest from absl.testing import parameterized - import numpy as np import xee - from xee import ext diff --git a/xee/types.py b/xee/types.py index a86e0f6..34ad79a 100644 --- a/xee/types.py +++ b/xee/types.py @@ -13,7 +13,7 @@ # limitations under the License. # ============================================================================== """Type definitions for Earth Engine concepts (and others).""" -from typing import Dict, List, Tuple, Union, TypedDict +from typing import Dict, List, Tuple, TypedDict, Union TileIndex = Tuple[int, int, int] # x_min, y_min, x_max, y_max