Skip to content

Commit

Permalink
Efficient way to convert a structured numpy array into a 3d array.
Browse files Browse the repository at this point in the history
This CL uses a more efficient way to convert the numpy structured array removing the need to copy the array into memory as a list. Using `.view()` doesn't seem to change the data buffer and should be more memory efficient.

Also adds checks for requested vs returned data types from EE. Will try to use requested type but will warn user if conversion is not possible then use returned EE type.

Closes #9

PiperOrigin-RevId: 587594128
  • Loading branch information
Xee authors committed Jan 9, 2024
1 parent cdd0365 commit 4ec7f29
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 8 deletions.
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@

# Print Python environment info for easier debugging on ReadTheDocs

import sys
import subprocess
import sys
import xee # verify this works

print('python exec:', sys.executable)
Expand Down
29 changes: 25 additions & 4 deletions xee/ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import math
import os
import sys
from typing import Any, Dict, List, Iterable, Literal, Optional, Tuple, Union
from typing import Any, Dict, Iterable, List, Literal, Optional, Tuple, Union
from urllib import parse
import warnings

Expand Down Expand Up @@ -464,9 +464,30 @@ def image_to_array(
pixels_getter, params, catch=ee.ee_exception.EEException
)

# TODO(#9): Find a way to make this more efficient. This is needed because
# `raw` is a structured array of all the same dtype (i.e. number of images).
arr = np.array(raw.tolist(), dtype=dtype)
# Extract out the shape information from EE response.
y_size, x_size = raw.shape
n_bands = len(raw.dtype)

# Get a view (no copy) of the data as the returned type from EE
# then reshape to the correct shape based on the request.
# This is needed because `raw` is a structured array of all the same dtype
# (i.e. number of images) and this converts it to an ndarray.
arr = raw.view(raw.dtype[0]).reshape(
y_size,
x_size,
n_bands,
)

# try converting the data to desired dtype in place without copying
# if conversion is not allowed then just use the EE returned dtype
try:
arr = arr.astype(dtype, copy=False)
except ValueError:
warnings.warn(
f'Could convert EE results to requested dtype {dtype} '
f'falling back to returned dtype from EE {np.dtype(raw.dtype[0])}'
)

data = arr.T
current_mask_value = np.array(self.mask_value, dtype=data.dtype)
# Sets EE nodata masked value to NaNs.
Expand Down
2 changes: 0 additions & 2 deletions xee/ext_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,8 @@

from absl.testing import absltest
from absl.testing import parameterized

import numpy as np
import xee

from xee import ext


Expand Down
2 changes: 1 addition & 1 deletion xee/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.
# ==============================================================================
"""Type definitions for Earth Engine concepts (and others)."""
from typing import Dict, List, Tuple, Union, TypedDict
from typing import Dict, List, Tuple, TypedDict, Union

TileIndex = Tuple[int, int, int]
# x_min, y_min, x_max, y_max
Expand Down

0 comments on commit 4ec7f29

Please sign in to comment.