From 80bd022bac1692e8e4cefb7d496c2920e88903e3 Mon Sep 17 00:00:00 2001 From: Arun Sathiya Date: Sat, 20 Jan 2024 09:49:44 -0800 Subject: [PATCH 01/51] ci: Use GITHUB_OUTPUT envvar instead of set-output command `save-state` and `set-output` commands used in GitHub Actions are deprecated and [GitHub recommends using environment files](https://github.blog/changelog/2023-07-24-github-actions-update-on-save-state-and-set-output-commands/). This PR updates the usage of `::set-output` to `"$GITHUB_OUTPUT"` Instructions for envvar usage from GitHub docs: https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-an-output-parameter Signed-off-by: Arun --- .github/workflows/lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index b3c95f6..07e13a9 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -46,7 +46,7 @@ jobs: id: pip-cache run: | python -m pip install --upgrade pip wheel - echo "::set-output name=dir::$(pip cache dir)" + echo "dir=$(pip cache dir)" >> "$GITHUB_OUTPUT" - name: pip cache uses: actions/cache@v2 with: From 0b78180b3077028a38937ce32853de98eec57530 Mon Sep 17 00:00:00 2001 From: ljstrnadiii Date: Tue, 13 Feb 2024 02:57:42 +0000 Subject: [PATCH 02/51] add option for using double precision coords --- xee/ext.py | 14 ++++++++-- xee/ext_integration_test.py | 54 +++++++++++++++++++++++++++++++------ 2 files changed, 58 insertions(+), 10 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index 3929b54..260ccfe 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -146,6 +146,7 @@ def open( request_byte_limit: int = REQUEST_BYTE_LIMIT, ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, + use_coords_double_precision: bool = False ) -> 'EarthEngineStore': if mode != 'r': raise ValueError( @@ -166,6 +167,7 @@ def open( request_byte_limit=request_byte_limit, ee_init_kwargs=ee_init_kwargs, ee_init_if_necessary=ee_init_if_necessary, + use_coords_double_precision=use_coords_double_precision ) def __init__( @@ -183,6 +185,7 @@ def __init__( request_byte_limit: int = REQUEST_BYTE_LIMIT, ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, + use_coords_double_precision: bool = False ): self.ee_init_kwargs = ee_init_kwargs self.ee_init_if_necessary = ee_init_if_necessary @@ -195,6 +198,7 @@ def __init__( self.geometry = geometry self.primary_dim_name = primary_dim_name or 'time' self.primary_dim_property = primary_dim_property or 'system:time_start' + self.use_coords_double_precision = use_coords_double_precision self.n_images = self.get_info['size'] self._props = self.get_info['props'] @@ -581,8 +585,9 @@ def _get_tile_from_ee( else (0, tile_coords_start, 1, tile_coords_end) ) target_image = ee.Image.pixelCoordinates(ee.Projection(self.crs_arg)) + dtype = np.float64 if self.use_coords_double_precision else np.float32 return tile_index, self.image_to_array( - target_image, grid=bbox, dtype=np.float32, bandIds=[band_id] + target_image, grid=bbox, dtype=dtype, bandIds=[band_id] ) def _process_coordinate_data( @@ -689,7 +694,7 @@ def _parse_dtype(data_type: types.DataType): def _ee_bounds_to_bounds(bounds: ee.Bounds) -> types.Bounds: - coords = np.array(bounds['coordinates'], dtype=np.float32)[0] + coords = np.array(bounds['coordinates'], dtype=np.float64)[0] x_min, y_min, x_max, y_max = ( min(coords[:, 0]), min(coords[:, 1]), @@ -951,6 +956,7 @@ def open_dataset( request_byte_limit: int = REQUEST_BYTE_LIMIT, ee_init_if_necessary: bool = False, ee_init_kwargs: Optional[Dict[str, Any]] = None, + use_coords_double_precision: bool = False, ) -> xarray.Dataset: # type: ignore """Open an Earth Engine ImageCollection as an Xarray Dataset. @@ -1020,6 +1026,9 @@ def open_dataset( frameworks. ee_init_kwargs: keywords to pass to Earth Engine Initialize when attempting to auto init for remote workers. + use_coords_double_precision: Whether to use double precision for coordinates + and bounds from provided geometry. False by default, but True may be + helpful when hoping to match a transform of an existing dataset. Returns: An xarray.Dataset that streams in remote data from Earth Engine. @@ -1049,6 +1058,7 @@ def open_dataset( request_byte_limit=request_byte_limit, ee_init_kwargs=ee_init_kwargs, ee_init_if_necessary=ee_init_if_necessary, + use_coords_double_precision=use_coords_double_precision ) store_entrypoint = backends_store.StoreBackendEntrypoint() diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index d1eafc0..9eff73b 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -19,7 +19,7 @@ import tempfile from absl.testing import absltest -from google.auth import identity_pool +import google.auth import numpy as np import xarray as xr from xarray.core import indexing @@ -41,17 +41,14 @@ ] -def _read_identity_pool_creds() -> identity_pool.Credentials: - credentials_path = os.environ[_CREDENTIALS_PATH_KEY] - with open(credentials_path) as file: - json_file = json.load(file) - credentials = identity_pool.Credentials.from_info(json_file) - return credentials.with_scopes(_SCOPES) +def _read_default_creds(): + credentials, _ = google.auth.default(scopes=_SCOPES) + return credentials def init_ee_for_tests(): ee.Initialize( - credentials=_read_identity_pool_creds(), + credentials=_read_default_creds(), opt_url=ee.data.HIGH_VOLUME_API_BASE_URL, ) @@ -358,6 +355,47 @@ def test_honors_projection(self): self.assertEqual(ds.dims, {'time': 4248, 'lon': 3600, 'lat': 1800}) self.assertNotEqual(ds.dims, standard_ds.dims) + @absltest.skipIf(_SKIP_RASTERIO_TESTS, 'rioxarray module not loaded') + def test_honors_transform_precisely(self): + data = np.empty((162, 120), dtype=np.float32) + # An example of a double precision bbox + bbox = ( + -53.94158617595226, + -12.078281822698678, + -53.67209159071253, + -11.714464132625046, + ) + x_res = (bbox[2] - bbox[0]) / data.shape[1] + y_res = (bbox[3] - bbox[1]) / data.shape[0] + raster = xr.DataArray( + data, + coords={ + 'y': np.linspace(bbox[3], bbox[1] + x_res, data.shape[0]), + 'x': np.linspace(bbox[0], bbox[2] - y_res, data.shape[1]), + }, + dims=('y', 'x'), + ) + + geo = ee.Geometry.Rectangle(*raster.rio.bounds()) + ic = ( + ee.ImageCollection('UCSB-CHG/CHIRPS/DAILY') + .filterDate(ee.DateRange('2014-01-01', '2014-01-02')) + .select('precipitation') + ) + xee_dataset = xr.open_dataset( + ee.ImageCollection(ic), + engine='ee', + geometry=geo, + scale=raster.rio.resolution()[0], + crs='EPSG:4326', + use_coords_double_precision=True, + ).rename({'lon': 'x', 'lat': 'y'}) + np.testing.assert_almost_equal( + np.array(xee_dataset.rio.transform()), + np.array(raster.rio.transform()), + decimal=13, + ) + def test_parses_ee_url(self): ds = self.entry.open_dataset( 'ee://LANDSAT/LC08/C01/T1', From 6983737a56092ab68b8db4ed65e60ad68dda65c4 Mon Sep 17 00:00:00 2001 From: ljstrnadiii Date: Tue, 13 Feb 2024 03:53:20 +0000 Subject: [PATCH 03/51] add match xarray support --- xee/ext.py | 34 ++++++++++++++++++++++++++----- xee/ext_integration_test.py | 40 +++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 5 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index 260ccfe..b8bc0b5 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -40,6 +40,7 @@ from xarray.backends import store as backends_store from xarray.core import indexing from xarray.core import utils +import xarray as xr from xee import types import ee @@ -146,7 +147,9 @@ def open( request_byte_limit: int = REQUEST_BYTE_LIMIT, ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, - use_coords_double_precision: bool = False + use_coords_double_precision: bool = False, + match_xarray: xarray.DataArray | xarray.Dataset | None = None + ) -> 'EarthEngineStore': if mode != 'r': raise ValueError( @@ -167,7 +170,8 @@ def open( request_byte_limit=request_byte_limit, ee_init_kwargs=ee_init_kwargs, ee_init_if_necessary=ee_init_if_necessary, - use_coords_double_precision=use_coords_double_precision + use_coords_double_precision=use_coords_double_precision, + match_xarray=match_xarray ) def __init__( @@ -185,7 +189,8 @@ def __init__( request_byte_limit: int = REQUEST_BYTE_LIMIT, ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, - use_coords_double_precision: bool = False + use_coords_double_precision: bool = False, + match_xarray: xr.DataArray | xr.Dataset | None = None ): self.ee_init_kwargs = ee_init_kwargs self.ee_init_if_necessary = ee_init_if_necessary @@ -209,12 +214,20 @@ def __init__( self.crs_arg = crs or proj.get('crs', proj.get('wkt', 'EPSG:4326')) self.crs = CRS(self.crs_arg) + if match_xarray is not None: + if match_xarray.rio.crs is None: + raise ValueError("If matching to xarray, we require `.rio.crs` is set.") + self.crs = CRS(match_xarray.rio.crs) + if match_xarray[match_xarray.rio.x_dim].dtype == np.float64: + self.use_coords_double_precision = True # Gets the unit i.e. meter, degree etc. self.scale_units = self.crs.axis_info[0].unit_name # Get the dimensions name based on the CRS (scale units). self.dimension_names = self.DIMENSION_NAMES.get( self.scale_units, ('X', 'Y') ) + if match_xarray is not None: + self.dimension_names = (match_xarray.rio.x_dim, match_xarray.rio.y_dim) x_dim_name, y_dim_name = self.dimension_names self._props.update( coordinates=f'{self.primary_dim_name} {x_dim_name} {y_dim_name}', @@ -227,11 +240,14 @@ def __init__( if scale is None: scale = default_scale default_transform = affine.Affine.scale(scale, -1 * scale) - transform = affine.Affine(*proj.get('transform', default_transform)[:6]) self.scale_x, self.scale_y = transform.a, transform.e self.scale = np.sqrt(np.abs(transform.determinant)) + if match_xarray is not None: + self.scale_x, self.scale_y = match_xarray.rio.resolution() + self.scale = np.sqrt(np.abs(self.scale_x * self.scale_y)) + # Parse the dataset bounds from the native projection (either from the CRS # or the image geometry) and translate it to the representation that will be # used for all internal `computePixels()` calls. @@ -252,6 +268,8 @@ def __init__( x_min, y_min = self.transform(x_min_0, y_min_0) x_max, y_max = self.transform(x_max_0, y_max_0) self.bounds = x_min, y_min, x_max, y_max + if match_xarray is not None: + self.bounds = match_xarray.rio.bounds() max_dtype = self._max_itemsize() @@ -957,6 +975,7 @@ def open_dataset( ee_init_if_necessary: bool = False, ee_init_kwargs: Optional[Dict[str, Any]] = None, use_coords_double_precision: bool = False, + match_xarray: xarray.DataArray | xarray.Dataset | None = None, ) -> xarray.Dataset: # type: ignore """Open an Earth Engine ImageCollection as an Xarray Dataset. @@ -1029,6 +1048,10 @@ def open_dataset( use_coords_double_precision: Whether to use double precision for coordinates and bounds from provided geometry. False by default, but True may be helpful when hoping to match a transform of an existing dataset. + match_xarray: An xarray.DataArray or xarray.Dataset to use as a template + with rioxarray-based schema to extract the crs and transform to specify + the spatial extent and crs of the output dataset. Using this arg requires + that rioxarray is installed. Returns: An xarray.Dataset that streams in remote data from Earth Engine. @@ -1058,7 +1081,8 @@ def open_dataset( request_byte_limit=request_byte_limit, ee_init_kwargs=ee_init_kwargs, ee_init_if_necessary=ee_init_if_necessary, - use_coords_double_precision=use_coords_double_precision + use_coords_double_precision=use_coords_double_precision, + match_xarray=match_xarray ) store_entrypoint = backends_store.StoreBackendEntrypoint() diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index 9eff73b..41fed4b 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -390,11 +390,51 @@ def test_honors_transform_precisely(self): crs='EPSG:4326', use_coords_double_precision=True, ).rename({'lon': 'x', 'lat': 'y'}) + # This is off slightly due to bounds determined by geometry e.g. .getInfo() + # seems to cause a super slight shift in the bounds. Thhe coords change before + # and after the call to .getInfo()! np.testing.assert_almost_equal( np.array(xee_dataset.rio.transform()), np.array(raster.rio.transform()), decimal=13, ) + + @absltest.skipIf(_SKIP_RASTERIO_TESTS, 'rioxarray module not loaded') + def test_match_xarray(self): + data = np.empty((162, 120), dtype=np.float32) + # An example of a double precision bbox + bbox = ( + -53.94158617595226, + -12.078281822698678, + -53.67209159071253, + -11.714464132625046, + ) + x_res = (bbox[2] - bbox[0]) / data.shape[1] + y_res = (bbox[3] - bbox[1]) / data.shape[0] + raster = xr.DataArray( + data, + coords={ + 'y': np.linspace(bbox[3], bbox[1] + x_res, data.shape[0]), + 'x': np.linspace(bbox[0], bbox[2] - y_res, data.shape[1]), + }, + dims=('y', 'x'), + ) + raster.rio.write_crs('EPSG:4326', inplace=True) + ic = ( + ee.ImageCollection('UCSB-CHG/CHIRPS/DAILY') + .filterDate(ee.DateRange('2014-01-01', '2014-01-02')) + .select('precipitation') + ) + xee_dataset = xr.open_dataset( + ee.ImageCollection(ic), + engine='ee', + scale=raster.rio.resolution()[0], + match_xarray=raster, + ) + np.testing.assert_equal( + np.array(xee_dataset.rio.transform()), + np.array(raster.rio.transform()), + ) def test_parses_ee_url(self): ds = self.entry.open_dataset( From 7efe99233f52d916e75f3fe7a0c990aeb039106c Mon Sep 17 00:00:00 2001 From: ljstrnadiii Date: Tue, 13 Feb 2024 04:05:43 +0000 Subject: [PATCH 04/51] run pyint --- xee/ext.py | 13 ++++++------- xee/ext_integration_test.py | 4 ++-- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index b8bc0b5..3afe2c7 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -148,8 +148,7 @@ def open( ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, use_coords_double_precision: bool = False, - match_xarray: xarray.DataArray | xarray.Dataset | None = None - + match_xarray: xarray.DataArray | xarray.Dataset | None = None, ) -> 'EarthEngineStore': if mode != 'r': raise ValueError( @@ -171,7 +170,7 @@ def open( ee_init_kwargs=ee_init_kwargs, ee_init_if_necessary=ee_init_if_necessary, use_coords_double_precision=use_coords_double_precision, - match_xarray=match_xarray + match_xarray=match_xarray, ) def __init__( @@ -190,7 +189,7 @@ def __init__( ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, use_coords_double_precision: bool = False, - match_xarray: xr.DataArray | xr.Dataset | None = None + match_xarray: xr.DataArray | xr.Dataset | None = None, ): self.ee_init_kwargs = ee_init_kwargs self.ee_init_if_necessary = ee_init_if_necessary @@ -216,7 +215,7 @@ def __init__( self.crs = CRS(self.crs_arg) if match_xarray is not None: if match_xarray.rio.crs is None: - raise ValueError("If matching to xarray, we require `.rio.crs` is set.") + raise ValueError('If matching to xarray, we require `.rio.crs` is set.') self.crs = CRS(match_xarray.rio.crs) if match_xarray[match_xarray.rio.x_dim].dtype == np.float64: self.use_coords_double_precision = True @@ -1046,7 +1045,7 @@ def open_dataset( ee_init_kwargs: keywords to pass to Earth Engine Initialize when attempting to auto init for remote workers. use_coords_double_precision: Whether to use double precision for coordinates - and bounds from provided geometry. False by default, but True may be + and bounds from provided geometry. False by default, but True may be helpful when hoping to match a transform of an existing dataset. match_xarray: An xarray.DataArray or xarray.Dataset to use as a template with rioxarray-based schema to extract the crs and transform to specify @@ -1082,7 +1081,7 @@ def open_dataset( ee_init_kwargs=ee_init_kwargs, ee_init_if_necessary=ee_init_if_necessary, use_coords_double_precision=use_coords_double_precision, - match_xarray=match_xarray + match_xarray=match_xarray, ) store_entrypoint = backends_store.StoreBackendEntrypoint() diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index 41fed4b..a24bb61 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -390,7 +390,7 @@ def test_honors_transform_precisely(self): crs='EPSG:4326', use_coords_double_precision=True, ).rename({'lon': 'x', 'lat': 'y'}) - # This is off slightly due to bounds determined by geometry e.g. .getInfo() + # This is off slightly due to bounds determined by geometry e.g. .getInfo() # seems to cause a super slight shift in the bounds. Thhe coords change before # and after the call to .getInfo()! np.testing.assert_almost_equal( @@ -398,7 +398,7 @@ def test_honors_transform_precisely(self): np.array(raster.rio.transform()), decimal=13, ) - + @absltest.skipIf(_SKIP_RASTERIO_TESTS, 'rioxarray module not loaded') def test_match_xarray(self): data = np.empty((162, 120), dtype=np.float32) From 43d306f005fd0011d8b71ddaa824ef9036934025 Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Tue, 13 Feb 2024 12:00:05 +0000 Subject: [PATCH 05/51] Convert value outside of ee.Image boundary to NaN. --- xee/ext.py | 2 +- xee/ext_integration_test.py | 24 ++++++++++++------------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index 3929b54..a87f5f2 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -463,7 +463,7 @@ def image_to_array( Returns: An numpy array containing the pixels computed based on the given image. """ - image = image.unmask(self.mask_value) + image = ee.Image(self.mask_value).rename(image.bandNames().getInfo()[0]).blend(image) params = { 'expression': image, 'fileFormat': 'NUMPY_NDARRAY', diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index d1eafc0..d751f04 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -99,8 +99,8 @@ def test_can_create_object(self): def test_basic_indexing(self): arr = xee.EarthEngineBackendArray('B4', self.store) - self.assertEqual(arr[indexing.BasicIndexer((0, 0, 0))], 0) - self.assertEqual(arr[indexing.BasicIndexer((-1, -1, -1))], np.array([0])) + self.assertEqual(np.isnan(arr[indexing.BasicIndexer((0, 0, 0))]), np.isnan(np.array(np.NaN))) + self.assertEqual(np.isnan(arr[indexing.BasicIndexer((-1, -1, -1))]), np.isnan(np.array([np.NaN]))) def test_basic_indexing__nonzero(self): arr = xee.EarthEngineBackendArray('longitude', self.lnglat_store) @@ -114,22 +114,22 @@ def test_basic_indexing__nonzero(self): def test_basic_indexing_multiple_images(self): arr = xee.EarthEngineBackendArray('B4', self.store) first_two = arr[indexing.BasicIndexer((slice(0, 2), 0, 0))] - self.assertTrue(np.allclose(first_two, np.array([0, 0]))) + self.assertTrue(np.allclose(np.isnan(first_two), np.isnan(np.full(2, np.nan)))) first_three = arr[indexing.BasicIndexer((slice(0, 3), 0, 0))] - self.assertTrue(np.allclose(first_three, np.array([0, 0, 0]))) + self.assertTrue(np.allclose(np.isnan(first_three), np.isnan(np.full(3, np.nan)))) last_two = arr[indexing.BasicIndexer((slice(-3, -1), 0, 0))] - self.assertTrue(np.allclose(last_two, np.array([0, 0]))) + self.assertTrue(np.allclose(np.isnan(last_two), np.isnan(np.full(2, np.nan)))) last_three = arr[indexing.BasicIndexer((slice(-4, -1), 0, 0))] - self.assertTrue(np.allclose(last_three, np.array([0, 0, 0]))) + self.assertTrue(np.allclose(np.isnan(last_three), np.isnan(np.full(3, np.nan)))) def test_slice_indexing(self): arr = xee.EarthEngineBackendArray('B5', self.store) first_10 = indexing.BasicIndexer((0, slice(0, 10), slice(0, 10))) - self.assertTrue(np.allclose(arr[first_10], np.zeros((10, 10)))) + self.assertTrue(np.allclose(np.isnan(arr[first_10]), np.isnan(np.full((10, 10), np.nan)))) last_5 = indexing.BasicIndexer((0, slice(-5, -1), slice(-5, -1))) - expected_last_5 = np.zeros((4, 4)) + expected_last_5 = np.full((4, 4), np.nan) self.assertTrue( - np.allclose(expected_last_5, arr[last_5]), f'Actual:\n{arr[last_5]}' + np.allclose(np.isnan(expected_last_5), np.isnan(arr[last_5])), f'Actual:\n{arr[last_5]}' ) def test_slice_indexing__non_global(self): @@ -189,13 +189,13 @@ def test_keys_to_slices(self): def test_slice_indexing_multiple_images(self): arr = xee.EarthEngineBackendArray('B5', self.store) first_10 = indexing.BasicIndexer((slice(0, 2), slice(0, 10), slice(0, 10))) - self.assertTrue(np.allclose(arr[first_10], np.zeros((2, 10, 10)))) + self.assertTrue(np.allclose(np.isnan(arr[first_10]), np.isnan(np.full((2, 10, 10), np.nan)))) last_5 = indexing.BasicIndexer( (slice(-3, -1), slice(-5, -1), slice(-5, -1)) ) - expected_last_5 = np.zeros((2, 4, 4)) + expected_last_5 = np.full((2, 4, 4), np.nan) self.assertTrue( - np.allclose(expected_last_5, arr[last_5]), f'Actual:\n{arr[last_5]}' + np.allclose(np.isnan(expected_last_5), np.isnan(arr[last_5])), f'Actual:\n{arr[last_5]}' ) def test_slice_indexing__medium(self): From acb0c836942ac98acb0117899905fbf255a57281 Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Tue, 13 Feb 2024 12:11:13 +0000 Subject: [PATCH 06/51] linting fixed. --- xee/ext.py | 6 ++++- xee/ext_integration_test.py | 44 ++++++++++++++++++++++++++++--------- 2 files changed, 39 insertions(+), 11 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index a87f5f2..fff719e 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -463,7 +463,11 @@ def image_to_array( Returns: An numpy array containing the pixels computed based on the given image. """ - image = ee.Image(self.mask_value).rename(image.bandNames().getInfo()[0]).blend(image) + image = ( + ee.Image(self.mask_value) + .rename(image.bandNames().getInfo()[0]) + .blend(image) + ) params = { 'expression': image, 'fileFormat': 'NUMPY_NDARRAY', diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index d751f04..cf11661 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -99,8 +99,14 @@ def test_can_create_object(self): def test_basic_indexing(self): arr = xee.EarthEngineBackendArray('B4', self.store) - self.assertEqual(np.isnan(arr[indexing.BasicIndexer((0, 0, 0))]), np.isnan(np.array(np.NaN))) - self.assertEqual(np.isnan(arr[indexing.BasicIndexer((-1, -1, -1))]), np.isnan(np.array([np.NaN]))) + self.assertEqual( + np.isnan(arr[indexing.BasicIndexer((0, 0, 0))]), + np.isnan(np.array(np.NaN)), + ) + self.assertEqual( + np.isnan(arr[indexing.BasicIndexer((-1, -1, -1))]), + np.isnan(np.array([np.NaN])), + ) def test_basic_indexing__nonzero(self): arr = xee.EarthEngineBackendArray('longitude', self.lnglat_store) @@ -114,22 +120,35 @@ def test_basic_indexing__nonzero(self): def test_basic_indexing_multiple_images(self): arr = xee.EarthEngineBackendArray('B4', self.store) first_two = arr[indexing.BasicIndexer((slice(0, 2), 0, 0))] - self.assertTrue(np.allclose(np.isnan(first_two), np.isnan(np.full(2, np.nan)))) + self.assertTrue( + np.allclose(np.isnan(first_two), np.isnan(np.full(2, np.nan))) + ) first_three = arr[indexing.BasicIndexer((slice(0, 3), 0, 0))] - self.assertTrue(np.allclose(np.isnan(first_three), np.isnan(np.full(3, np.nan)))) + self.assertTrue( + np.allclose(np.isnan(first_three), np.isnan(np.full(3, np.nan))) + ) last_two = arr[indexing.BasicIndexer((slice(-3, -1), 0, 0))] - self.assertTrue(np.allclose(np.isnan(last_two), np.isnan(np.full(2, np.nan)))) + self.assertTrue( + np.allclose(np.isnan(last_two), np.isnan(np.full(2, np.nan))) + ) last_three = arr[indexing.BasicIndexer((slice(-4, -1), 0, 0))] - self.assertTrue(np.allclose(np.isnan(last_three), np.isnan(np.full(3, np.nan)))) + self.assertTrue( + np.allclose(np.isnan(last_three), np.isnan(np.full(3, np.nan))) + ) def test_slice_indexing(self): arr = xee.EarthEngineBackendArray('B5', self.store) first_10 = indexing.BasicIndexer((0, slice(0, 10), slice(0, 10))) - self.assertTrue(np.allclose(np.isnan(arr[first_10]), np.isnan(np.full((10, 10), np.nan)))) + self.assertTrue( + np.allclose( + np.isnan(arr[first_10]), np.isnan(np.full((10, 10), np.nan)) + ) + ) last_5 = indexing.BasicIndexer((0, slice(-5, -1), slice(-5, -1))) expected_last_5 = np.full((4, 4), np.nan) self.assertTrue( - np.allclose(np.isnan(expected_last_5), np.isnan(arr[last_5])), f'Actual:\n{arr[last_5]}' + np.allclose(np.isnan(expected_last_5), np.isnan(arr[last_5])), + f'Actual:\n{arr[last_5]}', ) def test_slice_indexing__non_global(self): @@ -189,13 +208,18 @@ def test_keys_to_slices(self): def test_slice_indexing_multiple_images(self): arr = xee.EarthEngineBackendArray('B5', self.store) first_10 = indexing.BasicIndexer((slice(0, 2), slice(0, 10), slice(0, 10))) - self.assertTrue(np.allclose(np.isnan(arr[first_10]), np.isnan(np.full((2, 10, 10), np.nan)))) + self.assertTrue( + np.allclose( + np.isnan(arr[first_10]), np.isnan(np.full((2, 10, 10), np.nan)) + ) + ) last_5 = indexing.BasicIndexer( (slice(-3, -1), slice(-5, -1), slice(-5, -1)) ) expected_last_5 = np.full((2, 4, 4), np.nan) self.assertTrue( - np.allclose(np.isnan(expected_last_5), np.isnan(arr[last_5])), f'Actual:\n{arr[last_5]}' + np.allclose(np.isnan(expected_last_5), np.isnan(arr[last_5])), + f'Actual:\n{arr[last_5]}', ) def test_slice_indexing__medium(self): From 306b01f37044f95d6d2bc6c156d09761f91e1688 Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Wed, 14 Feb 2024 09:31:48 +0000 Subject: [PATCH 07/51] use equal_nan = True instead of np.isnan(). --- xee/ext_integration_test.py | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index cf11661..8f91c46 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -120,34 +120,26 @@ def test_basic_indexing__nonzero(self): def test_basic_indexing_multiple_images(self): arr = xee.EarthEngineBackendArray('B4', self.store) first_two = arr[indexing.BasicIndexer((slice(0, 2), 0, 0))] - self.assertTrue( - np.allclose(np.isnan(first_two), np.isnan(np.full(2, np.nan))) - ) + self.assertTrue(np.allclose(first_two, np.full(2, np.nan), equal_nan=True)) first_three = arr[indexing.BasicIndexer((slice(0, 3), 0, 0))] self.assertTrue( - np.allclose(np.isnan(first_three), np.isnan(np.full(3, np.nan))) + np.allclose(first_three, np.full(3, np.nan), equal_nan=True) ) last_two = arr[indexing.BasicIndexer((slice(-3, -1), 0, 0))] - self.assertTrue( - np.allclose(np.isnan(last_two), np.isnan(np.full(2, np.nan))) - ) + self.assertTrue(np.allclose(last_two, np.full(2, np.nan), equal_nan=True)) last_three = arr[indexing.BasicIndexer((slice(-4, -1), 0, 0))] - self.assertTrue( - np.allclose(np.isnan(last_three), np.isnan(np.full(3, np.nan))) - ) + self.assertTrue(np.allclose(last_three, np.full(3, np.nan), equal_nan=True)) def test_slice_indexing(self): arr = xee.EarthEngineBackendArray('B5', self.store) first_10 = indexing.BasicIndexer((0, slice(0, 10), slice(0, 10))) self.assertTrue( - np.allclose( - np.isnan(arr[first_10]), np.isnan(np.full((10, 10), np.nan)) - ) + np.allclose(arr[first_10], np.full((10, 10), np.nan), equal_nan=True) ) last_5 = indexing.BasicIndexer((0, slice(-5, -1), slice(-5, -1))) expected_last_5 = np.full((4, 4), np.nan) self.assertTrue( - np.allclose(np.isnan(expected_last_5), np.isnan(arr[last_5])), + np.allclose(expected_last_5, arr[last_5], equal_nan=True), f'Actual:\n{arr[last_5]}', ) @@ -209,16 +201,14 @@ def test_slice_indexing_multiple_images(self): arr = xee.EarthEngineBackendArray('B5', self.store) first_10 = indexing.BasicIndexer((slice(0, 2), slice(0, 10), slice(0, 10))) self.assertTrue( - np.allclose( - np.isnan(arr[first_10]), np.isnan(np.full((2, 10, 10), np.nan)) - ) + np.allclose(arr[first_10], np.full((2, 10, 10), np.nan), equal_nan=True) ) last_5 = indexing.BasicIndexer( (slice(-3, -1), slice(-5, -1), slice(-5, -1)) ) expected_last_5 = np.full((2, 4, 4), np.nan) self.assertTrue( - np.allclose(np.isnan(expected_last_5), np.isnan(arr[last_5])), + np.allclose(expected_last_5, arr[last_5], equal_nan=True), f'Actual:\n{arr[last_5]}', ) From 7f744923ae3cb2b7acb6cc4099aee761900ed33d Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Wed, 14 Feb 2024 14:10:06 +0000 Subject: [PATCH 08/51] Instead of geometry.bounds() use geometry only. --- xee/ext.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index fff719e..f4d6118 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -279,9 +279,9 @@ def get_info(self) -> Dict[str, Any]: rpcs.append(('projection', self.projection)) if isinstance(self.geometry, ee.Geometry): - rpcs.append(('bounds', self.geometry.bounds())) + rpcs.append(('bounds', self.geometry)) else: - rpcs.append(('bounds', self.image_collection.first().geometry().bounds())) + rpcs.append(('bounds', self.image_collection.first().geometry())) # TODO(#29, #30): This RPC call takes the longest time to compute. This # requires a full scan of the images in the collection, which happens on the From 3cc7e4625989a4d94a82e2c1ec5a9b0dadaa1938 Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Wed, 14 Feb 2024 15:02:33 +0000 Subject: [PATCH 09/51] integration test is updated with the ee.geometry. --- xee/ext_integration_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index 8f91c46..616b92a 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -352,7 +352,7 @@ def test_honors_geometry(self): engine=xee.EarthEngineBackendEntrypoint, ) - self.assertEqual(ds.dims, {'time': 4248, 'lon': 40, 'lat': 35}) + self.assertEqual(ds.dims, {'time': 4248, 'lon': 40, 'lat': 33}) self.assertNotEqual(ds.dims, standard_ds.dims) def test_honors_projection(self): From f85f7c2841466830cb8fad9839f0d94846a2057e Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Wed, 14 Feb 2024 17:39:43 +0000 Subject: [PATCH 10/51] getInfo() removed from the rename call. --- xee/ext.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xee/ext.py b/xee/ext.py index f4d6118..3ef71d9 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -465,7 +465,7 @@ def image_to_array( """ image = ( ee.Image(self.mask_value) - .rename(image.bandNames().getInfo()[0]) + .rename([image.bandNames().get(0)]) .blend(image) ) params = { From e891f0aefb984f4dac2bc84d4dc141fe6da4da34 Mon Sep 17 00:00:00 2001 From: ljstrnadiii Date: Wed, 14 Feb 2024 18:51:14 +0000 Subject: [PATCH 11/51] revert any credential changes --- xee/ext_integration_test.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index a24bb61..fdb10f7 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -19,7 +19,7 @@ import tempfile from absl.testing import absltest -import google.auth +from google.auth import identity_pool import numpy as np import xarray as xr from xarray.core import indexing @@ -41,16 +41,19 @@ ] -def _read_default_creds(): - credentials, _ = google.auth.default(scopes=_SCOPES) - return credentials +def _read_identity_pool_creds() -> identity_pool.Credentials: + credentials_path = os.environ[_CREDENTIALS_PATH_KEY] + with open(credentials_path) as file: + json_file = json.load(file) + credentials = identity_pool.Credentials.from_info(json_file) + return credentials.with_scopes(_SCOPES) def init_ee_for_tests(): - ee.Initialize( - credentials=_read_default_creds(), - opt_url=ee.data.HIGH_VOLUME_API_BASE_URL, - ) + ee.Initialize( + credentials=_read_identity_pool_creds(), + opt_url=ee.data.HIGH_VOLUME_API_BASE_URL, + ) class EEBackendArrayTest(absltest.TestCase): From 5e19d99298634d9a590d62021c7687f62ec7a60c Mon Sep 17 00:00:00 2001 From: ljstrnadiii Date: Wed, 14 Feb 2024 18:52:29 +0000 Subject: [PATCH 12/51] Revert "run pyint" This reverts commit 7efe99233f52d916e75f3fe7a0c990aeb039106c. --- xee/ext.py | 13 +++++++------ xee/ext_integration_test.py | 4 ++-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index 3afe2c7..b8bc0b5 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -148,7 +148,8 @@ def open( ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, use_coords_double_precision: bool = False, - match_xarray: xarray.DataArray | xarray.Dataset | None = None, + match_xarray: xarray.DataArray | xarray.Dataset | None = None + ) -> 'EarthEngineStore': if mode != 'r': raise ValueError( @@ -170,7 +171,7 @@ def open( ee_init_kwargs=ee_init_kwargs, ee_init_if_necessary=ee_init_if_necessary, use_coords_double_precision=use_coords_double_precision, - match_xarray=match_xarray, + match_xarray=match_xarray ) def __init__( @@ -189,7 +190,7 @@ def __init__( ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, use_coords_double_precision: bool = False, - match_xarray: xr.DataArray | xr.Dataset | None = None, + match_xarray: xr.DataArray | xr.Dataset | None = None ): self.ee_init_kwargs = ee_init_kwargs self.ee_init_if_necessary = ee_init_if_necessary @@ -215,7 +216,7 @@ def __init__( self.crs = CRS(self.crs_arg) if match_xarray is not None: if match_xarray.rio.crs is None: - raise ValueError('If matching to xarray, we require `.rio.crs` is set.') + raise ValueError("If matching to xarray, we require `.rio.crs` is set.") self.crs = CRS(match_xarray.rio.crs) if match_xarray[match_xarray.rio.x_dim].dtype == np.float64: self.use_coords_double_precision = True @@ -1045,7 +1046,7 @@ def open_dataset( ee_init_kwargs: keywords to pass to Earth Engine Initialize when attempting to auto init for remote workers. use_coords_double_precision: Whether to use double precision for coordinates - and bounds from provided geometry. False by default, but True may be + and bounds from provided geometry. False by default, but True may be helpful when hoping to match a transform of an existing dataset. match_xarray: An xarray.DataArray or xarray.Dataset to use as a template with rioxarray-based schema to extract the crs and transform to specify @@ -1081,7 +1082,7 @@ def open_dataset( ee_init_kwargs=ee_init_kwargs, ee_init_if_necessary=ee_init_if_necessary, use_coords_double_precision=use_coords_double_precision, - match_xarray=match_xarray, + match_xarray=match_xarray ) store_entrypoint = backends_store.StoreBackendEntrypoint() diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index fdb10f7..12bb9bf 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -393,7 +393,7 @@ def test_honors_transform_precisely(self): crs='EPSG:4326', use_coords_double_precision=True, ).rename({'lon': 'x', 'lat': 'y'}) - # This is off slightly due to bounds determined by geometry e.g. .getInfo() + # This is off slightly due to bounds determined by geometry e.g. .getInfo() # seems to cause a super slight shift in the bounds. Thhe coords change before # and after the call to .getInfo()! np.testing.assert_almost_equal( @@ -401,7 +401,7 @@ def test_honors_transform_precisely(self): np.array(raster.rio.transform()), decimal=13, ) - + @absltest.skipIf(_SKIP_RASTERIO_TESTS, 'rioxarray module not loaded') def test_match_xarray(self): data = np.empty((162, 120), dtype=np.float32) From f83c749ed4f33ba7f3b3133db3d8019ecc3f9bfb Mon Sep 17 00:00:00 2001 From: ljstrnadiii Date: Tue, 13 Feb 2024 04:05:43 +0000 Subject: [PATCH 13/51] run pyint --- xee/ext.py | 13 ++++++------- xee/ext_integration_test.py | 4 ++-- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index b8bc0b5..3afe2c7 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -148,8 +148,7 @@ def open( ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, use_coords_double_precision: bool = False, - match_xarray: xarray.DataArray | xarray.Dataset | None = None - + match_xarray: xarray.DataArray | xarray.Dataset | None = None, ) -> 'EarthEngineStore': if mode != 'r': raise ValueError( @@ -171,7 +170,7 @@ def open( ee_init_kwargs=ee_init_kwargs, ee_init_if_necessary=ee_init_if_necessary, use_coords_double_precision=use_coords_double_precision, - match_xarray=match_xarray + match_xarray=match_xarray, ) def __init__( @@ -190,7 +189,7 @@ def __init__( ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, use_coords_double_precision: bool = False, - match_xarray: xr.DataArray | xr.Dataset | None = None + match_xarray: xr.DataArray | xr.Dataset | None = None, ): self.ee_init_kwargs = ee_init_kwargs self.ee_init_if_necessary = ee_init_if_necessary @@ -216,7 +215,7 @@ def __init__( self.crs = CRS(self.crs_arg) if match_xarray is not None: if match_xarray.rio.crs is None: - raise ValueError("If matching to xarray, we require `.rio.crs` is set.") + raise ValueError('If matching to xarray, we require `.rio.crs` is set.') self.crs = CRS(match_xarray.rio.crs) if match_xarray[match_xarray.rio.x_dim].dtype == np.float64: self.use_coords_double_precision = True @@ -1046,7 +1045,7 @@ def open_dataset( ee_init_kwargs: keywords to pass to Earth Engine Initialize when attempting to auto init for remote workers. use_coords_double_precision: Whether to use double precision for coordinates - and bounds from provided geometry. False by default, but True may be + and bounds from provided geometry. False by default, but True may be helpful when hoping to match a transform of an existing dataset. match_xarray: An xarray.DataArray or xarray.Dataset to use as a template with rioxarray-based schema to extract the crs and transform to specify @@ -1082,7 +1081,7 @@ def open_dataset( ee_init_kwargs=ee_init_kwargs, ee_init_if_necessary=ee_init_if_necessary, use_coords_double_precision=use_coords_double_precision, - match_xarray=match_xarray + match_xarray=match_xarray, ) store_entrypoint = backends_store.StoreBackendEntrypoint() diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index 12bb9bf..fdb10f7 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -393,7 +393,7 @@ def test_honors_transform_precisely(self): crs='EPSG:4326', use_coords_double_precision=True, ).rename({'lon': 'x', 'lat': 'y'}) - # This is off slightly due to bounds determined by geometry e.g. .getInfo() + # This is off slightly due to bounds determined by geometry e.g. .getInfo() # seems to cause a super slight shift in the bounds. Thhe coords change before # and after the call to .getInfo()! np.testing.assert_almost_equal( @@ -401,7 +401,7 @@ def test_honors_transform_precisely(self): np.array(raster.rio.transform()), decimal=13, ) - + @absltest.skipIf(_SKIP_RASTERIO_TESTS, 'rioxarray module not loaded') def test_match_xarray(self): data = np.empty((162, 120), dtype=np.float32) From af977501a4e6e75b2ec7a1f4398053e537d1d7c0 Mon Sep 17 00:00:00 2001 From: ljstrnadiii Date: Wed, 14 Feb 2024 19:01:11 +0000 Subject: [PATCH 14/51] formatted change --- xee/ext_integration_test.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index fdb10f7..d6a5fd7 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -42,18 +42,18 @@ def _read_identity_pool_creds() -> identity_pool.Credentials: - credentials_path = os.environ[_CREDENTIALS_PATH_KEY] - with open(credentials_path) as file: - json_file = json.load(file) - credentials = identity_pool.Credentials.from_info(json_file) - return credentials.with_scopes(_SCOPES) + credentials_path = os.environ[_CREDENTIALS_PATH_KEY] + with open(credentials_path) as file: + json_file = json.load(file) + credentials = identity_pool.Credentials.from_info(json_file) + return credentials.with_scopes(_SCOPES) def init_ee_for_tests(): - ee.Initialize( - credentials=_read_identity_pool_creds(), - opt_url=ee.data.HIGH_VOLUME_API_BASE_URL, - ) + ee.Initialize( + credentials=_read_identity_pool_creds(), + opt_url=ee.data.HIGH_VOLUME_API_BASE_URL, + ) class EEBackendArrayTest(absltest.TestCase): From 2c48180e6ea227582813c5d6fed868a29f8a62ac Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Thu, 15 Feb 2024 05:03:15 +0000 Subject: [PATCH 15/51] Geometry.bounds() added instead of the geometry(). --- xee/ext.py | 4 ++-- xee/ext_integration_test.py | 12 +++--------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index 3ef71d9..a30c3b6 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -279,9 +279,9 @@ def get_info(self) -> Dict[str, Any]: rpcs.append(('projection', self.projection)) if isinstance(self.geometry, ee.Geometry): - rpcs.append(('bounds', self.geometry)) + rpcs.append(('bounds', self.geometry.bounds())) else: - rpcs.append(('bounds', self.image_collection.first().geometry())) + rpcs.append(('bounds', self.image_collection.first().geometry().bounds())) # TODO(#29, #30): This RPC call takes the longest time to compute. This # requires a full scan of the images in the collection, which happens on the diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index 616b92a..4ed8b56 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -99,14 +99,8 @@ def test_can_create_object(self): def test_basic_indexing(self): arr = xee.EarthEngineBackendArray('B4', self.store) - self.assertEqual( - np.isnan(arr[indexing.BasicIndexer((0, 0, 0))]), - np.isnan(np.array(np.NaN)), - ) - self.assertEqual( - np.isnan(arr[indexing.BasicIndexer((-1, -1, -1))]), - np.isnan(np.array([np.NaN])), - ) + self.assertEqual(np.isnan(arr[indexing.BasicIndexer((0, 0, 0))]),True) + self.assertEqual(np.isnan(arr[indexing.BasicIndexer((-1, -1, -1))]), True) def test_basic_indexing__nonzero(self): arr = xee.EarthEngineBackendArray('longitude', self.lnglat_store) @@ -352,7 +346,7 @@ def test_honors_geometry(self): engine=xee.EarthEngineBackendEntrypoint, ) - self.assertEqual(ds.dims, {'time': 4248, 'lon': 40, 'lat': 33}) + self.assertEqual(ds.dims, {'time': 4248, 'lon': 40, 'lat': 35}) self.assertNotEqual(ds.dims, standard_ds.dims) def test_honors_projection(self): From 192989105f268c9a49bddd95449dc5bbbcc93a81 Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Thu, 15 Feb 2024 05:08:09 +0000 Subject: [PATCH 16/51] linting fixed. --- xee/ext_integration_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index 4ed8b56..17a7682 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -99,7 +99,7 @@ def test_can_create_object(self): def test_basic_indexing(self): arr = xee.EarthEngineBackendArray('B4', self.store) - self.assertEqual(np.isnan(arr[indexing.BasicIndexer((0, 0, 0))]),True) + self.assertEqual(np.isnan(arr[indexing.BasicIndexer((0, 0, 0))]), True) self.assertEqual(np.isnan(arr[indexing.BasicIndexer((-1, -1, -1))]), True) def test_basic_indexing__nonzero(self): From a0e3d56ce09cc43a63f8df959927f278fa770933 Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Thu, 15 Feb 2024 15:09:16 +0000 Subject: [PATCH 17/51] Test case updated with np.testing.assert_equal. --- xee/ext_integration_test.py | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index 17a7682..971bec3 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -114,28 +114,21 @@ def test_basic_indexing__nonzero(self): def test_basic_indexing_multiple_images(self): arr = xee.EarthEngineBackendArray('B4', self.store) first_two = arr[indexing.BasicIndexer((slice(0, 2), 0, 0))] - self.assertTrue(np.allclose(first_two, np.full(2, np.nan), equal_nan=True)) + np.testing.assert_equal(first_two, np.full(2, np.nan)) first_three = arr[indexing.BasicIndexer((slice(0, 3), 0, 0))] - self.assertTrue( - np.allclose(first_three, np.full(3, np.nan), equal_nan=True) - ) + np.testing.assert_equal(first_three, np.full(3, np.nan)) last_two = arr[indexing.BasicIndexer((slice(-3, -1), 0, 0))] - self.assertTrue(np.allclose(last_two, np.full(2, np.nan), equal_nan=True)) + np.testing.assert_equal(last_two, np.full(2, np.nan)) last_three = arr[indexing.BasicIndexer((slice(-4, -1), 0, 0))] - self.assertTrue(np.allclose(last_three, np.full(3, np.nan), equal_nan=True)) + np.testing.assert_equal(last_three, np.full(3, np.nan)) def test_slice_indexing(self): arr = xee.EarthEngineBackendArray('B5', self.store) first_10 = indexing.BasicIndexer((0, slice(0, 10), slice(0, 10))) - self.assertTrue( - np.allclose(arr[first_10], np.full((10, 10), np.nan), equal_nan=True) - ) + np.testing.assert_equal(arr[first_10], np.full((10, 10), np.nan)) last_5 = indexing.BasicIndexer((0, slice(-5, -1), slice(-5, -1))) expected_last_5 = np.full((4, 4), np.nan) - self.assertTrue( - np.allclose(expected_last_5, arr[last_5], equal_nan=True), - f'Actual:\n{arr[last_5]}', - ) + np.testing.assert_equal(expected_last_5, arr[last_5]) def test_slice_indexing__non_global(self): arr = xee.EarthEngineBackendArray('spi2y', self.conus_store) @@ -194,17 +187,12 @@ def test_keys_to_slices(self): def test_slice_indexing_multiple_images(self): arr = xee.EarthEngineBackendArray('B5', self.store) first_10 = indexing.BasicIndexer((slice(0, 2), slice(0, 10), slice(0, 10))) - self.assertTrue( - np.allclose(arr[first_10], np.full((2, 10, 10), np.nan), equal_nan=True) - ) + np.testing.assert_equal(arr[first_10], np.full((2, 10, 10), np.nan)) last_5 = indexing.BasicIndexer( (slice(-3, -1), slice(-5, -1), slice(-5, -1)) ) expected_last_5 = np.full((2, 4, 4), np.nan) - self.assertTrue( - np.allclose(expected_last_5, arr[last_5], equal_nan=True), - f'Actual:\n{arr[last_5]}', - ) + np.testing.assert_equal(expected_last_5, arr[last_5]) def test_slice_indexing__medium(self): try: From 83731718fb52b415c8a2429f24e7b8d9f6e1b670 Mon Sep 17 00:00:00 2001 From: ljstrnadiii Date: Sat, 17 Feb 2024 15:17:07 +0000 Subject: [PATCH 18/51] checkout `ext.py` from main + create failing integration test --- xee/ext.py | 39 +++----------------------------- xee/ext_integration_test.py | 44 +------------------------------------ 2 files changed, 4 insertions(+), 79 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index 3afe2c7..3929b54 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -40,7 +40,6 @@ from xarray.backends import store as backends_store from xarray.core import indexing from xarray.core import utils -import xarray as xr from xee import types import ee @@ -147,8 +146,6 @@ def open( request_byte_limit: int = REQUEST_BYTE_LIMIT, ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, - use_coords_double_precision: bool = False, - match_xarray: xarray.DataArray | xarray.Dataset | None = None, ) -> 'EarthEngineStore': if mode != 'r': raise ValueError( @@ -169,8 +166,6 @@ def open( request_byte_limit=request_byte_limit, ee_init_kwargs=ee_init_kwargs, ee_init_if_necessary=ee_init_if_necessary, - use_coords_double_precision=use_coords_double_precision, - match_xarray=match_xarray, ) def __init__( @@ -188,8 +183,6 @@ def __init__( request_byte_limit: int = REQUEST_BYTE_LIMIT, ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, - use_coords_double_precision: bool = False, - match_xarray: xr.DataArray | xr.Dataset | None = None, ): self.ee_init_kwargs = ee_init_kwargs self.ee_init_if_necessary = ee_init_if_necessary @@ -202,7 +195,6 @@ def __init__( self.geometry = geometry self.primary_dim_name = primary_dim_name or 'time' self.primary_dim_property = primary_dim_property or 'system:time_start' - self.use_coords_double_precision = use_coords_double_precision self.n_images = self.get_info['size'] self._props = self.get_info['props'] @@ -213,20 +205,12 @@ def __init__( self.crs_arg = crs or proj.get('crs', proj.get('wkt', 'EPSG:4326')) self.crs = CRS(self.crs_arg) - if match_xarray is not None: - if match_xarray.rio.crs is None: - raise ValueError('If matching to xarray, we require `.rio.crs` is set.') - self.crs = CRS(match_xarray.rio.crs) - if match_xarray[match_xarray.rio.x_dim].dtype == np.float64: - self.use_coords_double_precision = True # Gets the unit i.e. meter, degree etc. self.scale_units = self.crs.axis_info[0].unit_name # Get the dimensions name based on the CRS (scale units). self.dimension_names = self.DIMENSION_NAMES.get( self.scale_units, ('X', 'Y') ) - if match_xarray is not None: - self.dimension_names = (match_xarray.rio.x_dim, match_xarray.rio.y_dim) x_dim_name, y_dim_name = self.dimension_names self._props.update( coordinates=f'{self.primary_dim_name} {x_dim_name} {y_dim_name}', @@ -239,14 +223,11 @@ def __init__( if scale is None: scale = default_scale default_transform = affine.Affine.scale(scale, -1 * scale) + transform = affine.Affine(*proj.get('transform', default_transform)[:6]) self.scale_x, self.scale_y = transform.a, transform.e self.scale = np.sqrt(np.abs(transform.determinant)) - if match_xarray is not None: - self.scale_x, self.scale_y = match_xarray.rio.resolution() - self.scale = np.sqrt(np.abs(self.scale_x * self.scale_y)) - # Parse the dataset bounds from the native projection (either from the CRS # or the image geometry) and translate it to the representation that will be # used for all internal `computePixels()` calls. @@ -267,8 +248,6 @@ def __init__( x_min, y_min = self.transform(x_min_0, y_min_0) x_max, y_max = self.transform(x_max_0, y_max_0) self.bounds = x_min, y_min, x_max, y_max - if match_xarray is not None: - self.bounds = match_xarray.rio.bounds() max_dtype = self._max_itemsize() @@ -602,9 +581,8 @@ def _get_tile_from_ee( else (0, tile_coords_start, 1, tile_coords_end) ) target_image = ee.Image.pixelCoordinates(ee.Projection(self.crs_arg)) - dtype = np.float64 if self.use_coords_double_precision else np.float32 return tile_index, self.image_to_array( - target_image, grid=bbox, dtype=dtype, bandIds=[band_id] + target_image, grid=bbox, dtype=np.float32, bandIds=[band_id] ) def _process_coordinate_data( @@ -711,7 +689,7 @@ def _parse_dtype(data_type: types.DataType): def _ee_bounds_to_bounds(bounds: ee.Bounds) -> types.Bounds: - coords = np.array(bounds['coordinates'], dtype=np.float64)[0] + coords = np.array(bounds['coordinates'], dtype=np.float32)[0] x_min, y_min, x_max, y_max = ( min(coords[:, 0]), min(coords[:, 1]), @@ -973,8 +951,6 @@ def open_dataset( request_byte_limit: int = REQUEST_BYTE_LIMIT, ee_init_if_necessary: bool = False, ee_init_kwargs: Optional[Dict[str, Any]] = None, - use_coords_double_precision: bool = False, - match_xarray: xarray.DataArray | xarray.Dataset | None = None, ) -> xarray.Dataset: # type: ignore """Open an Earth Engine ImageCollection as an Xarray Dataset. @@ -1044,13 +1020,6 @@ def open_dataset( frameworks. ee_init_kwargs: keywords to pass to Earth Engine Initialize when attempting to auto init for remote workers. - use_coords_double_precision: Whether to use double precision for coordinates - and bounds from provided geometry. False by default, but True may be - helpful when hoping to match a transform of an existing dataset. - match_xarray: An xarray.DataArray or xarray.Dataset to use as a template - with rioxarray-based schema to extract the crs and transform to specify - the spatial extent and crs of the output dataset. Using this arg requires - that rioxarray is installed. Returns: An xarray.Dataset that streams in remote data from Earth Engine. @@ -1080,8 +1049,6 @@ def open_dataset( request_byte_limit=request_byte_limit, ee_init_kwargs=ee_init_kwargs, ee_init_if_necessary=ee_init_if_necessary, - use_coords_double_precision=use_coords_double_precision, - match_xarray=match_xarray, ) store_entrypoint = backends_store.StoreBackendEntrypoint() diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index d6a5fd7..1306171 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -359,7 +359,7 @@ def test_honors_projection(self): self.assertNotEqual(ds.dims, standard_ds.dims) @absltest.skipIf(_SKIP_RASTERIO_TESTS, 'rioxarray module not loaded') - def test_honors_transform_precisely(self): + def test_expected_double_precision_transform(self): data = np.empty((162, 120), dtype=np.float32) # An example of a double precision bbox bbox = ( @@ -391,49 +391,7 @@ def test_honors_transform_precisely(self): geometry=geo, scale=raster.rio.resolution()[0], crs='EPSG:4326', - use_coords_double_precision=True, ).rename({'lon': 'x', 'lat': 'y'}) - # This is off slightly due to bounds determined by geometry e.g. .getInfo() - # seems to cause a super slight shift in the bounds. Thhe coords change before - # and after the call to .getInfo()! - np.testing.assert_almost_equal( - np.array(xee_dataset.rio.transform()), - np.array(raster.rio.transform()), - decimal=13, - ) - - @absltest.skipIf(_SKIP_RASTERIO_TESTS, 'rioxarray module not loaded') - def test_match_xarray(self): - data = np.empty((162, 120), dtype=np.float32) - # An example of a double precision bbox - bbox = ( - -53.94158617595226, - -12.078281822698678, - -53.67209159071253, - -11.714464132625046, - ) - x_res = (bbox[2] - bbox[0]) / data.shape[1] - y_res = (bbox[3] - bbox[1]) / data.shape[0] - raster = xr.DataArray( - data, - coords={ - 'y': np.linspace(bbox[3], bbox[1] + x_res, data.shape[0]), - 'x': np.linspace(bbox[0], bbox[2] - y_res, data.shape[1]), - }, - dims=('y', 'x'), - ) - raster.rio.write_crs('EPSG:4326', inplace=True) - ic = ( - ee.ImageCollection('UCSB-CHG/CHIRPS/DAILY') - .filterDate(ee.DateRange('2014-01-01', '2014-01-02')) - .select('precipitation') - ) - xee_dataset = xr.open_dataset( - ee.ImageCollection(ic), - engine='ee', - scale=raster.rio.resolution()[0], - match_xarray=raster, - ) np.testing.assert_equal( np.array(xee_dataset.rio.transform()), np.array(raster.rio.transform()), From f6048f6540cbdaca7eacbce4e7dbba4b9e8d3802 Mon Sep 17 00:00:00 2001 From: ljstrnadiii Date: Sat, 17 Feb 2024 15:57:40 +0000 Subject: [PATCH 19/51] add support for bbox in geometry + demo passing integration test --- xee/ext.py | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index 3929b54..aebe2c1 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -139,7 +139,7 @@ def open( crs: Optional[str] = None, scale: Optional[float] = None, projection: Optional[ee.Projection] = None, - geometry: Optional[ee.Geometry] = None, + geometry: Optional[Union[ee.Geometry, types.types.BBox]] = None, primary_dim_name: Optional[str] = None, primary_dim_property: Optional[str] = None, mask_value: Optional[float] = None, @@ -176,7 +176,7 @@ def __init__( crs: Optional[str] = None, scale: Union[float, int, None] = None, projection: Optional[ee.Projection] = None, - geometry: Optional[ee.Geometry] = None, + geometry: Optional[Union[ee.Geometry, types.BBox]] = None, primary_dim_name: Optional[str] = None, primary_dim_property: Optional[str] = None, mask_value: Optional[float] = None, @@ -231,19 +231,31 @@ def __init__( # Parse the dataset bounds from the native projection (either from the CRS # or the image geometry) and translate it to the representation that will be # used for all internal `computePixels()` calls. - try: - if isinstance(geometry, ee.Geometry): + if geometry is None: + try: + x_min_0, y_min_0, x_max_0, y_max_0 = self.crs.area_of_use.bounds + except AttributeError: + # `area_of_use` is probable `None`. Parse the geometry from the first + # image instead (calculated in self.get_info()) x_min_0, y_min_0, x_max_0, y_max_0 = _ee_bounds_to_bounds( self.get_info['bounds'] ) - else: - x_min_0, y_min_0, x_max_0, y_max_0 = self.crs.area_of_use.bounds - except AttributeError: - # `area_of_use` is probable `None`. Parse the geometry from the first - # image instead (calculated in self.get_info()) + elif isinstance(geometry, ee.Geometry): x_min_0, y_min_0, x_max_0, y_max_0 = _ee_bounds_to_bounds( self.get_info['bounds'] ) + elif isinstance(geometry, Union[List, Tuple, np.ndarray]): + if len(geometry) != 4: + raise ValueError( + 'geometry must be a 4-tuple of floats or a ee.Geometry, ' + f'but got {geometry!r}' + ) + x_min_0, y_min_0, x_max_0, y_max_0 = geometry + else: + raise ValueError( + 'geometry must be a 4-tuple of floats or a ee.Geometry or None, ' + f'but got {type(geometry)}' + ) x_min, y_min = self.transform(x_min_0, y_min_0) x_max, y_max = self.transform(x_max_0, y_max_0) @@ -689,7 +701,7 @@ def _parse_dtype(data_type: types.DataType): def _ee_bounds_to_bounds(bounds: ee.Bounds) -> types.Bounds: - coords = np.array(bounds['coordinates'], dtype=np.float32)[0] + coords = np.array(bounds['coordinates'], dtype=np.float64)[0] x_min, y_min, x_max, y_max = ( min(coords[:, 0]), min(coords[:, 1]), @@ -1003,7 +1015,8 @@ def open_dataset( coalesce all variables upon opening. By default, the scale and reference system is set by the the `crs` and `scale` arguments. geometry (optional): Specify an `ee.Geometry` to define the regional - bounds when opening the data. When not set, the bounds are defined by + bounds when opening the data or a bbox specifying [x_min, y_min, x_max, + y_max] in EPSG:4326. When not set, the bounds are defined by the CRS's 'area_of_use` boundaries. If those aren't present, the bounds are derived from the geometry of the first image of the collection. primary_dim_name (optional): Override the name of the primary dimension of From 00b34e790fe0cebf15b1c1712de252be6ac89278 Mon Sep 17 00:00:00 2001 From: ljstrnadiii Date: Sat, 17 Feb 2024 16:00:49 +0000 Subject: [PATCH 20/51] update integation test to pass bounds --- xee/ext_integration_test.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index 1306171..08e8155 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -378,8 +378,6 @@ def test_expected_double_precision_transform(self): }, dims=('y', 'x'), ) - - geo = ee.Geometry.Rectangle(*raster.rio.bounds()) ic = ( ee.ImageCollection('UCSB-CHG/CHIRPS/DAILY') .filterDate(ee.DateRange('2014-01-01', '2014-01-02')) @@ -388,7 +386,7 @@ def test_expected_double_precision_transform(self): xee_dataset = xr.open_dataset( ee.ImageCollection(ic), engine='ee', - geometry=geo, + geometry=tuple(raster.rio.bounds()), scale=raster.rio.resolution()[0], crs='EPSG:4326', ).rename({'lon': 'x', 'lat': 'y'}) From a2b3c55723a26f15c4156911773ab8b50aaad5b4 Mon Sep 17 00:00:00 2001 From: ljstrnadiii Date: Sun, 18 Feb 2024 17:11:34 +0000 Subject: [PATCH 21/51] demonstrate xy scale control through projection in test --- xee/ext.py | 2 +- xee/ext_integration_test.py | 12 +++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index aebe2c1..b3b4fcd 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -594,7 +594,7 @@ def _get_tile_from_ee( ) target_image = ee.Image.pixelCoordinates(ee.Projection(self.crs_arg)) return tile_index, self.image_to_array( - target_image, grid=bbox, dtype=np.float32, bandIds=[band_id] + target_image, grid=bbox, dtype=np.float64, bandIds=[band_id] ) def _process_coordinate_data( diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index 08e8155..9c6022c 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -359,9 +359,8 @@ def test_honors_projection(self): self.assertNotEqual(ds.dims, standard_ds.dims) @absltest.skipIf(_SKIP_RASTERIO_TESTS, 'rioxarray module not loaded') - def test_expected_double_precision_transform(self): - data = np.empty((162, 120), dtype=np.float32) - # An example of a double precision bbox + def test_expected_precise_transform(self): + data = np.empty((162, 121), dtype=np.float32) bbox = ( -53.94158617595226, -12.078281822698678, @@ -378,6 +377,7 @@ def test_expected_double_precision_transform(self): }, dims=('y', 'x'), ) + raster.rio.write_crs('EPSG:4326', inplace=True) ic = ( ee.ImageCollection('UCSB-CHG/CHIRPS/DAILY') .filterDate(ee.DateRange('2014-01-01', '2014-01-02')) @@ -387,9 +387,11 @@ def test_expected_double_precision_transform(self): ee.ImageCollection(ic), engine='ee', geometry=tuple(raster.rio.bounds()), - scale=raster.rio.resolution()[0], - crs='EPSG:4326', + projection=ee.Projection( + crs=str(raster.rio.crs), transform=raster.rio.transform()[:6] + ), ).rename({'lon': 'x', 'lat': 'y'}) + self.assertNotEqual(abs(x_res), abs(y_res)) np.testing.assert_equal( np.array(xee_dataset.rio.transform()), np.array(raster.rio.transform()), From 4db088745ff99b32840738814436ec5efcad656c Mon Sep 17 00:00:00 2001 From: ljstrnadiii Date: Sun, 18 Feb 2024 17:14:44 +0000 Subject: [PATCH 22/51] nits --- xee/ext.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index b3b4fcd..bc4f947 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -25,7 +25,7 @@ import math import os import sys -from typing import Any, Dict, Iterable, List, Literal, Optional, Tuple, Union +from typing import Any, Dict, Iterable, List, Literal, Optional, Sequence, Tuple, Union from urllib import parse import warnings @@ -139,7 +139,7 @@ def open( crs: Optional[str] = None, scale: Optional[float] = None, projection: Optional[ee.Projection] = None, - geometry: Optional[Union[ee.Geometry, types.types.BBox]] = None, + geometry: Optional[Union[ee.Geometry, types.BBox]] = None, primary_dim_name: Optional[str] = None, primary_dim_property: Optional[str] = None, mask_value: Optional[float] = None, @@ -244,7 +244,7 @@ def __init__( x_min_0, y_min_0, x_max_0, y_max_0 = _ee_bounds_to_bounds( self.get_info['bounds'] ) - elif isinstance(geometry, Union[List, Tuple, np.ndarray]): + elif isinstance(geometry, Union[List, Tuple, np.ndarray, Sequence]): if len(geometry) != 4: raise ValueError( 'geometry must be a 4-tuple of floats or a ee.Geometry, ' From 4c0dd6b5bce2aacc3fbe44f7e6d8377ebf9a3971 Mon Sep 17 00:00:00 2001 From: ljstrnadiii Date: Tue, 20 Feb 2024 15:43:27 +0000 Subject: [PATCH 23/51] narrow supported types --- xee/ext.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index bc4f947..f4aa7d4 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -139,7 +139,7 @@ def open( crs: Optional[str] = None, scale: Optional[float] = None, projection: Optional[ee.Projection] = None, - geometry: Optional[Union[ee.Geometry, types.BBox]] = None, + geometry: Optional[Union[ee.Geometry, Tuple[float, float, float, float]]] = None, primary_dim_name: Optional[str] = None, primary_dim_property: Optional[str] = None, mask_value: Optional[float] = None, @@ -176,7 +176,7 @@ def __init__( crs: Optional[str] = None, scale: Union[float, int, None] = None, projection: Optional[ee.Projection] = None, - geometry: Optional[Union[ee.Geometry, types.BBox]] = None, + geometry: Optional[Union[ee.Geometry, Tuple[float, float, float, float]]] = None, primary_dim_name: Optional[str] = None, primary_dim_property: Optional[str] = None, mask_value: Optional[float] = None, @@ -244,16 +244,16 @@ def __init__( x_min_0, y_min_0, x_max_0, y_max_0 = _ee_bounds_to_bounds( self.get_info['bounds'] ) - elif isinstance(geometry, Union[List, Tuple, np.ndarray, Sequence]): + elif isinstance(geometry, Union[List, Tuple]): if len(geometry) != 4: raise ValueError( - 'geometry must be a 4-tuple of floats or a ee.Geometry, ' + 'geometry must be a tuple or list of length 4, or a ee.Geometry, ' f'but got {geometry!r}' ) x_min_0, y_min_0, x_max_0, y_max_0 = geometry else: raise ValueError( - 'geometry must be a 4-tuple of floats or a ee.Geometry or None, ' + f'geometry must be a tuple or list of length 4, a ee.Geometry, or None ' f'but got {type(geometry)}' ) @@ -956,7 +956,7 @@ def open_dataset( crs: Optional[str] = None, scale: Union[float, int, None] = None, projection: Optional[ee.Projection] = None, - geometry: Optional[ee.Geometry] = None, + geometry: Optional[Union[ee.Geometry, Tuple[float, float, float, float]]] = None, primary_dim_name: Optional[str] = None, primary_dim_property: Optional[str] = None, ee_mask_value: Optional[float] = None, From a766527f1f779ae75c274ff8c869ecbfc88269e2 Mon Sep 17 00:00:00 2001 From: ljstrnadiii Date: Tue, 20 Feb 2024 15:43:39 +0000 Subject: [PATCH 24/51] add readme example --- README.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/README.md b/README.md index 4aef1cd..ad42410 100644 --- a/README.md +++ b/README.md @@ -90,6 +90,20 @@ i = ee.ImageCollection(ee.Image("LANDSAT/LC08/C02/T1_TOA/LC08_044034_20140318")) ds = xarray.open_dataset(i, engine='ee') ``` +Open any Earth Engine ImageCollection to match an existing transform: + +```python +raster = rioxarray.open_rasterio(...) # assume crs + transform is set +ds = xr.open_dataset( + 'ee://ECMWF/ERA5_LAND/HOURLY', + engine='ee', + geometry=tuple(raster.rio.bounds()), # must be in EPSG:4326 + projection=ee.Projection( + crs=str(raster.rio.crs), transform=raster.rio.transform()[:6] + ), +) +``` + See [examples](examples/) or [docs](docs/) for more uses and integrations. ## License From 23ea0c9b402f775c18874e47a8a7283929b5b070 Mon Sep 17 00:00:00 2001 From: ljstrnadiii Date: Wed, 21 Feb 2024 14:22:06 +0000 Subject: [PATCH 25/51] typing nit --- xee/ext.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index 58cbd0d..c1a9055 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -139,7 +139,9 @@ def open( crs: Optional[str] = None, scale: Optional[float] = None, projection: Optional[ee.Projection] = None, - geometry: Optional[Union[ee.Geometry, Tuple[float, float, float, float]]] = None, + geometry: Union[ + ee.Geometry, Tuple[float, float, float, float], None + ] = None, primary_dim_name: Optional[str] = None, primary_dim_property: Optional[str] = None, mask_value: Optional[float] = None, @@ -176,7 +178,9 @@ def __init__( crs: Optional[str] = None, scale: Union[float, int, None] = None, projection: Optional[ee.Projection] = None, - geometry: Optional[Union[ee.Geometry, Tuple[float, float, float, float]]] = None, + geometry: Optional[ + Union[ee.Geometry, Tuple[float, float, float, float]] + ] = None, primary_dim_name: Optional[str] = None, primary_dim_property: Optional[str] = None, mask_value: Optional[float] = None, @@ -253,8 +257,8 @@ def __init__( x_min_0, y_min_0, x_max_0, y_max_0 = geometry else: raise ValueError( - f'geometry must be a tuple or list of length 4, a ee.Geometry, or None ' - f'but got {type(geometry)}' + 'geometry must be a tuple or list of length 4, a ee.Geometry, or' + f' None but got {type(geometry)}' ) x_min, y_min = self.transform(x_min_0, y_min_0) @@ -960,7 +964,9 @@ def open_dataset( crs: Optional[str] = None, scale: Union[float, int, None] = None, projection: Optional[ee.Projection] = None, - geometry: Optional[Union[ee.Geometry, Tuple[float, float, float, float]]] = None, + geometry: Optional[ + Union[ee.Geometry, Tuple[float, float, float, float]] + ] = None, primary_dim_name: Optional[str] = None, primary_dim_property: Optional[str] = None, ee_mask_value: Optional[float] = None, From b4e5a92665f6c617194170387dda679a6066b7e6 Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Tue, 5 Mar 2024 06:50:56 +0000 Subject: [PATCH 26/51] Allow users to configure the internal thread pool. --- xee/ext.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index a30c3b6..c169652 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -146,6 +146,7 @@ def open( request_byte_limit: int = REQUEST_BYTE_LIMIT, ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, + executor_kwargs: Optional[dict] = None, ) -> 'EarthEngineStore': if mode != 'r': raise ValueError( @@ -166,6 +167,7 @@ def open( request_byte_limit=request_byte_limit, ee_init_kwargs=ee_init_kwargs, ee_init_if_necessary=ee_init_if_necessary, + executor_kwargs=executor_kwargs ) def __init__( @@ -183,10 +185,16 @@ def __init__( request_byte_limit: int = REQUEST_BYTE_LIMIT, ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, + executor_kwargs: Optional[dict] = None, ): self.ee_init_kwargs = ee_init_kwargs self.ee_init_if_necessary = ee_init_if_necessary + # Initialize executor_kwargs + if executor_kwargs is None: + executor_kwargs = {} + self.executor_kwargs = executor_kwargs + self.image_collection = image_collection if n_images != -1: self.image_collection = image_collection.limit(n_images) @@ -602,7 +610,7 @@ def _process_coordinate_data( for i in range(tile_count) ] tiles = [None] * tile_count - with concurrent.futures.ThreadPoolExecutor() as pool: + with concurrent.futures.ThreadPoolExecutor(**self.executor_kwargs) as pool: for i, arr in pool.map( self._get_tile_from_ee, list(zip(data, itertools.cycle([coordinate_type]))), @@ -862,8 +870,7 @@ def _raw_indexing_method( for _ in range(shape[0]) ] - # TODO(#11): Allow users to configure this via kwargs. - with concurrent.futures.ThreadPoolExecutor() as pool: + with concurrent.futures.ThreadPoolExecutor(**self.store.executor_kwargs) as pool: for (i, j, k), arr in pool.map( self._make_tile, self._tile_indexes(key[0], bbox) ): @@ -955,6 +962,7 @@ def open_dataset( request_byte_limit: int = REQUEST_BYTE_LIMIT, ee_init_if_necessary: bool = False, ee_init_kwargs: Optional[Dict[str, Any]] = None, + executor_kwargs: Optional[dict] = None, ) -> xarray.Dataset: # type: ignore """Open an Earth Engine ImageCollection as an Xarray Dataset. @@ -1024,6 +1032,8 @@ def open_dataset( frameworks. ee_init_kwargs: keywords to pass to Earth Engine Initialize when attempting to auto init for remote workers. + executor_kwargs (optional): A dictionary of keyword arguments to pass to + the ThreadPoolExecutor that handles the parallel computation of pixels. Returns: An xarray.Dataset that streams in remote data from Earth Engine. @@ -1053,6 +1063,7 @@ def open_dataset( request_byte_limit=request_byte_limit, ee_init_kwargs=ee_init_kwargs, ee_init_if_necessary=ee_init_if_necessary, + executor_kwargs=executor_kwargs, ) store_entrypoint = backends_store.StoreBackendEntrypoint() From d79b87438cff4fc7fad724142b3c815e35b896c5 Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Tue, 5 Mar 2024 06:52:32 +0000 Subject: [PATCH 27/51] lint error fixed. --- xee/ext.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index c169652..cfe2513 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -167,7 +167,7 @@ def open( request_byte_limit=request_byte_limit, ee_init_kwargs=ee_init_kwargs, ee_init_if_necessary=ee_init_if_necessary, - executor_kwargs=executor_kwargs + executor_kwargs=executor_kwargs, ) def __init__( @@ -192,7 +192,7 @@ def __init__( # Initialize executor_kwargs if executor_kwargs is None: - executor_kwargs = {} + executor_kwargs = {} self.executor_kwargs = executor_kwargs self.image_collection = image_collection @@ -870,7 +870,9 @@ def _raw_indexing_method( for _ in range(shape[0]) ] - with concurrent.futures.ThreadPoolExecutor(**self.store.executor_kwargs) as pool: + with concurrent.futures.ThreadPoolExecutor( + **self.store.executor_kwargs + ) as pool: for (i, j, k), arr in pool.map( self._make_tile, self._tile_indexes(key[0], bbox) ): From d3d4d5e025c7365b731cc953229e45a59ea57626 Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Tue, 5 Mar 2024 12:09:26 +0000 Subject: [PATCH 28/51] nit changes done. --- xee/ext.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xee/ext.py b/xee/ext.py index cfe2513..cddfa84 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -1035,7 +1035,8 @@ def open_dataset( ee_init_kwargs: keywords to pass to Earth Engine Initialize when attempting to auto init for remote workers. executor_kwargs (optional): A dictionary of keyword arguments to pass to - the ThreadPoolExecutor that handles the parallel computation of pixels. + the ThreadPoolExecutor that handles the parallel computation of pixels + i.e. {'max_workers': 2}. Returns: An xarray.Dataset that streams in remote data from Earth Engine. From bb6e680472002ed5462a481824af891154a617ac Mon Sep 17 00:00:00 2001 From: Nathaniel Schmitz Date: Tue, 5 Mar 2024 16:45:43 +0000 Subject: [PATCH 29/51] Fix executor_kwargs type annotation --- xee/ext.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index cddfa84..6b8b203 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -146,7 +146,7 @@ def open( request_byte_limit: int = REQUEST_BYTE_LIMIT, ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, - executor_kwargs: Optional[dict] = None, + executor_kwargs: Optional[Dict[str, Any]] = None, ) -> 'EarthEngineStore': if mode != 'r': raise ValueError( @@ -185,7 +185,7 @@ def __init__( request_byte_limit: int = REQUEST_BYTE_LIMIT, ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, - executor_kwargs: Optional[dict] = None, + executor_kwargs: Optional[Dict[str, Any]] = None, ): self.ee_init_kwargs = ee_init_kwargs self.ee_init_if_necessary = ee_init_if_necessary @@ -964,7 +964,7 @@ def open_dataset( request_byte_limit: int = REQUEST_BYTE_LIMIT, ee_init_if_necessary: bool = False, ee_init_kwargs: Optional[Dict[str, Any]] = None, - executor_kwargs: Optional[dict] = None, + executor_kwargs: Optional[Dict[str, Any]] = None, ) -> xarray.Dataset: # type: ignore """Open an Earth Engine ImageCollection as an Xarray Dataset. From f5d68b4758de564e09b2521edbc87a6b69dbb0ae Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Wed, 6 Mar 2024 11:24:02 +0000 Subject: [PATCH 30/51] Changes done for the ee int images. --- xee/ext.py | 1 + xee/ext_integration_test.py | 1 + 2 files changed, 2 insertions(+) diff --git a/xee/ext.py b/xee/ext.py index 6b8b203..4083da4 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -471,6 +471,7 @@ def image_to_array( Returns: An numpy array containing the pixels computed based on the given image. """ + image = image.toFloat() image = ( ee.Image(self.mask_value) .rename([image.bandNames().get(0)]) diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index ea821df..e4c04a1 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -69,6 +69,7 @@ def setUp(self): '2017-01-01', '2017-01-03' ), n_images=64, + mask_value=-99999, ) self.lnglat_store = xee.EarthEngineStore( ee.ImageCollection.fromImages([ee.Image.pixelLonLat()]), From 7a4525e6514343e3d885872fa7b3e19d4f5d69a1 Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Wed, 6 Mar 2024 13:17:37 +0000 Subject: [PATCH 31/51] Returns the data always in the np.float32 format. --- xee/ext.py | 22 ++++++---------------- xee/ext_integration_test.py | 3 --- 2 files changed, 6 insertions(+), 19 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index 4083da4..ec9bb1d 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -452,7 +452,6 @@ def image_to_array( self, image: ee.Image, pixels_getter=_GetComputedPixels(), - dtype=np.float32, **kwargs, ) -> np.ndarray: """Gets the pixels for a given image as a numpy array. @@ -464,12 +463,11 @@ def image_to_array( image: An EE image. pixels_getter: An object whose `__getitem__()` method calls `computePixels()`. - dtype: a np.dtype. The returned array will be in this dtype. **kwargs: Additional settings for `params` in `computePixels(params)`. For example, a `grid` dictionary. Returns: - An numpy array containing the pixels computed based on the given image. + A numpy array of float data value containing the pixels computed based on the given image. """ image = image.toFloat() image = ( @@ -500,16 +498,6 @@ def image_to_array( n_bands, ) - # try converting the data to desired dtype in place without copying - # if conversion is not allowed then just use the EE returned dtype - try: - arr = arr.astype(dtype, copy=False) - except ValueError: - warnings.warn( - f'Could convert EE results to requested dtype {dtype} ' - f'falling back to returned dtype from EE {np.dtype(raw.dtype[0])}' - ) - data = arr.T current_mask_value = np.array(self.mask_value, dtype=data.dtype) # Sets EE nodata masked value to NaNs. @@ -595,7 +583,7 @@ def _get_tile_from_ee( ) target_image = ee.Image.pixelCoordinates(ee.Projection(self.crs_arg)) return tile_index, self.image_to_array( - target_image, grid=bbox, dtype=np.float32, bandIds=[band_id] + target_image, grid=bbox, bandIds=[band_id] ) def _process_coordinate_data( @@ -838,7 +826,8 @@ def _raw_indexing_method( if self.store.chunks == -1: target_image = self._slice_collection(key[0]) out = self.store.image_to_array( - target_image, grid=self.store.project(bbox), dtype=self.dtype + target_image, + grid=self.store.project(bbox), ) if squeeze_axes: @@ -893,7 +882,8 @@ def _make_tile( tile_idx, (istart, iend, *bbox) = tile_index target_image = self._slice_collection(slice(istart, iend)) return tile_idx, self.store.image_to_array( - target_image, grid=self.store.project(tuple(bbox)), dtype=self.dtype + target_image, + grid=self.store.project(tuple(bbox)), ) def _tile_indexes( diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index e4c04a1..2f07325 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -69,7 +69,6 @@ def setUp(self): '2017-01-01', '2017-01-03' ), n_images=64, - mask_value=-99999, ) self.lnglat_store = xee.EarthEngineStore( ee.ImageCollection.fromImages([ee.Image.pixelLonLat()]), @@ -244,14 +243,12 @@ def __getitem__(self, params): raise ee.ee_exception.EEException('Too many requests!') return ee.data.computePixels(params) - arr = xee.EarthEngineBackendArray('B5', self.store) grid = self.store.project((0, 10, 0, 10)) getter = ErroneousPixelsGetter() self.store.image_to_array( self.store.image_collection.first(), pixels_getter=getter, grid=grid, - dtype=arr.dtype, ) self.assertEqual(getter.count, 3) From 16edfa0350e5ea1f81b75abd09806382a9e53ceb Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Thu, 7 Mar 2024 12:15:16 +0000 Subject: [PATCH 32/51] Test cases are added. --- xee/ext.py | 2 +- xee/ext_integration_test.py | 14 +++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index ec9bb1d..0cf5872 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -719,7 +719,7 @@ def __init__(self, variable_name: str, ee_store: EarthEngineStore): # It looks like different bands have different dimensions & transforms! # Can we get this into consistent dimensions? self._info = ee_store._band_attrs(variable_name) - self.dtype = _parse_dtype(self._info['data_type']) + self.dtype = np.dtype(np.float32) x_min, y_min, x_max, y_max = self.bounds diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index 2f07325..1d22bfb 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -70,6 +70,13 @@ def setUp(self): ), n_images=64, ) + self.store_with_neg_mask_value = xee.EarthEngineStore( + ee.ImageCollection('LANDSAT/LC08/C01/T1').filterDate( + '2017-01-01', '2017-01-03' + ), + n_images=64, + mask_value=-9999 + ) self.lnglat_store = xee.EarthEngineStore( ee.ImageCollection.fromImages([ee.Image.pixelLonLat()]), chunks={'index': 256, 'width': 512, 'height': 512}, @@ -97,7 +104,7 @@ def test_can_create_object(self): self.assertIsNotNone(arr) self.assertEqual((64, 360, 180), arr.shape) - self.assertEqual(np.int32, arr.dtype) + self.assertEqual(np.float32, arr.dtype) self.assertEqual('B4', arr.variable_name) def test_basic_indexing(self): @@ -105,6 +112,11 @@ def test_basic_indexing(self): self.assertEqual(np.isnan(arr[indexing.BasicIndexer((0, 0, 0))]), True) self.assertEqual(np.isnan(arr[indexing.BasicIndexer((-1, -1, -1))]), True) + def test_basic_indexing_on_int_ee_image(self): + arr = xee.EarthEngineBackendArray('B4', self.store_with_neg_mask_value) + self.assertEqual(np.isnan(arr[indexing.BasicIndexer((0, 0, 0))]), True) + self.assertEqual(np.isnan(arr[indexing.BasicIndexer((-1, -1, -1))]), True) + def test_basic_indexing__nonzero(self): arr = xee.EarthEngineBackendArray('longitude', self.lnglat_store) From 33ed4c60b71265e7b1ebd3967b9e175530c6b6a6 Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Thu, 7 Mar 2024 12:19:54 +0000 Subject: [PATCH 33/51] lint error fixed. --- xee/ext_integration_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index 1d22bfb..53a66fd 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -75,7 +75,7 @@ def setUp(self): '2017-01-01', '2017-01-03' ), n_images=64, - mask_value=-9999 + mask_value=-9999, ) self.lnglat_store = xee.EarthEngineStore( ee.ImageCollection.fromImages([ee.Image.pixelLonLat()]), From 71322bcb54bcb7989acb5787e5f25594a9437fda Mon Sep 17 00:00:00 2001 From: Nathaniel Schmitz Date: Fri, 8 Mar 2024 21:24:26 +0000 Subject: [PATCH 34/51] Use ee.Image.unmask instead of ee.Image.blend --- xee/ext.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index 0cf5872..1e54136 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -469,17 +469,8 @@ def image_to_array( Returns: A numpy array of float data value containing the pixels computed based on the given image. """ - image = image.toFloat() - image = ( - ee.Image(self.mask_value) - .rename([image.bandNames().get(0)]) - .blend(image) - ) - params = { - 'expression': image, - 'fileFormat': 'NUMPY_NDARRAY', - **kwargs, - } + image = image.unmask(self.mask_value, False) + params = {'expression': image, 'fileFormat': 'NUMPY_NDARRAY', **kwargs} raw = common.robust_getitem( pixels_getter, params, catch=ee.ee_exception.EEException ) From 41de120c36095e975ea88bcbc52a97b5d06935ab Mon Sep 17 00:00:00 2001 From: Nathaniel Schmitz Date: Fri, 8 Mar 2024 21:30:56 +0000 Subject: [PATCH 35/51] Add dtype parameter to image_to_array --- xee/ext.py | 30 ++++++++++++++++++++++-------- xee/ext_integration_test.py | 2 +- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index 1e54136..2e94a73 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -452,6 +452,7 @@ def image_to_array( self, image: ee.Image, pixels_getter=_GetComputedPixels(), + dtype=np.float32, **kwargs, ) -> np.ndarray: """Gets the pixels for a given image as a numpy array. @@ -463,14 +464,19 @@ def image_to_array( image: An EE image. pixels_getter: An object whose `__getitem__()` method calls `computePixels()`. + dtype: a np.dtype. The returned array will be in this dtype. **kwargs: Additional settings for `params` in `computePixels(params)`. For example, a `grid` dictionary. Returns: - A numpy array of float data value containing the pixels computed based on the given image. + A numpy array containing the pixels computed based on the given image. """ image = image.unmask(self.mask_value, False) - params = {'expression': image, 'fileFormat': 'NUMPY_NDARRAY', **kwargs} + params = { + 'expression': image, + 'fileFormat': 'NUMPY_NDARRAY', + **kwargs, + } raw = common.robust_getitem( pixels_getter, params, catch=ee.ee_exception.EEException ) @@ -489,6 +495,16 @@ def image_to_array( n_bands, ) + # try converting the data to desired dtype in place without copying + # if conversion is not allowed then just use the EE returned dtype + try: + arr = arr.astype(dtype, copy=False) + except ValueError: + warnings.warn( + f'Could convert EE results to requested dtype {dtype} ' + f'falling back to returned dtype from EE {np.dtype(raw.dtype[0])}' + ) + data = arr.T current_mask_value = np.array(self.mask_value, dtype=data.dtype) # Sets EE nodata masked value to NaNs. @@ -574,7 +590,7 @@ def _get_tile_from_ee( ) target_image = ee.Image.pixelCoordinates(ee.Projection(self.crs_arg)) return tile_index, self.image_to_array( - target_image, grid=bbox, bandIds=[band_id] + target_image, grid=bbox, dtype=np.float32, bandIds=[band_id] ) def _process_coordinate_data( @@ -710,7 +726,7 @@ def __init__(self, variable_name: str, ee_store: EarthEngineStore): # It looks like different bands have different dimensions & transforms! # Can we get this into consistent dimensions? self._info = ee_store._band_attrs(variable_name) - self.dtype = np.dtype(np.float32) + self.dtype = _parse_dtype(self._info['data_type']) x_min, y_min, x_max, y_max = self.bounds @@ -817,8 +833,7 @@ def _raw_indexing_method( if self.store.chunks == -1: target_image = self._slice_collection(key[0]) out = self.store.image_to_array( - target_image, - grid=self.store.project(bbox), + target_image, grid=self.store.project(bbox), dtype=self.dtype ) if squeeze_axes: @@ -873,8 +888,7 @@ def _make_tile( tile_idx, (istart, iend, *bbox) = tile_index target_image = self._slice_collection(slice(istart, iend)) return tile_idx, self.store.image_to_array( - target_image, - grid=self.store.project(tuple(bbox)), + target_image, grid=self.store.project(tuple(bbox)), dtype=self.dtype ) def _tile_indexes( diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index 53a66fd..ee4ec7f 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -104,7 +104,7 @@ def test_can_create_object(self): self.assertIsNotNone(arr) self.assertEqual((64, 360, 180), arr.shape) - self.assertEqual(np.float32, arr.dtype) + self.assertEqual(np.int32, arr.dtype) self.assertEqual('B4', arr.variable_name) def test_basic_indexing(self): From 3b678d47e3c75fea5847ea3c049cd07a90bb5025 Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Mon, 11 Mar 2024 07:17:16 +0000 Subject: [PATCH 36/51] test case updated. --- xee/ext_integration_test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index ee4ec7f..32ec804 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -148,11 +148,11 @@ def test_slice_indexing(self): def test_slice_indexing__non_global(self): arr = xee.EarthEngineBackendArray('spi2y', self.conus_store) first_10 = indexing.BasicIndexer((0, slice(0, 10), slice(0, 10))) - self.assertTrue(np.allclose(arr[first_10], np.zeros((10, 10)))) + np.testing.assert_equal(arr[first_10], np.full((10, 10), np.nan)) last_5 = indexing.BasicIndexer((0, slice(-5, -1), slice(-5, -1))) - expected_last_5 = np.zeros((4, 4)) - self.assertTrue( - np.allclose(expected_last_5, arr[last_5]), f'Actual:\n{arr[last_5]}' + expected_last_5 = np.full((4, 4), np.nan) + np.testing.assert_equal( + expected_last_5, arr[last_5], f'Actual:\n{arr[last_5]}' ) # TODO(alxr): Add more tests here to check for off-by-one errors... From c03a039805df90ebc1d967d66b982f9e5f639ee9 Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Mon, 11 Mar 2024 07:24:15 +0000 Subject: [PATCH 37/51] nit chnages in tescase. --- xee/ext_integration_test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index 32ec804..5075620 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -255,12 +255,14 @@ def __getitem__(self, params): raise ee.ee_exception.EEException('Too many requests!') return ee.data.computePixels(params) + arr = xee.EarthEngineBackendArray('B5', self.store) grid = self.store.project((0, 10, 0, 10)) getter = ErroneousPixelsGetter() self.store.image_to_array( self.store.image_collection.first(), pixels_getter=getter, grid=grid, + dtype=arr.dtype, ) self.assertEqual(getter.count, 3) From a679053cec6dbada793bc63d057c957832737f86 Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Mon, 11 Mar 2024 13:04:49 +0000 Subject: [PATCH 38/51] Always return the float datatype in xee. --- xee/ext.py | 3 +-- xee/ext_integration_test.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index 2e94a73..eb68d4c 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -726,8 +726,7 @@ def __init__(self, variable_name: str, ee_store: EarthEngineStore): # It looks like different bands have different dimensions & transforms! # Can we get this into consistent dimensions? self._info = ee_store._band_attrs(variable_name) - self.dtype = _parse_dtype(self._info['data_type']) - + self.dtype = np.dtype(np.float32) x_min, y_min, x_max, y_max = self.bounds # Make sure the size is at least 1x1. diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index 5075620..35cb340 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -104,7 +104,7 @@ def test_can_create_object(self): self.assertIsNotNone(arr) self.assertEqual((64, 360, 180), arr.shape) - self.assertEqual(np.int32, arr.dtype) + self.assertEqual(np.float32, arr.dtype) self.assertEqual('B4', arr.variable_name) def test_basic_indexing(self): From 26adfbd286d100a045ba3977eed1742d33d35ac6 Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Tue, 12 Mar 2024 06:00:42 +0000 Subject: [PATCH 39/51] Retry logic for the ee.data.computePixels update. --- xee/ext.py | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/xee/ext.py b/xee/ext.py index 6b8b203..f5d4a97 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -147,6 +147,8 @@ def open( ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, executor_kwargs: Optional[Dict[str, Any]] = None, + compute_pixels_max_retries: Optional[int] = None, + compute_pixels_initial_delay: Optional[int] = None, ) -> 'EarthEngineStore': if mode != 'r': raise ValueError( @@ -168,6 +170,8 @@ def open( ee_init_kwargs=ee_init_kwargs, ee_init_if_necessary=ee_init_if_necessary, executor_kwargs=executor_kwargs, + compute_pixels_max_retries=compute_pixels_max_retries, + compute_pixels_initial_delay=compute_pixels_initial_delay, ) def __init__( @@ -186,6 +190,8 @@ def __init__( ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, executor_kwargs: Optional[Dict[str, Any]] = None, + compute_pixels_max_retries: Optional[int] = None, + compute_pixels_initial_delay: Optional[int] = None, ): self.ee_init_kwargs = ee_init_kwargs self.ee_init_if_necessary = ee_init_if_necessary @@ -195,6 +201,17 @@ def __init__( executor_kwargs = {} self.executor_kwargs = executor_kwargs + # Here 6 & 500 is default value. + # (https://github.com/pydata/xarray/blob/main/xarray/backends/common.py#L181). + self.compute_pixels_max_retries = ( + 6 if compute_pixels_max_retries is None else compute_pixels_max_retries + ) + self.compute_pixels_initial_delay = ( + 500 + if compute_pixels_initial_delay is None + else compute_pixels_initial_delay + ) + self.image_collection = image_collection if n_images != -1: self.image_collection = image_collection.limit(n_images) @@ -482,7 +499,11 @@ def image_to_array( **kwargs, } raw = common.robust_getitem( - pixels_getter, params, catch=ee.ee_exception.EEException + pixels_getter, + params, + catch=ee.ee_exception.EEException, + max_retries=self.compute_pixels_max_retries, + initial_delay=self.compute_pixels_initial_delay, ) # Extract out the shape information from EE response. @@ -965,6 +986,8 @@ def open_dataset( ee_init_if_necessary: bool = False, ee_init_kwargs: Optional[Dict[str, Any]] = None, executor_kwargs: Optional[Dict[str, Any]] = None, + compute_pixels_max_retries: Optional[int] = None, + compute_pixels_initial_delay: Optional[int] = None, ) -> xarray.Dataset: # type: ignore """Open an Earth Engine ImageCollection as an Xarray Dataset. @@ -1037,6 +1060,10 @@ def open_dataset( executor_kwargs (optional): A dictionary of keyword arguments to pass to the ThreadPoolExecutor that handles the parallel computation of pixels i.e. {'max_workers': 2}. + compute_pixels_max_retries (optional): The maximum number of retry + attempts for calling ee.data.computePixels(). + compute_pixels_initial_delay (optional): The initial delay in milliseconds + before the first retry of calling ee.data.computePixels(). Returns: An xarray.Dataset that streams in remote data from Earth Engine. @@ -1067,6 +1094,8 @@ def open_dataset( ee_init_kwargs=ee_init_kwargs, ee_init_if_necessary=ee_init_if_necessary, executor_kwargs=executor_kwargs, + compute_pixels_max_retries=compute_pixels_max_retries, + compute_pixels_initial_delay=compute_pixels_initial_delay, ) store_entrypoint = backends_store.StoreBackendEntrypoint() From 2853dc7204cece9cec51a6011f84e4f81c9b239a Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Tue, 12 Mar 2024 06:13:11 +0000 Subject: [PATCH 40/51] Lint error fixed. --- xee/ext.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xee/ext.py b/xee/ext.py index f5d4a97..c4906b2 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -1060,7 +1060,7 @@ def open_dataset( executor_kwargs (optional): A dictionary of keyword arguments to pass to the ThreadPoolExecutor that handles the parallel computation of pixels i.e. {'max_workers': 2}. - compute_pixels_max_retries (optional): The maximum number of retry + compute_pixels_max_retries (optional): The maximum number of retry attempts for calling ee.data.computePixels(). compute_pixels_initial_delay (optional): The initial delay in milliseconds before the first retry of calling ee.data.computePixels(). From 4f814e78d3eeee83d5c41a85d502596d2fc69893 Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Tue, 12 Mar 2024 08:24:18 +0000 Subject: [PATCH 41/51] Declare retries as a default arguments. --- xee/ext.py | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index c4906b2..888fe20 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -147,8 +147,8 @@ def open( ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, executor_kwargs: Optional[Dict[str, Any]] = None, - compute_pixels_max_retries: Optional[int] = None, - compute_pixels_initial_delay: Optional[int] = None, + compute_pixels_max_retries: int = 6, + compute_pixels_initial_delay: int = 500, ) -> 'EarthEngineStore': if mode != 'r': raise ValueError( @@ -190,8 +190,8 @@ def __init__( ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, executor_kwargs: Optional[Dict[str, Any]] = None, - compute_pixels_max_retries: Optional[int] = None, - compute_pixels_initial_delay: Optional[int] = None, + compute_pixels_max_retries: int = 6, + compute_pixels_initial_delay: int = 500, ): self.ee_init_kwargs = ee_init_kwargs self.ee_init_if_necessary = ee_init_if_necessary @@ -201,16 +201,8 @@ def __init__( executor_kwargs = {} self.executor_kwargs = executor_kwargs - # Here 6 & 500 is default value. - # (https://github.com/pydata/xarray/blob/main/xarray/backends/common.py#L181). - self.compute_pixels_max_retries = ( - 6 if compute_pixels_max_retries is None else compute_pixels_max_retries - ) - self.compute_pixels_initial_delay = ( - 500 - if compute_pixels_initial_delay is None - else compute_pixels_initial_delay - ) + self.compute_pixels_max_retries = compute_pixels_max_retries + self.compute_pixels_initial_delay = compute_pixels_initial_delay self.image_collection = image_collection if n_images != -1: @@ -986,8 +978,8 @@ def open_dataset( ee_init_if_necessary: bool = False, ee_init_kwargs: Optional[Dict[str, Any]] = None, executor_kwargs: Optional[Dict[str, Any]] = None, - compute_pixels_max_retries: Optional[int] = None, - compute_pixels_initial_delay: Optional[int] = None, + compute_pixels_max_retries: int = 6, + compute_pixels_initial_delay: int = 500, ) -> xarray.Dataset: # type: ignore """Open an Earth Engine ImageCollection as an Xarray Dataset. @@ -1060,10 +1052,11 @@ def open_dataset( executor_kwargs (optional): A dictionary of keyword arguments to pass to the ThreadPoolExecutor that handles the parallel computation of pixels i.e. {'max_workers': 2}. - compute_pixels_max_retries (optional): The maximum number of retry - attempts for calling ee.data.computePixels(). - compute_pixels_initial_delay (optional): The initial delay in milliseconds - before the first retry of calling ee.data.computePixels(). + compute_pixels_max_retries (int): The maximum number of retry + attempts for calling ee.data.computePixels(). defaults to 6. + # (https://github.com/pydata/xarray/blob/main/xarray/backends/common.py#L181). + compute_pixels_initial_delay (int): The initial delay in milliseconds + before the first retry of calling ee.data.computePixels(). defaults to 500. Returns: An xarray.Dataset that streams in remote data from Earth Engine. From 9154d15667a2ed75d29a84df402ec744157c0b30 Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Tue, 12 Mar 2024 09:00:52 +0000 Subject: [PATCH 42/51] nit changes done. --- xee/ext.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index 888fe20..152769d 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -1053,10 +1053,10 @@ def open_dataset( the ThreadPoolExecutor that handles the parallel computation of pixels i.e. {'max_workers': 2}. compute_pixels_max_retries (int): The maximum number of retry - attempts for calling ee.data.computePixels(). defaults to 6. + attempts for calling ee.data.computePixels(). By default, it is 6. # (https://github.com/pydata/xarray/blob/main/xarray/backends/common.py#L181). compute_pixels_initial_delay (int): The initial delay in milliseconds - before the first retry of calling ee.data.computePixels(). defaults to 500. + before the first retry of calling ee.data.computePixels(). By default, it is 500. Returns: An xarray.Dataset that streams in remote data from Earth Engine. From 4d6385c3d7b61340a8cd305aac181b5b8ebfa6dc Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Thu, 14 Mar 2024 06:31:11 +0000 Subject: [PATCH 43/51] Tile_fetch_kwargs added. --- xee/ext.py | 46 +++++++++++++++++++++---------------- xee/ext_integration_test.py | 10 ++++++++ 2 files changed, 36 insertions(+), 20 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index 152769d..aead28b 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -147,8 +147,10 @@ def open( ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, executor_kwargs: Optional[Dict[str, Any]] = None, - compute_pixels_max_retries: int = 6, - compute_pixels_initial_delay: int = 500, + tile_fetch_kwargs: Dict[str, int] = { + 'max_retries': 6, + 'initial_delay': 500, + }, ) -> 'EarthEngineStore': if mode != 'r': raise ValueError( @@ -170,8 +172,7 @@ def open( ee_init_kwargs=ee_init_kwargs, ee_init_if_necessary=ee_init_if_necessary, executor_kwargs=executor_kwargs, - compute_pixels_max_retries=compute_pixels_max_retries, - compute_pixels_initial_delay=compute_pixels_initial_delay, + tile_fetch_kwargs=tile_fetch_kwargs, ) def __init__( @@ -190,8 +191,10 @@ def __init__( ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, executor_kwargs: Optional[Dict[str, Any]] = None, - compute_pixels_max_retries: int = 6, - compute_pixels_initial_delay: int = 500, + tile_fetch_kwargs: Dict[str, int] = { + 'max_retries': 6, + 'initial_delay': 500, + }, ): self.ee_init_kwargs = ee_init_kwargs self.ee_init_if_necessary = ee_init_if_necessary @@ -201,8 +204,8 @@ def __init__( executor_kwargs = {} self.executor_kwargs = executor_kwargs - self.compute_pixels_max_retries = compute_pixels_max_retries - self.compute_pixels_initial_delay = compute_pixels_initial_delay + self.tile_fetch_max_retries = tile_fetch_kwargs['max_retries'] + self.tile_fetch_initial_delay = tile_fetch_kwargs['initial_delay'] self.image_collection = image_collection if n_images != -1: @@ -494,8 +497,8 @@ def image_to_array( pixels_getter, params, catch=ee.ee_exception.EEException, - max_retries=self.compute_pixels_max_retries, - initial_delay=self.compute_pixels_initial_delay, + max_retries=self.tile_fetch_max_retries, + initial_delay=self.tile_fetch_initial_delay, ) # Extract out the shape information from EE response. @@ -978,8 +981,10 @@ def open_dataset( ee_init_if_necessary: bool = False, ee_init_kwargs: Optional[Dict[str, Any]] = None, executor_kwargs: Optional[Dict[str, Any]] = None, - compute_pixels_max_retries: int = 6, - compute_pixels_initial_delay: int = 500, + tile_fetch_kwargs: Dict[str, int] = { + 'max_retries': 6, + 'initial_delay': 500, + }, ) -> xarray.Dataset: # type: ignore """Open an Earth Engine ImageCollection as an Xarray Dataset. @@ -1052,12 +1057,14 @@ def open_dataset( executor_kwargs (optional): A dictionary of keyword arguments to pass to the ThreadPoolExecutor that handles the parallel computation of pixels i.e. {'max_workers': 2}. - compute_pixels_max_retries (int): The maximum number of retry - attempts for calling ee.data.computePixels(). By default, it is 6. - # (https://github.com/pydata/xarray/blob/main/xarray/backends/common.py#L181). - compute_pixels_initial_delay (int): The initial delay in milliseconds - before the first retry of calling ee.data.computePixels(). By default, it is 500. - + tile_fetch_kwargs (Dict): The necessary kwargs like `max_retries`, + `initial_delay` which helps while fetching data through calling + ee.data.computePixels(). i.e. {'max_retries' : 6, 'initial_delay': 500}. + - max_retries is maximum number of retry attempts for calling + ee.data.computePixels().By default, it is 6. + - initial_delay is the initial delay in milliseconds before the first + retry of calling ee.data.computePixels(). By default, it is 500. + (https://github.com/pydata/xarray/blob/main/xarray/backends/common.py#L181). Returns: An xarray.Dataset that streams in remote data from Earth Engine. """ @@ -1087,8 +1094,7 @@ def open_dataset( ee_init_kwargs=ee_init_kwargs, ee_init_if_necessary=ee_init_if_necessary, executor_kwargs=executor_kwargs, - compute_pixels_max_retries=compute_pixels_max_retries, - compute_pixels_initial_delay=compute_pixels_initial_delay, + tile_fetch_kwargs=tile_fetch_kwargs, ) store_entrypoint = backends_store.StoreBackendEntrypoint() diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index ea821df..3ac2bd2 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -69,6 +69,7 @@ def setUp(self): '2017-01-01', '2017-01-03' ), n_images=64, + tile_fetch_kwargs={'max_retries': 10, 'initial_delay': 1500}, ) self.lnglat_store = xee.EarthEngineStore( ee.ImageCollection.fromImages([ee.Image.pixelLonLat()]), @@ -255,6 +256,15 @@ def __getitem__(self, params): self.assertEqual(getter.count, 3) + def test_tile_fetch_kwargs(self): + arr = xee.EarthEngineBackendArray('B4', self.store) + self.assertEqual(arr.store.tile_fetch_initial_delay, 1500) + self.assertEqual(arr.store.tile_fetch_max_retries, 10) + + arr1 = xee.EarthEngineBackendArray('longitude', self.lnglat_store) + self.assertEqual(arr1.store.tile_fetch_initial_delay, 500) + self.assertEqual(arr1.store.tile_fetch_max_retries, 6) + class EEBackendEntrypointTest(absltest.TestCase): From d91a9e48c5bc8e7eacb6954aa5503b3227bd7215 Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Thu, 14 Mar 2024 07:14:20 +0000 Subject: [PATCH 44/51] Pop added for working on single value, too. --- xee/ext.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index aead28b..af4d429 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -204,8 +204,9 @@ def __init__( executor_kwargs = {} self.executor_kwargs = executor_kwargs - self.tile_fetch_max_retries = tile_fetch_kwargs['max_retries'] - self.tile_fetch_initial_delay = tile_fetch_kwargs['initial_delay'] + # Default value: (https://github.com/pydata/xarray/blob/main/xarray/backends/common.py#L181). + self.tile_fetch_max_retries = tile_fetch_kwargs.pop('max_retries', 6) + self.tile_fetch_initial_delay = tile_fetch_kwargs.pop('initial_delay', 500) self.image_collection = image_collection if n_images != -1: From 41b12e2a0de12d45df3440bd2485469b9d3348d6 Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Thu, 14 Mar 2024 10:31:53 +0000 Subject: [PATCH 45/51] code updated after suggestions. --- xee/ext.py | 27 ++++++++++++++------------- xee/ext_integration_test.py | 12 ++++++++---- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index af4d429..51f36a6 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -102,6 +102,11 @@ class EarthEngineStore(common.AbstractDataStore): 'height': 256, } + TILE_FETCH_KWARGS: Dict[str, int] = { + 'max_retries': 6, + 'initial_delay': 500, + } + SCALE_UNITS: Dict[str, int] = { 'degree': 1, 'metre': 10_000, @@ -147,10 +152,7 @@ def open( ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, executor_kwargs: Optional[Dict[str, Any]] = None, - tile_fetch_kwargs: Dict[str, int] = { - 'max_retries': 6, - 'initial_delay': 500, - }, + tile_fetch_kwargs: Dict[str, int] = TILE_FETCH_KWARGS, ) -> 'EarthEngineStore': if mode != 'r': raise ValueError( @@ -191,10 +193,7 @@ def __init__( ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, executor_kwargs: Optional[Dict[str, Any]] = None, - tile_fetch_kwargs: Dict[str, int] = { - 'max_retries': 6, - 'initial_delay': 500, - }, + tile_fetch_kwargs: Dict[str, int] = TILE_FETCH_KWARGS, ): self.ee_init_kwargs = ee_init_kwargs self.ee_init_if_necessary = ee_init_if_necessary @@ -204,9 +203,7 @@ def __init__( executor_kwargs = {} self.executor_kwargs = executor_kwargs - # Default value: (https://github.com/pydata/xarray/blob/main/xarray/backends/common.py#L181). - self.tile_fetch_max_retries = tile_fetch_kwargs.pop('max_retries', 6) - self.tile_fetch_initial_delay = tile_fetch_kwargs.pop('initial_delay', 500) + self.tile_fetch_kwargs = tile_fetch_kwargs self.image_collection = image_collection if n_images != -1: @@ -498,8 +495,12 @@ def image_to_array( pixels_getter, params, catch=ee.ee_exception.EEException, - max_retries=self.tile_fetch_max_retries, - initial_delay=self.tile_fetch_initial_delay, + max_retries=self.tile_fetch_kwargs.get( + 'max_retries', self.TILE_FETCH_KWARGS.get('max_retries') + ), + initial_delay=self.tile_fetch_kwargs.get( + 'initial_delay', self.TILE_FETCH_KWARGS.get('initial_delay') + ), ) # Extract out the shape information from EE response. diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index 3ac2bd2..5ad9378 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -81,6 +81,7 @@ def setUp(self): '2020-03-30', '2020-04-01' ), n_images=64, + tile_fetch_kwargs={'max_retries': 9}, ) self.all_img_store = xee.EarthEngineStore( ee.ImageCollection('LANDSAT/LC08/C01/T1').filterDate( @@ -258,13 +259,16 @@ def __getitem__(self, params): def test_tile_fetch_kwargs(self): arr = xee.EarthEngineBackendArray('B4', self.store) - self.assertEqual(arr.store.tile_fetch_initial_delay, 1500) - self.assertEqual(arr.store.tile_fetch_max_retries, 10) + self.assertEqual(arr.store.tile_fetch_kwargs['initial_delay'], 1500) + self.assertEqual(arr.store.tile_fetch_kwargs['max_retries'], 10) arr1 = xee.EarthEngineBackendArray('longitude', self.lnglat_store) - self.assertEqual(arr1.store.tile_fetch_initial_delay, 500) - self.assertEqual(arr1.store.tile_fetch_max_retries, 6) + self.assertEqual(arr1.store.tile_fetch_kwargs['initial_delay'], 500) + self.assertEqual(arr1.store.tile_fetch_kwargs['max_retries'], 6) + arr1 = xee.EarthEngineBackendArray('spi2y', self.conus_store) + self.assertNotIn('initial_delay', arr1.store.tile_fetch_kwargs) + self.assertEqual(arr1.store.tile_fetch_kwargs['max_retries'], 9) class EEBackendEntrypointTest(absltest.TestCase): From 4b5447ae786723845032373f909ce2478741b1db Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Thu, 14 Mar 2024 10:34:42 +0000 Subject: [PATCH 46/51] Lint changes done. --- xee/ext_integration_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index 5ad9378..a94f524 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -270,6 +270,7 @@ def test_tile_fetch_kwargs(self): self.assertNotIn('initial_delay', arr1.store.tile_fetch_kwargs) self.assertEqual(arr1.store.tile_fetch_kwargs['max_retries'], 9) + class EEBackendEntrypointTest(absltest.TestCase): def setUp(self): From d9aa0c0403c10bb091cd9c7edd590185e40044b2 Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Thu, 14 Mar 2024 11:54:12 +0000 Subject: [PATCH 47/51] keyword arguments changed to optional. --- xee/ext.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index 51f36a6..9df545e 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -152,7 +152,7 @@ def open( ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, executor_kwargs: Optional[Dict[str, Any]] = None, - tile_fetch_kwargs: Dict[str, int] = TILE_FETCH_KWARGS, + tile_fetch_kwargs: Optional[Dict[str, int]] = None, ) -> 'EarthEngineStore': if mode != 'r': raise ValueError( @@ -193,7 +193,7 @@ def __init__( ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, executor_kwargs: Optional[Dict[str, Any]] = None, - tile_fetch_kwargs: Dict[str, int] = TILE_FETCH_KWARGS, + tile_fetch_kwargs: Optional[Dict[str, int]] = None, ): self.ee_init_kwargs = ee_init_kwargs self.ee_init_if_necessary = ee_init_if_necessary @@ -203,7 +203,11 @@ def __init__( executor_kwargs = {} self.executor_kwargs = executor_kwargs - self.tile_fetch_kwargs = tile_fetch_kwargs + self.tile_fetch_kwargs = ( + self.TILE_FETCH_KWARGS + if tile_fetch_kwargs is None + else tile_fetch_kwargs + ) self.image_collection = image_collection if n_images != -1: @@ -983,10 +987,7 @@ def open_dataset( ee_init_if_necessary: bool = False, ee_init_kwargs: Optional[Dict[str, Any]] = None, executor_kwargs: Optional[Dict[str, Any]] = None, - tile_fetch_kwargs: Dict[str, int] = { - 'max_retries': 6, - 'initial_delay': 500, - }, + tile_fetch_kwargs: Optional[Dict[str, int]] = None, ) -> xarray.Dataset: # type: ignore """Open an Earth Engine ImageCollection as an Xarray Dataset. @@ -1059,7 +1060,7 @@ def open_dataset( executor_kwargs (optional): A dictionary of keyword arguments to pass to the ThreadPoolExecutor that handles the parallel computation of pixels i.e. {'max_workers': 2}. - tile_fetch_kwargs (Dict): The necessary kwargs like `max_retries`, + tile_fetch_kwargs (optional): The necessary kwargs like `max_retries`, `initial_delay` which helps while fetching data through calling ee.data.computePixels(). i.e. {'max_retries' : 6, 'initial_delay': 500}. - max_retries is maximum number of retry attempts for calling From 99a3d4519e5be55003b9d33794c902f592effb67 Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Fri, 15 Mar 2024 06:35:07 +0000 Subject: [PATCH 48/51] tile_fetch_kwargs is updated with getitem_kwargs. --- xee/ext.py | 32 ++++++++++++-------------------- xee/ext_integration_test.py | 20 ++++++++++---------- 2 files changed, 22 insertions(+), 30 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index 9df545e..1414e80 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -102,7 +102,7 @@ class EarthEngineStore(common.AbstractDataStore): 'height': 256, } - TILE_FETCH_KWARGS: Dict[str, int] = { + GETITEM_KWARGS: Dict[str, int] = { 'max_retries': 6, 'initial_delay': 500, } @@ -152,7 +152,7 @@ def open( ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, executor_kwargs: Optional[Dict[str, Any]] = None, - tile_fetch_kwargs: Optional[Dict[str, int]] = None, + getitem_kwargs: Optional[Dict[str, int]] = None, ) -> 'EarthEngineStore': if mode != 'r': raise ValueError( @@ -174,7 +174,7 @@ def open( ee_init_kwargs=ee_init_kwargs, ee_init_if_necessary=ee_init_if_necessary, executor_kwargs=executor_kwargs, - tile_fetch_kwargs=tile_fetch_kwargs, + getitem_kwargs=getitem_kwargs, ) def __init__( @@ -193,7 +193,7 @@ def __init__( ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, executor_kwargs: Optional[Dict[str, Any]] = None, - tile_fetch_kwargs: Optional[Dict[str, int]] = None, + getitem_kwargs: Optional[Dict[str, int]] = None, ): self.ee_init_kwargs = ee_init_kwargs self.ee_init_if_necessary = ee_init_if_necessary @@ -203,11 +203,7 @@ def __init__( executor_kwargs = {} self.executor_kwargs = executor_kwargs - self.tile_fetch_kwargs = ( - self.TILE_FETCH_KWARGS - if tile_fetch_kwargs is None - else tile_fetch_kwargs - ) + self.getitem_kwargs = {**self.GETITEM_KWARGS, **(getitem_kwargs or {})} self.image_collection = image_collection if n_images != -1: @@ -499,12 +495,8 @@ def image_to_array( pixels_getter, params, catch=ee.ee_exception.EEException, - max_retries=self.tile_fetch_kwargs.get( - 'max_retries', self.TILE_FETCH_KWARGS.get('max_retries') - ), - initial_delay=self.tile_fetch_kwargs.get( - 'initial_delay', self.TILE_FETCH_KWARGS.get('initial_delay') - ), + max_retries=self.getitem_kwargs['max_retries'], + initial_delay=self.getitem_kwargs['initial_delay'], ) # Extract out the shape information from EE response. @@ -987,7 +979,7 @@ def open_dataset( ee_init_if_necessary: bool = False, ee_init_kwargs: Optional[Dict[str, Any]] = None, executor_kwargs: Optional[Dict[str, Any]] = None, - tile_fetch_kwargs: Optional[Dict[str, int]] = None, + getitem_kwargs: Optional[Dict[str, int]] = None, ) -> xarray.Dataset: # type: ignore """Open an Earth Engine ImageCollection as an Xarray Dataset. @@ -1060,9 +1052,9 @@ def open_dataset( executor_kwargs (optional): A dictionary of keyword arguments to pass to the ThreadPoolExecutor that handles the parallel computation of pixels i.e. {'max_workers': 2}. - tile_fetch_kwargs (optional): The necessary kwargs like `max_retries`, - `initial_delay` which helps while fetching data through calling - ee.data.computePixels(). i.e. {'max_retries' : 6, 'initial_delay': 500}. + getitem_kwargs (optional): Exponential backoff kwargs passed into + the xarray function to index the array (`robust_getitem`). + i.e. {'max_retries' : 6, 'initial_delay': 500}. - max_retries is maximum number of retry attempts for calling ee.data.computePixels().By default, it is 6. - initial_delay is the initial delay in milliseconds before the first @@ -1097,7 +1089,7 @@ def open_dataset( ee_init_kwargs=ee_init_kwargs, ee_init_if_necessary=ee_init_if_necessary, executor_kwargs=executor_kwargs, - tile_fetch_kwargs=tile_fetch_kwargs, + getitem_kwargs=getitem_kwargs, ) store_entrypoint = backends_store.StoreBackendEntrypoint() diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index a94f524..771ab78 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -69,7 +69,7 @@ def setUp(self): '2017-01-01', '2017-01-03' ), n_images=64, - tile_fetch_kwargs={'max_retries': 10, 'initial_delay': 1500}, + getitem_kwargs={'max_retries': 10, 'initial_delay': 1500}, ) self.lnglat_store = xee.EarthEngineStore( ee.ImageCollection.fromImages([ee.Image.pixelLonLat()]), @@ -81,7 +81,7 @@ def setUp(self): '2020-03-30', '2020-04-01' ), n_images=64, - tile_fetch_kwargs={'max_retries': 9}, + getitem_kwargs={'max_retries': 9}, ) self.all_img_store = xee.EarthEngineStore( ee.ImageCollection('LANDSAT/LC08/C01/T1').filterDate( @@ -257,18 +257,18 @@ def __getitem__(self, params): self.assertEqual(getter.count, 3) - def test_tile_fetch_kwargs(self): + def test_getitem_kwargs(self): arr = xee.EarthEngineBackendArray('B4', self.store) - self.assertEqual(arr.store.tile_fetch_kwargs['initial_delay'], 1500) - self.assertEqual(arr.store.tile_fetch_kwargs['max_retries'], 10) + self.assertEqual(arr.store.getitem_kwargs['initial_delay'], 1500) + self.assertEqual(arr.store.getitem_kwargs['max_retries'], 10) arr1 = xee.EarthEngineBackendArray('longitude', self.lnglat_store) - self.assertEqual(arr1.store.tile_fetch_kwargs['initial_delay'], 500) - self.assertEqual(arr1.store.tile_fetch_kwargs['max_retries'], 6) + self.assertEqual(arr1.store.getitem_kwargs['initial_delay'], 500) + self.assertEqual(arr1.store.getitem_kwargs['max_retries'], 6) - arr1 = xee.EarthEngineBackendArray('spi2y', self.conus_store) - self.assertNotIn('initial_delay', arr1.store.tile_fetch_kwargs) - self.assertEqual(arr1.store.tile_fetch_kwargs['max_retries'], 9) + arr2 = xee.EarthEngineBackendArray('spi2y', self.conus_store) + self.assertEqual(arr2.store.getitem_kwargs['initial_delay'], 500) + self.assertEqual(arr2.store.getitem_kwargs['max_retries'], 9) class EEBackendEntrypointTest(absltest.TestCase): From e3edcec8b25eb466dfe8acab5469728362fa2d67 Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Fri, 15 Mar 2024 14:48:22 +0000 Subject: [PATCH 49/51] Nit changes done. --- xee/ext.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index 1414e80..00b3dbf 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -495,8 +495,7 @@ def image_to_array( pixels_getter, params, catch=ee.ee_exception.EEException, - max_retries=self.getitem_kwargs['max_retries'], - initial_delay=self.getitem_kwargs['initial_delay'], + **self.getitem_kwargs, ) # Extract out the shape information from EE response. @@ -1055,10 +1054,9 @@ def open_dataset( getitem_kwargs (optional): Exponential backoff kwargs passed into the xarray function to index the array (`robust_getitem`). i.e. {'max_retries' : 6, 'initial_delay': 500}. - - max_retries is maximum number of retry attempts for calling - ee.data.computePixels().By default, it is 6. - - initial_delay is the initial delay in milliseconds before the first - retry of calling ee.data.computePixels(). By default, it is 500. + - max_retries, the maximum number of retry attempts.By default, it is 6. + - initial_delay, the initial delay in milliseconds before the first + retry. By default, it is 500. (https://github.com/pydata/xarray/blob/main/xarray/backends/common.py#L181). Returns: An xarray.Dataset that streams in remote data from Earth Engine. From f13b6a2e01aac8f6e6799c6f4d01c710b9fa4664 Mon Sep 17 00:00:00 2001 From: Nathaniel Schmitz Date: Fri, 15 Mar 2024 15:10:32 +0000 Subject: [PATCH 50/51] Update getitem_kwargs documentation comment. --- xee/ext.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index 00b3dbf..51976bc 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -1051,13 +1051,11 @@ def open_dataset( executor_kwargs (optional): A dictionary of keyword arguments to pass to the ThreadPoolExecutor that handles the parallel computation of pixels i.e. {'max_workers': 2}. - getitem_kwargs (optional): Exponential backoff kwargs passed into - the xarray function to index the array (`robust_getitem`). - i.e. {'max_retries' : 6, 'initial_delay': 500}. - - max_retries, the maximum number of retry attempts.By default, it is 6. - - initial_delay, the initial delay in milliseconds before the first - retry. By default, it is 500. - (https://github.com/pydata/xarray/blob/main/xarray/backends/common.py#L181). + getitem_kwargs (optional): Exponential backoff kwargs passed into the + xarray function to index the array (`robust_getitem`). + - 'max_retries', the maximum number of retry attempts. Defaults to 6. + - 'initial_delay', the initial delay in milliseconds before the first + retry. Defaults to 500. Returns: An xarray.Dataset that streams in remote data from Earth Engine. """ From b640d9589227cb6db0b0c5517e4ced73051f494c Mon Sep 17 00:00:00 2001 From: ljstrnadiii Date: Sat, 16 Mar 2024 15:21:26 +0000 Subject: [PATCH 51/51] nits + pull out logic + typing --- xee/ext.py | 78 ++++++++++++++++++++++++++---------------------------- 1 file changed, 37 insertions(+), 41 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index c1a9055..cb7d635 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -139,9 +139,7 @@ def open( crs: Optional[str] = None, scale: Optional[float] = None, projection: Optional[ee.Projection] = None, - geometry: Union[ - ee.Geometry, Tuple[float, float, float, float], None - ] = None, + geometry: ee.Geometry | Tuple[float, float, float, float] | None = None, primary_dim_name: Optional[str] = None, primary_dim_property: Optional[str] = None, mask_value: Optional[float] = None, @@ -178,9 +176,7 @@ def __init__( crs: Optional[str] = None, scale: Union[float, int, None] = None, projection: Optional[ee.Projection] = None, - geometry: Optional[ - Union[ee.Geometry, Tuple[float, float, float, float]] - ] = None, + geometry: ee.Geometry | Tuple[float, float, float, float] | None = None, primary_dim_name: Optional[str] = None, primary_dim_property: Optional[str] = None, mask_value: Optional[float] = None, @@ -232,38 +228,7 @@ def __init__( self.scale_x, self.scale_y = transform.a, transform.e self.scale = np.sqrt(np.abs(transform.determinant)) - # Parse the dataset bounds from the native projection (either from the CRS - # or the image geometry) and translate it to the representation that will be - # used for all internal `computePixels()` calls. - if geometry is None: - try: - x_min_0, y_min_0, x_max_0, y_max_0 = self.crs.area_of_use.bounds - except AttributeError: - # `area_of_use` is probable `None`. Parse the geometry from the first - # image instead (calculated in self.get_info()) - x_min_0, y_min_0, x_max_0, y_max_0 = _ee_bounds_to_bounds( - self.get_info['bounds'] - ) - elif isinstance(geometry, ee.Geometry): - x_min_0, y_min_0, x_max_0, y_max_0 = _ee_bounds_to_bounds( - self.get_info['bounds'] - ) - elif isinstance(geometry, Union[List, Tuple]): - if len(geometry) != 4: - raise ValueError( - 'geometry must be a tuple or list of length 4, or a ee.Geometry, ' - f'but got {geometry!r}' - ) - x_min_0, y_min_0, x_max_0, y_max_0 = geometry - else: - raise ValueError( - 'geometry must be a tuple or list of length 4, a ee.Geometry, or' - f' None but got {type(geometry)}' - ) - - x_min, y_min = self.transform(x_min_0, y_min_0) - x_max, y_max = self.transform(x_max_0, y_max_0) - self.bounds = x_min, y_min, x_max, y_max + self.bounds = self._determine_bounds(geometry=geometry) max_dtype = self._max_itemsize() @@ -626,6 +591,39 @@ def _process_coordinate_data( tiles[i] = arr.flatten() return np.concatenate(tiles) + def _determine_bounds( + self, + geometry: ee.Geometry | Tuple[float, float, float, float] | None = None, + ) -> Tuple[float, float, float, float]: + if geometry is None: + try: + x_min_0, y_min_0, x_max_0, y_max_0 = self.crs.area_of_use.bounds + except AttributeError: + # `area_of_use` is probably `None`. Parse the geometry from the first + # image instead (calculated in self.get_info()) + x_min_0, y_min_0, x_max_0, y_max_0 = _ee_bounds_to_bounds( + self.get_info['bounds'] + ) + elif isinstance(geometry, ee.Geometry): + x_min_0, y_min_0, x_max_0, y_max_0 = _ee_bounds_to_bounds( + self.get_info['bounds'] + ) + elif isinstance(geometry, Sequence): + if len(geometry) != 4: + raise ValueError( + 'geometry must be a tuple or list of length 4, or a ee.Geometry, ' + f'but got {geometry!r}' + ) + x_min_0, y_min_0, x_max_0, y_max_0 = geometry + else: + raise ValueError( + 'geometry must be a tuple or list of length 4, a ee.Geometry, or' + f' None but got {type(geometry)}' + ) + x_min, y_min = self.transform(x_min_0, y_min_0) + x_max, y_max = self.transform(x_max_0, y_max_0) + return x_min, y_min, x_max, y_max + def get_variables(self) -> utils.Frozen[str, xarray.Variable]: vars_ = [(name, self.open_store_variable(name)) for name in self._bands()] @@ -964,9 +962,7 @@ def open_dataset( crs: Optional[str] = None, scale: Union[float, int, None] = None, projection: Optional[ee.Projection] = None, - geometry: Optional[ - Union[ee.Geometry, Tuple[float, float, float, float]] - ] = None, + geometry: ee.Geometry | Tuple[float, float, float, float] | None = None, primary_dim_name: Optional[str] = None, primary_dim_property: Optional[str] = None, ee_mask_value: Optional[float] = None,