From 0b78180b3077028a38937ce32853de98eec57530 Mon Sep 17 00:00:00 2001 From: ljstrnadiii Date: Tue, 13 Feb 2024 02:57:42 +0000 Subject: [PATCH 01/21] add option for using double precision coords --- xee/ext.py | 14 ++++++++-- xee/ext_integration_test.py | 54 +++++++++++++++++++++++++++++++------ 2 files changed, 58 insertions(+), 10 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index 3929b54..260ccfe 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -146,6 +146,7 @@ def open( request_byte_limit: int = REQUEST_BYTE_LIMIT, ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, + use_coords_double_precision: bool = False ) -> 'EarthEngineStore': if mode != 'r': raise ValueError( @@ -166,6 +167,7 @@ def open( request_byte_limit=request_byte_limit, ee_init_kwargs=ee_init_kwargs, ee_init_if_necessary=ee_init_if_necessary, + use_coords_double_precision=use_coords_double_precision ) def __init__( @@ -183,6 +185,7 @@ def __init__( request_byte_limit: int = REQUEST_BYTE_LIMIT, ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, + use_coords_double_precision: bool = False ): self.ee_init_kwargs = ee_init_kwargs self.ee_init_if_necessary = ee_init_if_necessary @@ -195,6 +198,7 @@ def __init__( self.geometry = geometry self.primary_dim_name = primary_dim_name or 'time' self.primary_dim_property = primary_dim_property or 'system:time_start' + self.use_coords_double_precision = use_coords_double_precision self.n_images = self.get_info['size'] self._props = self.get_info['props'] @@ -581,8 +585,9 @@ def _get_tile_from_ee( else (0, tile_coords_start, 1, tile_coords_end) ) target_image = ee.Image.pixelCoordinates(ee.Projection(self.crs_arg)) + dtype = np.float64 if self.use_coords_double_precision else np.float32 return tile_index, self.image_to_array( - target_image, grid=bbox, dtype=np.float32, bandIds=[band_id] + target_image, grid=bbox, dtype=dtype, bandIds=[band_id] ) def _process_coordinate_data( @@ -689,7 +694,7 @@ def _parse_dtype(data_type: types.DataType): def _ee_bounds_to_bounds(bounds: ee.Bounds) -> types.Bounds: - coords = np.array(bounds['coordinates'], dtype=np.float32)[0] + coords = np.array(bounds['coordinates'], dtype=np.float64)[0] x_min, y_min, x_max, y_max = ( min(coords[:, 0]), min(coords[:, 1]), @@ -951,6 +956,7 @@ def open_dataset( request_byte_limit: int = REQUEST_BYTE_LIMIT, ee_init_if_necessary: bool = False, ee_init_kwargs: Optional[Dict[str, Any]] = None, + use_coords_double_precision: bool = False, ) -> xarray.Dataset: # type: ignore """Open an Earth Engine ImageCollection as an Xarray Dataset. @@ -1020,6 +1026,9 @@ def open_dataset( frameworks. ee_init_kwargs: keywords to pass to Earth Engine Initialize when attempting to auto init for remote workers. + use_coords_double_precision: Whether to use double precision for coordinates + and bounds from provided geometry. False by default, but True may be + helpful when hoping to match a transform of an existing dataset. Returns: An xarray.Dataset that streams in remote data from Earth Engine. @@ -1049,6 +1058,7 @@ def open_dataset( request_byte_limit=request_byte_limit, ee_init_kwargs=ee_init_kwargs, ee_init_if_necessary=ee_init_if_necessary, + use_coords_double_precision=use_coords_double_precision ) store_entrypoint = backends_store.StoreBackendEntrypoint() diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index d1eafc0..9eff73b 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -19,7 +19,7 @@ import tempfile from absl.testing import absltest -from google.auth import identity_pool +import google.auth import numpy as np import xarray as xr from xarray.core import indexing @@ -41,17 +41,14 @@ ] -def _read_identity_pool_creds() -> identity_pool.Credentials: - credentials_path = os.environ[_CREDENTIALS_PATH_KEY] - with open(credentials_path) as file: - json_file = json.load(file) - credentials = identity_pool.Credentials.from_info(json_file) - return credentials.with_scopes(_SCOPES) +def _read_default_creds(): + credentials, _ = google.auth.default(scopes=_SCOPES) + return credentials def init_ee_for_tests(): ee.Initialize( - credentials=_read_identity_pool_creds(), + credentials=_read_default_creds(), opt_url=ee.data.HIGH_VOLUME_API_BASE_URL, ) @@ -358,6 +355,47 @@ def test_honors_projection(self): self.assertEqual(ds.dims, {'time': 4248, 'lon': 3600, 'lat': 1800}) self.assertNotEqual(ds.dims, standard_ds.dims) + @absltest.skipIf(_SKIP_RASTERIO_TESTS, 'rioxarray module not loaded') + def test_honors_transform_precisely(self): + data = np.empty((162, 120), dtype=np.float32) + # An example of a double precision bbox + bbox = ( + -53.94158617595226, + -12.078281822698678, + -53.67209159071253, + -11.714464132625046, + ) + x_res = (bbox[2] - bbox[0]) / data.shape[1] + y_res = (bbox[3] - bbox[1]) / data.shape[0] + raster = xr.DataArray( + data, + coords={ + 'y': np.linspace(bbox[3], bbox[1] + x_res, data.shape[0]), + 'x': np.linspace(bbox[0], bbox[2] - y_res, data.shape[1]), + }, + dims=('y', 'x'), + ) + + geo = ee.Geometry.Rectangle(*raster.rio.bounds()) + ic = ( + ee.ImageCollection('UCSB-CHG/CHIRPS/DAILY') + .filterDate(ee.DateRange('2014-01-01', '2014-01-02')) + .select('precipitation') + ) + xee_dataset = xr.open_dataset( + ee.ImageCollection(ic), + engine='ee', + geometry=geo, + scale=raster.rio.resolution()[0], + crs='EPSG:4326', + use_coords_double_precision=True, + ).rename({'lon': 'x', 'lat': 'y'}) + np.testing.assert_almost_equal( + np.array(xee_dataset.rio.transform()), + np.array(raster.rio.transform()), + decimal=13, + ) + def test_parses_ee_url(self): ds = self.entry.open_dataset( 'ee://LANDSAT/LC08/C01/T1', From 6983737a56092ab68b8db4ed65e60ad68dda65c4 Mon Sep 17 00:00:00 2001 From: ljstrnadiii Date: Tue, 13 Feb 2024 03:53:20 +0000 Subject: [PATCH 02/21] add match xarray support --- xee/ext.py | 34 ++++++++++++++++++++++++++----- xee/ext_integration_test.py | 40 +++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 5 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index 260ccfe..b8bc0b5 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -40,6 +40,7 @@ from xarray.backends import store as backends_store from xarray.core import indexing from xarray.core import utils +import xarray as xr from xee import types import ee @@ -146,7 +147,9 @@ def open( request_byte_limit: int = REQUEST_BYTE_LIMIT, ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, - use_coords_double_precision: bool = False + use_coords_double_precision: bool = False, + match_xarray: xarray.DataArray | xarray.Dataset | None = None + ) -> 'EarthEngineStore': if mode != 'r': raise ValueError( @@ -167,7 +170,8 @@ def open( request_byte_limit=request_byte_limit, ee_init_kwargs=ee_init_kwargs, ee_init_if_necessary=ee_init_if_necessary, - use_coords_double_precision=use_coords_double_precision + use_coords_double_precision=use_coords_double_precision, + match_xarray=match_xarray ) def __init__( @@ -185,7 +189,8 @@ def __init__( request_byte_limit: int = REQUEST_BYTE_LIMIT, ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, - use_coords_double_precision: bool = False + use_coords_double_precision: bool = False, + match_xarray: xr.DataArray | xr.Dataset | None = None ): self.ee_init_kwargs = ee_init_kwargs self.ee_init_if_necessary = ee_init_if_necessary @@ -209,12 +214,20 @@ def __init__( self.crs_arg = crs or proj.get('crs', proj.get('wkt', 'EPSG:4326')) self.crs = CRS(self.crs_arg) + if match_xarray is not None: + if match_xarray.rio.crs is None: + raise ValueError("If matching to xarray, we require `.rio.crs` is set.") + self.crs = CRS(match_xarray.rio.crs) + if match_xarray[match_xarray.rio.x_dim].dtype == np.float64: + self.use_coords_double_precision = True # Gets the unit i.e. meter, degree etc. self.scale_units = self.crs.axis_info[0].unit_name # Get the dimensions name based on the CRS (scale units). self.dimension_names = self.DIMENSION_NAMES.get( self.scale_units, ('X', 'Y') ) + if match_xarray is not None: + self.dimension_names = (match_xarray.rio.x_dim, match_xarray.rio.y_dim) x_dim_name, y_dim_name = self.dimension_names self._props.update( coordinates=f'{self.primary_dim_name} {x_dim_name} {y_dim_name}', @@ -227,11 +240,14 @@ def __init__( if scale is None: scale = default_scale default_transform = affine.Affine.scale(scale, -1 * scale) - transform = affine.Affine(*proj.get('transform', default_transform)[:6]) self.scale_x, self.scale_y = transform.a, transform.e self.scale = np.sqrt(np.abs(transform.determinant)) + if match_xarray is not None: + self.scale_x, self.scale_y = match_xarray.rio.resolution() + self.scale = np.sqrt(np.abs(self.scale_x * self.scale_y)) + # Parse the dataset bounds from the native projection (either from the CRS # or the image geometry) and translate it to the representation that will be # used for all internal `computePixels()` calls. @@ -252,6 +268,8 @@ def __init__( x_min, y_min = self.transform(x_min_0, y_min_0) x_max, y_max = self.transform(x_max_0, y_max_0) self.bounds = x_min, y_min, x_max, y_max + if match_xarray is not None: + self.bounds = match_xarray.rio.bounds() max_dtype = self._max_itemsize() @@ -957,6 +975,7 @@ def open_dataset( ee_init_if_necessary: bool = False, ee_init_kwargs: Optional[Dict[str, Any]] = None, use_coords_double_precision: bool = False, + match_xarray: xarray.DataArray | xarray.Dataset | None = None, ) -> xarray.Dataset: # type: ignore """Open an Earth Engine ImageCollection as an Xarray Dataset. @@ -1029,6 +1048,10 @@ def open_dataset( use_coords_double_precision: Whether to use double precision for coordinates and bounds from provided geometry. False by default, but True may be helpful when hoping to match a transform of an existing dataset. + match_xarray: An xarray.DataArray or xarray.Dataset to use as a template + with rioxarray-based schema to extract the crs and transform to specify + the spatial extent and crs of the output dataset. Using this arg requires + that rioxarray is installed. Returns: An xarray.Dataset that streams in remote data from Earth Engine. @@ -1058,7 +1081,8 @@ def open_dataset( request_byte_limit=request_byte_limit, ee_init_kwargs=ee_init_kwargs, ee_init_if_necessary=ee_init_if_necessary, - use_coords_double_precision=use_coords_double_precision + use_coords_double_precision=use_coords_double_precision, + match_xarray=match_xarray ) store_entrypoint = backends_store.StoreBackendEntrypoint() diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index 9eff73b..41fed4b 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -390,11 +390,51 @@ def test_honors_transform_precisely(self): crs='EPSG:4326', use_coords_double_precision=True, ).rename({'lon': 'x', 'lat': 'y'}) + # This is off slightly due to bounds determined by geometry e.g. .getInfo() + # seems to cause a super slight shift in the bounds. Thhe coords change before + # and after the call to .getInfo()! np.testing.assert_almost_equal( np.array(xee_dataset.rio.transform()), np.array(raster.rio.transform()), decimal=13, ) + + @absltest.skipIf(_SKIP_RASTERIO_TESTS, 'rioxarray module not loaded') + def test_match_xarray(self): + data = np.empty((162, 120), dtype=np.float32) + # An example of a double precision bbox + bbox = ( + -53.94158617595226, + -12.078281822698678, + -53.67209159071253, + -11.714464132625046, + ) + x_res = (bbox[2] - bbox[0]) / data.shape[1] + y_res = (bbox[3] - bbox[1]) / data.shape[0] + raster = xr.DataArray( + data, + coords={ + 'y': np.linspace(bbox[3], bbox[1] + x_res, data.shape[0]), + 'x': np.linspace(bbox[0], bbox[2] - y_res, data.shape[1]), + }, + dims=('y', 'x'), + ) + raster.rio.write_crs('EPSG:4326', inplace=True) + ic = ( + ee.ImageCollection('UCSB-CHG/CHIRPS/DAILY') + .filterDate(ee.DateRange('2014-01-01', '2014-01-02')) + .select('precipitation') + ) + xee_dataset = xr.open_dataset( + ee.ImageCollection(ic), + engine='ee', + scale=raster.rio.resolution()[0], + match_xarray=raster, + ) + np.testing.assert_equal( + np.array(xee_dataset.rio.transform()), + np.array(raster.rio.transform()), + ) def test_parses_ee_url(self): ds = self.entry.open_dataset( From 7efe99233f52d916e75f3fe7a0c990aeb039106c Mon Sep 17 00:00:00 2001 From: ljstrnadiii Date: Tue, 13 Feb 2024 04:05:43 +0000 Subject: [PATCH 03/21] run pyint --- xee/ext.py | 13 ++++++------- xee/ext_integration_test.py | 4 ++-- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index b8bc0b5..3afe2c7 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -148,8 +148,7 @@ def open( ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, use_coords_double_precision: bool = False, - match_xarray: xarray.DataArray | xarray.Dataset | None = None - + match_xarray: xarray.DataArray | xarray.Dataset | None = None, ) -> 'EarthEngineStore': if mode != 'r': raise ValueError( @@ -171,7 +170,7 @@ def open( ee_init_kwargs=ee_init_kwargs, ee_init_if_necessary=ee_init_if_necessary, use_coords_double_precision=use_coords_double_precision, - match_xarray=match_xarray + match_xarray=match_xarray, ) def __init__( @@ -190,7 +189,7 @@ def __init__( ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, use_coords_double_precision: bool = False, - match_xarray: xr.DataArray | xr.Dataset | None = None + match_xarray: xr.DataArray | xr.Dataset | None = None, ): self.ee_init_kwargs = ee_init_kwargs self.ee_init_if_necessary = ee_init_if_necessary @@ -216,7 +215,7 @@ def __init__( self.crs = CRS(self.crs_arg) if match_xarray is not None: if match_xarray.rio.crs is None: - raise ValueError("If matching to xarray, we require `.rio.crs` is set.") + raise ValueError('If matching to xarray, we require `.rio.crs` is set.') self.crs = CRS(match_xarray.rio.crs) if match_xarray[match_xarray.rio.x_dim].dtype == np.float64: self.use_coords_double_precision = True @@ -1046,7 +1045,7 @@ def open_dataset( ee_init_kwargs: keywords to pass to Earth Engine Initialize when attempting to auto init for remote workers. use_coords_double_precision: Whether to use double precision for coordinates - and bounds from provided geometry. False by default, but True may be + and bounds from provided geometry. False by default, but True may be helpful when hoping to match a transform of an existing dataset. match_xarray: An xarray.DataArray or xarray.Dataset to use as a template with rioxarray-based schema to extract the crs and transform to specify @@ -1082,7 +1081,7 @@ def open_dataset( ee_init_kwargs=ee_init_kwargs, ee_init_if_necessary=ee_init_if_necessary, use_coords_double_precision=use_coords_double_precision, - match_xarray=match_xarray + match_xarray=match_xarray, ) store_entrypoint = backends_store.StoreBackendEntrypoint() diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index 41fed4b..a24bb61 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -390,7 +390,7 @@ def test_honors_transform_precisely(self): crs='EPSG:4326', use_coords_double_precision=True, ).rename({'lon': 'x', 'lat': 'y'}) - # This is off slightly due to bounds determined by geometry e.g. .getInfo() + # This is off slightly due to bounds determined by geometry e.g. .getInfo() # seems to cause a super slight shift in the bounds. Thhe coords change before # and after the call to .getInfo()! np.testing.assert_almost_equal( @@ -398,7 +398,7 @@ def test_honors_transform_precisely(self): np.array(raster.rio.transform()), decimal=13, ) - + @absltest.skipIf(_SKIP_RASTERIO_TESTS, 'rioxarray module not loaded') def test_match_xarray(self): data = np.empty((162, 120), dtype=np.float32) From e891f0aefb984f4dac2bc84d4dc141fe6da4da34 Mon Sep 17 00:00:00 2001 From: ljstrnadiii Date: Wed, 14 Feb 2024 18:51:14 +0000 Subject: [PATCH 04/21] revert any credential changes --- xee/ext_integration_test.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index a24bb61..fdb10f7 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -19,7 +19,7 @@ import tempfile from absl.testing import absltest -import google.auth +from google.auth import identity_pool import numpy as np import xarray as xr from xarray.core import indexing @@ -41,16 +41,19 @@ ] -def _read_default_creds(): - credentials, _ = google.auth.default(scopes=_SCOPES) - return credentials +def _read_identity_pool_creds() -> identity_pool.Credentials: + credentials_path = os.environ[_CREDENTIALS_PATH_KEY] + with open(credentials_path) as file: + json_file = json.load(file) + credentials = identity_pool.Credentials.from_info(json_file) + return credentials.with_scopes(_SCOPES) def init_ee_for_tests(): - ee.Initialize( - credentials=_read_default_creds(), - opt_url=ee.data.HIGH_VOLUME_API_BASE_URL, - ) + ee.Initialize( + credentials=_read_identity_pool_creds(), + opt_url=ee.data.HIGH_VOLUME_API_BASE_URL, + ) class EEBackendArrayTest(absltest.TestCase): From 5e19d99298634d9a590d62021c7687f62ec7a60c Mon Sep 17 00:00:00 2001 From: ljstrnadiii Date: Wed, 14 Feb 2024 18:52:29 +0000 Subject: [PATCH 05/21] Revert "run pyint" This reverts commit 7efe99233f52d916e75f3fe7a0c990aeb039106c. --- xee/ext.py | 13 +++++++------ xee/ext_integration_test.py | 4 ++-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index 3afe2c7..b8bc0b5 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -148,7 +148,8 @@ def open( ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, use_coords_double_precision: bool = False, - match_xarray: xarray.DataArray | xarray.Dataset | None = None, + match_xarray: xarray.DataArray | xarray.Dataset | None = None + ) -> 'EarthEngineStore': if mode != 'r': raise ValueError( @@ -170,7 +171,7 @@ def open( ee_init_kwargs=ee_init_kwargs, ee_init_if_necessary=ee_init_if_necessary, use_coords_double_precision=use_coords_double_precision, - match_xarray=match_xarray, + match_xarray=match_xarray ) def __init__( @@ -189,7 +190,7 @@ def __init__( ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, use_coords_double_precision: bool = False, - match_xarray: xr.DataArray | xr.Dataset | None = None, + match_xarray: xr.DataArray | xr.Dataset | None = None ): self.ee_init_kwargs = ee_init_kwargs self.ee_init_if_necessary = ee_init_if_necessary @@ -215,7 +216,7 @@ def __init__( self.crs = CRS(self.crs_arg) if match_xarray is not None: if match_xarray.rio.crs is None: - raise ValueError('If matching to xarray, we require `.rio.crs` is set.') + raise ValueError("If matching to xarray, we require `.rio.crs` is set.") self.crs = CRS(match_xarray.rio.crs) if match_xarray[match_xarray.rio.x_dim].dtype == np.float64: self.use_coords_double_precision = True @@ -1045,7 +1046,7 @@ def open_dataset( ee_init_kwargs: keywords to pass to Earth Engine Initialize when attempting to auto init for remote workers. use_coords_double_precision: Whether to use double precision for coordinates - and bounds from provided geometry. False by default, but True may be + and bounds from provided geometry. False by default, but True may be helpful when hoping to match a transform of an existing dataset. match_xarray: An xarray.DataArray or xarray.Dataset to use as a template with rioxarray-based schema to extract the crs and transform to specify @@ -1081,7 +1082,7 @@ def open_dataset( ee_init_kwargs=ee_init_kwargs, ee_init_if_necessary=ee_init_if_necessary, use_coords_double_precision=use_coords_double_precision, - match_xarray=match_xarray, + match_xarray=match_xarray ) store_entrypoint = backends_store.StoreBackendEntrypoint() diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index fdb10f7..12bb9bf 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -393,7 +393,7 @@ def test_honors_transform_precisely(self): crs='EPSG:4326', use_coords_double_precision=True, ).rename({'lon': 'x', 'lat': 'y'}) - # This is off slightly due to bounds determined by geometry e.g. .getInfo() + # This is off slightly due to bounds determined by geometry e.g. .getInfo() # seems to cause a super slight shift in the bounds. Thhe coords change before # and after the call to .getInfo()! np.testing.assert_almost_equal( @@ -401,7 +401,7 @@ def test_honors_transform_precisely(self): np.array(raster.rio.transform()), decimal=13, ) - + @absltest.skipIf(_SKIP_RASTERIO_TESTS, 'rioxarray module not loaded') def test_match_xarray(self): data = np.empty((162, 120), dtype=np.float32) From f83c749ed4f33ba7f3b3133db3d8019ecc3f9bfb Mon Sep 17 00:00:00 2001 From: ljstrnadiii Date: Tue, 13 Feb 2024 04:05:43 +0000 Subject: [PATCH 06/21] run pyint --- xee/ext.py | 13 ++++++------- xee/ext_integration_test.py | 4 ++-- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index b8bc0b5..3afe2c7 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -148,8 +148,7 @@ def open( ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, use_coords_double_precision: bool = False, - match_xarray: xarray.DataArray | xarray.Dataset | None = None - + match_xarray: xarray.DataArray | xarray.Dataset | None = None, ) -> 'EarthEngineStore': if mode != 'r': raise ValueError( @@ -171,7 +170,7 @@ def open( ee_init_kwargs=ee_init_kwargs, ee_init_if_necessary=ee_init_if_necessary, use_coords_double_precision=use_coords_double_precision, - match_xarray=match_xarray + match_xarray=match_xarray, ) def __init__( @@ -190,7 +189,7 @@ def __init__( ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, use_coords_double_precision: bool = False, - match_xarray: xr.DataArray | xr.Dataset | None = None + match_xarray: xr.DataArray | xr.Dataset | None = None, ): self.ee_init_kwargs = ee_init_kwargs self.ee_init_if_necessary = ee_init_if_necessary @@ -216,7 +215,7 @@ def __init__( self.crs = CRS(self.crs_arg) if match_xarray is not None: if match_xarray.rio.crs is None: - raise ValueError("If matching to xarray, we require `.rio.crs` is set.") + raise ValueError('If matching to xarray, we require `.rio.crs` is set.') self.crs = CRS(match_xarray.rio.crs) if match_xarray[match_xarray.rio.x_dim].dtype == np.float64: self.use_coords_double_precision = True @@ -1046,7 +1045,7 @@ def open_dataset( ee_init_kwargs: keywords to pass to Earth Engine Initialize when attempting to auto init for remote workers. use_coords_double_precision: Whether to use double precision for coordinates - and bounds from provided geometry. False by default, but True may be + and bounds from provided geometry. False by default, but True may be helpful when hoping to match a transform of an existing dataset. match_xarray: An xarray.DataArray or xarray.Dataset to use as a template with rioxarray-based schema to extract the crs and transform to specify @@ -1082,7 +1081,7 @@ def open_dataset( ee_init_kwargs=ee_init_kwargs, ee_init_if_necessary=ee_init_if_necessary, use_coords_double_precision=use_coords_double_precision, - match_xarray=match_xarray + match_xarray=match_xarray, ) store_entrypoint = backends_store.StoreBackendEntrypoint() diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index 12bb9bf..fdb10f7 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -393,7 +393,7 @@ def test_honors_transform_precisely(self): crs='EPSG:4326', use_coords_double_precision=True, ).rename({'lon': 'x', 'lat': 'y'}) - # This is off slightly due to bounds determined by geometry e.g. .getInfo() + # This is off slightly due to bounds determined by geometry e.g. .getInfo() # seems to cause a super slight shift in the bounds. Thhe coords change before # and after the call to .getInfo()! np.testing.assert_almost_equal( @@ -401,7 +401,7 @@ def test_honors_transform_precisely(self): np.array(raster.rio.transform()), decimal=13, ) - + @absltest.skipIf(_SKIP_RASTERIO_TESTS, 'rioxarray module not loaded') def test_match_xarray(self): data = np.empty((162, 120), dtype=np.float32) From af977501a4e6e75b2ec7a1f4398053e537d1d7c0 Mon Sep 17 00:00:00 2001 From: ljstrnadiii Date: Wed, 14 Feb 2024 19:01:11 +0000 Subject: [PATCH 07/21] formatted change --- xee/ext_integration_test.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index fdb10f7..d6a5fd7 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -42,18 +42,18 @@ def _read_identity_pool_creds() -> identity_pool.Credentials: - credentials_path = os.environ[_CREDENTIALS_PATH_KEY] - with open(credentials_path) as file: - json_file = json.load(file) - credentials = identity_pool.Credentials.from_info(json_file) - return credentials.with_scopes(_SCOPES) + credentials_path = os.environ[_CREDENTIALS_PATH_KEY] + with open(credentials_path) as file: + json_file = json.load(file) + credentials = identity_pool.Credentials.from_info(json_file) + return credentials.with_scopes(_SCOPES) def init_ee_for_tests(): - ee.Initialize( - credentials=_read_identity_pool_creds(), - opt_url=ee.data.HIGH_VOLUME_API_BASE_URL, - ) + ee.Initialize( + credentials=_read_identity_pool_creds(), + opt_url=ee.data.HIGH_VOLUME_API_BASE_URL, + ) class EEBackendArrayTest(absltest.TestCase): From 103fd7ddd544f50862b828d85adcfa36addd499e Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Fri, 16 Feb 2024 06:08:49 +0000 Subject: [PATCH 08/21] Geometry bounds are updated with the projection. --- xee/ext.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index 3929b54..028f2d8 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -279,9 +279,9 @@ def get_info(self) -> Dict[str, Any]: rpcs.append(('projection', self.projection)) if isinstance(self.geometry, ee.Geometry): - rpcs.append(('bounds', self.geometry.bounds())) + rpcs.append(('bounds', self.geometry.bounds(proj = self.projection))) else: - rpcs.append(('bounds', self.image_collection.first().geometry().bounds())) + rpcs.append(('bounds', self.image_collection.first().geometry().bounds(proj = self.projection))) # TODO(#29, #30): This RPC call takes the longest time to compute. This # requires a full scan of the images in the collection, which happens on the From 2ea35e55595253fc2d050a91e49748816a7c794f Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Fri, 16 Feb 2024 06:26:42 +0000 Subject: [PATCH 09/21] lint error fixed. --- xee/ext.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index 028f2d8..b82163e 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -279,9 +279,12 @@ def get_info(self) -> Dict[str, Any]: rpcs.append(('projection', self.projection)) if isinstance(self.geometry, ee.Geometry): - rpcs.append(('bounds', self.geometry.bounds(proj = self.projection))) + rpcs.append(('bounds', self.geometry.bounds(proj=self.projection))) else: - rpcs.append(('bounds', self.image_collection.first().geometry().bounds(proj = self.projection))) + rpcs.append(( + 'bounds', + self.image_collection.first().geometry().bounds(proj=self.projection), + )) # TODO(#29, #30): This RPC call takes the longest time to compute. This # requires a full scan of the images in the collection, which happens on the From 7131655a4ee923b471db84a5dfb37b4c035312f8 Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Fri, 16 Feb 2024 07:10:07 +0000 Subject: [PATCH 10/21] Added maxError as a 1 in the bounds for better. --- xee/ext.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index b82163e..87072d7 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -279,11 +279,13 @@ def get_info(self) -> Dict[str, Any]: rpcs.append(('projection', self.projection)) if isinstance(self.geometry, ee.Geometry): - rpcs.append(('bounds', self.geometry.bounds(proj=self.projection))) + rpcs.append(('bounds', self.geometry.bounds(1, proj=self.projection))) else: rpcs.append(( 'bounds', - self.image_collection.first().geometry().bounds(proj=self.projection), + self.image_collection.first() + .geometry() + .bounds(1, proj=self.projection), )) # TODO(#29, #30): This RPC call takes the longest time to compute. This From 83731718fb52b415c8a2429f24e7b8d9f6e1b670 Mon Sep 17 00:00:00 2001 From: ljstrnadiii Date: Sat, 17 Feb 2024 15:17:07 +0000 Subject: [PATCH 11/21] checkout `ext.py` from main + create failing integration test --- xee/ext.py | 39 +++----------------------------- xee/ext_integration_test.py | 44 +------------------------------------ 2 files changed, 4 insertions(+), 79 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index 3afe2c7..3929b54 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -40,7 +40,6 @@ from xarray.backends import store as backends_store from xarray.core import indexing from xarray.core import utils -import xarray as xr from xee import types import ee @@ -147,8 +146,6 @@ def open( request_byte_limit: int = REQUEST_BYTE_LIMIT, ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, - use_coords_double_precision: bool = False, - match_xarray: xarray.DataArray | xarray.Dataset | None = None, ) -> 'EarthEngineStore': if mode != 'r': raise ValueError( @@ -169,8 +166,6 @@ def open( request_byte_limit=request_byte_limit, ee_init_kwargs=ee_init_kwargs, ee_init_if_necessary=ee_init_if_necessary, - use_coords_double_precision=use_coords_double_precision, - match_xarray=match_xarray, ) def __init__( @@ -188,8 +183,6 @@ def __init__( request_byte_limit: int = REQUEST_BYTE_LIMIT, ee_init_kwargs: Optional[Dict[str, Any]] = None, ee_init_if_necessary: bool = False, - use_coords_double_precision: bool = False, - match_xarray: xr.DataArray | xr.Dataset | None = None, ): self.ee_init_kwargs = ee_init_kwargs self.ee_init_if_necessary = ee_init_if_necessary @@ -202,7 +195,6 @@ def __init__( self.geometry = geometry self.primary_dim_name = primary_dim_name or 'time' self.primary_dim_property = primary_dim_property or 'system:time_start' - self.use_coords_double_precision = use_coords_double_precision self.n_images = self.get_info['size'] self._props = self.get_info['props'] @@ -213,20 +205,12 @@ def __init__( self.crs_arg = crs or proj.get('crs', proj.get('wkt', 'EPSG:4326')) self.crs = CRS(self.crs_arg) - if match_xarray is not None: - if match_xarray.rio.crs is None: - raise ValueError('If matching to xarray, we require `.rio.crs` is set.') - self.crs = CRS(match_xarray.rio.crs) - if match_xarray[match_xarray.rio.x_dim].dtype == np.float64: - self.use_coords_double_precision = True # Gets the unit i.e. meter, degree etc. self.scale_units = self.crs.axis_info[0].unit_name # Get the dimensions name based on the CRS (scale units). self.dimension_names = self.DIMENSION_NAMES.get( self.scale_units, ('X', 'Y') ) - if match_xarray is not None: - self.dimension_names = (match_xarray.rio.x_dim, match_xarray.rio.y_dim) x_dim_name, y_dim_name = self.dimension_names self._props.update( coordinates=f'{self.primary_dim_name} {x_dim_name} {y_dim_name}', @@ -239,14 +223,11 @@ def __init__( if scale is None: scale = default_scale default_transform = affine.Affine.scale(scale, -1 * scale) + transform = affine.Affine(*proj.get('transform', default_transform)[:6]) self.scale_x, self.scale_y = transform.a, transform.e self.scale = np.sqrt(np.abs(transform.determinant)) - if match_xarray is not None: - self.scale_x, self.scale_y = match_xarray.rio.resolution() - self.scale = np.sqrt(np.abs(self.scale_x * self.scale_y)) - # Parse the dataset bounds from the native projection (either from the CRS # or the image geometry) and translate it to the representation that will be # used for all internal `computePixels()` calls. @@ -267,8 +248,6 @@ def __init__( x_min, y_min = self.transform(x_min_0, y_min_0) x_max, y_max = self.transform(x_max_0, y_max_0) self.bounds = x_min, y_min, x_max, y_max - if match_xarray is not None: - self.bounds = match_xarray.rio.bounds() max_dtype = self._max_itemsize() @@ -602,9 +581,8 @@ def _get_tile_from_ee( else (0, tile_coords_start, 1, tile_coords_end) ) target_image = ee.Image.pixelCoordinates(ee.Projection(self.crs_arg)) - dtype = np.float64 if self.use_coords_double_precision else np.float32 return tile_index, self.image_to_array( - target_image, grid=bbox, dtype=dtype, bandIds=[band_id] + target_image, grid=bbox, dtype=np.float32, bandIds=[band_id] ) def _process_coordinate_data( @@ -711,7 +689,7 @@ def _parse_dtype(data_type: types.DataType): def _ee_bounds_to_bounds(bounds: ee.Bounds) -> types.Bounds: - coords = np.array(bounds['coordinates'], dtype=np.float64)[0] + coords = np.array(bounds['coordinates'], dtype=np.float32)[0] x_min, y_min, x_max, y_max = ( min(coords[:, 0]), min(coords[:, 1]), @@ -973,8 +951,6 @@ def open_dataset( request_byte_limit: int = REQUEST_BYTE_LIMIT, ee_init_if_necessary: bool = False, ee_init_kwargs: Optional[Dict[str, Any]] = None, - use_coords_double_precision: bool = False, - match_xarray: xarray.DataArray | xarray.Dataset | None = None, ) -> xarray.Dataset: # type: ignore """Open an Earth Engine ImageCollection as an Xarray Dataset. @@ -1044,13 +1020,6 @@ def open_dataset( frameworks. ee_init_kwargs: keywords to pass to Earth Engine Initialize when attempting to auto init for remote workers. - use_coords_double_precision: Whether to use double precision for coordinates - and bounds from provided geometry. False by default, but True may be - helpful when hoping to match a transform of an existing dataset. - match_xarray: An xarray.DataArray or xarray.Dataset to use as a template - with rioxarray-based schema to extract the crs and transform to specify - the spatial extent and crs of the output dataset. Using this arg requires - that rioxarray is installed. Returns: An xarray.Dataset that streams in remote data from Earth Engine. @@ -1080,8 +1049,6 @@ def open_dataset( request_byte_limit=request_byte_limit, ee_init_kwargs=ee_init_kwargs, ee_init_if_necessary=ee_init_if_necessary, - use_coords_double_precision=use_coords_double_precision, - match_xarray=match_xarray, ) store_entrypoint = backends_store.StoreBackendEntrypoint() diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index d6a5fd7..1306171 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -359,7 +359,7 @@ def test_honors_projection(self): self.assertNotEqual(ds.dims, standard_ds.dims) @absltest.skipIf(_SKIP_RASTERIO_TESTS, 'rioxarray module not loaded') - def test_honors_transform_precisely(self): + def test_expected_double_precision_transform(self): data = np.empty((162, 120), dtype=np.float32) # An example of a double precision bbox bbox = ( @@ -391,49 +391,7 @@ def test_honors_transform_precisely(self): geometry=geo, scale=raster.rio.resolution()[0], crs='EPSG:4326', - use_coords_double_precision=True, ).rename({'lon': 'x', 'lat': 'y'}) - # This is off slightly due to bounds determined by geometry e.g. .getInfo() - # seems to cause a super slight shift in the bounds. Thhe coords change before - # and after the call to .getInfo()! - np.testing.assert_almost_equal( - np.array(xee_dataset.rio.transform()), - np.array(raster.rio.transform()), - decimal=13, - ) - - @absltest.skipIf(_SKIP_RASTERIO_TESTS, 'rioxarray module not loaded') - def test_match_xarray(self): - data = np.empty((162, 120), dtype=np.float32) - # An example of a double precision bbox - bbox = ( - -53.94158617595226, - -12.078281822698678, - -53.67209159071253, - -11.714464132625046, - ) - x_res = (bbox[2] - bbox[0]) / data.shape[1] - y_res = (bbox[3] - bbox[1]) / data.shape[0] - raster = xr.DataArray( - data, - coords={ - 'y': np.linspace(bbox[3], bbox[1] + x_res, data.shape[0]), - 'x': np.linspace(bbox[0], bbox[2] - y_res, data.shape[1]), - }, - dims=('y', 'x'), - ) - raster.rio.write_crs('EPSG:4326', inplace=True) - ic = ( - ee.ImageCollection('UCSB-CHG/CHIRPS/DAILY') - .filterDate(ee.DateRange('2014-01-01', '2014-01-02')) - .select('precipitation') - ) - xee_dataset = xr.open_dataset( - ee.ImageCollection(ic), - engine='ee', - scale=raster.rio.resolution()[0], - match_xarray=raster, - ) np.testing.assert_equal( np.array(xee_dataset.rio.transform()), np.array(raster.rio.transform()), From f6048f6540cbdaca7eacbce4e7dbba4b9e8d3802 Mon Sep 17 00:00:00 2001 From: ljstrnadiii Date: Sat, 17 Feb 2024 15:57:40 +0000 Subject: [PATCH 12/21] add support for bbox in geometry + demo passing integration test --- xee/ext.py | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index 3929b54..aebe2c1 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -139,7 +139,7 @@ def open( crs: Optional[str] = None, scale: Optional[float] = None, projection: Optional[ee.Projection] = None, - geometry: Optional[ee.Geometry] = None, + geometry: Optional[Union[ee.Geometry, types.types.BBox]] = None, primary_dim_name: Optional[str] = None, primary_dim_property: Optional[str] = None, mask_value: Optional[float] = None, @@ -176,7 +176,7 @@ def __init__( crs: Optional[str] = None, scale: Union[float, int, None] = None, projection: Optional[ee.Projection] = None, - geometry: Optional[ee.Geometry] = None, + geometry: Optional[Union[ee.Geometry, types.BBox]] = None, primary_dim_name: Optional[str] = None, primary_dim_property: Optional[str] = None, mask_value: Optional[float] = None, @@ -231,19 +231,31 @@ def __init__( # Parse the dataset bounds from the native projection (either from the CRS # or the image geometry) and translate it to the representation that will be # used for all internal `computePixels()` calls. - try: - if isinstance(geometry, ee.Geometry): + if geometry is None: + try: + x_min_0, y_min_0, x_max_0, y_max_0 = self.crs.area_of_use.bounds + except AttributeError: + # `area_of_use` is probable `None`. Parse the geometry from the first + # image instead (calculated in self.get_info()) x_min_0, y_min_0, x_max_0, y_max_0 = _ee_bounds_to_bounds( self.get_info['bounds'] ) - else: - x_min_0, y_min_0, x_max_0, y_max_0 = self.crs.area_of_use.bounds - except AttributeError: - # `area_of_use` is probable `None`. Parse the geometry from the first - # image instead (calculated in self.get_info()) + elif isinstance(geometry, ee.Geometry): x_min_0, y_min_0, x_max_0, y_max_0 = _ee_bounds_to_bounds( self.get_info['bounds'] ) + elif isinstance(geometry, Union[List, Tuple, np.ndarray]): + if len(geometry) != 4: + raise ValueError( + 'geometry must be a 4-tuple of floats or a ee.Geometry, ' + f'but got {geometry!r}' + ) + x_min_0, y_min_0, x_max_0, y_max_0 = geometry + else: + raise ValueError( + 'geometry must be a 4-tuple of floats or a ee.Geometry or None, ' + f'but got {type(geometry)}' + ) x_min, y_min = self.transform(x_min_0, y_min_0) x_max, y_max = self.transform(x_max_0, y_max_0) @@ -689,7 +701,7 @@ def _parse_dtype(data_type: types.DataType): def _ee_bounds_to_bounds(bounds: ee.Bounds) -> types.Bounds: - coords = np.array(bounds['coordinates'], dtype=np.float32)[0] + coords = np.array(bounds['coordinates'], dtype=np.float64)[0] x_min, y_min, x_max, y_max = ( min(coords[:, 0]), min(coords[:, 1]), @@ -1003,7 +1015,8 @@ def open_dataset( coalesce all variables upon opening. By default, the scale and reference system is set by the the `crs` and `scale` arguments. geometry (optional): Specify an `ee.Geometry` to define the regional - bounds when opening the data. When not set, the bounds are defined by + bounds when opening the data or a bbox specifying [x_min, y_min, x_max, + y_max] in EPSG:4326. When not set, the bounds are defined by the CRS's 'area_of_use` boundaries. If those aren't present, the bounds are derived from the geometry of the first image of the collection. primary_dim_name (optional): Override the name of the primary dimension of From 00b34e790fe0cebf15b1c1712de252be6ac89278 Mon Sep 17 00:00:00 2001 From: ljstrnadiii Date: Sat, 17 Feb 2024 16:00:49 +0000 Subject: [PATCH 13/21] update integation test to pass bounds --- xee/ext_integration_test.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index 1306171..08e8155 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -378,8 +378,6 @@ def test_expected_double_precision_transform(self): }, dims=('y', 'x'), ) - - geo = ee.Geometry.Rectangle(*raster.rio.bounds()) ic = ( ee.ImageCollection('UCSB-CHG/CHIRPS/DAILY') .filterDate(ee.DateRange('2014-01-01', '2014-01-02')) @@ -388,7 +386,7 @@ def test_expected_double_precision_transform(self): xee_dataset = xr.open_dataset( ee.ImageCollection(ic), engine='ee', - geometry=geo, + geometry=tuple(raster.rio.bounds()), scale=raster.rio.resolution()[0], crs='EPSG:4326', ).rename({'lon': 'x', 'lat': 'y'}) From b8960a44d8f19a6e35b5504b0ab4346f424cd174 Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Sun, 18 Feb 2024 16:19:03 +0000 Subject: [PATCH 14/21] Geometry bounds testcases are added. --- xee/ext_integration_test.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index 9d0b857..5f09e38 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -259,6 +259,32 @@ def __getitem__(self, params): self.assertEqual(getter.count, 3) + def test_geometry_bounds_with_and_without_projection(self): + image = ( + ee.ImageCollection('LANDSAT/LC08/C01/T1') + .filterDate('2017-01-01', '2017-01-03') + .first() + ) + point = ee.Geometry.Point(-40.2414893624401, 105.48790177216375) + distance = 311.5 + scale = 5000 + projection = ee.Projection('EPSG:4326', [1, 0, 0, 0, -1, 0]).atScale(scale) + image = image.reproject(projection) + + geometry = point.buffer(distance, proj=projection).bounds(proj=projection) + + data_store = xee.EarthEngineStore( + ee.ImageCollection(image), + projection=image.projection(), + geometry=geometry, + ) + data_store_bounds = data_store.get_info['bounds'] + + self.assertNotEqual(geometry.bounds().getInfo(), data_store_bounds) + self.assertEqual( + geometry.bounds(1, proj=projection).getInfo(), data_store_bounds + ) + class EEBackendEntrypointTest(absltest.TestCase): From a2b3c55723a26f15c4156911773ab8b50aaad5b4 Mon Sep 17 00:00:00 2001 From: ljstrnadiii Date: Sun, 18 Feb 2024 17:11:34 +0000 Subject: [PATCH 15/21] demonstrate xy scale control through projection in test --- xee/ext.py | 2 +- xee/ext_integration_test.py | 12 +++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index aebe2c1..b3b4fcd 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -594,7 +594,7 @@ def _get_tile_from_ee( ) target_image = ee.Image.pixelCoordinates(ee.Projection(self.crs_arg)) return tile_index, self.image_to_array( - target_image, grid=bbox, dtype=np.float32, bandIds=[band_id] + target_image, grid=bbox, dtype=np.float64, bandIds=[band_id] ) def _process_coordinate_data( diff --git a/xee/ext_integration_test.py b/xee/ext_integration_test.py index 08e8155..9c6022c 100644 --- a/xee/ext_integration_test.py +++ b/xee/ext_integration_test.py @@ -359,9 +359,8 @@ def test_honors_projection(self): self.assertNotEqual(ds.dims, standard_ds.dims) @absltest.skipIf(_SKIP_RASTERIO_TESTS, 'rioxarray module not loaded') - def test_expected_double_precision_transform(self): - data = np.empty((162, 120), dtype=np.float32) - # An example of a double precision bbox + def test_expected_precise_transform(self): + data = np.empty((162, 121), dtype=np.float32) bbox = ( -53.94158617595226, -12.078281822698678, @@ -378,6 +377,7 @@ def test_expected_double_precision_transform(self): }, dims=('y', 'x'), ) + raster.rio.write_crs('EPSG:4326', inplace=True) ic = ( ee.ImageCollection('UCSB-CHG/CHIRPS/DAILY') .filterDate(ee.DateRange('2014-01-01', '2014-01-02')) @@ -387,9 +387,11 @@ def test_expected_double_precision_transform(self): ee.ImageCollection(ic), engine='ee', geometry=tuple(raster.rio.bounds()), - scale=raster.rio.resolution()[0], - crs='EPSG:4326', + projection=ee.Projection( + crs=str(raster.rio.crs), transform=raster.rio.transform()[:6] + ), ).rename({'lon': 'x', 'lat': 'y'}) + self.assertNotEqual(abs(x_res), abs(y_res)) np.testing.assert_equal( np.array(xee_dataset.rio.transform()), np.array(raster.rio.transform()), From 4db088745ff99b32840738814436ec5efcad656c Mon Sep 17 00:00:00 2001 From: ljstrnadiii Date: Sun, 18 Feb 2024 17:14:44 +0000 Subject: [PATCH 16/21] nits --- xee/ext.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index b3b4fcd..bc4f947 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -25,7 +25,7 @@ import math import os import sys -from typing import Any, Dict, Iterable, List, Literal, Optional, Tuple, Union +from typing import Any, Dict, Iterable, List, Literal, Optional, Sequence, Tuple, Union from urllib import parse import warnings @@ -139,7 +139,7 @@ def open( crs: Optional[str] = None, scale: Optional[float] = None, projection: Optional[ee.Projection] = None, - geometry: Optional[Union[ee.Geometry, types.types.BBox]] = None, + geometry: Optional[Union[ee.Geometry, types.BBox]] = None, primary_dim_name: Optional[str] = None, primary_dim_property: Optional[str] = None, mask_value: Optional[float] = None, @@ -244,7 +244,7 @@ def __init__( x_min_0, y_min_0, x_max_0, y_max_0 = _ee_bounds_to_bounds( self.get_info['bounds'] ) - elif isinstance(geometry, Union[List, Tuple, np.ndarray]): + elif isinstance(geometry, Union[List, Tuple, np.ndarray, Sequence]): if len(geometry) != 4: raise ValueError( 'geometry must be a 4-tuple of floats or a ee.Geometry, ' From 4c0dd6b5bce2aacc3fbe44f7e6d8377ebf9a3971 Mon Sep 17 00:00:00 2001 From: ljstrnadiii Date: Tue, 20 Feb 2024 15:43:27 +0000 Subject: [PATCH 17/21] narrow supported types --- xee/ext.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index bc4f947..f4aa7d4 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -139,7 +139,7 @@ def open( crs: Optional[str] = None, scale: Optional[float] = None, projection: Optional[ee.Projection] = None, - geometry: Optional[Union[ee.Geometry, types.BBox]] = None, + geometry: Optional[Union[ee.Geometry, Tuple[float, float, float, float]]] = None, primary_dim_name: Optional[str] = None, primary_dim_property: Optional[str] = None, mask_value: Optional[float] = None, @@ -176,7 +176,7 @@ def __init__( crs: Optional[str] = None, scale: Union[float, int, None] = None, projection: Optional[ee.Projection] = None, - geometry: Optional[Union[ee.Geometry, types.BBox]] = None, + geometry: Optional[Union[ee.Geometry, Tuple[float, float, float, float]]] = None, primary_dim_name: Optional[str] = None, primary_dim_property: Optional[str] = None, mask_value: Optional[float] = None, @@ -244,16 +244,16 @@ def __init__( x_min_0, y_min_0, x_max_0, y_max_0 = _ee_bounds_to_bounds( self.get_info['bounds'] ) - elif isinstance(geometry, Union[List, Tuple, np.ndarray, Sequence]): + elif isinstance(geometry, Union[List, Tuple]): if len(geometry) != 4: raise ValueError( - 'geometry must be a 4-tuple of floats or a ee.Geometry, ' + 'geometry must be a tuple or list of length 4, or a ee.Geometry, ' f'but got {geometry!r}' ) x_min_0, y_min_0, x_max_0, y_max_0 = geometry else: raise ValueError( - 'geometry must be a 4-tuple of floats or a ee.Geometry or None, ' + f'geometry must be a tuple or list of length 4, a ee.Geometry, or None ' f'but got {type(geometry)}' ) @@ -956,7 +956,7 @@ def open_dataset( crs: Optional[str] = None, scale: Union[float, int, None] = None, projection: Optional[ee.Projection] = None, - geometry: Optional[ee.Geometry] = None, + geometry: Optional[Union[ee.Geometry, Tuple[float, float, float, float]]] = None, primary_dim_name: Optional[str] = None, primary_dim_property: Optional[str] = None, ee_mask_value: Optional[float] = None, From a766527f1f779ae75c274ff8c869ecbfc88269e2 Mon Sep 17 00:00:00 2001 From: ljstrnadiii Date: Tue, 20 Feb 2024 15:43:39 +0000 Subject: [PATCH 18/21] add readme example --- README.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/README.md b/README.md index 4aef1cd..ad42410 100644 --- a/README.md +++ b/README.md @@ -90,6 +90,20 @@ i = ee.ImageCollection(ee.Image("LANDSAT/LC08/C02/T1_TOA/LC08_044034_20140318")) ds = xarray.open_dataset(i, engine='ee') ``` +Open any Earth Engine ImageCollection to match an existing transform: + +```python +raster = rioxarray.open_rasterio(...) # assume crs + transform is set +ds = xr.open_dataset( + 'ee://ECMWF/ERA5_LAND/HOURLY', + engine='ee', + geometry=tuple(raster.rio.bounds()), # must be in EPSG:4326 + projection=ee.Projection( + crs=str(raster.rio.crs), transform=raster.rio.transform()[:6] + ), +) +``` + See [examples](examples/) or [docs](docs/) for more uses and integrations. ## License From 23ea0c9b402f775c18874e47a8a7283929b5b070 Mon Sep 17 00:00:00 2001 From: ljstrnadiii Date: Wed, 21 Feb 2024 14:22:06 +0000 Subject: [PATCH 19/21] typing nit --- xee/ext.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index 58cbd0d..c1a9055 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -139,7 +139,9 @@ def open( crs: Optional[str] = None, scale: Optional[float] = None, projection: Optional[ee.Projection] = None, - geometry: Optional[Union[ee.Geometry, Tuple[float, float, float, float]]] = None, + geometry: Union[ + ee.Geometry, Tuple[float, float, float, float], None + ] = None, primary_dim_name: Optional[str] = None, primary_dim_property: Optional[str] = None, mask_value: Optional[float] = None, @@ -176,7 +178,9 @@ def __init__( crs: Optional[str] = None, scale: Union[float, int, None] = None, projection: Optional[ee.Projection] = None, - geometry: Optional[Union[ee.Geometry, Tuple[float, float, float, float]]] = None, + geometry: Optional[ + Union[ee.Geometry, Tuple[float, float, float, float]] + ] = None, primary_dim_name: Optional[str] = None, primary_dim_property: Optional[str] = None, mask_value: Optional[float] = None, @@ -253,8 +257,8 @@ def __init__( x_min_0, y_min_0, x_max_0, y_max_0 = geometry else: raise ValueError( - f'geometry must be a tuple or list of length 4, a ee.Geometry, or None ' - f'but got {type(geometry)}' + 'geometry must be a tuple or list of length 4, a ee.Geometry, or' + f' None but got {type(geometry)}' ) x_min, y_min = self.transform(x_min_0, y_min_0) @@ -960,7 +964,9 @@ def open_dataset( crs: Optional[str] = None, scale: Union[float, int, None] = None, projection: Optional[ee.Projection] = None, - geometry: Optional[Union[ee.Geometry, Tuple[float, float, float, float]]] = None, + geometry: Optional[ + Union[ee.Geometry, Tuple[float, float, float, float]] + ] = None, primary_dim_name: Optional[str] = None, primary_dim_property: Optional[str] = None, ee_mask_value: Optional[float] = None, From b640d9589227cb6db0b0c5517e4ced73051f494c Mon Sep 17 00:00:00 2001 From: ljstrnadiii Date: Sat, 16 Mar 2024 15:21:26 +0000 Subject: [PATCH 20/21] nits + pull out logic + typing --- xee/ext.py | 78 ++++++++++++++++++++++++++---------------------------- 1 file changed, 37 insertions(+), 41 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index c1a9055..cb7d635 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -139,9 +139,7 @@ def open( crs: Optional[str] = None, scale: Optional[float] = None, projection: Optional[ee.Projection] = None, - geometry: Union[ - ee.Geometry, Tuple[float, float, float, float], None - ] = None, + geometry: ee.Geometry | Tuple[float, float, float, float] | None = None, primary_dim_name: Optional[str] = None, primary_dim_property: Optional[str] = None, mask_value: Optional[float] = None, @@ -178,9 +176,7 @@ def __init__( crs: Optional[str] = None, scale: Union[float, int, None] = None, projection: Optional[ee.Projection] = None, - geometry: Optional[ - Union[ee.Geometry, Tuple[float, float, float, float]] - ] = None, + geometry: ee.Geometry | Tuple[float, float, float, float] | None = None, primary_dim_name: Optional[str] = None, primary_dim_property: Optional[str] = None, mask_value: Optional[float] = None, @@ -232,38 +228,7 @@ def __init__( self.scale_x, self.scale_y = transform.a, transform.e self.scale = np.sqrt(np.abs(transform.determinant)) - # Parse the dataset bounds from the native projection (either from the CRS - # or the image geometry) and translate it to the representation that will be - # used for all internal `computePixels()` calls. - if geometry is None: - try: - x_min_0, y_min_0, x_max_0, y_max_0 = self.crs.area_of_use.bounds - except AttributeError: - # `area_of_use` is probable `None`. Parse the geometry from the first - # image instead (calculated in self.get_info()) - x_min_0, y_min_0, x_max_0, y_max_0 = _ee_bounds_to_bounds( - self.get_info['bounds'] - ) - elif isinstance(geometry, ee.Geometry): - x_min_0, y_min_0, x_max_0, y_max_0 = _ee_bounds_to_bounds( - self.get_info['bounds'] - ) - elif isinstance(geometry, Union[List, Tuple]): - if len(geometry) != 4: - raise ValueError( - 'geometry must be a tuple or list of length 4, or a ee.Geometry, ' - f'but got {geometry!r}' - ) - x_min_0, y_min_0, x_max_0, y_max_0 = geometry - else: - raise ValueError( - 'geometry must be a tuple or list of length 4, a ee.Geometry, or' - f' None but got {type(geometry)}' - ) - - x_min, y_min = self.transform(x_min_0, y_min_0) - x_max, y_max = self.transform(x_max_0, y_max_0) - self.bounds = x_min, y_min, x_max, y_max + self.bounds = self._determine_bounds(geometry=geometry) max_dtype = self._max_itemsize() @@ -626,6 +591,39 @@ def _process_coordinate_data( tiles[i] = arr.flatten() return np.concatenate(tiles) + def _determine_bounds( + self, + geometry: ee.Geometry | Tuple[float, float, float, float] | None = None, + ) -> Tuple[float, float, float, float]: + if geometry is None: + try: + x_min_0, y_min_0, x_max_0, y_max_0 = self.crs.area_of_use.bounds + except AttributeError: + # `area_of_use` is probably `None`. Parse the geometry from the first + # image instead (calculated in self.get_info()) + x_min_0, y_min_0, x_max_0, y_max_0 = _ee_bounds_to_bounds( + self.get_info['bounds'] + ) + elif isinstance(geometry, ee.Geometry): + x_min_0, y_min_0, x_max_0, y_max_0 = _ee_bounds_to_bounds( + self.get_info['bounds'] + ) + elif isinstance(geometry, Sequence): + if len(geometry) != 4: + raise ValueError( + 'geometry must be a tuple or list of length 4, or a ee.Geometry, ' + f'but got {geometry!r}' + ) + x_min_0, y_min_0, x_max_0, y_max_0 = geometry + else: + raise ValueError( + 'geometry must be a tuple or list of length 4, a ee.Geometry, or' + f' None but got {type(geometry)}' + ) + x_min, y_min = self.transform(x_min_0, y_min_0) + x_max, y_max = self.transform(x_max_0, y_max_0) + return x_min, y_min, x_max, y_max + def get_variables(self) -> utils.Frozen[str, xarray.Variable]: vars_ = [(name, self.open_store_variable(name)) for name in self._bands()] @@ -964,9 +962,7 @@ def open_dataset( crs: Optional[str] = None, scale: Union[float, int, None] = None, projection: Optional[ee.Projection] = None, - geometry: Optional[ - Union[ee.Geometry, Tuple[float, float, float, float]] - ] = None, + geometry: ee.Geometry | Tuple[float, float, float, float] | None = None, primary_dim_name: Optional[str] = None, primary_dim_property: Optional[str] = None, ee_mask_value: Optional[float] = None, From 7beabcfe75fef22fa291159d066c5c7804431228 Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Thu, 28 Mar 2024 06:41:05 +0000 Subject: [PATCH 21/21] lint error fixed. --- xee/ext.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index 8880418..c54b1cc 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -280,12 +280,14 @@ def get_info(self) -> Dict[str, Any]: if isinstance(self.geometry, ee.Geometry): rpcs.append(('bounds', self.geometry.bounds(1, proj=self.projection))) else: - rpcs.append(( - 'bounds', - self.image_collection.first() - .geometry() - .bounds(1, proj=self.projection), - )) + rpcs.append( + ( + 'bounds', + self.image_collection.first() + .geometry() + .bounds(1, proj=self.projection), + ) + ) # TODO(#29, #30): This RPC call takes the longest time to compute. This # requires a full scan of the images in the collection, which happens on the @@ -298,14 +300,16 @@ def get_info(self) -> Dict[str, Any]: # client-side. Ideally, this would live behind a xarray-backend-specific # feature flag, since it's not guaranteed that data is this consistent. columns = ['system:id', self.primary_dim_property] - rpcs.append(( - 'properties', + rpcs.append( ( - self.image_collection.reduceColumns( - ee.Reducer.toList().repeat(len(columns)), columns - ).get('list') - ), - )) + 'properties', + ( + self.image_collection.reduceColumns( + ee.Reducer.toList().repeat(len(columns)), columns + ).get('list') + ), + ) + ) info = ee.List([rpc for _, rpc in rpcs]).getInfo()