From 12844d3ca774c534ca0053b0264535086b3cc967 Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Tue, 28 Nov 2023 12:01:05 +0000 Subject: [PATCH 1/6] Pixel grid dimensions (40818x1) error fixed. --- xee/ext.py | 64 +++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 56 insertions(+), 8 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index cdc2dfe..63d8260 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -29,6 +29,7 @@ import warnings import affine +from itertools import cycle import numpy as np import pandas as pd import pyproj @@ -232,7 +233,7 @@ def __init__( x_min_0, y_min_0, x_max_0, y_max_0 = _ee_bounds_to_bounds( self.get_info['bounds'] ) - # TODO(#40): Investigate data discrepancy (off-by-one) issue. + x_min, y_min = self.transform(x_min_0, y_min_0) x_max, y_max = self.transform(x_max_0, y_max_0) self.bounds = x_min, y_min, x_max, y_max @@ -536,6 +537,44 @@ def _get_primary_coordinates(self) -> List[Any]: ] return primary_coords + def _get_tile_from_EE( + self, tile_index: Tuple[Any, Union[str, int]] + ) -> Tuple[slice, np.ndarray]: + """Get a numpy array from EE for a specific 3D bounding box (a 'tile').""" + tile_index, BandIds = tile_index + bbox = self.project( + (tile_index[0], 0, tile_index[1], 1) + if BandIds == 'longitude' + else (0, tile_index[0], 1, tile_index[1]) + ) + tile_idx = slice(tile_index[0], tile_index[1]) + target_image = ee.Image.pixelLonLat() + return tile_idx, self.image_to_array( + target_image, grid=bbox, dtype=np.float32, bandIds=[BandIds] + ) + + def process_coordinate_data( + self, + total_tile: int, + tile_size: int, + end_point: int, + coordinate_type: str, + ) -> np.ndarray: + """Process coordinate data using multithreading for longitude or latitude.""" + data = [ + (tile_size * i, min(tile_size * (i + 1), end_point)) + for i in range(total_tile) + ] + tiles = [None for _ in range(total_tile)] + with concurrent.futures.ThreadPoolExecutor() as pool: + for i, arr in pool.map( + self._get_tile_from_EE, list(zip(data, cycle([coordinate_type]))) + ): + tiles[i] = ( + arr.tolist() if coordinate_type == 'longitude' else arr.tolist()[0] + ) + return np.concatenate(tiles) + def get_variables(self) -> utils.Frozen[str, xarray.Variable]: vars_ = [(name, self.open_store_variable(name)) for name in self._bands()] @@ -551,15 +590,24 @@ def get_variables(self) -> utils.Frozen[str, xarray.Variable]: f'ImageCollection due to: {e}.' ) - lnglat_img = ee.Image.pixelLonLat() - lon_grid = self.project((0, 0, v0.shape[1], 1)) - lat_grid = self.project((0, 0, 1, v0.shape[2])) - lon = self.image_to_array( - lnglat_img, grid=lon_grid, dtype=np.float32, bandIds=['longitude'] + if isinstance(self.chunks, dict): + # when the value of self.chunks = 'auto' or user-define. + self._apparent_chunks = self.chunks.copy() + else: + # when the value of self.chunks = -1 + self._apparent_chunks = {k: 1 for k in self.PREFERRED_CHUNKS.keys()} + self._apparent_chunks['width'] = v0.shape[1] + self._apparent_chunks['height'] = v0.shape[2] + + lon_total_tile = math.ceil(v0.shape[1] / self._apparent_chunks['width']) + lon = self.process_coordinate_data( + lon_total_tile, self._apparent_chunks['width'], v0.shape[1], 'longitude' ) - lat = self.image_to_array( - lnglat_img, grid=lat_grid, dtype=np.float32, bandIds=['latitude'] + lat_total_tile = math.ceil(v0.shape[2] / self._apparent_chunks['height']) + lat = self.process_coordinate_data( + lat_total_tile, self._apparent_chunks['height'], v0.shape[2], 'latitude' ) + width_coord = np.squeeze(lon) height_coord = np.squeeze(lat) From f8f02e02abeffd18bdb026751ff3b66eb77e79ce Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Wed, 29 Nov 2023 07:56:40 +0000 Subject: [PATCH 2/6] nit changes done. --- xee/ext.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index 63d8260..56e71e4 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -540,17 +540,17 @@ def _get_primary_coordinates(self) -> List[Any]: def _get_tile_from_EE( self, tile_index: Tuple[Any, Union[str, int]] ) -> Tuple[slice, np.ndarray]: - """Get a numpy array from EE for a specific 3D bounding box (a 'tile').""" - tile_index, BandIds = tile_index + """Get a numpy array from EE for a specific bounding box (a 'tile').""" + tile_index, band_id = tile_index bbox = self.project( (tile_index[0], 0, tile_index[1], 1) - if BandIds == 'longitude' + if band_id == 'longitude' else (0, tile_index[0], 1, tile_index[1]) ) tile_idx = slice(tile_index[0], tile_index[1]) target_image = ee.Image.pixelLonLat() return tile_idx, self.image_to_array( - target_image, grid=bbox, dtype=np.float32, bandIds=[BandIds] + target_image, grid=bbox, dtype=np.float32, bandIds=[band_id] ) def process_coordinate_data( From 61b6d76dff405f7dcddfb71ddb92ee2215812717 Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Tue, 5 Dec 2023 04:24:12 +0000 Subject: [PATCH 3/6] Necessary Code updated. --- xee/ext.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index 56e71e4..6a65c10 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -21,6 +21,7 @@ import concurrent.futures import functools import importlib +import itertools import math import os import sys @@ -29,7 +30,6 @@ import warnings import affine -from itertools import cycle import numpy as np import pandas as pd import pyproj @@ -537,7 +537,7 @@ def _get_primary_coordinates(self) -> List[Any]: ] return primary_coords - def _get_tile_from_EE( + def _get_tile_from_ee( self, tile_index: Tuple[Any, Union[str, int]] ) -> Tuple[slice, np.ndarray]: """Get a numpy array from EE for a specific bounding box (a 'tile').""" @@ -553,7 +553,7 @@ def _get_tile_from_EE( target_image, grid=bbox, dtype=np.float32, bandIds=[band_id] ) - def process_coordinate_data( + def _process_coordinate_data( self, total_tile: int, tile_size: int, @@ -565,10 +565,10 @@ def process_coordinate_data( (tile_size * i, min(tile_size * (i + 1), end_point)) for i in range(total_tile) ] - tiles = [None for _ in range(total_tile)] + tiles = [None] * total_tile with concurrent.futures.ThreadPoolExecutor() as pool: for i, arr in pool.map( - self._get_tile_from_EE, list(zip(data, cycle([coordinate_type]))) + self._get_tile_from_ee, list(zip(data, itertools.cycle([coordinate_type]))) ): tiles[i] = ( arr.tolist() if coordinate_type == 'longitude' else arr.tolist()[0] @@ -592,20 +592,20 @@ def get_variables(self) -> utils.Frozen[str, xarray.Variable]: if isinstance(self.chunks, dict): # when the value of self.chunks = 'auto' or user-define. - self._apparent_chunks = self.chunks.copy() + width_chunk = self.chunks['width'] + height_chunk = self.chunks['height'] else: # when the value of self.chunks = -1 - self._apparent_chunks = {k: 1 for k in self.PREFERRED_CHUNKS.keys()} - self._apparent_chunks['width'] = v0.shape[1] - self._apparent_chunks['height'] = v0.shape[2] + width_chunk = v0.shape[1] + height_chunk = v0.shape[2] - lon_total_tile = math.ceil(v0.shape[1] / self._apparent_chunks['width']) - lon = self.process_coordinate_data( - lon_total_tile, self._apparent_chunks['width'], v0.shape[1], 'longitude' + lon_total_tile = math.ceil(v0.shape[1] / width_chunk) + lon = self._process_coordinate_data( + lon_total_tile, width_chunk, v0.shape[1], 'longitude' ) - lat_total_tile = math.ceil(v0.shape[2] / self._apparent_chunks['height']) - lat = self.process_coordinate_data( - lat_total_tile, self._apparent_chunks['height'], v0.shape[2], 'latitude' + lat_total_tile = math.ceil(v0.shape[2] / height_chunk) + lat = self._process_coordinate_data( + lat_total_tile, height_chunk, v0.shape[2], 'latitude' ) width_coord = np.squeeze(lon) From 9f5b7fc833004db1c20126d0373513496ca3d8cd Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Tue, 5 Dec 2023 06:29:28 +0000 Subject: [PATCH 4/6] nit changes done. --- xee/ext.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xee/ext.py b/xee/ext.py index 6a65c10..1bc770e 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -568,7 +568,8 @@ def _process_coordinate_data( tiles = [None] * total_tile with concurrent.futures.ThreadPoolExecutor() as pool: for i, arr in pool.map( - self._get_tile_from_ee, list(zip(data, itertools.cycle([coordinate_type]))) + self._get_tile_from_ee, + list(zip(data, itertools.cycle([coordinate_type]))) ): tiles[i] = ( arr.tolist() if coordinate_type == 'longitude' else arr.tolist()[0] From 9fa4422c98ab70b5cc273c6a1503b863a0fb5d6a Mon Sep 17 00:00:00 2001 From: dabhicusp Date: Fri, 8 Dec 2023 05:38:46 +0000 Subject: [PATCH 5/6] replace 'total_tile' with 'tile_count'. --- xee/ext.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index 1bc770e..a51c7e4 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -555,7 +555,7 @@ def _get_tile_from_ee( def _process_coordinate_data( self, - total_tile: int, + tile_count: int, tile_size: int, end_point: int, coordinate_type: str, @@ -563,13 +563,13 @@ def _process_coordinate_data( """Process coordinate data using multithreading for longitude or latitude.""" data = [ (tile_size * i, min(tile_size * (i + 1), end_point)) - for i in range(total_tile) + for i in range(tile_count) ] - tiles = [None] * total_tile + tiles = [None] * tile_count with concurrent.futures.ThreadPoolExecutor() as pool: for i, arr in pool.map( self._get_tile_from_ee, - list(zip(data, itertools.cycle([coordinate_type]))) + list(zip(data, itertools.cycle([coordinate_type]))), ): tiles[i] = ( arr.tolist() if coordinate_type == 'longitude' else arr.tolist()[0] From 185e29910b057cbd7dbcdba55a8c3b65417cff51 Mon Sep 17 00:00:00 2001 From: Nathaniel Schmitz Date: Tue, 19 Dec 2023 10:37:13 -0500 Subject: [PATCH 6/6] Fix minor grammatical issues in ext.py. --- xee/ext.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index a51c7e4..e805770 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -592,11 +592,11 @@ def get_variables(self) -> utils.Frozen[str, xarray.Variable]: ) if isinstance(self.chunks, dict): - # when the value of self.chunks = 'auto' or user-define. + # when the value of self.chunks = 'auto' or user-defined. width_chunk = self.chunks['width'] height_chunk = self.chunks['height'] else: - # when the value of self.chunks = -1 + # when the value of self.chunks = -1. width_chunk = v0.shape[1] height_chunk = v0.shape[2]