-
Notifications
You must be signed in to change notification settings - Fork 29
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This adds an Earth Engine initialization check for dataset operations…
… for remote workers to call Earth Engine. Also adds docs for submitting a Dataflow job using Xee Dataflow jobs would fail with Xee due to the remote workers not having the EE client library initialized. This adds a check to all calls for the `EarthEngineBackendArray` object so that if there is a call to EE, it will be initialized if not already. There was discussion on issue #99 regarding documentation for how to do initialize/authenticate on distributed cluster and this also includes a Dataflow example where that users can start from. close #51 PiperOrigin-RevId: 596966033
- Loading branch information
Xee authors
committed
Jan 9, 2024
1 parent
f05e82b
commit a174bfe
Showing
6 changed files
with
201 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
FROM apache/beam_python3.9_sdk:2.51.0 | ||
|
||
COPY requirements.txt ./ | ||
|
||
RUN pip install -r requirements.txt |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
steps: | ||
- name: 'gcr.io/cloud-builders/docker' | ||
args: [ 'build', '-t', 'REGION-docker.pkg.dev/YOUR_PROJECT/REPO/CONTAINER', '.' ] | ||
images: ['REGION-docker.pkg.dev/YOUR_PROJECT/REPO/CONTAINER'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
# Copyright 2023 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# https://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# ============================================================================== | ||
r"""Exports EE ImageCollections to Zarr using Xarray-Beam.""" | ||
|
||
# example: | ||
# python ee_to_zarr.py | ||
# --input NASA/GPM_L3/IMERG_V06 | ||
# --output gs://xee-out-${PROJECT_NUMBER} | ||
# --target_chunks='time=6' | ||
# --runner DataflowRunner | ||
# --project $PROJECT | ||
# --region $REGION | ||
# --temp_location gs://xee-out-${PROJECT_NUMBER}/tmp/ | ||
# --service_account_email $SERVICE_ACCOUNT | ||
# --sdk_location=container | ||
# --sdk_container_image=${REGION}-docker.pkg.dev/${PROJECT_NAME}/${REPO}/${CONTAINER} | ||
# --subnetwork regions/${REGION}/subnetworks/${NETWORK_NAME} | ||
# --job_name imerg-dataflow-test-$(date '+%Y%m%d%H%M%S') | ||
|
||
import logging | ||
|
||
from absl import app | ||
from absl import flags | ||
import apache_beam as beam | ||
from apache_beam.internal import pickler | ||
import xarray as xr | ||
import xarray_beam as xbeam | ||
import xee | ||
|
||
import ee | ||
|
||
pickler.set_library(pickler.USE_CLOUDPICKLE) | ||
|
||
logger = logging.getLogger(__name__) | ||
logger.setLevel(logging.INFO) | ||
|
||
|
||
_INPUT = flags.DEFINE_string( | ||
'input', '', help='The input Earth Engine ImageCollection.' | ||
) | ||
_CRS = flags.DEFINE_string( | ||
'crs', | ||
'EPSG:4326', | ||
help='Coordinate Reference System for output Zarr.', | ||
) | ||
_SCALE = flags.DEFINE_float('scale', 0.25, help='Scale factor for output Zarr.') | ||
_TARGET_CHUNKS = flags.DEFINE_string( | ||
'target_chunks', | ||
'', | ||
help=( | ||
'chunks on the input Zarr dataset to change on the outputs, in the ' | ||
'form of a comma separated dimension=size pairs, e.g., ' | ||
"--target_chunks='x=10,y=10'. Omitted dimensions are not changed and a " | ||
'chunksize of -1 indicates not to chunk a dimension.' | ||
), | ||
) | ||
_OUTPUT = flags.DEFINE_string('output', '', help='The output zarr path.') | ||
_RUNNER = flags.DEFINE_string('runner', None, help='beam.runners.Runner') | ||
|
||
|
||
# pylint: disable=unused-argument | ||
def parse_dataflow_flags(argv: list[str]): | ||
parser = flags.argparse_flags.ArgumentParser( | ||
description='parser for dataflow flags', | ||
allow_abbrev=False, | ||
) | ||
_, dataflow_args = parser.parse_known_args() | ||
return dataflow_args | ||
|
||
|
||
# Borrowed from the xbeam examples: | ||
# https://github.com/google/xarray-beam/blob/4f4fcb965a65b5d577601af311d0e0142ee38076/examples/xbeam_rechunk.py#L41 | ||
def _parse_chunks_str(chunks_str: str) -> dict[str, int]: | ||
chunks = {} | ||
parts = chunks_str.split(',') | ||
for part in parts: | ||
k, v = part.split('=') | ||
chunks[k] = int(v) | ||
return chunks | ||
|
||
|
||
def main(argv: list[str]) -> None: | ||
assert _INPUT.value, 'Must specify --input' | ||
assert _OUTPUT.value, 'Must specify --output' | ||
|
||
source_chunks = {'time': 24} | ||
target_chunks = dict(source_chunks, **_parse_chunks_str(_TARGET_CHUNKS.value)) | ||
|
||
ee.Initialize() | ||
|
||
input_coll = ( | ||
ee.ImageCollection(_INPUT.value) | ||
.limit(100, 'system:time_start', True) | ||
.select('precipitationCal') | ||
) | ||
|
||
ds = xr.open_dataset( | ||
input_coll, | ||
crs=_CRS.value, | ||
scale=_SCALE.value, | ||
engine=xee.EarthEngineBackendEntrypoint, | ||
) | ||
template = xbeam.make_template(ds) | ||
itemsize = max(variable.dtype.itemsize for variable in template.values()) | ||
|
||
with beam.Pipeline(runner=_RUNNER.value, argv=argv) as root: | ||
_ = ( | ||
root | ||
| xbeam.DatasetToChunks(ds, source_chunks) | ||
| xbeam.Rechunk( | ||
ds.sizes, | ||
source_chunks, | ||
target_chunks, | ||
itemsize=itemsize, | ||
) | ||
| xbeam.ChunksToZarr(_OUTPUT.value, template, target_chunks) | ||
) | ||
|
||
|
||
if __name__ == '__main__': | ||
app.run(main, flags_parser=parse_dataflow_flags) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
absl-py | ||
earthengine-api | ||
gcsfs | ||
google-cloud | ||
google-cloud-storage | ||
xarray | ||
xarray-beam | ||
apache-beam[gcp] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters