diff --git a/examples/tsunami/chile2010_fgmax-fgout/use_xarray_backends.py b/examples/tsunami/chile2010_fgmax-fgout/use_xarray_backends.py new file mode 100644 index 000000000..6d7fa9128 --- /dev/null +++ b/examples/tsunami/chile2010_fgmax-fgout/use_xarray_backends.py @@ -0,0 +1,87 @@ +import glob + +try: + import rioxarray + import xarray as xr +except ImportError: + "You must install xarray and rioxarray in order to use the xarray backends" + raise + +from clawpack.geoclaw.xarray_backends import FGMaxBackend, FGOutBackend + +# epsg code for lat-lon +# Optionally, provide an epsg code to assign the associated coordinate system to the file. +# default behavior assigns no coordinate system. Note that if no coordinate system is provided, +# it will come in a GIS with coordinates associated with row and column number (not the x and y position +# encoded in the netcdf). + +epsg_code = 4326 + +# An example of a fgout grid. +filename = "_output/fgout0001.b0001" +# provide the .bxxx file if binary format is used or the +# .qxxx file if ascii format is used. +# the format, fg number, and frame number are inferred from the filename. + +ds = xr.open_dataset( + filename, + engine=FGOutBackend, + backend_kwargs={ + "epsg": epsg_code, + "qmap": "geoclaw", + # qmap is the qmap specified to the fgout object in setrun.py see + # the following documentation page for more details. + # https://www.clawpack.org/dev/fgout.html#specifying-q-out-vars + "dry_tolerance": None, + # variables that are not eta and B are masked + # where h>dry_tolerance. To turn this functionality + # off set dry_tolerance = None. + }, +) +# ds is now an xarray object. It can be interacted with directly or written to netcdf using +ds.to_netcdf("fgout0001_0001.nc") + +# It is possible to combine all fgout files into a single netcdf file +# using xr.open_mfdataset (requires dask) or xr.concat (does not require dask) +# https://docs.xarray.dev/en/stable/generated/xarray.open_mfdataset.html +# https://docs.xarray.dev/en/latest/generated/xarray.concat.html +# for instructions on installing xarray with dask see: +# https://docs.xarray.dev/en/latest/getting-started-guide/installing.html#instructions + +fgout_files = glob.glob("_output/fgout0001.b*") + +try: + ds_all = xr.open_mfdataset( + fgout_files, + engine=FGOutBackend, + backend_kwargs={"epsg": epsg_code, "qmap": "geoclaw"}, + ) +except ValueError: # if dask is not available, use xr.concat. +# if dask is not installed xr.open_mfdataset() will fail with something like +# ValueError: unrecognized chunk manager dask - must be one of: [] + fgouts = [] + for filename in fgout_files: + ds = xr.open_dataset( + filename, + engine=FGOutBackend, + backend_kwargs={"epsg": epsg_code, "qmap": "geoclaw"}, + ) + fgouts.append(ds) + + ds_all = xr.concat(fgouts, dim="time") + +# save out. +ds_all.to_netcdf("fgout_all.nc") + +# An example of a fgmax grid. +filename = "_output/fgmax0001.txt" +ds = xr.open_dataset(filename, engine=FGMaxBackend, backend_kwargs={"epsg": epsg_code}) +ds.to_netcdf("fgmax0001.nc") + +# To see the use of clipping, change the tfinal in setrun to something like 2*3600.0 +# the fgmax0001_clipped.nc will only be the area where the wave arrived within the considered time. +filename = "_output/fgmax0001.txt" +ds = xr.open_dataset( + filename, engine=FGMaxBackend, backend_kwargs={"epsg": epsg_code, "clip": True} +) +ds.to_netcdf("fgmax0001_clipped.nc") diff --git a/src/python/geoclaw/fgout_tools.py b/src/python/geoclaw/fgout_tools.py index 7fdc47e19..de21b51a8 100644 --- a/src/python/geoclaw/fgout_tools.py +++ b/src/python/geoclaw/fgout_tools.py @@ -18,7 +18,7 @@ fgout frames, as a single netCDF file - function read_netcdf: Read a netCDF file and return a list of fgout frames, assuming the file contains all the qoi's needed to reconstruct q. -- function read_netcdf_arrays: Read a netCDF file and extract the +- function read_netcdf_arrays: Read a netCDF file and extract the requested quantities of interest as numpy arrays. - print_netcdf_info: Print info about the contents of a netCDF file containing some fgout frames. @@ -56,7 +56,7 @@ def __init__(self, fgout_grid, frameno=None): # Define shortcuts to attributes of self.fgout_grid that are the same # for all frames (e.g. X,Y) to avoid storing grid for every frame. - + @property def x(self): return self.fgout_grid.x @@ -64,15 +64,15 @@ def x(self): @property def y(self): return self.fgout_grid.y - + @property def X(self): return self.fgout_grid.X - + @property def Y(self): return self.fgout_grid.Y - + @property def delta(self): return self.fgout_grid.delta @@ -80,19 +80,19 @@ def delta(self): @property def extent_centers(self): return self.fgout_grid.extent_centers - + @property def extent_edges(self): return self.fgout_grid.extent_edges - + @property def drytol(self): return self.fgout_grid.drytol - + @property def qmap(self): return self.fgout_grid.qmap - + # Define attributes such as h as @properties with lazy evaluation: # the corresponding array is created and stored only when first # accessed by the user. Those not needed are not created. @@ -105,14 +105,14 @@ def h(self): try: i_h = q_out_vars.index(self.qmap['h']) self._h = self.q[i_h,:,:] - except: + except ValueError: # if q_out_vars does not contain qmap['h'], a value error is thrown try: i_eta = q_out_vars.index(self.qmap['eta']) i_B = q_out_vars.index(self.qmap['B']) self._h = self.q[i_eta,:,:] - self.q[i_B,:,:] - except: + except ValueError: print('*** Could not find h or eta-B in q_out_vars') - raise + raise AttributeError return self._h @property @@ -322,7 +322,7 @@ def __init__(self,fgno=None,outdir='.',output_format=None, 'bdif':7, 'eta':8, 'B':9} else: raise ValueError('Invalid qmap: %s' % qmap) - + # GeoClaw input values: # Many of these should be read from fgout_grids.data # using read_fgout_grids_data before using read_frame @@ -344,12 +344,12 @@ def __init__(self,fgno=None,outdir='.',output_format=None, self.q_out_vars = [1,2,3,4] # list of which components to print # default: h,hu,hv,eta (5=topo) self.nqout = None # number of vars to print out - + self.drytol = 1e-3 # used for computing u,v from hu,hv # private attributes for those that are only created if # needed by the user: - + self._X = None self._Y = None self._x = None @@ -375,7 +375,7 @@ def y(self): dy = (self.y2 - self.y1)/self.ny self._y = numpy.linspace(self.y1+dy/2, self.y2-dy/2, self.ny) return self._y - + @property def X(self): """2D array X of longitudes (cell centers)""" @@ -397,7 +397,7 @@ def delta(self): dy = (self.y2 - self.y1)/self.ny self._delta = (dx,dy) return self._delta - + @property def extent_centers(self): """Extent of cell centers [xmin,xmax,ymin,ymax]""" @@ -405,7 +405,7 @@ def extent_centers(self): self._extent_centers = [self.x.min(), self.x.max(),\ self.y.min(), self.y.max()] return self._extent_centers - + @property def extent_edges(self): """Extent of cell edges [xmin,xmax,ymin,ymax]""" @@ -414,10 +414,10 @@ def extent_edges(self): self._extent_edges = [self.x.min()-dx/2, self.x.max()+dx/2,\ self.y.min()-dy/2, self.y.max()+dy/2] return self._extent_edges - + # Create plotdata of class clawpack.visclaw.ClawPlotData - # only when needed for reading GeoClaw output, + # only when needed for reading GeoClaw output, # since this must be done after fgno, outdir, output_format # have been specified: @@ -450,7 +450,7 @@ def set_plotdata(self): file_prefix_str = plotdata.file_prefix + '.b' else: file_prefix_str = plotdata.file_prefix + '.q' - + # Contrary to usual default, set save_frames to False so that all # the fgout frames read in are not saved in plotdata.framesoln_dict. # Otherwise this might be a memory hog when making an animation with @@ -472,7 +472,7 @@ def read_fgout_grids_data(self, fgno=None, data_file='fgout_grids.data'): data_path = os.path.join(self.outdir, data_file) print('Reading fgout grid info from \n %s' % data_path) - + with open(data_path) as filep: lines = filep.readlines() fgout_input = None @@ -577,7 +577,7 @@ def read_fgout_grids_data_pre511(self, fgno=None, data_path = os.path.join(self.outdir, data_file) print('Reading fgout grid info from \n %s' % data_path) - + with open(data_path) as filep: lines = filep.readlines() fgout_input = None @@ -628,7 +628,7 @@ def write_to_fgout_data(self, fid): errmsg = 'point_style %s is not implement, only point_style==2' \ % point_style raise NotImplementedError(errmsg) - + if self.output_format == 'ascii': output_format = 1 elif self.output_format == 'binary32': @@ -640,7 +640,7 @@ def write_to_fgout_data(self, fid): raise NotImplementedError(errmsg) - + # write header, independent of point_style: #fid = open(self.input_file_name,'w') fid.write("\n") @@ -648,7 +648,7 @@ def write_to_fgout_data(self, fid): fid.write("%i # output_style\n" \ % self.output_style) - + if self.output_style == 1: assert self.tstart is not None, 'Need to set tstart' assert self.tend is not None, 'Need to set tend' @@ -659,7 +659,7 @@ def write_to_fgout_data(self, fid): elif self.output_style == 2: self.nout = len(self.output_times) fid.write("%i %s # nout\n" % (self.nout, 11*" ")) - + # remove [] and , from list of times: output_times_str = repr(list(self.output_times))[1:-1] output_times_str = output_times_str.replace(',','') @@ -695,7 +695,7 @@ def write_to_fgout_data(self, fid): print(" lower left = (%15.10f,%15.10f)" % (x1,y1)) print(" upper right = (%15.10f,%15.10f)" % (x2,y2)) print(" dx = %15.10e, dy = %15.10e" % (dx,dy)) - + # q_out_vars is a list of q components to print, e.g. [1,4] format = len(self.q_out_vars) * '%s ' @@ -725,7 +725,7 @@ def read_frame(self, frameno): self.read_fgout_grids_data() fgoutX = self.X fgoutY = self.Y - + try: fr = self.plotdata.getframe(frameno) except: @@ -741,11 +741,11 @@ def read_frame(self, frameno): fgout_frame.q = state.q fgout_frame.t = state.t - + fgout_frame.frameno = frameno - - X,Y = patch.grid.p_centers[:2] - + + X,Y = patch.grid.p_centers[:2] + if not numpy.allclose(X, fgoutX): errmsg = '*** X read from output does not match fgout_grid.X' raise ValueError(errmsg) @@ -753,7 +753,7 @@ def read_frame(self, frameno): if not numpy.allclose(Y, fgoutY): errmsg = '*** Y read from output does not match fgout_grid.Y' raise ValueError(errmsg) - + return fgout_frame @@ -766,29 +766,29 @@ def make_fgout_fcn_xy(fgout, qoi, method='nearest', """ Create a function that can be called at (x,y) and return the qoi interpolated in space from the fgout array. - - qoi should be a string (e.g. 'h', 'u' or 'v') corresponding to + + qoi should be a string (e.g. 'h', 'u' or 'v') corresponding to an attribute of fgout. - - The function returned takes arguments x,y that can be floats or + + The function returned takes arguments x,y that can be floats or (equal length) 1D arrays of values that lie within the spatial - extent of fgout. - - bounds_error and fill_value determine the behavior if (x,y) is not in + extent of fgout. + + bounds_error and fill_value determine the behavior if (x,y) is not in the bounds of the data, as in scipy.interpolate.RegularGridInterpolator. """ - + from scipy.interpolate import RegularGridInterpolator - + try: q = getattr(fgout,qoi) except: print('*** fgout missing attribute qoi = %s?' % qoi) - + err_msg = '*** q must have same shape as fgout.X\n' \ + 'fgout.X.shape = %s, q.shape = %s' % (fgout.X.shape,q.shape) assert fgout.X.shape == q.shape, err_msg - + x1 = fgout.X[:,0] y1 = fgout.Y[0,:] fgout_fcn1 = RegularGridInterpolator((x1,y1), q, method=method, @@ -816,39 +816,39 @@ def make_fgout_fcn_xyt(fgout1, fgout2, qoi, method_xy='nearest', """ Create a function that can be called at (x,y,t) and return the qoi interpolated in space and time between the two frames fgout1 and fgout2. - - qoi should be a string (e.g. 'h', 'u' or 'v') corresponding to + + qoi should be a string (e.g. 'h', 'u' or 'v') corresponding to an attribute of fgout. - + method_xy is the method used in creating the spatial interpolator, and is passed to make_fgout_fcn_xy. - + method_t is the method used for interpolation in time, currently only 'linear' is supported, which linearly interpolates. - - bounds_error and fill_value determine the behavior if (x,y,t) is not in + + bounds_error and fill_value determine the behavior if (x,y,t) is not in the bounds of the data. - + The function returned takes arguments x,y (floats or equal-length 1D arrays) of values that lie within the spatial extent of fgout1, fgout2 (which are assumed to cover the same uniform grid at different times) - and t should be a float that lies between fgout1.t and fgout2.t. + and t should be a float that lies between fgout1.t and fgout2.t. """ - + assert numpy.allclose(fgout1.X, fgout2.X), \ '*** fgout1 and fgout2 must have same X' assert numpy.allclose(fgout1.Y, fgout2.Y), \ '*** fgout1 and fgout2 must have same Y' - + t1 = fgout1.t t2 = fgout2.t #assert t1 < t2, '*** expected fgout1.t < fgout2.t' - + fgout1_fcn_xy = make_fgout_fcn_xy(fgout1, qoi, method=method_xy, bounds_error=bounds_error, fill_value=fill_value) fgout2_fcn_xy = make_fgout_fcn_xy(fgout2, qoi, method=method_xy, bounds_error=bounds_error, fill_value=fill_value) - + def fgout_fcn(x,y,t): """ Function that can be evaluated at single point or arrays (x,y) @@ -877,15 +877,15 @@ def fgout_fcn(x,y,t): qout = (1-alpha)*qout1 + alpha*qout2 else: raise NotImplementedError('method_t = %s not supported' % method_t) - + return qout - + return fgout_fcn # =============================== # Functions for writing a set of fgout frames as a netCDF file, and -# reading such a file: - +# reading such a file: + def write_netcdf(fgout_frames, fname_nc='fgout_frames.nc', qois = ['h','hu','hv','eta'], datatype='f4', include_B0=False, include_Bfinal=False, @@ -894,63 +894,63 @@ def write_netcdf(fgout_frames, fname_nc='fgout_frames.nc', Write a list of fgout frames (at different times on the same rectangular grid) to a single netCDF file, with some metadata and the topography, if desired. - + fgout_frames should be a list of FGoutFrame objects, all of the same size and at increasing times. - + fname_nc is the name of the file to write. - + qois is a list of strings, the quantities of interest to include in file. - This could include any of: + This could include any of: 'h', 'eta', 'hu', 'hv', 'u', 'v', 's', 'hss', 'B'. All other quantities can be computed from h, hu, hv, eta, the original fgout variables from GeoClaw, but for some applications you might only want to save 'h' and 's', for example. - + datatype should be 'f4' [default] or 'f8', specifying bytes per qoi value. 'f8' has full precision of the original data, but the file will be twice as large and may not be needed for downstream applications. - + Note that the topography B = eta - h, so it is not necessary to store all three of these. Also, B is often the same for all frames, so rather than storing B at each frame as a qoi, two other options are also provided (and then storing eta or h for all frames allows calculating the other): - + include_Bfinal: If True, include the topography B array from the final frame as the Bfinal array. - + include_B0: If True, include the topography B array from the first frame - as the B0 array. This is only useful if, e.g., the first frame is initial + as the B0 array. This is only useful if, e.g., the first frame is initial topography before co-seismic deformation, and at later times the topography - is always equal to Bfinal. - + is always equal to Bfinal. + `description` is a string that will be added as metadata. - A metadata field `history` will also be added, which includes the - time the file was created and the path to the directory where it was made. - """ - + A metadata field `history` will also be added, which includes the + time the file was created and the path to the directory where it was made. + """ + import netCDF4 from datetime import datetime, timedelta from cftime import num2date, date2num import time timestr = time.ctime(time.time()) # current time for metadata - + fg_times = numpy.array([fg.t for fg in fgout_frames]) - + if verbose: print('Creating %s with fgout frames at times: ' % fname_nc) print(fg_times) - + fg0 = fgout_frames[0] x = fg0.x y = fg0.y - + xs = numpy.array([fg.x for fg in fgout_frames]) ys = numpy.array([fg.y for fg in fgout_frames]) # assert same for all times - + units = {'h':'meters', 'eta':'meters', 'hu':'m^2/s', 'hv':'m^2/s', 'u':'m/s', 'v':'m/s', 's':'m/s', 'hss':'m^3/s^2', 'B':'meters'} @@ -969,12 +969,12 @@ def write_netcdf(fgout_frames, fname_nc='fgout_frames.nc', latitudes = rootgrp.createVariable('lat','f8',('lat',)) latitudes[:] = y latitudes.units = 'degrees_north' - + time = rootgrp.createDimension('time', len(fg_times)) times = rootgrp.createVariable('time','f8',('time',)) times[:] = fg_times times.units = 'seconds' - + if 0: # Could make times be datetimes relative to some event time, e.g.: times.units = 'seconds since 1700-01-26 21:00:00.0' @@ -982,7 +982,7 @@ def write_netcdf(fgout_frames, fname_nc='fgout_frames.nc', dates = [datetime(1700,1,26,21) + timedelta(seconds=ss) \ for ss in fg_times] times[:] = date2num(dates,units=times.units,calendar=times.calendar) - + if include_B0: B0 = rootgrp.createVariable('B0',datatype,('lon','lat',)) B0[:,:] = fg0.B @@ -993,7 +993,7 @@ def write_netcdf(fgout_frames, fname_nc='fgout_frames.nc', Bfinal = rootgrp.createVariable('Bfinal',datatype,('lon','lat',)) Bfinal[:,:] = fg_final.B Bfinal.units = 'meters' - + for qoi in qois: qoi_frames = [getattr(fgout,qoi) for fgout in fgout_frames] qoi_var = rootgrp.createVariable(qoi,datatype,('time','lon','lat',)) @@ -1002,7 +1002,7 @@ def write_netcdf(fgout_frames, fname_nc='fgout_frames.nc', def get_as_array(var, rootgrp, verbose=True): """ - Utility function to retrieve variable from netCDF file and convert to + Utility function to retrieve variable from netCDF file and convert to numpy array. """ a = rootgrp.variables.get(var, None) @@ -1011,7 +1011,7 @@ def get_as_array(var, rootgrp, verbose=True): return numpy.array(a) else: if verbose: print(' Did not find %s' % var) - return None + return None def read_netcdf_arrays(fname_nc, qois, verbose=True): @@ -1022,19 +1022,19 @@ def read_netcdf_arrays(fname_nc, qois, verbose=True): 'h', 'eta', 'hu', 'hv', 'u', 'v', 's', 'hss', 'B'. qois can also include the time-independent 'B0' and/or 'Bfinal' - - Returns + + Returns x, y, t, qoi_arrays where x,y define the longitude, latitudes, t is the times of the frames, and qoi_arrays is a dictionary indexed by the strings from qois. - + Each dict element is an array with shape (len(t), len(x), len(y)) for time-dependent qoi's, or (len(x), len(y)) for B0 or Bfinal, or None if that qoi was not found in the netCDF file. """ - + import netCDF4 with netCDF4.Dataset(fname_nc, 'r') as rootgrp: @@ -1046,17 +1046,17 @@ def read_netcdf_arrays(fname_nc, qois, verbose=True): x = get_as_array('lon', rootgrp, verbose) y = get_as_array('lat', rootgrp, verbose) t = get_as_array('time', rootgrp, verbose) - + vars = list(rootgrp.variables) - + qoi_arrays = {} for qoi in qois: qoi_array = get_as_array(qoi, rootgrp, verbose) qoi_arrays[qoi] = qoi_array - + return x,y,t,qoi_arrays - - + + def read_netcdf(fname_nc, fgout_grid=None, verbose=True): """ @@ -1065,7 +1065,7 @@ def read_netcdf(fname_nc, fgout_grid=None, verbose=True): the qoi's 'h','hu','hv','eta' required to reconstruct the q array as output by GeoClaw. """ - + import netCDF4 with netCDF4.Dataset(fname_nc, 'r') as rootgrp: @@ -1079,7 +1079,7 @@ def read_netcdf(fname_nc, fgout_grid=None, verbose=True): t = get_as_array('time', rootgrp, verbose) vars = list(rootgrp.variables) - + for qoi in ['h','hu','hv','eta']: errmsg = '*** Cannot reconstruct fgout frame without %s' % qoi @@ -1088,14 +1088,14 @@ def read_netcdf(fname_nc, fgout_grid=None, verbose=True): hu = get_as_array('hu', rootgrp, verbose) hv = get_as_array('hv', rootgrp, verbose) eta = get_as_array('eta', rootgrp, verbose) - + if (x is None) or (y is None): print('*** Could not create grid') else: X,Y = numpy.meshgrid(x,y) - + fgout_frames = [] - + for k in range(eta.shape[0]): fgout = FGoutFrame(fgout_grid=fgout_grid, frameno=k) fgout.x = x @@ -1109,9 +1109,9 @@ def read_netcdf(fname_nc, fgout_grid=None, verbose=True): fgout.X = X fgout.Y = Y fgout_frames.append(fgout) - + print('Created fgout_frames as list of length %i' % len(fgout_frames)) - + return fgout_frames def print_netcdf_info(fname_nc): @@ -1127,7 +1127,7 @@ def print_netcdf_info(fname_nc): t = get_as_array('time', rootgrp, verbose=False) vars = list(rootgrp.variables) - + print('===================================================') print('netCDF file %s contains:' % fname_nc) print('description: \n', rootgrp.description) @@ -1137,5 +1137,3 @@ def print_netcdf_info(fname_nc): print('%i times from %.3f to %.3f' % (len(t),t[0],t[-1])) print('variables: ',vars) print('===================================================') - - diff --git a/src/python/geoclaw/xarray_backends.py b/src/python/geoclaw/xarray_backends.py new file mode 100644 index 000000000..326d0d908 --- /dev/null +++ b/src/python/geoclaw/xarray_backends.py @@ -0,0 +1,583 @@ +r""" +xarray backends module: $CLAW/geoclaw/src/python/geoclaw/xarray_backends.py + +Xarray backends for GeoClaw fixed grids and fgmax grids. + +These only work for point_style = 2 (uniform regular) and have a dependency on +xarray and rioxarray. Xarray provides the core datastructure and rioxarray +provides an interface to rasterio, used to assign geospatial projection +information. + +- https://docs.xarray.dev/en/stable/index.html +- https://corteva.github.io/rioxarray/stable/readme.html +- https://rasterio.readthedocs.io/en/latest/index.html + +The expectation is that you run commands that use these backends from the same +directory you run "make output". + +Includes: + +- class FGMaxBackend: Xarray backend for fgmax grids. +- class FGOutBackend: Xarray backend for fgout grids. + +Usage: + +.. code-block:: python + import rioxarray # activate the rio accessor + import xarray as xr + from clawpack.geoclaw.xarray_backends import FGOutBackend, FGMaxBackend + + # An example of a fgout grid. + + filename = '_output/fgout0001.b0001 + # provide the .bxxx file if binary format is used or the + # .qxxx file if ascii format is used. + # the format, fg number, and frame number are inferred from the filename. + + ds = xr.open_dataset( + filename, + engine=FGOutBackend, + backend_kwargs={ + 'qmap': 'geoclaw', + 'epsg':epsg_code + }) + # ds is now an xarray object. It can be interacted with directly or written to netcdf using + ds.write_netcdf('filename.nc') + + # A 'qmap' backend_kwargs is required as it indicates the qmap used for + # selecting elements of q to include in fgout. See + # https://www.clawpack.org/dev/fgout.html#specifying-q-out-vars + + # Optionally, provide an epsg code to assign the associated coordinate system to the file. + # default behavior assigns no coordinate system. + + # An example of a fgmax grid. + filename = "_output/fgmax0001.txt" + ds = xr.open_dataset(filename, engine=FGMaxBackend, backend_kwargs={'epsg':epsg_code}) + + +Dimensions: + +Files opened with FGOutBackend will have dimensions (time, y, x). +Files opened with FGMaxBackend will have dimensions (y, x). + +Variable naming: + +For fixed grid files, the dataset will have the elements of q specified +by qmap and q_out_vars. This may be as many as the full list described below or +as few as specified by the user. + +For fixed grid geoclaw files, the full set of variables is: +- h +- hu +- hv +- eta +- B + +For fixed grid geoclaw-bouss files, the full set of variables is: +- h +- hu +- hv +- huc +- hvc +- eta +- B + +Fixed grid dclaw files, the full set of variables is: +- h +- hu +- hv +- hm +- pb +- hchi +- bdif +- eta +- B + +Depending on the number of variables specified in the setrun.py fgmax files will +have a portion of the following variables: + +If rundata.fgmax_data.num_fgmax_val == 1 + +- arrival_time, Wave arrival time (based on eta>sea_level + fg.arrival_tol) +- h_max, Maximum water depth +- eta_max, Maximum water surface elevation +- h_max_time, Time of maximum water depth +- B, Basal topography at the first time fgmax first monitored maximum amr level +- level, Maximum amr level + +If rundata.fgmax_data.num_fgmax_val == 2: + +- s_max, Maximum velocity +- s_max_time, Time of maximum velocity + +If rundata.fgmax_data.num_fgmax_val == 5: + +- hs_max, Maximum momentum +- hs_max_time, Time of maximum momentum +- hss_max, Maximum momentum flux +- hss_max_time, Time of maximum momentum flux +- h_min, Minimum depth +- h_min_time, Time of minimum depth + + +See the following links for additional information about xarray Backends. + +- https://docs.xarray.dev/en/stable/generated/xarray.backends.BackendEntrypoint.html#xarray.backends.BackendEntrypoint +- https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html +""" + +import os + +import numpy as np + +try: + import rioxarray # activate the rio accessor + import xarray as xr +except ImportError: + raise ImportError( + "rioxarray and xarray are required to use the FGOutBackend and FGMaxBackend" + ) + +from clawpack.geoclaw import fgmax_tools, fgout_tools +from xarray.backends import BackendEntrypoint + +_qunits = { + "h": "meters", + "hu": "meters squared per second", + "hv": "meters squared per second", + "hm": "meters", + "pb": "newton per meter squared", + "hchi": "meters", + "bdif": "meters", + "eta": "meters", + "B": "meters", + "huc": "varies", + "hvc": "varies", +} + +time_unit = "seconds" +reference_time = "model start" +space_unit = "meters" +nodata = np.nan + + +def _prepare_var(data, mask, dims, units, nodata, long_name): + "Reorient array into the format xarray expects and generate the mapping." + if mask is not None: + data[mask] = nodata + data = data.T + data = np.flipud(data) + mapping = ( + dims, + data, + {"units": units, "_FillValue": nodata, "long_name": long_name}, + ) + return mapping + + +class FGOutBackend(BackendEntrypoint): + "Xarray Backend for Clawpack fixed grid format." + + def open_dataset( + self, + filename, # path to fgout file. + qmap="geoclaw", # qmap value for FGoutGrid ('geoclaw', 'dclaw', or 'geoclaw-bouss') + epsg=None, # epsg code + dry_tolerance=0.001, + # dry tolerance used for for masking elements of q that are not + # eta or B. Default behavior is to mask all elements of q except for + # eta and B where h>0.001. + # used only if h or eta-B is available based on q_out_vars. + # if dry_tolerance = None, no masking is applied to any variable. + drop_variables=None, # name of any elements of q to drop. + ): + + if drop_variables is None: + drop_variables = [] + + full_path = os.path.abspath(filename) + filename = os.path.basename(full_path) + outdir = os.path.basename(os.path.dirname(full_path)) + + # filename has the format fgoutXXXX.qYYYY (ascii) + # or fgoutXXXX.bYYYY (binary) + # where XXXX is the fixed grid number and YYYY is the frame + # number. + type_code = filename.split(".")[-1][0] + fgno = int(filename.split(".")[0][-4:]) + frameno = int(filename.split(".")[-1][-4:]) + if type_code == "q": # TODO, is this correct? + output_format = "ascii" + elif type_code == "b": + output_format = "binary32" # format of fgout grid output + else: + raise ValueError("Invalid FGout output format. Must be ascii or binary.") + + fgout_grid = fgout_tools.FGoutGrid( + fgno=fgno, outdir=outdir, output_format=output_format, qmap=qmap + ) + + if fgout_grid.point_style != 2: + raise ValueError("FGOutBackend only works with fg.point_style=2") + + fgout_grid.read_fgout_grids_data() + fgout = fgout_grid.read_frame(frameno) + + time = fgout.t + # both come in ascending. flip to give expected order. + x = fgout.x + y = np.flipud(fgout.y) + nj = len(x) + ni = len(y) + + # mask based on dry tolerance + if dry_tolerance is not None: + try: + mask = fgout.h < dry_tolerance + # internally fgout_tools will try fgou.eta-fgout.B if h is not present. + except AttributeError: + print("FGOutBackend: No h, eta, or B. No mask applied.") + mask = np.ones((nj, ni), dtype=bool) + + # create data_vars dictionary + data_vars = {} + for i, i_var in enumerate(fgout_grid.q_out_vars): + + # construct variable + + # Find the varname in fgout.qmap associated with + # q_out_vars[i] + varname = None + for name, index in fgout_grid.qmap.items(): + if i_var == index: + varname = name + + # construct xarray dataset if varname not in drop vars. + if varname not in drop_variables: + + Q = fgout.q[i, :, :] + + # mask all but eta based on h presence. + if (varname not in ("B", "eta")) and dry_tolerance is not None: + Q[mask] = nodata + + # to keep xarray happy, need to transpose and flip ud. + Q = Q.T + Q = np.flipud(Q) + Q = Q.reshape((1, ni, nj)) # reshape to add a time dimension. + + data_array_attrs = {"units": _qunits[varname], "_FillValue": nodata} + + data_vars[varname] = ( + [ + "time", + "y", + "x", + ], + Q, + data_array_attrs, + ) + + ds_attrs = {"description": "Clawpack model output"} + + ds = xr.Dataset( + data_vars=data_vars, + coords=dict( + x=(["x"], x, {"units": space_unit}), + y=(["y"], y, {"units": space_unit}), + time=("time", [time], {"units": "seconds"}), + reference_time=reference_time, + ), + attrs=ds_attrs, + ) + + if epsg is not None: + ds.rio.write_crs( + epsg, + inplace=True, + ).rio.set_spatial_dims( + x_dim="x", + y_dim="y", + inplace=True, + ).rio.write_coordinate_system(inplace=True) + # https://corteva.github.io/rioxarray/stable/getting_started/crs_management.html#Spatial-dimensions + # https://gis.stackexchange.com/questions/470207/how-to-write-crs-info-to-netcdf-in-a-way-qgis-can-read-python-xarray + + return ds + + open_dataset_parameters = ["filename", "drop_variables"] + + description = "Use Clawpack fixed grid output files in Xarray" + url = "https://www.clawpack.org/fgout.html" + + +class FGMaxBackend(BackendEntrypoint): + "Xarray Backend for Clawpack fgmax grid format." + + def open_dataset( + self, + filename, + epsg=None, + drop_variables=None, + clip=False, # if True, clip the entire array to the extent where arrival_time is defined. + ): + + if drop_variables is None: + drop_variables = [] + + # expectation is for standard clawpack organization, + # e.g., output and 'fgmax_grids.data' in _output + fgno = int(os.path.basename(filename).split(".")[0][-4:]) + outdir = os.path.dirname(filename) + data_file = os.path.join(outdir, "fgmax_grids.data") + + fg = fgmax_tools.FGmaxGrid() + fg.read_fgmax_grids_data(fgno=fgno, data_file=data_file) + if fg.point_style != 2: + raise ValueError("FGMaxBackend only works with fg.point_style=2") + + fg.read_output(outdir=outdir) + + # Construct the x and y coordinates + # Both come in ascending, therefore flip y so that it is ordered as expected. + x = fg.x + y = np.flipud(fg.y) + + # Construct the data_vars array. To organize the + # data in the way expected by netcdf standards, need + # to both transpose and flipud the array. + + data_vars = {} + + data_vars["arrival_time"] = _prepare_var( + fg.arrival_time.data, + fg.arrival_time.mask, + [ + "y", + "x", + ], + "seconds", + nodata, + "Wave arrival time", + ) + + data_vars["h_max"] = _prepare_var( + fg.h.data, + fg.h.mask, + [ + "y", + "x", + ], + "meters", + nodata, + "Maximum water depth", + ) + + data_vars["eta_max"] = _prepare_var( + fg.h.data + fg.B.data, + fg.h.mask, + [ + "y", + "x", + ], + "meters", + nodata, + "Maximum water surface elevation", + ) + + data_vars["h_max_time"] = _prepare_var( + fg.h_time.data, + fg.h_time.mask, + [ + "y", + "x", + ], + "seconds", + nodata, + "Time of maximum water depth", + ) + + data_vars["B"] = _prepare_var( + fg.B, + None, + [ + "y", + "x", + ], + "meters", + nodata, + "Basal topography at the first time fgmax first monitored maximum amr level", + ) + + data_vars["level"] = _prepare_var( + fg.level, + None, + [ + "y", + "x", + ], + "(no units)", + -1, + "Maximum amr level", + ) + + if hasattr(fg, "s"): + if hasattr(fg.s, "data"): + data_vars["s_max"] = _prepare_var( + fg.s.data, + fg.s.mask, + [ + "y", + "x", + ], + "meters per second", + nodata, + "Maximum velocity", + ) + data_vars["s_max_time"] = _prepare_var( + fg.s_time.data, + fg.s_time.mask, + [ + "y", + "x", + ], + "seconds", + nodata, + "Time of maximum velocity", + ) + if hasattr(fg, "hs"): + if hasattr(fg.hs, "data"): + data_vars["hs_max"] = _prepare_var( + fg.hs.data, + fg.hs.mask, + [ + "y", + "x", + ], + "meters squared per second", + nodata, + "Maximum momentum", + ) + data_vars["hs_max_time"] = _prepare_var( + fg.hs_time.data, + fg.hs_time.mask, + [ + "y", + "x", + ], + "seconds", + nodata, + "Time of maximum momentum", + ) + + data_vars["hss_max"] = _prepare_var( + fg.hss.data, + fg.hss.mask, + [ + "y", + "x", + ], + "meters cubed per second squared", + nodata, + "Maximum momentum flux", + ) + data_vars["hss_max_time"] = _prepare_var( + fg.hss_time.data, + fg.hss_time.mask, + [ + "y", + "x", + ], + "seconds", + nodata, + "Time of maximum momentum flux", + ) + + data_vars["h_min"] = _prepare_var( + fg.hmin.data, + fg.hmin.mask, + [ + "y", + "x", + ], + "meters", + nodata, + "Minimum depth", + ) + data_vars["h_min_time"] = _prepare_var( + fg.hmin_time.data, + fg.hmin_time.mask, + [ + "y", + "x", + ], + "seconds", + nodata, + "Time of minimum depth", + ) + + # drop requested variables + for var in drop_variables: + if var in data_vars.keys(): + del data_vars[var] + + # Construct the values from + ds_attrs = {"description": "D-Claw model output"} + + ds = xr.Dataset( + data_vars=data_vars, + coords=dict( + x=(["x"], x, {"units": "meters"}), + y=(["y"], y, {"units": "meters"}), + ), + attrs=ds_attrs, + ) + + if epsg is not None: + + ds.rio.write_crs( + epsg, + inplace=True, + ).rio.set_spatial_dims( + x_dim="x", + y_dim="y", + inplace=True, + ).rio.write_coordinate_system(inplace=True) + # https://corteva.github.io/rioxarray/stable/getting_started/crs_management.html#Spatial-dimensions + # https://gis.stackexchange.com/questions/470207/how-to-write-crs-info-to-netcdf-in-a-way-qgis-can-read-python-xarray + + # clip + if clip: + clip_data = fg.arrival_time.data >= 0 + clip_data = clip_data.T + clip_data = np.flipud(clip_data) + ds = _clip(ds, clip_data) + + return ds + + open_dataset_parameters = ["filename", "drop_variables", "clip"] + + description = "Use Clawpack fix grid monitoring files in Xarray" + url = "https://www.clawpack.org/fgmax.html#fgmax" + + +def _clip(ds, clip_data): + # clip DS to the extent where clip_data is true. + # useful for when there are large areas where nothing happened. + + nrow, ncol = clip_data.shape + + valid_col = np.sum(clip_data, axis=0) > 0 + valid_row = np.sum(clip_data, axis=1) > 0 + + sel_rows = np.nonzero(valid_row)[0] + sel_cols = np.nonzero(valid_col)[0] + + first_row = sel_rows[0] + last_row = sel_rows[-1] + first_col = sel_cols[0] + last_col = sel_cols[-1] + + ds = ds.isel(y=np.arange(first_row, last_row), x=np.arange(first_col, last_col)) + return ds