+"""
+Extract a field from a time series of NetCDF files as VTK files for plotting in
+ParaView.
+
+It can extract a field across multiple files by passing in a regular expression
+for the filename patter. As an example, one can run the script using:
+
+ ./paraview_vtk_field_extractor.py -v areaCell,latVertex -f "hist.comp.*.nc"
+
+To extract a time series of areaCell,latVertex that spans multiple files.
+By default, time-independent fields on cells are written to a file
+vtk_files/staticFieldsOnCells.vtp
+and time-dependent fields on cells are written to
+vtk_files/timeDependentFieldsOnCells.pvd
+vtk_files/time_series/timeDependentFieldsOnCells.0.vtp
+vtk_files/time_series/timeDependentFieldsOnCells.1.vtp
+...
+and similarly for edges and vertices. Time-independent fields can be
+included in each time step of the time-dependent fields for with
+the --combine flag. This allows time-dependent and -independent fields
+to be combined in filters within ParaView at the expense of considerable
+additional storage space.
+
+Indices for extra dimensions can either be supplied at runtime at a prompt
+or through the -d flag. For each extra dimension, the use can specify
+nothing (an empty string, meaning skip any fields with this dimension),
+a single index, or a comma-separated list of indices or a range of indices
+indices (separated by 1 or 2 colons). For example,
+
+ -d maxEdges= nVertLeves=0:10:2 nParticles=0,2,4,6,8
+
+will ignore any fields with dimension maxEdges, extract every other layer from
+the first 10 vertical levels (each into its own field) and extract the five
+specified particles.
+
+An index array can also be specified in this way (and these can be mixed with
+integer indices in a comma-separated list but not in a colon-separated range):
+
+ -d nVertLeves=0,maxLevelCell
+
+will extract fields from the first vertical level and the vertical level with
+index given by maxLevelCell.
+
+The extractor includes optional support for extracting geometry appropriate
+for displaying variables at the depth of a topographic feature (typically the
+top or bottom of the domain) for MPAS components with a spatially variable
+top or bottom index (e.g. `maxLevelCell` in MPAS-Ocean). This is accomplished
+with flags such as:
+
+ --topo_dim=nVertLevels --topo_cell_index=maxLevelCell
+
+Fields on cells are sampled at the topographic index and the geometry includes
+polygons corresponding to edges so that vertical faces between adjacent cells
+can be displayed. Fields are extracted as normal except that they are sampled
+as point data rather than cell data, allowing computations in ParaView to
+display the topography. A mask field is also included indicating which parts
+of edge polygons correspond to the boundary of the domain (boundaryMask == 1)
+and which parts of cell and edge polygons are interior (boundaryMask == 0).
+Together, this can be used to plot topography by using a calculator filter like
+the following:
+
+ coords*(1.0 + 100.0/mag(coords)*((1 - boundaryMask)*(-bottomDepth)
+ + 10.0*boundaryMask))
+
+If this is entered into a Calculator Filter in ParaView with the "coordinate
+result" box checked, the result will to display the MPAS-Ocean topography,
+exaggerated by a factor of 100, with a value equivalent to 10 m along boundary
+points of edge polygons (a "water-tight" surface).
+"""
+
+from __future__ import absolute_import, division, print_function, \
+ unicode_literals
+
+from pyevtk.vtk import VtkFile, VtkPolyData
+
+import sys
+import os
+import glob
+import numpy
+import argparse
+from datetime import datetime
+from netCDF4 import date2num
+
+from builtins import input
+
+from netCDF4 import Dataset as NetCDFFile
+
+import xarray
+import json
+from geometric_features import FeatureCollection
+import logging
+from io import StringIO
+
+from progressbar import ProgressBar, Percentage, Bar, ETA
+from mpas_tools.conversion import mask, cull
+from mpas_tools.io import write_netcdf
+from mpas_tools.logging import check_call
+
+
+
+
+
+def main():
+ parser = \
+ argparse.ArgumentParser(description=__doc__,
+ formatter_class=argparse.RawTextHelpFormatter)
+ parser.add_argument("-f", "--file_pattern", dest="filename_pattern",
+ help="MPAS Filename pattern.", metavar="FILE",
+ required=True)
+ parser.add_argument("-m", "--mesh_file", dest="mesh_filename",
+ help="MPAS Mesh filename. If not set, it will use the "
+ "first file in the -f flag as the mesh file.")
+ parser.add_argument("-b", "--blocking", dest="blocking", type=int,
+ help="Size of blocks when reading MPAS file",
+ metavar="BLK", default=32000)
+ parser.add_argument("-v", "--variable_list", dest="variable_list",
+ help="List of variables to extract ('all' for all "
+ "variables, 'allOnCells' for all variables on "
+ "cells, etc.)", metavar="VAR", required=True)
+ parser.add_argument("-3", "--32bit", dest="output_32bit",
+ help="If set, the vtk files will be written using "
+ "32bit floats.", action="store_true")
+ parser.add_argument("-c", "--combine", dest="combine_output",
+ help="If set, time-independent fields are written to "
+ "each file along with time-dependent fields.",
+ action="store_true")
+ parser.add_argument("-a", "--append", dest="append",
+ help="If set, only vtp files that do not already "
+ "exist are written out.", action="store_true")
+ parser.add_argument("-d", "--dim_list", dest="dimension_list", nargs="+",
+ help="A list of dimensions and associated indices.",
+ metavar="DIM")
+ parser.add_argument("-o", "--out_dir", dest="out_dir",
+ help="the output directory.", default='vtk_files',
+ metavar="DIR")
+ parser.add_argument("-x", "--xtime", dest="xtime", default='xtime',
+ metavar="XTIME",
+ help="the name of the xtime variable or 'none' to "
+ "extract Time dim without xtime")
+ parser.add_argument("-l", "--lonlat", dest="lonlat",
+ help="If set, the resulting points are in lon-lat "
+ "space, not Cartesian.", action="store_true")
+ parser.add_argument("-t", "--time", dest="time",
+ help="Indices for the time dimension", metavar="TIME",
+ required=False)
+ parser.add_argument("--ignore_time", dest="ignore_time", required=False,
+ action="store_true",
+ help="ignore the Time dimension if it exists "
+ "for files with a Time dimension but no xtime"
+ "variable (e.g. mesh file)")
+ parser.add_argument("--topo_dim", dest="topo_dim", required=False,
+ help="Dimension and range for topography dimension")
+ parser.add_argument("--topo_cell_index", dest="topo_cell_index",
+ required=False,
+ help="Index array indicating the bottom of the domain "
+ "(default is the topo_dim-1 for all cells)")
+ parser.add_argument("--include_mesh_vars", dest="include_mesh_vars",
+ action="store_true",
+ help="Whether to extract mesh variables as well as"
+ "time-series variables")
+ parser.add_argument("--region_mask", dest="region_mask", required=False,
+ help="A geojson file defining a region that the data"
+ "should be masked to before extraction. Make one"
+ "easily at https://goejson.io")
+ parser.add_argument("--temp_dir", dest="temp_dir", required=False,
+ default="./culled_region",
+ help="If --region_mask is provided, a temporary "
+ "directory for the culled files")
+ args = parser.parse_args()
+
+ if args.region_mask is not None:
+ fc_region_mask = FeatureCollection()
+ with open(args.region_mask) as f:
+ featuresDict = json.load(f)
+ defaults = {'component': 'ocean',
+ 'name': 'mask',
+ 'object': 'region'}
+ for feature in featuresDict['features']:
+ if feature['geometry']['type'] not in ['Polygon',
+ 'MultiPolygon']:
+ raise ValueError('All masking features must be regions '
+ '(Polygons or MultiPolygons)')
+ # assign the default values if they're not already present
+ for key, value in defaults.items():
+ if key not in feature['properties']:
+ feature['properties'][key] = value
+ fc_region_mask.add_feature(feature)
+ else:
+ fc_region_mask = None
+
+ extract_vtk(filename_pattern=args.filename_pattern,
+ variable_list=args.variable_list,
+ dimension_list=args.dimension_list,
+ mesh_filename=args.mesh_filename,
+ blocking=args.blocking,
+ output_32bit=args.output_32bit,
+ combine=args.combine_output,
+ append=args.append,
+ out_dir=args.out_dir,
+ xtime=args.xtime,
+ lonlat=args.lonlat,
+ time=args.time,
+ ignore_time=args.ignore_time,
+ topo_dim=args.topo_dim,
+ topo_cell_index=args.topo_cell_index,
+ include_mesh_vars=args.include_mesh_vars,
+ fc_region_mask=fc_region_mask,
+ temp_dir=args.temp_dir)
+
+
+def build_field_time_series(local_time_indices, file_names, mesh_file,
+ out_dir, blocking, all_dim_vals, blockDimName,
+ variable_list, vertices, connectivity, offsets,
+ valid_mask, output_32bit, combine_output, append,
+ xtimeName, use_progress_bar, topo_dim=None,
+ topo_cell_indices=None, cell_to_point_map=None,
+ boundary_mask=None): # {{{
+
+ if len(variable_list) == 0:
+ return
+
+ if output_32bit:
+ outType = 'float32'
+ else:
+ outType = 'float64'
+
+ # Get dimension info to allocate the size of Colors
+ time_series_file = open_netcdf(file_names[0])
+
+ if mesh_file is not None:
+ # blockDim may not exist in time series file
+ blockDim = len(mesh_file.dimensions[blockDimName])
+ else:
+ blockDim = len(time_series_file.dimensions[blockDimName])
+
+ if boundary_mask is not None:
+ variable_list.append('boundaryMask')
+ all_dim_vals['boundaryMask'] = None
+ pointData = True
+ cellData = False
+ else:
+ pointData = False
+ cellData = True
+
+ # Pre-compute the number of blocks
+ nBlocks = int(numpy.ceil(blockDim / blocking))
+
+ nPolygons = len(offsets)
+ nPoints = len(vertices[0])
+ nTimes = len(local_time_indices)
+ nVars = len(variable_list)
+
+ var_has_time_dim = numpy.zeros(nVars, bool)
+ nHyperSlabs = 0
+ for iVar in range(nVars):
+ var_name = variable_list[iVar]
+ if boundary_mask is not None and var_name == 'boundaryMask':
+ var_has_time_dim[iVar] = False
+ elif xtimeName is not None:
+ if var_name in time_series_file.variables:
+ var_has_time_dim[iVar] = \
+ 'Time' in time_series_file.variables[var_name].dimensions
+ else:
+ # we can't support time dependence in the mesh file
+ assert('Time' not in mesh_file.variables[var_name].dimensions)
+ var_has_time_dim[iVar] = False
+
+ extra_dim_vals = all_dim_vals[var_name]
+ if (extra_dim_vals is None) or (len(extra_dim_vals) == 0):
+ nHyperSlabs += 1
+ else:
+ nHyperSlabs += len(extra_dim_vals)
+
+ any_var_has_time_dim = numpy.any(var_has_time_dim)
+
+ if topo_dim is not None:
+ if (mesh_file is not None) and (topo_dim in mesh_file.dimensions):
+ nTopoLevels = len(mesh_file.dimensions[topo_dim])
+ else:
+ nTopoLevels = len(time_series_file.dimensions[topo_dim])
+ else:
+ nTopoLevels = None
+
+ time_series_file.close()
+
+ try:
+ os.makedirs(out_dir)
+ except OSError:
+ pass
+
+ if any_var_has_time_dim:
+ try:
+ os.makedirs('{}/time_series'.format(out_dir))
+ except OSError:
+ pass
+ else:
+ # there is no point in combining output if no fields have Time dim
+ combine_output = False
+ nTimes = 1
+
+ # Output time series
+ if use_progress_bar:
+ widgets = ['Writing time series: ', Percentage(), ' ', Bar(), ' ',
+ ETA()]
+ field_bar = ProgressBar(widgets=widgets,
+ maxval=nTimes*nHyperSlabs).start()
+ else:
+ print("Writing time series....")
+
+ suffix = blockDimName[1:]
+ if any_var_has_time_dim:
+ if combine_output or numpy.all(var_has_time_dim):
+ out_prefix = "fieldsOn{}".format(suffix)
+ else:
+ out_prefix = "timeDependentFieldsOn{}".format(suffix)
+ # start the pvd file
+ pvd_file = write_pvd_header(out_dir, out_prefix)
+ pvd_file.write('<Collection>\n')
+
+ if not combine_output and not numpy.all(var_has_time_dim):
+ static_prefix = "staticFieldsOn{}".format(suffix)
+ varIndices = numpy.arange(nVars)[numpy.logical_not(var_has_time_dim)]
+ timeIndependentFile = write_vtp_header(out_dir,
+ static_prefix,
+ varIndices[0],
+ varIndices,
+ variable_list,
+ all_dim_vals,
+ vertices,
+ connectivity,
+ offsets,
+ nPoints,
+ nPolygons,
+ outType,
+ cellData=cellData,
+ pointData=pointData,
+ xtime=None)
+
+ prev_file = ""
+ for time_index in range(nTimes):
+
+ if prev_file != file_names[time_index]:
+ if prev_file != "":
+ time_series_file.close()
+ time_series_file = open_netcdf(file_names[time_index])
+ prev_file = file_names[time_index]
+
+ if any_var_has_time_dim:
+ if xtimeName is None:
+ xtime = None
+ years = float(time_index)
+ else:
+ if xtimeName == 'none':
+ xtime = '{}'.format(time_index)
+ years = float(time_index)
+ else:
+ if xtimeName not in time_series_file.variables:
+ raise ValueError("xtime variable name {} not found in "
+ "{}".format(xtimeName,
+ time_series_file))
+ var = time_series_file.variables[xtimeName]
+ if len(var.shape) == 2:
+ xtime = var[local_time_indices[time_index], :]
+ xtime = xtime.tostring().decode('utf-8').strip().strip(
+ '\x00')
+ date = datetime(int(xtime[0:4]), int(xtime[5:7]),
+ int(xtime[8:10]), int(xtime[11:13]),
+ int(xtime[14:16]), int(xtime[17:19]))
+ years = date2num(date, units='days since 0000-01-01',
+ calendar='noleap')/365.
+ else:
+ xtime = var[local_time_indices[time_index]]
+ years = xtime/365.
+ xtime = str(xtime)
+
+ # write the header for the vtp file
+ vtp_file_prefix = "time_series/{}.{:d}".format(out_prefix,
+ time_index)
+ file_name = '{}/{}.vtp'.format(out_dir, vtp_file_prefix)
+ if append and os.path.exists(file_name):
+ pvd_file.write('<DataSet timestep="{:.16f}" group="" '
+ 'part="0"\n'.format(years))
+ pvd_file.write('\tfile="{}.vtp"/>\n'.format(vtp_file_prefix))
+ continue
+
+ if combine_output:
+ varIndices = numpy.arange(nVars)
+ else:
+ varIndices = numpy.arange(nVars)[var_has_time_dim]
+ timeDependentFile = write_vtp_header(out_dir,
+ vtp_file_prefix,
+ varIndices[0],
+ varIndices,
+ variable_list,
+ all_dim_vals,
+ vertices,
+ connectivity,
+ offsets,
+ nPoints,
+ nPolygons,
+ outType,
+ cellData=cellData,
+ pointData=pointData,
+ xtime=xtime)
+
+ # add time step to pdv file
+ pvd_file.write('<DataSet timestep="{:.16f}" group="" '
+ 'part="0"\n'.format(years))
+ pvd_file.write('\tfile="{}.vtp"/>\n'.format(vtp_file_prefix))
+
+ if time_index == 0 or combine_output:
+ varIndices = numpy.arange(nVars)
+ else:
+ # only the time-dependent variables
+ varIndices = numpy.arange(nVars)[var_has_time_dim]
+
+ iHyperSlabProgress = 0
+ for iVar in varIndices:
+ has_time = var_has_time_dim[iVar]
+
+ var_name = variable_list[iVar]
+ (out_var_names, dim_list) = \
+ get_hyperslab_name_and_dims(var_name, all_dim_vals[var_name])
+ if has_time or combine_output:
+ vtkFile = timeDependentFile
+ else:
+ vtkFile = timeIndependentFile
+ for iHyperSlab in range(len(out_var_names)):
+ if dim_list is not None:
+ dim_vals = dim_list[iHyperSlab]
+ else:
+ dim_vals = None
+
+ if boundary_mask is not None and var_name == 'boundaryMask':
+ field = numpy.array(boundary_mask, dtype=outType)
+ else:
+ field = numpy.zeros(blockDim, dtype=outType)
+
+ for iBlock in numpy.arange(0, nBlocks):
+ blockStart = iBlock * blocking
+ blockEnd = min((iBlock + 1) * blocking, blockDim)
+ block_indices = numpy.arange(blockStart, blockEnd)
+ if topo_cell_indices is None:
+ block_topo_cell_indices = None
+ else:
+ block_topo_cell_indices = \
+ topo_cell_indices[block_indices]
+ field_block = read_field(
+ var_name, mesh_file, time_series_file,
+ dim_vals, local_time_indices[time_index],
+ block_indices, outType, topo_dim=topo_dim,
+ topo_cell_indices=block_topo_cell_indices,
+ nTopoLevels=nTopoLevels)
+
+ field[blockStart:blockEnd] = field_block
+
+ field = field[valid_mask]
+
+ if cell_to_point_map is not None:
+ # map field from cells to points
+ field = field[cell_to_point_map]
+
+ vtkFile.appendData(field)
+
+ if use_progress_bar:
+ field_bar.update(time_index*nHyperSlabs +
+ iHyperSlabProgress)
+ iHyperSlabProgress += 1
+
+ del field
+
+ if any_var_has_time_dim:
+ timeDependentFile.save()
+ del timeDependentFile
+
+ if time_index == 0 and not combine_output and not \
+ numpy.all(var_has_time_dim):
+ timeIndependentFile.save()
+ del timeIndependentFile
+
+ time_series_file.close()
+ if use_progress_bar:
+ field_bar.finish()
+
+ if any_var_has_time_dim:
+ # finish the pdv file
+ pvd_file.write('</Collection>\n')
+ pvd_file.write('</VTKFile>\n')
+ pvd_file.close() # }}}
+
+
+def open_netcdf(file_name):
+ nc_file = NetCDFFile(file_name, 'r')
+ # turn off auto mask (if applicable)
+ try:
+ nc_file.set_auto_mask(False)
+ except AttributeError:
+ pass
+ return nc_file
+
+
+def is_valid_mesh_var(mesh_file, variable_name): # {{{
+ if mesh_file is None:
+ return False
+
+ if variable_name not in mesh_file.variables:
+ return False
+
+ return 'Time' not in mesh_file.variables[variable_name].dimensions # }}}
+
+
+def get_var(variable_name, mesh_file, time_series_file): # {{{
+ if is_valid_mesh_var(mesh_file, variable_name):
+ return mesh_file.variables[variable_name]
+ else:
+ return time_series_file.variables[variable_name] # }}}
+
+
+def setup_time_indices(fn_pattern, xtimeName, use_progress_bar): # {{{
+ """
+ This function finds a list of NetCDF files containing time-dependent
+ MPAS data and extracts the time indices in each file. The routine
+ insures that each time is unique.
+ """
+ # Build file list and time indices
+ if ';' in fn_pattern:
+ file_list = []
+ for pattern in fn_pattern.split(';'):
+ file_list.extend(glob.glob(pattern))
+ else:
+ file_list = glob.glob(fn_pattern)
+ file_list.sort()
+
+ local_indices = []
+ file_names = []
+ all_times = []
+
+ if len(file_list) == 0:
+ print("No files to process.")
+ print("Exiting...")
+ sys.exit(0)
+
+ if use_progress_bar:
+ widgets = ['Build time indices: ', Percentage(), ' ', Bar(), ' ',
+ ETA()]
+ time_bar = ProgressBar(widgets=widgets, maxval=len(file_list)).start()
+ else:
+ print("Build time indices...")
+
+ i_file = 0
+ allTIndex = 0
+ for file_name in file_list:
+ try:
+ nc_file = open_netcdf(file_name)
+ except IOError:
+ print("Warning: could not open {}".format(file_name))
+ continue
+
+ if 'Time' not in nc_file.dimensions or xtimeName is None:
+ local_times = ['0']
+ else:
+ local_times = []
+ if xtimeName == 'none':
+ # no xtime variable so just use integers converted to strings
+ for index in range(len(nc_file.dimensions['Time'])):
+ local_times.append(allTIndex)
+ allTIndex += 1
+ else:
+ if xtimeName not in nc_file.variables:
+ raise ValueError("xtime variable name {} not found in "
+ "{}".format(xtimeName, file_name))
+ xtime = nc_file.variables[xtimeName]
+ if len(xtime.shape) == 2:
+ xtime = xtime[:, :]
+ for index in range(xtime.shape[0]):
+ local_times.append(xtime[index, :].tostring())
+ else:
+ local_times = xtime[:]
+
+ if len(local_times) == 0:
+ local_times = ['0']
+
+ nTime = len(local_times)
+
+ for time_idx in range(nTime):
+ if local_times[time_idx] not in all_times:
+ local_indices.append(time_idx)
+ file_names.append(file_name)
+ all_times.append(local_times[time_idx])
+
+ i_file = i_file + 1
+ nc_file.close()
+ if use_progress_bar:
+ time_bar.update(i_file)
+
+ if use_progress_bar:
+ time_bar.finish()
+
+ return local_indices, file_names # }}}
+
+
+def parse_extra_dim(dim_name, index_string, time_series_file, mesh_file):
+ # {{{
+ """
+ Parses the indices to be extracted along a given dimension.
+ The index_string can be fomatted as follows:
+ <blank> -- no indices are to be extracted
+ n -- the index n is to be extracted
+ m,n,p -- the list of indices is to be extracted
+ m:n -- all indices from m to n are to be extracted (including m but
+ excluding n, in the typical python indexing convention)
+ m:n:s -- all indices from m to n are to be extracted (including m but
+ excluding n, in the typical python indexing convention) with
+ stride s between indices
+
+ Parameters
+ ----------
+ dim_name : str
+ The name of the dimension to be indexed
+
+ index_string : str
+ An index string indicating with indices are to be extracted
+
+ time_series_file : NetCDF4.Dataset
+ The name of a time series file that can be used to determine the size
+ of the dimension if ``mesh_file=None``.
+
+ mesh_file : NetCDF4.Dataset
+ The name of a mesh file that can be used to determine the size
+ of the dimension, or ``None`` if the time series file should be used
+
+ Returns
+ -------
+ indices : list of str
+ Indices into the given dimension formatted as zero-padded strings
+ (if indices are numerical, as opposed to the name of an index variable)
+ """
+ if index_string == '':
+ return []
+
+ if (mesh_file is not None) and (dim_name in mesh_file.dimensions):
+ nc_file = mesh_file
+ else:
+ nc_file = time_series_file
+
+ dim_size = len(nc_file.dimensions[dim_name])
+
+ indices, numerical_indices = parse_index_string(index_string, dim_size)
+
+ # zero-pad integer indices
+ if len(numerical_indices) > 0:
+ max_index = numpy.amax(numerical_indices)
+ pad = int(numpy.log10(max(max_index, 1)))+1
+ template = '%%0%dd' % pad
+ for i in range(len(indices)):
+ try:
+ val = int(indices[i])
+ except ValueError:
+ continue
+ indices[i] = template % (val)
+
+ return indices
+
+# }}}
+
+
+def parse_time_indices(index_string, time_indices, time_file_names): # {{{
+ """
+ Parses the indices to be extracted along the Time dimension.
+ The index_string can be formatted as follows:
+ <blank> -- no indices are to be extracted
+ n -- the index n is to be extracted
+ m,n,p -- the list of indices is to be extracted
+ m:n -- all indices from m to n are to be extracted (including m but
+ excluding n, in the typical python indexing convention)
+ m:n:s -- all indices from m to n are to be extracted (including m but
+ excluding n, in the typical python indexing convention) with
+ stride s between indices
+
+ Parameters
+ ----------
+ index_string : str or list of int
+ An index string indicating with indices are to be extracted
+
+ time_indices : list of int
+ The local time indices in each input NetCDF file
+
+ time_file_names : list of str
+ The name of the file associated with each time index
+
+ Returns
+ -------
+ time_indices : list of int
+ The local time indices in each input NetCDF file after reindexing
+
+ time_file_names : list of str
+ The name of the file associated with each time index after reindexing
+
+ """
+ dim_size = len(time_indices)
+ _, numerical_indices = parse_index_string(index_string, dim_size)
+
+ time_indices = [time_indices[index] for index in numerical_indices]
+ time_file_names = [time_file_names[index] for index in numerical_indices]
+ return time_indices, time_file_names # }}}
+
+
+def parse_index_string(index_string, dim_size): # {{{
+ """
+ Parses an index string into a list of indices.
+ The index_string can be fomatted as follows:
+ <blank> -- no indices are to be extracted
+ n -- the index n is to be extracted
+ m,n,p -- the list of indices is to be extracted
+ m:n -- all indices from m to n are to be extracted (including m but
+ excluding n, in the typical python indexing convention)
+ m:n:s -- all indices from m to n are to be extracted (including m but
+ excluding n, in the typical python indexing convention) with
+ stride s between indices
+
+ Parameters
+ ----------
+ index_string : str or list of int
+ An index string indicating with indices are to be extracted
+
+ dim_size : int
+ The size of the dimension
+
+ Returns
+ -------
+ indices : list of int
+ The indices corresponding to the given index string.
+ """
+ if not isinstance(index_string, str):
+ numerical_indices = index_string
+ indices = []
+ for index in numerical_indices:
+ if index < 0 or index >= dim_size:
+ raise ValueError("Index (or indices) out of bounds 0 <= "
+ "index < {}: {}".format(dim_size,
+ index_string))
+ indices.append('{}'.format(index))
+ else:
+ if index_string == '':
+ return [], []
+
+ if ',' in index_string:
+ indices = [index for index in index_string.split(',')]
+ elif ':' in index_string:
+ index_list = index_string.split(':')
+ if len(index_list) not in [2, 3]:
+ raise ValueError("Improperly formatted index string: "
+ "{}".format(index_string))
+ if index_list[0] == '':
+ first = 0
+ else:
+ first = int(index_list[0])
+ if index_list[1] == '':
+ last = dim_size
+ else:
+ last = int(index_list[1])
+ if (len(index_list) == 2) or (index_list[2] == ''):
+ step = 1
+ else:
+ step = int(index_list[2])
+ indices = [str(index) for index in numpy.arange(first, last, step)]
+ else:
+ indices = [index_string]
+
+ numerical_indices = []
+ for index in indices:
+ try:
+ val = int(index)
+ except ValueError:
+ continue
+
+ if val < 0 or val >= dim_size:
+ raise ValueError("Index (or indices) out of bounds 0 <= "
+ "index < {}: {}".format(dim_size,
+ index_string))
+
+ numerical_indices.append(val)
+
+ return indices, numerical_indices # }}}
+
+
+def parse_extra_dims(dimension_list, time_series_file, mesh_file,
+ topo_dim=None, topo_cell_index_name=None,
+ max_index_count=None):
+ # {{{
+ """
+ Parses a list of dimensions and corresponding indices separated by equals
+ signs. Optionally, a max_index_count (typically 1) can be provided,
+ indicating that indices beyond max_index_count-1 will be ignored in each
+ dimension. Optionally, topo_dim contains the name of a dimension associated
+ with the surface or bottom topography (e.g. nVertLevels for MPAS-Ocean)
+ If topo_dim is provided, topo_cell_index_name can optionally be either
+ a constant value for the vertical index to the topography or the name of a
+ field with dimension nCells that contains the vertical index of the
+ topography.
+ """
+
+ extra_dims = {}
+ topo_cell_indices = None
+
+ if dimension_list is not None:
+ for dim_item in dimension_list:
+ print(dim_item)
+ (dimName, index_string) = dim_item.split('=')
+ indices = parse_extra_dim(dimName, index_string, time_series_file,
+ mesh_file)
+ if indices is not None:
+ if max_index_count is None or len(indices) <= max_index_count:
+ extra_dims[dimName] = indices
+ else:
+ extra_dims[dimName] = indices[0:max_index_count]
+
+ if topo_dim is not None:
+ if topo_cell_index_name is not None:
+ if (mesh_file is not None) and \
+ (topo_cell_index_name in mesh_file.variables):
+ topo_cell_indices = \
+ mesh_file.variables[topo_cell_index_name][:]-1
+ else:
+ topo_cell_indices = \
+ time_series_file.variables[topo_cell_index_name][:]-1
+ else:
+ index = len(mesh_file.dimensions[topo_dim])-1
+ nCells = len(mesh_file.dimensions['nCells'])
+ topo_cell_indices = index*numpy.ones(nCells, int)
+
+ return extra_dims, topo_cell_indices
+# }}}
+
+
+def setup_dimension_values_and_sort_vars(
+ time_series_file, mesh_file, variable_list, extra_dims,
+ include_mesh_vars, basic_dims=('nCells', 'nEdges', 'nVertices', 'Time'),
+ include_dims=('nCells', 'nEdges', 'nVertices')): # {{{
+ """
+ Creates a list of variables names to be extracted. Prompts for indices
+ of any extra dimensions that were not specified on the command line.
+ extra_dims should be a dictionary of indices along extra dimensions (as
+ opposed to "basic" dimensions). basic_dims is a list of dimension names
+ that should be excluded from extra_dims. include_dims is a list of
+ possible dimensions, one of which must be in each vairable to be extracted
+ (used in expanding command line placeholders "all", "allOnCells", etc.)
+ """
+
+ time_series_variables = time_series_file.variables
+ if mesh_file is None or not include_mesh_vars:
+ mesh_variables = None
+ else:
+ mesh_variables = mesh_file.variables
+ variable_names = _expand_variable_list(variable_list, time_series_variables,
+ mesh_variables, include_dims)
+
+ all_dim_vals = {}
+ cellVars = []
+ vertexVars = []
+ edgeVars = []
+
+ promptDimNames = []
+ display_prompt = True
+ for variable_name in variable_names:
+ if is_valid_mesh_var(mesh_file, variable_name):
+ nc_file = mesh_file
+ else:
+ nc_file = time_series_file
+ field_dims = nc_file.variables[variable_name].dimensions
+ for dim in field_dims:
+ if ((dim in basic_dims) or (dim in extra_dims)
+ or (dim in promptDimNames)):
+ # this dimension has already been accounted for
+ continue
+ promptDimNames.append(str(dim))
+
+ if display_prompt:
+ print("")
+ print("Need to define additional dimension values")
+ display_prompt = False
+
+ dim_size = len(nc_file.dimensions[dim])
+ valid = False
+ while not valid:
+ print("Valid range for dimension {} between 0 and {}"
+ "".format(dim, dim_size-1))
+ index_string = input("Enter a value for dimension {}: "
+ "".format(dim))
+ indices = parse_extra_dim(str(dim), index_string,
+ time_series_file, mesh_file)
+ valid = indices is not None
+ if valid:
+ extra_dims[str(dim)] = indices
+ else:
+ print(" -- Invalid value, please re-enter --")
+
+ empty_dims = []
+ for dim in extra_dims:
+ if len(extra_dims[dim]) == 0:
+ empty_dims.append(dim)
+
+ for variable_name in variable_names:
+
+ field_dims = get_var(variable_name, mesh_file,
+ time_series_file).dimensions
+ skip = False
+ for dim in field_dims:
+ if dim in empty_dims:
+ skip = True
+ break
+ if skip:
+ continue
+
+ # Setting dimension values:
+ indices = []
+ for dim in field_dims:
+ if dim not in basic_dims:
+ indices.append(extra_dims[dim])
+ if len(indices) == 0:
+ dim_vals = None
+ elif len(indices) == 1:
+ dim_vals = []
+ for index0 in indices[0]:
+ dim_vals.append([index0])
+ elif len(indices) == 2:
+ dim_vals = []
+ for index0 in indices[0]:
+ for index1 in indices[1]:
+ dim_vals.append([index0, index1])
+ elif len(indices) == 3:
+ dim_vals = []
+ for index0 in indices[0]:
+ for index1 in indices[1]:
+ for index2 in indices[2]:
+ dim_vals.append([index0, index1, index2])
+ else:
+ print("variable {} has too many extra dimensions and will be "
+ "skipped.".format(variable_name))
+ continue
+
+ if "nCells" in field_dims:
+ cellVars.append(variable_name)
+ elif "nVertices" in field_dims:
+ vertexVars.append(variable_name)
+ elif "nEdges" in field_dims:
+ edgeVars.append(variable_name)
+
+ all_dim_vals[variable_name] = dim_vals
+ del dim_vals
+
+ return all_dim_vals, cellVars, vertexVars, edgeVars
+# }}}
+
+
+def summarize_extraction(mesh_file, time_indices, cellVars, vertexVars,
+ edgeVars, transects_file=None): # {{{
+ """
+ print a summary of the time levels, mesh file, transects file (optional)
+ and variables to be extracted.
+ """
+
+ print("")
+ print("Extracting a total of {} time levels.".format(len(time_indices)))
+ print("Using file '{}' as the mesh file for this extraction."
+ "".format(mesh_file))
+ if transects_file is not None:
+ print("Using file '{}' as the transects file.".format(transects_file))
+ print("")
+ print("")
+ print("The following variables will be extracted from the input file(s).")
+ print("")
+
+ if len(cellVars) > 0:
+ print(" Variables with 'nCells' as a dimension:")
+ for variable_name in cellVars:
+ print(" name: {}".format(variable_name))
+
+ if len(vertexVars) > 0:
+ print(" Variables with 'nVertices' as a dimension:")
+ for variable_name in vertexVars:
+ print(" name: {}".format(variable_name))
+
+ if len(edgeVars) > 0:
+ print(" Variables with 'nEdges' as adimension:")
+ for variable_name in edgeVars:
+ print(" name: {}".format(variable_name))
+
+ print("")
+# }}}
+
+
+def write_pvd_header(path, prefix): # {{{
+ pvd_file = open('{}/{}.pvd'.format(path, prefix), 'w')
+ pvd_file.write('<?xml version="1.0"?>\n')
+ pvd_file.write('<VTKFile type="Collection" version="0.1"\n')
+ pvd_file.write('\tbyte_order="LittleEndian"\n')
+ pvd_file.write('\tcompressor="vtkZLibDataCompressor">\n')
+ return pvd_file # }}}
+
+
+def get_hyperslab_name_and_dims(var_name, extra_dim_vals): # {{{
+ if extra_dim_vals is None:
+ return [var_name], None
+ if len(extra_dim_vals) == 0:
+ return [], None
+ out_var_names = []
+ for hyper_slab in extra_dim_vals:
+ pieces = [var_name]
+ pieces.extend(hyper_slab)
+ out_var_names.append('_'.join(pieces))
+ return out_var_names, extra_dim_vals
+# }}}
+
+
+def write_vtp_header(path, prefix, active_var_index, var_indices,
+ variable_list, all_dim_vals, vertices, connectivity,
+ offsets, nPoints, nPolygons, outType, cellData=True,
+ pointData=False, xtime=None): # {{{
+ vtkFile = VtkFile("{}/{}".format(path, prefix), VtkPolyData)
+
+ if xtime is not None:
+ vtkFile.openElement(str("metadata"))
+ vtkFile.openElement(str("xtime"))
+ vtkFile.xml.addText(str(xtime))
+ vtkFile.closeElement(str("xtime"))
+ vtkFile.closeElement(str("metadata"))
+
+ vtkFile.openElement(vtkFile.ftype.name)
+ vtkFile.openPiece(npoints=nPoints, npolys=nPolygons)
+
+ vtkFile.openElement(str("Points"))
+ vtkFile.addData(str("points"), vertices)
+ vtkFile.closeElement(str("Points"))
+
+ vtkFile.openElement(str("Polys"))
+ vtkFile.addData(str("connectivity"), connectivity)
+ vtkFile.addData(str("offsets"), offsets)
+ vtkFile.closeElement(str("Polys"))
+
+ if cellData:
+ vtkFile.openData(str("Cell"),
+ scalars=variable_list[active_var_index])
+ for iVar in var_indices:
+ var_name = variable_list[iVar]
+ (out_var_names, dim_list) = \
+ get_hyperslab_name_and_dims(var_name, all_dim_vals[var_name])
+ for out_var_name in out_var_names:
+ vtkFile.addHeader(str(out_var_name), outType, nPolygons, 1)
+ vtkFile.closeData(str("Cell"))
+ if pointData:
+ vtkFile.openData(str("Point"),
+ scalars=variable_list[active_var_index])
+ for iVar in var_indices:
+ var_name = variable_list[iVar]
+ (out_var_names, dim_list) = \
+ get_hyperslab_name_and_dims(var_name, all_dim_vals[var_name])
+ for out_var_name in out_var_names:
+ vtkFile.addHeader(str(out_var_name), outType, nPoints, 1)
+ vtkFile.closeData(str("Point"))
+
+ vtkFile.closePiece()
+ vtkFile.closeElement(vtkFile.ftype.name)
+
+ vtkFile.appendData(vertices)
+ vtkFile.appendData(connectivity)
+ vtkFile.appendData(offsets)
+
+ return vtkFile # }}}
+
+
+def build_topo_point_and_polygon_lists(nc_file, output_32bit, lonlat,
+ use_progress_bar): # {{{
+
+ if output_32bit:
+ dtype = 'f4'
+ else:
+ dtype = 'f8'
+
+ xVertex, yVertex, zVertex = \
+ _build_location_list_xyz(nc_file, 'Vertex', output_32bit, lonlat)
+
+ nCells = len(nc_file.dimensions['nCells'])
+ nEdges = len(nc_file.dimensions['nEdges'])
+ maxEdges = len(nc_file.dimensions['maxEdges'])
+
+ nEdgesOnCell = nc_file.variables['nEdgesOnCell'][:]
+ verticesOnCell = nc_file.variables['verticesOnCell'][:, :]-1
+ edgesOnCell = nc_file.variables['edgesOnCell'][:, :]-1
+ verticesOnEdge = nc_file.variables['verticesOnEdge'][:] - 1
+ cellsOnEdge = nc_file.variables['cellsOnEdge'][:] - 1
+
+ # 4 points for each edge face
+ nPoints = 4*nEdges
+ # 1 polygon for each edge and cell
+ nPolygons = nEdges + nCells
+
+ X = numpy.zeros(nPoints, dtype)
+ Y = numpy.zeros(nPoints, dtype)
+ Z = numpy.zeros(nPoints, dtype)
+
+ # The points on an edge are vertex 0, 1, 1, 0 on that edge, making a
+ # vertical rectangle if the points are offset
+ iEdges, voe = numpy.meshgrid(numpy.arange(nEdges), [0, 1, 1, 0],
+ indexing='ij')
+ iVerts = verticesOnEdge[iEdges, voe].ravel()
+ X[:] = xVertex[iVerts]
+ Y[:] = yVertex[iVerts]
+ Z[:] = zVertex[iVerts]
+ vertices = (X, Y, Z)
+
+ verticesOnPolygon = -1*numpy.ones((nPolygons, maxEdges), int)
+ verticesOnPolygon[0:nEdges, 0:4] = \
+ numpy.arange(4*nEdges).reshape(nEdges, 4)
+
+ # Build cells
+ if use_progress_bar:
+ widgets = ['Build cell connectivity: ', Percentage(), ' ', Bar(), ' ',
+ ETA()]
+ bar = ProgressBar(widgets=widgets, maxval=nCells).start()
+ else:
+ print("Build cell connectivity...")
+
+ outIndex = nEdges
+
+ for iCell in range(nCells):
+ neoc = nEdgesOnCell[iCell]
+ eocs = edgesOnCell[iCell, 0:neoc]
+ vocs = verticesOnCell[iCell, 0:neoc]
+ for index in range(neoc):
+ iVert = vocs[index]
+ iEdge = eocs[index]
+ # which vertex on the edge corresponds to iVert?
+ coes = cellsOnEdge[iEdge, :]
+ voes = verticesOnEdge[iEdge, :]
+
+ if coes[0] == iCell:
+ if voes[0] == iVert:
+ voe = 0
+ else:
+ voe = 1
+ else:
+ if voes[0] == iVert:
+ voe = 3
+ else:
+ voe = 2
+
+ verticesOnPolygon[nEdges+iCell, index] = 4*iEdge + voe
+
+ outIndex += neoc
+
+ if use_progress_bar:
+ bar.update(iCell)
+
+ if use_progress_bar:
+ bar.finish()
+
+ validVerts = verticesOnPolygon >= 0
+
+ if lonlat:
+ lonEdge = numpy.rad2deg(nc_file.variables['lonEdge'][:])
+ latEdge = numpy.rad2deg(nc_file.variables['latEdge'][:])
+ lonCell = numpy.rad2deg(nc_file.variables['lonCell'][:])
+ latCell = numpy.rad2deg(nc_file.variables['latCell'][:])
+ lonPolygon = numpy.append(lonEdge, lonCell)
+ latPolygon = numpy.append(latEdge, latCell)
+
+ vertices, verticesOnPolygon = _fix_lon_lat_vertices(vertices,
+ verticesOnPolygon,
+ validVerts,
+ lonPolygon)
+
+ if nc_file.on_a_sphere.strip() == 'NO' and \
+ nc_file.is_periodic.strip() == 'YES':
+ if lonlat:
+ xcoord = lonPolygon
+ ycoord = latPolygon
+ else:
+ xEdge = numpy.rad2deg(nc_file.variables['xEdge'][:])
+ yEdge = numpy.rad2deg(nc_file.variables['yEdge'][:])
+ xCell = numpy.rad2deg(nc_file.variables['xCell'][:])
+ yCell = numpy.rad2deg(nc_file.variables['yCell'][:])
+ xcoord = numpy.append(xEdge, xCell)
+ ycoord = numpy.append(yEdge, yCell)
+
+ vertices, verticesOnPolygon = _fix_periodic_vertices(vertices,
+ verticesOnPolygon,
+ validVerts,
+ xcoord, ycoord,
+ nc_file.x_period,
+ nc_file.y_period)
+
+ nPoints = len(vertices[0])
+
+ # we want to know the cells corresponding to each point. The first two
+ # points correspond to the first cell, the second two to the second cell
+ # (if any).
+ cell_to_point_map = -1*numpy.ones((nPoints,), int)
+ boundary_mask = numpy.zeros((nPoints,), bool)
+
+ # first cell on edge always exists
+ coe = cellsOnEdge[:, 0].copy()
+ for index in range(2):
+ voe = verticesOnPolygon[0:nEdges, index]
+ cell_to_point_map[voe] = coe
+ boundary_mask[voe] = False
+
+ # second cell on edge may not exist
+ coe = cellsOnEdge[:, 1].copy()
+ mask = coe == -1
+ # use the first cell if the second doesn't exist
+ coe[mask] = cellsOnEdge[:, 0][mask]
+ for index in range(2, 4):
+ voe = verticesOnPolygon[0:nEdges, index]
+ cell_to_point_map[voe] = coe
+ boundary_mask[voe] = mask
+
+ # for good measure, make sure vertices on cell are also accounted for
+ for index in range(maxEdges):
+ iCells = numpy.arange(nCells)
+ voc = verticesOnPolygon[nEdges:nEdges+nCells, index]
+ mask = index < nEdgesOnCell
+ cell_to_point_map[voc[mask]] = iCells[mask]
+ boundary_mask[voc[mask]] = False
+
+ connectivity = verticesOnPolygon[validVerts]
+ validCount = numpy.sum(numpy.array(validVerts, int), axis=1)
+ offsets = numpy.cumsum(validCount, dtype=int)
+ valid_mask = numpy.ones(nCells, bool)
+
+ return vertices, connectivity, offsets, valid_mask, \
+ cell_to_point_map, boundary_mask.ravel() # }}}
+
+
+def build_cell_geom_lists(nc_file, output_32bit, lonlat): # {{{
+
+ print("Build geometry for fields on cells...")
+
+ vertices = _build_location_list_xyz(nc_file, 'Vertex', output_32bit,
+ lonlat)
+
+ if lonlat:
+ lonCell = numpy.rad2deg(nc_file.variables['lonCell'][:])
+ latCell = numpy.rad2deg(nc_file.variables['latCell'][:])
+
+ nCells = len(nc_file.dimensions['nCells'])
+
+ nEdgesOnCell = nc_file.variables['nEdgesOnCell'][:]
+ verticesOnCell = nc_file.variables['verticesOnCell'][:, :] - 1
+ # MPAS-O sets non-masked values to total number of vertices instead of 0
+ # (as produced in mesh workflow)
+ verticesOnCell[numpy.where(verticesOnCell == len(vertices[0]))] = 0
+
+ validVertices = numpy.zeros(verticesOnCell.shape, bool)
+ for vIndex in range(validVertices.shape[1]):
+ validVertices[:, vIndex] = vIndex < nEdgesOnCell
+
+ if lonlat:
+ vertices, verticesOnCell = _fix_lon_lat_vertices(vertices,
+ verticesOnCell,
+ validVertices,
+ lonCell)
+
+ if nc_file.on_a_sphere.strip() == 'NO' and \
+ nc_file.is_periodic.strip() == 'YES':
+ if lonlat:
+ xcoord = lonCell
+ ycoord = latCell
+ else:
+ xcoord = nc_file.variables['xCell'][:]
+ ycoord = nc_file.variables['yCell'][:]
+ vertices, verticesOnCell = _fix_periodic_vertices(vertices,
+ verticesOnCell,
+ validVertices,
+ xcoord, ycoord,
+ nc_file.x_period,
+ nc_file.y_period)
+
+ connectivity = numpy.array(verticesOnCell[validVertices], int)
+ offsets = numpy.cumsum(nEdgesOnCell, dtype=int)
+ valid_mask = numpy.ones(nCells, bool)
+
+ return vertices, connectivity, offsets, valid_mask # }}}
+
+
+def build_vertex_geom_lists(nc_file, output_32bit, lonlat): # {{{
+ print("Build geometry for fields on vertices....")
+
+ vertices = _build_location_list_xyz(nc_file, 'Cell', output_32bit, lonlat)
+
+ if lonlat:
+ lonVertex = numpy.rad2deg(nc_file.variables['lonVertex'][:])
+ latVertex = numpy.rad2deg(nc_file.variables['latVertex'][:])
+
+ vertexDegree = len(nc_file.dimensions['vertexDegree'])
+
+ cellsOnVertex = nc_file.variables['cellsOnVertex'][:, :] - 1
+
+ valid_mask = numpy.all(cellsOnVertex >= 0, axis=1)
+
+ cellsOnVertex = cellsOnVertex[valid_mask, :]
+
+ if lonlat:
+ # all remaining entries in cellsOnVertex are valid
+ validVertices = numpy.ones(cellsOnVertex.shape, bool)
+ vertices, cellsOnVertex = _fix_lon_lat_vertices(vertices,
+ cellsOnVertex,
+ validVertices,
+ lonVertex[valid_mask])
+
+ if nc_file.on_a_sphere.strip() == 'NO' and \
+ nc_file.is_periodic.strip() == 'YES':
+ # all remaining entries in cellsOnVertex are valid
+ validVertices = numpy.ones(cellsOnVertex.shape, bool)
+ if lonlat:
+ xcoord = lonVertex[valid_mask]
+ ycoord = latVertex[valid_mask]
+ else:
+ xcoord = nc_file.variables['xVertex'][valid_mask]
+ ycoord = nc_file.variables['yVertex'][valid_mask]
+ vertices, cellsOnVertex = _fix_periodic_vertices(vertices,
+ cellsOnVertex,
+ validVertices,
+ xcoord, ycoord,
+ nc_file.x_period,
+ nc_file.y_period)
+
+ connectivity = numpy.array(cellsOnVertex.ravel(), int)
+ validCount = cellsOnVertex.shape[0]
+ offsets = numpy.array(vertexDegree*numpy.arange(1, validCount+1), int)
+
+ return vertices, connectivity, offsets, valid_mask # }}}
+
+
+def build_edge_geom_lists(nc_file, output_32bit, lonlat): # {{{
+ (xCell, yCell, zCell) = \
+ _build_location_list_xyz(nc_file, 'Cell', output_32bit, lonlat)
+ (xVertex, yVertex, zVertex) = \
+ _build_location_list_xyz(nc_file, 'Vertex', output_32bit, lonlat)
+
+ vertices = (numpy.append(xCell, xVertex),
+ numpy.append(yCell, yVertex),
+ numpy.append(zCell, zVertex))
+
+ if lonlat:
+ lonEdge = numpy.rad2deg(nc_file.variables['lonEdge'][:])
+ latEdge = numpy.rad2deg(nc_file.variables['latEdge'][:])
+
+ nEdges = len(nc_file.dimensions['nEdges'])
+ nCells = len(nc_file.dimensions['nCells'])
+
+ cellsOnEdge = nc_file.variables['cellsOnEdge'][:] - 1
+ verticesOnEdge = nc_file.variables['verticesOnEdge'][:] - 1
+
+ vertsOnCell = -1*numpy.ones((nEdges, 4), int)
+ vertsOnCell[:, 0] = cellsOnEdge[:, 0]
+ vertsOnCell[:, 1] = verticesOnEdge[:, 0]
+ vertsOnCell[:, 2] = cellsOnEdge[:, 1]
+ vertsOnCell[:, 3] = verticesOnEdge[:, 1]
+
+ validVerts = vertsOnCell >= 0
+
+ vertsOnCell[:, 1] += nCells
+ vertsOnCell[:, 3] += nCells
+
+ validCount = numpy.sum(numpy.array(validVerts, int), axis=1)
+
+ # exclude any isolated points or lines -- we need only triangles or quads
+ valid_mask = validCount >= 3
+
+ vertsOnCell = vertsOnCell[valid_mask, :]
+ validVerts = validVerts[valid_mask, :]
+
+ if lonlat:
+ vertices, vertsOnCell = _fix_lon_lat_vertices(vertices,
+ vertsOnCell,
+ validVerts,
+ lonEdge[valid_mask])
+ if nc_file.on_a_sphere.strip() == 'NO' and \
+ nc_file.is_periodic.strip() == 'YES':
+ if lonlat:
+ xcoord = lonEdge[valid_mask]
+ ycoord = latEdge[valid_mask]
+ else:
+ xcoord = nc_file.variables['xEdge'][valid_mask]
+ ycoord = nc_file.variables['yEdge'][valid_mask]
+
+ vertices, vertsOnCell = _fix_periodic_vertices(vertices,
+ vertsOnCell,
+ validVerts,
+ xcoord, ycoord,
+ nc_file.x_period,
+ nc_file.y_period)
+
+ connectivity = numpy.array(vertsOnCell[validVerts], int)
+ validCount = numpy.sum(numpy.array(validVerts, int), axis=1)
+ offsets = numpy.cumsum(validCount, dtype=int)
+
+ return vertices, connectivity, offsets, valid_mask # }}}
+
+
+def get_field_sign(field_name):
+ if field_name[0] == '-':
+ field_name = field_name[1:]
+ sign = -1
+ else:
+ sign = 1
+
+ return field_name, sign
+
+
+def read_field(var_name, mesh_file, time_series_file, extra_dim_vals,
+ time_index, block_indices, outType, sign=1,
+ topo_dim=None, topo_cell_indices=None, nTopoLevels=None): # {{{
+
+ def read_field_with_dims(field_var, dim_vals, temp_shape, outType,
+ index_arrays): # {{{
+ temp_field = numpy.zeros(temp_shape, dtype=outType)
+ inDims = len(dim_vals)
+ if inDims <= 0 or inDims > 5:
+ print('reading field {} with {} dimensions not supported.'
+ ''.format(var_name, inDims))
+ sys.exit(1)
+
+ if inDims == 1:
+ temp_field = numpy.array(field_var[dim_vals[0]], dtype=outType)
+ elif inDims == 2:
+ temp_field = numpy.array(field_var[dim_vals[0], dim_vals[1]],
+ dtype=outType)
+ elif inDims == 3:
+ temp_field = numpy.array(field_var[dim_vals[0], dim_vals[1],
+ dim_vals[2]],
+ dtype=outType)
+ elif inDims == 4:
+ temp_field = numpy.array(field_var[dim_vals[0], dim_vals[1],
+ dim_vals[2], dim_vals[3]],
+ dtype=outType)
+ elif inDims == 5:
+ temp_field = numpy.array(field_var[dim_vals[0], dim_vals[1],
+ dim_vals[2], dim_vals[3],
+ dim_vals[4]],
+ dtype=outType)
+
+ if topo_dim is not None and topo_dim in field_var.dimensions:
+ if len(temp_field.shape) != 2:
+ raise ValueError('Field with dimensions {} not supported in '
+ 'topography extraction mode.'.format(
+ field_var.dimensions))
+ # sample the depth-dependent field at the index of the topography
+ temp_field = temp_field[numpy.arange(temp_field.shape[0]),
+ topo_cell_indices]
+
+ outDims = len(temp_field.shape)
+
+ if outDims <= 0 or outDims > 4:
+ print('something went wrong reading field {}, resulting in a temp '
+ 'array with {} dimensions.'.format(var_name, outDims))
+ sys.exit(1)
+ block_indices = numpy.arange(temp_field.shape[0])
+ if outDims == 1:
+ field = temp_field
+ elif outDims == 2:
+ field = temp_field[block_indices, index_arrays[0]]
+ elif outDims == 3:
+ field = temp_field[block_indices, index_arrays[0], index_arrays[1]]
+ elif outDims == 4:
+ field = temp_field[block_indices, index_arrays[0], index_arrays[1],
+ index_arrays[2]]
+
+ return field # }}}
+
+ field_var = get_var(var_name, mesh_file, time_series_file)
+ if 'missing_value' in field_var.ncattrs():
+ missing_val = field_var.missing_value
+ elif '_FillValue' in field_var.ncattrs():
+ missing_val = field_var._FillValue
+ else:
+ missing_val = None
+
+ dim_vals = []
+ extra_dim_index = 0
+ shape = field_var.shape
+ temp_shape = ()
+
+ index_arrays = []
+
+ for i in range(field_var.ndim):
+ dim = field_var.dimensions[i]
+ if dim == 'Time':
+ dim_vals.append(time_index)
+ elif dim in ['nCells', 'nEdges', 'nVertices']:
+ dim_vals.append(block_indices)
+ temp_shape = temp_shape + (len(block_indices),)
+ elif topo_dim is not None and dim == topo_dim:
+ dim_vals.append(numpy.arange(nTopoLevels))
+ else:
+ extra_dim_val = extra_dim_vals[extra_dim_index]
+ try:
+ index = int(extra_dim_val)
+ dim_vals.append(index)
+ except ValueError:
+ # we have an index array so we need to read the whole range
+ # first and then sample the result
+ dim_vals.append(numpy.arange(shape[i]))
+ temp_shape = temp_shape + (shape[i],)
+
+ index_array_var = get_var(extra_dim_val, mesh_file,
+ time_series_file)
+
+ # read the appropriate indices from the index_array_var
+ index_array = numpy.maximum(0, numpy.minimum(
+ shape[i]-1, index_array_var[block_indices]-1))
+
+ index_arrays.append(index_array)
+
+ extra_dim_index += 1
+
+ field = read_field_with_dims(field_var, dim_vals, temp_shape, outType,
+ index_arrays)
+
+ if missing_val is not None:
+ field[field == missing_val] = numpy.nan
+
+ return sign*field # }}}
+
+
+def compute_zInterface(minLevelCell, maxLevelCell, layerThicknessCell,
+ zMinCell, zMaxCell, dtype, cellsOnEdge=None):
+ # {{{
+
+ (nCells, nLevels) = layerThicknessCell.shape
+
+ cellMask = numpy.ones((nCells, nLevels), bool)
+ for iLevel in range(nLevels):
+ if minLevelCell is not None:
+ cellMask[:, iLevel] = numpy.logical_and(cellMask[:, iLevel],
+ iLevel >= minLevelCell)
+ if maxLevelCell is not None:
+ cellMask[:, iLevel] = numpy.logical_and(cellMask[:, iLevel],
+ iLevel <= maxLevelCell)
+
+ zInterfaceCell = numpy.zeros((nCells, nLevels+1), dtype=dtype)
+ for iLevel in range(nLevels):
+ zInterfaceCell[:, iLevel+1] = \
+ zInterfaceCell[:, iLevel] \
+ + cellMask[:, iLevel]*layerThicknessCell[:, iLevel]
+
+ if zMinCell is not None:
+ minLevel = minLevelCell.copy()
+ minLevel[minLevel < 0] = nLevels-1
+ zOffsetCell = zMinCell - zInterfaceCell[numpy.arange(0, nCells),
+ minLevel]
+ else:
+ zOffsetCell = zMaxCell - zInterfaceCell[numpy.arange(0, nCells),
+ maxLevelCell+1]
+
+ for iLevel in range(nLevels+1):
+ zInterfaceCell[:, iLevel] += zOffsetCell
+
+ if cellsOnEdge is None:
+ return zInterfaceCell
+ else:
+ nEdges = cellsOnEdge.shape[0]
+ zInterfaceEdge = numpy.zeros((nEdges, nLevels+1), dtype=dtype)
+
+ # Get a list of valid cells on edges and a mask of which are valid
+ cellsOnEdgeMask = numpy.logical_and(cellsOnEdge >= 0,
+ cellsOnEdge < nCells)
+ cellIndicesOnEdge = list()
+ cellIndicesOnEdge.append(cellsOnEdge[cellsOnEdgeMask[:, 0], 0])
+ cellIndicesOnEdge.append(cellsOnEdge[cellsOnEdgeMask[:, 1], 1])
+
+ for iLevel in range(nLevels):
+ edgeMask = numpy.zeros(nEdges, bool)
+ layerThicknessEdge = numpy.zeros(nEdges, float)
+ denom = numpy.zeros(nEdges, float)
+ for index in range(2):
+ mask = cellsOnEdgeMask[:, index]
+ cellIndices = cellIndicesOnEdge[index]
+ cellMaskLocal = cellMask[cellIndices, iLevel]
+
+ edgeMask[mask] = numpy.logical_or(edgeMask[mask],
+ cellMaskLocal)
+
+ layerThicknessEdge[mask] += \
+ cellMaskLocal*layerThicknessCell[cellIndices, iLevel]
+ denom[mask] += 1.0*cellMaskLocal
+
+ layerThicknessEdge[edgeMask] /= denom[edgeMask]
+
+ zInterfaceEdge[:, iLevel+1] = (zInterfaceEdge[:, iLevel]
+ + edgeMask*layerThicknessEdge)
+
+ if zMinCell is not None:
+ refLevelEdge = numpy.zeros(nEdges, int)
+ for index in range(2):
+ mask = cellsOnEdgeMask[:, index]
+ cellIndices = cellIndicesOnEdge[index]
+ refLevelEdge[mask] = numpy.maximum(refLevelEdge[mask],
+ minLevel[cellIndices])
+ else:
+ refLevelEdge = (nLevels-1)*numpy.ones(nEdges, int)
+ for index in range(2):
+ mask = cellsOnEdgeMask[:, index]
+ cellIndices = cellIndicesOnEdge[index]
+ refLevelEdge[mask] = numpy.minimum(refLevelEdge[mask],
+ maxLevelCell[cellIndices]+1)
+
+ zOffsetEdge = numpy.zeros(nEdges, float)
+ # add the average of zInterfaceCell at each adjacent cell
+ denom = numpy.zeros(nEdges, float)
+ for index in range(2):
+ mask = cellsOnEdgeMask[:, index]
+ cellIndices = cellIndicesOnEdge[index]
+ zOffsetEdge[mask] += zInterfaceCell[cellIndices,
+ refLevelEdge[mask]]
+ denom[mask] += 1.0
+
+ mask = denom > 0.
+ zOffsetEdge[mask] /= denom[mask]
+
+ # subtract the depth of zInterfaceEdge at the level of the bottom
+ zOffsetEdge -= zInterfaceEdge[numpy.arange(nEdges), refLevelEdge]
+
+ for iLevel in range(nLevels+1):
+ zInterfaceEdge[:, iLevel] += zOffsetEdge
+
+ return zInterfaceCell, zInterfaceEdge # }}}
+
+
+def _build_location_list_xyz(nc_file, suffix, output_32bit, lonlat): # {{{
+
+ if lonlat:
+ X = numpy.rad2deg(nc_file.variables['lon{}'.format(suffix)][:])
+ Y = numpy.rad2deg(nc_file.variables['lat{}'.format(suffix)][:])
+ Z = numpy.zeros(X.shape)
+ else:
+ X = nc_file.variables['x{}'.format(suffix)][:]
+ Y = nc_file.variables['y{}'.format(suffix)][:]
+ Z = nc_file.variables['z{}'.format(suffix)][:]
+ if output_32bit:
+ X = numpy.array(X, 'f4')
+ Y = numpy.array(Y, 'f4')
+ Z = numpy.array(Z, 'f4')
+ return X, Y, Z
+
+# }}}
+
+
+def _fix_lon_lat_vertices(vertices, verticesOnCell, validVertices,
+ lonCell): # {{{
+
+ nCells = verticesOnCell.shape[0]
+ nVertices = len(vertices[0])
+
+ xVertex = vertices[0]
+ xDiff = xVertex[verticesOnCell] - lonCell.reshape(nCells, 1)
+
+ # which cells have vertices that are out of range?
+ outOfRange = numpy.logical_and(validVertices,
+ numpy.logical_or(xDiff >= 180.,
+ xDiff < -180.))
+
+ cellsOutOfRange = numpy.any(outOfRange, axis=1)
+
+ valid = validVertices[cellsOutOfRange, :]
+
+ verticesToChange = numpy.zeros(verticesOnCell.shape, bool)
+ verticesToChange[cellsOutOfRange, :] = valid
+
+ xDiff = xDiff[cellsOutOfRange, :][valid]
+ voc = verticesOnCell[cellsOutOfRange, :][valid]
+ nVerticesToAdd = numpy.count_nonzero(valid)
+ verticesToAdd = numpy.arange(nVerticesToAdd) + nVertices
+ xv = xVertex[voc]
+ verticesOnCell[verticesToChange] = verticesToAdd
+
+ # where the lon. difference between the cell center and the vertex
+ # is out of range (presumably because of the periodic boundary),
+ # move it to be within 180 degrees.
+ mask = xDiff >= 180.
+ xv[mask] -= 360.
+ mask = xDiff < -180.
+ xv[mask] += 360.
+
+ vertices = (numpy.append(xVertex, xv),
+ numpy.append(vertices[1], vertices[1][voc]),
+ numpy.append(vertices[2], vertices[2][voc]))
+
+ return vertices, verticesOnCell # }}}
+
+
+def _fix_periodic_vertices(vertices, verticesOnCell, validVertices,
+ xCell, yCell, xperiod, yperiod): # {{{
+
+ vertices, verticesOnCell = _fix_periodic_vertices_1D(
+ vertices, verticesOnCell, validVertices, xCell, xperiod, dim=0)
+ vertices, verticesOnCell = _fix_periodic_vertices_1D(
+ vertices, verticesOnCell, validVertices, yCell, yperiod, dim=1)
+
+ return vertices, verticesOnCell # }}}
+
+
+def _fix_periodic_vertices_1D(vertices, verticesOnCell, validVertices,
+ coordCell, coordPeriod, dim): # {{{
+
+ nCells = verticesOnCell.shape[0]
+ nVertices = len(vertices[0])
+
+ coordVertex = vertices[dim]
+
+ coordDiff = coordVertex[verticesOnCell] - coordCell.reshape(nCells, 1)
+
+ # which cells have vertices that are out of range?
+ coordOutOfRange = numpy.logical_and(
+ validVertices,
+ numpy.logical_or(coordDiff > coordPeriod / 2.0,
+ coordDiff < -coordPeriod / 2.0))
+
+ coordCellsOutOfRange = numpy.any(coordOutOfRange, axis=1)
+
+ coordValid = validVertices[coordCellsOutOfRange, :]
+
+ coordVerticesToChange = numpy.zeros(verticesOnCell.shape, bool)
+ coordVerticesToChange[coordCellsOutOfRange, :] = coordValid
+
+ coordDiff = coordDiff[coordCellsOutOfRange, :][coordValid]
+ coordVOC = verticesOnCell[coordCellsOutOfRange, :][coordValid]
+
+ coordNVerticesToAdd = numpy.count_nonzero(coordValid)
+
+ coordVerticesToAdd = numpy.arange(coordNVerticesToAdd) + nVertices
+ coordV = coordVertex[coordVOC]
+ verticesOnCell[coordVerticesToChange] = coordVerticesToAdd
+
+ # need to shift points outside periodic domain (assumes that mesh is only
+ # within one period) can use mod if this is not the case in general
+ coordMask = coordDiff > coordPeriod / 2.0
+ coordV[coordMask] -= coordPeriod
+ coordMask = coordDiff < -coordPeriod / 2.0
+ coordV[coordMask] += coordPeriod
+
+ outVertices = []
+ for outDim in range(3):
+ if outDim == dim:
+ outVertices.append(numpy.append(vertices[outDim], coordV))
+ else:
+ outVertices.append(numpy.append(vertices[outDim],
+ vertices[outDim][coordVOC]))
+
+ return tuple(outVertices), verticesOnCell # }}}
+
+
+def _expand_variable_list(variable_list, time_series_variables,
+ mesh_variables, include_dims):
+
+
+ if variable_list == 'all':
+ variable_names = []
+ exclude_dims = ['Time']
+ for variable_name in time_series_variables:
+ _add_var(time_series_variables, str(variable_name),
+ include_dims, variable_names, exc_dims=None)
+ if mesh_variables is not None:
+ for variable_name in mesh_variables:
+ _add_var(mesh_variables, str(variable_name), include_dims,
+ variable_names, exclude_dims)
+ elif isinstance(variable_list, str):
+ variable_names = variable_list.split(',')
+ else:
+ variable_names = variable_list
+
+ for suffix in ['Cells', 'Edges', 'Vertices']:
+ include_dim = 'n{}'.format(suffix)
+ all_on = 'allOn{}'.format(suffix)
+ if (all_on in variable_names) and (include_dim in include_dims):
+ variable_names.remove(all_on)
+ exclude_dims = ['Time']
+ for variable_name in time_series_variables:
+ _add_var(time_series_variables, str(variable_name),
+ inc_dims=[include_dim], variable_names=variable_names,
+ exc_dims=None)
+ if mesh_variables is not None:
+ for variable_name in mesh_variables:
+ _add_var(mesh_variables, str(variable_name),
+ inc_dims=[include_dim],
+ variable_names=variable_names,
+ exc_dims=exclude_dims)
+
+ variable_names.sort()
+ return variable_names
+
+
+def _add_var(variables, var_name, inc_dims, variable_names, exc_dims=None):
+ if var_name in variable_names:
+ return
+
+ dims = variables[var_name].dimensions
+ supported = False
+ for d in inc_dims:
+ if d in dims:
+ supported = True
+ if exc_dims is not None:
+ for d in exc_dims:
+ if d in dims:
+ supported = False
+ if supported:
+ variable_names.append(var_name)
+
+
+def _cull_files(fc_region_mask, temp_dir, mesh_filename, time_file_names,
+ separate_mesh_file, variable_list, include_mesh_vars, xtime,
+ use_progress_bar):
+
+ mesh_vars = [
+ 'areaCell', 'cellsOnCell', 'edgesOnCell', 'indexToCellID',
+ 'latCell', 'lonCell', 'nEdgesOnCell', 'verticesOnCell',
+ 'xCell', 'yCell', 'zCell', 'angleEdge', 'cellsOnEdge', 'dcEdge',
+ 'dvEdge', 'edgesOnEdge', 'indexToEdgeID', 'latEdge',
+ 'lonEdge', 'nEdgesOnCell', 'nEdgesOnEdge', 'verticesOnEdge',
+ 'xEdge', 'yEdge', 'zEdge', 'areaTriangle',
+ 'cellsOnVertex', 'edgesOnVertex', 'indexToVertexID',
+ 'kiteAreasOnVertex', 'latVertex', 'lonVertex', 'xVertex', 'yVertex',
+ 'zVertex', 'weightsOnEdge']
+
+ try:
+ os.makedirs(temp_dir)
+ except OSError:
+ pass
+
+ log_stream = StringIO()
+ logger = logging.getLogger('_cull_files')
+ for handler in logger.handlers:
+ logger.removeHandler(handler)
+ handler = logging.StreamHandler(log_stream)
+ logger.addHandler(handler)
+ handler.setLevel(logging.INFO)
+
+ # Figure out the variable names we want to extract
+ with open_netcdf(time_file_names[0]) as time_series_file:
+ time_series_variables = time_series_file.variables
+ if separate_mesh_file and include_mesh_vars:
+ mesh_file = open_netcdf(mesh_filename)
+ mesh_variables = mesh_file.variables
+ else:
+ mesh_file = None
+ mesh_variables = None
+
+ include_dims = ('nCells', 'nEdges', 'nVertices')
+ variable_names = _expand_variable_list(variable_list,
+ time_series_variables,
+ mesh_variables, include_dims)
+
+ if mesh_file is not None:
+ mesh_file.close()
+
+ print('Including variables: {}'.format(', '.join(variable_names)))
+
+ with xarray.open_dataset(mesh_filename) as ds_mesh:
+ ds_mesh = ds_mesh[mesh_vars]
+ print('Making a region mask file')
+ ds_mask = mask(dsMesh=ds_mesh, fcMask=fc_region_mask, logger=logger,
+ dir=temp_dir)
+ write_netcdf(ds_mask, f'{temp_dir}/mask.nc')
+ print('Cropping mesh to region')
+ out_mesh_filename = '{}/mesh.nc'.format(temp_dir)
+ args = ['MpasCellCuller.x',
+ mesh_filename,
+ out_mesh_filename,
+ '-i', f'{temp_dir}/mask.nc']
+ check_call(args=args, logger=logger)
+
+ region_masks = dict()
+ cell_mask = ds_mask.regionCellMasks.sum(dim='nRegions') > 0
+ region_masks['nCells'] = cell_mask
+ region_masks['nVertices'] = \
+ ds_mask.regionVertexMasks.sum(dim='nRegions') > 0
+ coe = ds_mesh.cellsOnEdge - 1
+ valid_cell_on_edge = numpy.logical_and(coe >= 0, cell_mask[coe])
+ region_masks['nEdges'] = numpy.logical_or(
+ valid_cell_on_edge.isel(TWO=0),
+ valid_cell_on_edge.isel(TWO=1))
+
+ if use_progress_bar:
+ widgets = ['Cropping time series to region: ', Percentage(), ' ',
+ Bar(), ' ', ETA()]
+ bar = ProgressBar(widgets=widgets, maxval=len(time_file_names)).start()
+ else:
+ print('Cropping time series to region')
+ bar = None
+
+ out_time_file_names = []
+ already_culled = []
+ for index, filename in enumerate(time_file_names):
+ file_index = len(already_culled)
+ out_filename = f'{temp_dir}/time_series{file_index:04d}.nc'
+ out_time_file_names.append(out_filename)
+ if filename in already_culled:
+ continue
+ ds_in = xarray.open_dataset(filename)
+ ds_out = xarray.Dataset()
+ if xtime is None:
+ ds_in = ds_in[variable_names]
+ else:
+ ds_in = ds_in[variable_names + [xtime]]
+ ds_out[xtime] = ds_in[xtime]
+ for var in ds_in.data_vars:
+ for dim in region_masks:
+ if dim in ds_in[var].dims:
+ ds_out[var] = ds_in[var].where(region_masks[dim], drop=True)
+ write_netcdf(ds_out, out_filename)
+ already_culled.append(filename)
+ if use_progress_bar:
+ bar.update(index+1)
+ bar.finish()
+
+ logger.removeHandler(handler)
+ handler.close()
+
+ return out_mesh_filename, out_time_file_names
+
Comments in config files¶
+One of the main advantages of
+mpas_tools.config.MpasConfigParser
+overconfigparser.ConfigParser
is that it keeps track of comments +that are associated with config sections and options. There are a few “rules” +that make this possible.Comments must begin with the
+#
character. They must be placed before the +config section or option in question (preferably without blank lines between). +The comments can be any number of lines.Note
+Inline comments (after a config option on the same line) are not allowed +and will be parsed as part of the config option itself.
+