From 65a8e6534a308afd3b6b968b435ca16393187c64 Mon Sep 17 00:00:00 2001 From: Bryna Hazelton Date: Fri, 1 Dec 2023 17:10:30 -0800 Subject: [PATCH] make partial reads work, modularize UVCal selection code Add testing! --- docs/conf.py | 1 + pyuvdata/hdf5_utils.py | 2 + pyuvdata/parameter.py | 2 +- pyuvdata/utils.py | 425 +++++++-- pyuvdata/uvcal/__init__.py | 1 + pyuvdata/uvcal/calfits.py | 6 +- pyuvdata/uvcal/calh5.py | 515 +++++------ pyuvdata/uvcal/fhd_cal.py | 20 +- pyuvdata/uvcal/tests/__init__.py | 79 ++ pyuvdata/uvcal/tests/conftest.py | 97 ++ pyuvdata/uvcal/tests/test_calh5.py | 302 +++++++ pyuvdata/uvcal/tests/test_fhd_cal.py | 17 +- pyuvdata/uvcal/tests/test_uvcal.py | 206 +---- pyuvdata/uvcal/uvcal.py | 1220 +++++++++++++++----------- pyuvdata/uvdata/tests/test_uvdata.py | 10 +- pyuvdata/uvdata/uvdata.py | 134 +-- pyuvdata/uvdata/uvh5.py | 2 +- 17 files changed, 1802 insertions(+), 1237 deletions(-) create mode 100644 pyuvdata/uvcal/tests/test_calh5.py diff --git a/docs/conf.py b/docs/conf.py index 79ea4d4ae1..956eef141d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -69,6 +69,7 @@ ("UVH5", "params_style"), ("MWA FITS", "params_style"), ("MIR", "params_style"), + ("CalH5", "params_style"), ] # Add any paths that contain templates here, relative to this directory. diff --git a/pyuvdata/hdf5_utils.py b/pyuvdata/hdf5_utils.py index 68fb7b83ba..3c603a9959 100644 --- a/pyuvdata/hdf5_utils.py +++ b/pyuvdata/hdf5_utils.py @@ -2,6 +2,8 @@ # Copyright (c) 2023 Radio Astronomy Software Group # Licensed under the 2-clause BSD License """Utilities for working with HDF5 files.""" +from __future__ import annotations + from functools import cached_property from pathlib import Path from typing import Any diff --git a/pyuvdata/parameter.py b/pyuvdata/parameter.py index 30305571e6..abf45bb1ed 100644 --- a/pyuvdata/parameter.py +++ b/pyuvdata/parameter.py @@ -496,7 +496,7 @@ def __eq__(self, other, *, silent=False): if not silent: print( f"{self.name} parameter value is not an array, " - "but other is not" + "but other is an array" ) return False try: diff --git a/pyuvdata/utils.py b/pyuvdata/utils.py index 78424350c6..f5d7805599 100644 --- a/pyuvdata/utils.py +++ b/pyuvdata/utils.py @@ -765,6 +765,253 @@ def _sort_freq_helper( return index_array +def _check_range_overlap(val_range, range_type="time"): + """ + Detect if any val_range in an array overlap. + + Parameters + ---------- + val_range : np.array of float + Array of ranges, shape (Nranges, 2). + range_type : str + Type of range (for good error messages) + + Returns + ------- + bool + True if any range overlaps. + """ + # first check that time ranges are well formed (stop is >= than start) + if np.any((val_range[:, 1] - val_range[:, 0]) < 0): + raise ValueError( + f"The {range_type} ranges are not well-formed, some stop {range_type}s " + f"are after start {range_type}s." + ) + + # Sort by start time + sorted_ranges = val_range[np.argsort(val_range[:, 0]), :] + + # then check if adjacent pairs overlap + for ind in range(sorted_ranges.shape[0] - 1): + range1 = sorted_ranges[ind] + range2 = sorted_ranges[ind + 1] + if range2[0] < range1[1]: + return True + + +def _select_times_helper( + *, + times, + time_range, + lsts, + lst_range, + obj_time_array, + obj_time_range, + obj_lst_array, + obj_lst_range, + time_tols, + lst_tols, +): + """ + Get time indices in a select. + + Parameters + ---------- + times : array_like of float + The times to keep in the object, each value passed here should exist in the + time_array. Can be None, cannot be set with `time_range`, `lsts` or `lst_array`. + time_range : array_like of float + The time range in Julian Date to keep in the object, must be length 2. Some of + the times in the object should fall between the first and last elements. Can be + None, cannot be set with `times`, `lsts` or `lst_array`. + lsts : array_like of float + The local sidereal times (LSTs) to keep in the object, each value passed here + should exist in the lst_array. Can be None, cannot be set with `times`, + `time_range`, or `lst_range`. + lst_range : array_like of float + The local sidereal time (LST) range in radians to keep in the + object, must be of length 2. Some of the LSTs in the object should + fall between the first and last elements. If the second value is + smaller than the first, the LSTs are treated as having phase-wrapped + around LST = 2*pi = 0, and the LSTs kept on the object will run from + the larger value, through 0, and end at the smaller value. Can be None, cannot + be set with `times`, `time_range`, or `lsts`. + obj_time_array : array_like of float + Time array on object. Can be None if `object_time_range` is set. + obj_time_range : array_like of float + Time range on object. Can be None if `object_time_array` is set. + obj_lst_array : array_like of float + LST array on object. Can be None if `object_lst_range` is set. + obj_lst_range : array_like of float + LST range on object. Can be None if `object_lst_array` is set. + time_tols : tuple of float + Length 2 tuple giving (rtol, atol) to use for time matching. + lst_tols : tuple of float + Length 2 tuple giving (rtol, atol) to use for lst matching. + + """ + have_times = times is not None + have_time_range = time_range is not None + have_lsts = lsts is not None + have_lst_range = lst_range is not None + n_time_params = np.count_nonzero( + [have_times, have_time_range, have_lsts, have_lst_range] + ) + if n_time_params > 1: + raise ValueError( + "Only one of [times, time_range, lsts, lst_range] may be " + "specified per selection operation." + ) + if n_time_params == 0: + return None + + time_inds = np.zeros(0, dtype=np.int64) + if times is not None: + times = _get_iterable(times) + if np.array(times).ndim > 1: + times = np.array(times).flatten() + + if obj_time_range is not None: + for jd in times: + this_ind = np.nonzero( + np.logical_and( + (obj_time_range[:, 0] <= jd), (obj_time_range[:, 1] >= jd) + ) + )[0] + if this_ind.size > 0: + time_inds = np.append(time_inds, this_ind) + else: + raise ValueError(f"Time {jd} does not fall in any time_range.") + else: + for jd in times: + if np.any( + np.isclose(obj_time_array, jd, rtol=time_tols[0], atol=time_tols[1]) + ): + time_inds = np.append( + time_inds, + np.where( + np.isclose( + obj_time_array, jd, rtol=time_tols[0], atol=time_tols[1] + ) + )[0], + ) + else: + raise ValueError(f"Time {jd} is not present in the time_array.") + + if time_range is not None: + if np.size(time_range) != 2: + raise ValueError("time_range must be length 2.") + + if obj_time_range is not None: + for tind, trange in enumerate(obj_time_range): + if _check_range_overlap(np.stack((trange, time_range), axis=0)): + time_inds = np.append(time_inds, tind) + attr_str = "time_range" + else: + time_inds = np.nonzero( + (obj_time_array <= time_range[1]) & (obj_time_array >= time_range[0]) + )[0] + attr_str = "time_array" + if time_inds.size == 0: + raise ValueError( + f"No elements in {attr_str} between {time_range[0]} and " + f"{time_range[1]}." + ) + + if (lsts is not None or lst_range is not None) and obj_lst_range is not None: + # check for lsts wrapping around zero + lst_range_wrap = obj_lst_range[:, 0] > obj_lst_range[:, 1] + + if lsts is not None: + if np.any(np.asarray(lsts) > 2 * np.pi): + warnings.warn( + "The lsts parameter contained a value greater than 2*pi. " + "LST values are assumed to be in radians, not hours." + ) + lsts = _get_iterable(lsts) + if np.array(lsts).ndim > 1: + lsts = np.array(lsts).flatten() + + if obj_lst_range is not None: + for lst in lsts: + lst_ind = np.nonzero( + np.logical_and( + (obj_lst_range[:, 0] <= lst), (obj_lst_range[:, 1] >= lst) + ) + )[0] + if lst_ind.size == 0 and np.any(lst_range_wrap): + for lr_ind in np.nonzero(lst_range_wrap)[0]: + if (obj_lst_range[lr_ind, 0] <= lst and lst <= 2 * np.pi) or ( + lst >= 0 and lst <= obj_lst_range[lr_ind, 1] + ): + lst_ind = np.array([lr_ind]) + if lst_ind.size > 0: + time_inds = np.append(time_inds, lst_ind) + else: + raise ValueError(f"LST {lst} does not fall in any lst_range") + else: + for lst in lsts: + if np.any( + np.isclose(obj_lst_array, lst, rtol=lst_tols[0], atol=lst_tols[1]) + ): + time_inds = np.append( + time_inds, + np.where( + np.isclose( + obj_lst_array, lst, rtol=lst_tols[0], atol=lst_tols[1] + ) + )[0], + ) + else: + raise ValueError(f"LST {lst} is not present in the lst_array") + + if lst_range is not None: + if np.size(lst_range) != 2: + raise ValueError("lst_range must be length 2.") + if np.any(np.asarray(lst_range) > 2 * np.pi): + warnings.warn( + "The lst_range contained a value greater than 2*pi. " + "LST values are assumed to be in radians, not hours." + ) + if obj_lst_range is not None: + for lind, lrange in enumerate(obj_lst_range): + if not lst_range_wrap[lind]: + if _check_range_overlap(np.stack((lrange, lst_range), axis=0)): + time_inds = np.append(time_inds, lind) + else: + if (lst_range[0] >= lrange[0] and lst_range[0] <= 2 * np.pi) or ( + lst_range[1] <= lrange[1] and lst_range[1] >= 0 + ): + time_inds = np.append(time_inds, lind) + attr_str = "lst_range" + else: + if lst_range[1] < lst_range[0]: + # we're wrapping around LST = 2*pi = 0 + lst_range_1 = [lst_range[0], 2 * np.pi] + lst_range_2 = [0, lst_range[1]] + time_inds1 = np.nonzero( + (obj_lst_array <= lst_range_1[1]) + & (obj_lst_array >= lst_range_1[0]) + )[0] + time_inds2 = np.nonzero( + (obj_lst_array <= lst_range_2[1]) + & (obj_lst_array >= lst_range_2[0]) + )[0] + time_inds = np.union1d(time_inds1, time_inds2) + else: + time_inds = np.nonzero( + (obj_lst_array <= lst_range[1]) & (obj_lst_array >= lst_range[0]) + )[0] + attr_str = "lst_array" + + if time_inds.size == 0: + raise ValueError( + f"No elements in {attr_str} between {lst_range[0]} and " + f"{lst_range[1]}." + ) + return time_inds + + def baseline_to_antnums(baseline, *, Nants_telescope): """ Get the antenna numbers corresponding to a given baseline number. @@ -785,9 +1032,7 @@ def baseline_to_antnums(baseline, *, Nants_telescope): """ if Nants_telescope > 2147483648: - raise ValueError( - "error Nants={Nants}>2147483648 not supported".format(Nants=Nants_telescope) - ) + raise ValueError(f"error Nants={Nants_telescope}>2147483648 not supported") if np.any(np.asarray(baseline) < 0): raise ValueError("negative baseline numbers are not supported") if np.any(np.asarray(baseline) > 4611686018498691072): @@ -831,7 +1076,7 @@ def antnums_to_baseline(ant1, ant2, *, Nants_telescope, attempt256=False): if Nants_telescope is not None and Nants_telescope > 2147483648: raise ValueError( "cannot convert ant1, ant2 to a baseline index " - "with Nants={Nants}>2147483648.".format(Nants=Nants_telescope) + f"with Nants={Nants_telescope}>2147483648." ) if np.any(np.concatenate((np.unique(ant1), np.unique(ant2))) >= 2147483648): raise ValueError( @@ -948,9 +1193,7 @@ def polstr2num(pol: str | IterableType[str], *, x_orientation: str | None = None out = [poldict[key.lower()] for key in pol] else: raise ValueError( - "Polarization {p} cannot be converted to a polarization number.".format( - p=pol - ) + f"Polarization {pol} cannot be converted to a polarization number." ) return out @@ -1003,9 +1246,7 @@ def polnum2str(num, *, x_orientation=None): elif isinstance(num, Iterable): out = [dict_use[i] for i in num] else: - raise ValueError( - "Polarization {p} cannot be converted to string.".format(p=num) - ) + raise ValueError(f"Polarization {num} cannot be converted to string.") return out @@ -1056,9 +1297,7 @@ def jstr2num(jstr, *, x_orientation=None): elif isinstance(jstr, Iterable): out = [jdict[key.lower()] for key in jstr] else: - raise ValueError( - "Jones polarization {j} cannot be converted to index.".format(j=jstr) - ) + raise ValueError(f"Jones polarization {jstr} cannot be converted to index.") return out @@ -1108,9 +1347,7 @@ def jnum2str(jnum, *, x_orientation=None): elif isinstance(jnum, Iterable): out = [dict_use[i] for i in jnum] else: - raise ValueError( - "Jones polarization {j} cannot be converted to string.".format(j=jnum) - ) + raise ValueError(f"Jones polarization {jnum} cannot be converted to string.") return out @@ -5535,7 +5772,7 @@ def parse_ants(uv, ant_str, *, print_toggle=False, x_orientation=None): elif ant_str[str_pos:].upper().startswith("PV"): polarizations.append(polstr2num("pV")) else: - raise ValueError("Unparsible argument {s}".format(s=ant_str)) + raise ValueError(f"Unparsable argument {ant_str}") comma_cnt = ant_str[str_pos:].find(",") if comma_cnt >= 0: @@ -5733,9 +5970,41 @@ def _combine_filenames(filename1, filename2): return combined_filenames +def _get_slice_len(s, axlen): + """ + Get length of a slice s into array of len axlen. + + Parameters + ---------- + s : slice object + Slice object to index with + axlen : int + Length of axis s slices into + + Returns + ------- + int + Length of slice object + """ + if s.start is None: + start = 0 + else: + start = s.start + if s.stop is None: + stop = axlen + else: + stop = np.min([s.stop, axlen]) + if s.step is None: + step = 1 + else: + step = s.step + + return ((stop - 1 - start) // step) + 1 + + def _get_dset_shape(dset, indices): """ - Given a 3-tuple of indices, determine the indexed array shape. + Given a tuple of indices, determine the indexed array shape. Parameters ---------- @@ -5743,7 +6012,7 @@ def _get_dset_shape(dset, indices): A numpy array or a reference to an HDF5 dataset on disk. Requires the `dset.shape` attribute exists and returns a tuple. indices : tuple - A 3-tuple with the indices to extract along each dimension of dset. + A tuple with the indices to extract along each dimension of dset. Each element should contain a list of indices, a slice element, or a list of slice elements that will be concatenated after slicing. For data arrays with 4 dimensions, the second dimension (the old spw axis) @@ -5752,10 +6021,10 @@ def _get_dset_shape(dset, indices): Returns ------- tuple - a 3- or 4-tuple with the shape of the indexed array + a tuple with the shape of the indexed array tuple - a 3- or 4-tuple with indices used (will be different than input if dset has - 4 dimensions) + a tuple with indices used (will be different than input if dset has + 4 dimensions and indices has 3 dimensions) """ dset_shape = list(dset.shape) if len(dset_shape) == 4 and len(indices) == 3: @@ -5863,58 +6132,35 @@ def _convert_to_slices(indices, *, max_nslice_frac=0.1): return slices, passed -def _get_slice_len(s, axlen): - """ - Get length of a slice s into array of len axlen. - - Parameters - ---------- - s : slice object - Slice object to index with - axlen : int - Length of axis s slices into - - Returns - ------- - int - Length of slice object - """ - if s.start is None: - start = 0 - else: - start = s.start - if s.stop is None: - stop = axlen - else: - stop = np.min([s.stop, axlen]) - if s.step is None: - step = 1 - else: - step = s.step - - return ((stop - 1 - start) // step) + 1 - - def _index_dset(dset, indices, *, input_array=None): """ - Index a UVH5 data, flags or nsamples h5py dataset. + Index a UVH5 data, flags or nsamples h5py dataset to get data or overwrite data. + + If no `input_array` is passed, this function extracts the data at the indices + and returns it. If `input_array` is passed, this function replaced the data at the + indices with the input array. Parameters ---------- dset : h5py dataset A reference to an HDF5 dataset on disk. indices : tuple - A 3-tuple with the indices to extract along each dimension of dset. + A tuple with the indices to extract along each dimension of dset. Each element should contain a list of indices, a slice element, or a list of slice elements that will be concatenated after slicing. Indices must be provided such that all dimensions can be indexed - simultaneously. For data arrays with 4 dimensions, the second dimension - (the old spw axis) should not be included because it can only be length one. + simultaneously. This should have a length equal to the length of the dset, + with an exception to support the old array shape for uvdata arrays (in that + case the dset is length 4 but the second dimension is shallow, so only three + indices need to be passed). + input_array : ndarray, optional + Array to be copied into the dset at the indices. If not provided, the data in + the dset is indexed and returned. Returns ------- - ndarray - The indexed dset + ndarray or None + The indexed dset if the `input_array` parameter is not used. Notes ----- @@ -5938,22 +6184,26 @@ def _index_dset(dset, indices, *, input_array=None): # get arr and dset indices for each dimension in indices dset_indices = [] arr_indices = [] + nselects_per_dim = [] for i, dset_inds in enumerate(indices): if isinstance(dset_inds, (int, np.integer)): # this dimension is len 1, so slice is fine arr_indices.append([slice(None)]) dset_indices.append([[dset_inds]]) + nselects_per_dim.append(1) elif isinstance(dset_inds, slice): # this dimension is just a slice, so slice is fine arr_indices.append([slice(None)]) dset_indices.append([dset_inds]) + nselects_per_dim.append(1) elif isinstance(dset_inds, (list, np.ndarray)): if isinstance(dset_inds[0], (int, np.integer)): # this is a list of integers, append slice arr_indices.append([slice(None)]) dset_indices.append([dset_inds]) + nselects_per_dim.append(1) elif isinstance(dset_inds[0], slice): # this is a list of slices, need list of slice lens slens = [_get_slice_len(s, dset_shape[i]) for s in dset_inds] @@ -5961,38 +6211,29 @@ def _index_dset(dset, indices, *, input_array=None): arr_inds = [slice(s, s + l) for s, l in zip(ssums, slens)] arr_indices.append(arr_inds) dset_indices.append(dset_inds) - - if len(dset_shape) == 3: - freq_dim = 1 - pol_dim = 2 - else: - freq_dim = 2 - pol_dim = 3 - - # iterate over each of the 3 axes and fill the array - for blt_arr, blt_dset in zip(arr_indices[0], dset_indices[0]): - for freq_arr, freq_dset in zip(arr_indices[freq_dim], dset_indices[freq_dim]): - for pol_arr, pol_dset in zip(arr_indices[pol_dim], dset_indices[pol_dim]): - if input_array is None: - # index dset and assign to arr - if len(dset_shape) == 3: - arr[blt_arr, freq_arr, pol_arr] = dset[ - blt_dset, freq_dset, pol_dset - ] - else: - arr[blt_arr, :, freq_arr, pol_arr] = dset[ - blt_dset, :, freq_dset, pol_dset - ] - else: - # index arr and assign to dset - if len(dset_shape) == 3: - dset[blt_dset, freq_dset, pol_dset] = arr[ - blt_arr, freq_arr, pol_arr - ] - else: - dset[blt_dset, :, freq_dset, pol_dset] = arr[ - blt_arr, :, freq_arr, pol_arr - ] + nselects_per_dim.append(len(dset_inds)) + + # iterate through all selections and fill the array + total_selects = np.prod(nselects_per_dim) + axis_arrays = [] + for nsel in nselects_per_dim: + axis_arrays.append(np.arange(nsel, dtype=int)) + sel_index_arrays = np.meshgrid(*axis_arrays) + for ind, array in enumerate(sel_index_arrays): + sel_index_arrays[ind] = array.flatten() + for sel in np.arange(total_selects): + sel_arr_indices = [] + sel_dset_indices = [] + for dim in np.arange(len(dset_shape)): + this_index = (sel_index_arrays[dim])[sel] + sel_arr_indices.append(arr_indices[dim][this_index]) + sel_dset_indices.append(dset_indices[dim][this_index]) + if input_array is None: + # index dset and assign to arr + arr[(*sel_arr_indices,)] = dset[(*sel_dset_indices,)] + else: + # index arr and assign to dset + dset[(*sel_dset_indices,)] = arr[(*sel_arr_indices,)] if input_array is None: return arr diff --git a/pyuvdata/uvcal/__init__.py b/pyuvdata/uvcal/__init__.py index 64a8e833b3..64aba6e0d3 100644 --- a/pyuvdata/uvcal/__init__.py +++ b/pyuvdata/uvcal/__init__.py @@ -3,4 +3,5 @@ # Licensed under the 2-clause BSD License """Init file for UVCal.""" +from .calh5 import FastCalH5Meta # noqa from .uvcal import * # noqa diff --git a/pyuvdata/uvcal/calfits.py b/pyuvdata/uvcal/calfits.py index 8d4bde95bf..24cf98d8f2 100644 --- a/pyuvdata/uvcal/calfits.py +++ b/pyuvdata/uvcal/calfits.py @@ -618,9 +618,9 @@ def read_calfits( self.Nsources = hdr.pop("NSOURCES", None) bl_range_string = hdr.pop("BL_RANGE", None) if bl_range_string is not None: - self.baseline_range = [ - float(b) for b in bl_range_string.strip("[").strip("]").split(",") - ] + self.baseline_range = np.asarray( + [float(b) for b in bl_range_string.strip("[").strip("]").split(",")] + ) self.diffuse_model = hdr.pop("DIFFUSE", None) self.observer = hdr.pop("OBSERVER", None) diff --git a/pyuvdata/uvcal/calh5.py b/pyuvdata/uvcal/calh5.py index f396b4d67c..c358d0f478 100644 --- a/pyuvdata/uvcal/calh5.py +++ b/pyuvdata/uvcal/calh5.py @@ -11,10 +11,12 @@ import h5py import numpy as np +from docstring_parser import DocstringStyle from .. import hdf5_utils from .. import utils as uvutils -from .uvcal import UVCal, _future_array_shapes_warning, radian_tol +from ..docstrings import copy_replace_short_description +from .uvcal import UVCal, _future_array_shapes_warning hdf5plugin_present = True try: @@ -91,26 +93,13 @@ class FastCalH5Meta(hdf5_utils.HDF5Meta): _bool_attrs = frozenset(("wide_band",)) - def check_lsts_against_times(self): - """Check that LSTs consistent with the time_array and telescope location.""" - lsts = uvutils.get_lst_for_time( - self.time_array, *self.telescope_location_lat_lon_alt_degrees - ) - - if not np.all(np.isclose(self.lsts, lsts, rtol=0, atol=radian_tol)): - warnings.warn( - f"LST values stored in {self.path} are not self-consistent " - "with time_array and telescope location. Consider " - "recomputing with utils.get_lst_for_time." - ) - @cached_property def antenna_names(self) -> list[str]: """The antenna names in the file.""" return [bytes(name).decode("utf8") for name in self.header["antenna_names"][:]] def has_key(self, antnum: int | None = None, jpol: str | int | None = None) -> bool: - """Check if the file has a given antpair or antpair-pol key.""" + """Check if the file has a given antenna number or antenna number-pol key.""" if antnum is not None: if antnum not in self.ant_array: return False @@ -144,12 +133,14 @@ def telescope_location_lat_lon_alt_degrees(self) -> tuple[float, float, float]: """The telescope location in latitude, longitude, and altitude, in degrees.""" return self.latitude, self.longitude, self.altitude - def to_uvcal(self, check_lsts: bool = False) -> UVCal: + def to_uvcal( + self, *, check_lsts: bool = False, astrometry_library: str | None = None + ) -> UVCal: """Convert the file to a UVData object. The object will be metadata-only. """ - uvc = CalH5() + uvc = UVCal() uvc.read_calh5( self, read_data=False, @@ -170,9 +161,11 @@ class CalH5(UVCal): def _read_header( self, - filename: str | Path | FastCalH5Meta, + meta: FastCalH5Meta, + *, background_lsts: bool = True, run_check_acceptability: bool = True, + astrometry_library: str | None = None, ): """ Read header information from a UVH5 file. @@ -182,10 +175,8 @@ def _read_header( Parameters ---------- - filename : string, path, FastCalH5Meta, h5py.File or h5py.Group - A file name or path or a FastCalH5Meta or h5py File or Group object that - contains the header information. Should be called "/Header" for CalH5 files - conforming to spec. + meta : FastCalH5Meta, h5py.File or h5py.Group + A FastCalH5Meta or object that contains the header information. background_lsts : bool When set to True, the lst_array is calculated in a background thread. run_check_acceptability : bool @@ -195,32 +186,31 @@ def _read_header( ------- None """ - if not isinstance(filename, FastCalH5Meta): - obj = FastCalH5Meta(filename) - else: - obj = filename - # First, get the things relevant for setting LSTs, so that can be run in the # background if desired. self.telescope_location_lat_lon_alt_degrees = ( - obj.telescope_location_lat_lon_alt_degrees + meta.telescope_location_lat_lon_alt_degrees ) - if "time_array" in obj.header: - self.time_array = obj.time_array - if "time_range" in obj.header: - self.time_range = obj.time_range + if "time_array" in meta.header: + self.time_array = meta.time_array + if "time_range" in meta.header: + self.time_range = meta.time_range - if "lst_array" in obj.header: - self.lst_array = obj.header["lst_array"][:] + if "lst_array" in meta.header: + self.lst_array = meta.header["lst_array"][:] proc = None - elif "time_array" in obj.header: - proc = self.set_lsts_from_time_array(background=background_lsts) + elif "time_array" in meta.header: + proc = self.set_lsts_from_time_array( + background=background_lsts, astrometry_library=astrometry_library + ) - if "lst_range" in obj.header: - self.lst_range = obj.header["lst_range"][:] + if "lst_range" in meta.header: + self.lst_range = meta.header["lst_range"][:] proc = None - elif "time_range" in obj.header: - proc = self.set_lsts_from_time_array(background=background_lsts) + elif "time_range" in meta.header: + proc = self.set_lsts_from_time_array( + background=background_lsts, astrometry_library=astrometry_library + ) # Required parameters for attr in [ @@ -239,16 +229,14 @@ def _read_header( "integration_time", "spw_array", "jones_array", - "channel_width", "cal_style", "cal_type", "gain_convention", "wide_band", "x_orientation", - "flex_spw_id_array", ]: try: - setattr(self, attr, getattr(obj, attr)) + setattr(self, attr, getattr(meta, attr)) except AttributeError as e: raise KeyError(str(e)) from e @@ -257,8 +245,11 @@ def _read_header( # Optional parameters for attr in [ + "channel_width", + "flex_spw_id_array", "Nsources", "baseline_range", + "diffuse_model", "extra_keywords", "freq_array", "freq_range", @@ -271,13 +262,10 @@ def _read_header( "sky_field", ]: try: - setattr(self, attr, getattr(obj, attr)) + setattr(self, attr, getattr(meta, attr)) except AttributeError: pass - if self.blt_order is not None: - self._blt_order.form = (len(self.blt_order),) - # set telescope params try: self.set_telescope_params() @@ -285,9 +273,32 @@ def _read_header( warnings.warn(str(ve)) if run_check_acceptability: - obj.check_lsts_against_times() - + lat, lon, alt = self.telescope_location_lat_lon_alt_degrees + if self.time_array is not None: + uvutils.check_lsts_against_times( + jd_array=self.time_array, + lst_array=self.lst_array, + latitude=lat, + longitude=lon, + altitude=alt, + lst_tols=(0, uvutils.LST_RAD_TOL), + frame=self._telescope_location.frame, + ) + if self.time_range is not None: + uvutils.check_lsts_against_times( + jd_array=self.time_range, + lst_array=self.lst_range, + latitude=lat, + longitude=lon, + altitude=alt, + lst_tols=(0, uvutils.LST_RAD_TOL), + frame=self._telescope_location.frame, + ) self._set_future_array_shapes() + if self.wide_band: + self._set_wide_band() + if self.Nspws > 1 and not self.wide_band: + self._set_flex_spw() if proc is not None: proc.join() @@ -336,7 +347,11 @@ def _get_data( """ # check for bitshuffle data; bitshuffle filter number is 32008 # TODO should we check for any other filters? - if "32008" in dgrp["visdata"]._filters: + if self.cal_type == "gain": + data_name = "gains" + else: + data_name = "delays" + if "32008" in dgrp[data_name]._filters: if not hdf5plugin_present: # pragma: no cover raise ImportError( "hdf5plugin is not installed but is required to read this dataset" @@ -346,59 +361,49 @@ def _get_data( ( ant_inds, time_inds, + spw_inds, freq_inds, jones_inds, history_update_string, ) = self._select_preprocess( - antenna_nums, - antenna_names, - frequencies, - freq_chans, - times, - time_range, - lsts, - lst_range, - jones, + antenna_nums=antenna_nums, + antenna_names=antenna_names, + frequencies=frequencies, + freq_chans=freq_chans, + spws=spws, + times=times, + time_range=time_range, + lsts=lsts, + lst_range=lst_range, + jones=jones, ) - # figure out which axis is the most selective if ant_inds is not None: - ant_frac = len(ant_inds) / float(self.Nblts) + ant_frac = len(ant_inds) / float(self.Nants_data) else: ant_frac = 1 if time_inds is not None: - time_frac = len(time_inds) / float(self.Nblts) + time_frac = len(time_inds) / float(self.Ntimes) else: time_frac = 1 - if freq_inds is not None: + if freq_inds is not None and not self.wide_band: freq_frac = len(freq_inds) / float(self.Nfreqs) else: freq_frac = 1 + if spw_inds is not None and self.wide_band: + spw_frac = len(spw_inds) / float(self.Nspws) + else: + spw_frac = 1 + if jones_inds is not None: - jones_frac = len(jones_inds) / float(self.Npols) + jones_frac = len(jones_inds) / float(self.Njones) else: jones_frac = 1 - min_frac = np.min([ant_frac, time_frac, freq_frac, jones_frac]) - - if self.cal_type == "gain": - # get the fundamental datatype of the cal data; if integers, we need to - # cast to floats - caldata_dtype = dgrp["gains"].dtype - if caldata_dtype not in ("complex64", "complex128"): - hdf5_utils._check_uvh5_dtype(caldata_dtype) - if gain_array_dtype not in (np.complex64, np.complex128): - raise ValueError( - "gain_array_dtype must be np.complex64 or np.complex128" - ) - custom_dtype = True - else: - custom_dtype = False - else: - custom_dtype = False + min_frac = np.min([ant_frac, time_frac, freq_frac, jones_frac, spw_frac]) quality_present = False if "qualities" in dgrp: @@ -411,12 +416,7 @@ def _get_data( # no select, read in all the data inds = (np.s_[:], np.s_[:], np.s_[:], np.s_[:]) if self.cal_type == "gain": - if custom_dtype: - self.gain_array = hdf5_utils._read_complex_astype( - dgrp["gains"], inds, gain_array_dtype - ) - else: - self.gain_array = uvutils._index_dset(dgrp["gains"], inds) + self.gain_array = uvutils._index_dset(dgrp["gains"], inds) else: self.delay_array = uvutils._index_dset(dgrp["delays"], inds) self.flag_array = uvutils._index_dset(dgrp["flags"], inds) @@ -431,12 +431,16 @@ def _get_data( # do select operations on everything except data_array, flag_array # and nsample_array self._select_by_index( - ant_inds, time_inds, freq_inds, jones_inds, history_update_string + ant_inds=ant_inds, + time_inds=time_inds, + spw_inds=spw_inds, + freq_inds=freq_inds, + jones_inds=jones_inds, + history_update_string=history_update_string, ) # determine which axes can be sliced, rather than fancy indexed - # max_nslice_frac of 0.1 yields slice speedup over fancy index for HERA data - # See pyuvdata PR #805 + # max_nslice_frac of 0.1 is just copied from uvh5, not validated if ant_inds is not None: ant_slices, ant_sliceable = uvutils._convert_to_slices( ant_inds, max_nslice_frac=0.1 @@ -461,6 +465,14 @@ def _get_data( freq_inds, freq_slices = np.s_[:], np.s_[:] freq_sliceable = True + if spw_inds is not None: + spw_slices, spw_sliceable = uvutils._convert_to_slices( + spw_inds, max_nslice_frac=0.1 + ) + else: + spw_inds, spw_slices = np.s_[:], np.s_[:] + spw_sliceable = True + if jones_inds is not None: jones_slices, jones_sliceable = uvutils._convert_to_slices( jones_inds, max_nslice_frac=0.5 @@ -491,33 +503,43 @@ def _get_data( # change ant_frac so no more selects are done ant_frac = 1 - elif time_frac == min_frac: + elif freq_frac == min_frac: # construct inds list given simultaneous sliceability - inds = [np.s_[:], time_inds, np.s_[:], np.s_[:]] - if time_sliceable: - inds[1] = time_slices + inds = [np.s_[:], freq_inds, np.s_[:], np.s_[:]] + if freq_sliceable: + inds[1] = freq_slices inds = tuple(inds) - # change time_frac so no more selects are done - time_frac = 1 + # change freq_frac so no more selects are done + freq_frac = 1 - elif freq_frac == min_frac: + elif spw_frac == min_frac: # construct inds list given simultaneous sliceability - inds = [np.s_[:], np.s_[:], freq_inds, np.s_[:]] - if freq_sliceable: - inds[1] = freq_slices + inds = [np.s_[:], spw_inds, np.s_[:], np.s_[:]] + if spw_sliceable: + inds[1] = spw_slices inds = tuple(inds) # change freq_frac so no more selects are done - freq_frac = 1 + spw_frac = 1 + + elif time_frac == min_frac: + # construct inds list given simultaneous sliceability + inds = [np.s_[:], np.s_[:], time_inds, np.s_[:]] + if time_sliceable: + inds[2] = time_slices + + inds = tuple(inds) + # change time_frac so no more selects are done + time_frac = 1 else: # construct inds list given simultaneous sliceability inds = [np.s_[:], np.s_[:], np.s_[:], jones_inds] if jones_sliceable: - inds[2] = jones_slices + inds[3] = jones_slices inds = tuple(inds) @@ -525,17 +547,13 @@ def _get_data( jones_frac = 1 # index datasets - if custom_dtype: - cal_data = hdf5_utils._read_complex_astype( - caldata_dset, inds, gain_array_dtype - ) - else: - cal_data = uvutils._index_dset(caldata_dset, inds) + cal_data = uvutils._index_dset(caldata_dset, inds) flags = uvutils._index_dset(flags_dset, inds) if quality_present: qualities = uvutils._index_dset(qualities_dset, inds) if total_quality_present: - total_qualities = uvutils._index_dset(total_qualities_dset, inds) + tq_inds = inds[1:] + total_qualities = uvutils._index_dset(total_qualities_dset, tq_inds) # down select on other dimensions if necessary # use indices not slices here: generally not the bottleneck if ant_frac < 1: @@ -543,20 +561,27 @@ def _get_data( flags = flags[ant_inds] if quality_present: qualities = qualities[ant_inds] - if time_frac < 1: - cal_data = cal_data[:, time_inds] - flags = flags[:, time_inds] + if freq_frac < 1: + cal_data = cal_data[:, freq_inds] + flags = flags[:, freq_inds] if quality_present: - qualities = qualities[:, time_inds] + qualities = qualities[:, freq_inds] if total_quality_present: - total_qualities = total_qualities[time_inds] - if freq_frac < 1: - cal_data = cal_data[:, :, freq_inds] - flags = flags[:, :, freq_inds] + total_qualities = total_qualities[freq_inds] + if spw_frac < 1: + cal_data = cal_data[:, spw_inds] + flags = flags[:, spw_inds] if quality_present: - qualities = qualities[:, :, freq_inds] + qualities = qualities[:, spw_inds] if total_quality_present: - total_qualities = total_qualities[:, freq_inds] + total_qualities = total_qualities[spw_inds] + if time_frac < 1: + cal_data = cal_data[:, :, time_inds] + flags = flags[:, :, time_inds] + if quality_present: + qualities = qualities[:, :, time_inds] + if total_quality_present: + total_qualities = total_qualities[:, time_inds] if jones_frac < 1: cal_data = cal_data[:, :, :, jones_inds] flags = flags[:, :, :, jones_inds] @@ -569,7 +594,7 @@ def _get_data( if self.cal_type == "gain": self.gain_array = cal_data else: - self.gain_array = cal_data + self.delay_array = cal_data self.flag_array = flags if quality_present: self.quality_array = qualities @@ -578,15 +603,17 @@ def _get_data( return + @copy_replace_short_description(UVCal.read_calh5, style=DocstringStyle.NUMPYDOC) def read_calh5( self, - filename, + filename: str | Path | FastCalH5Meta, *, antenna_nums=None, antenna_names=None, ant_str=None, frequencies=None, freq_chans=None, + spws=None, times=None, time_range=None, lsts=None, @@ -599,99 +626,15 @@ def read_calh5( check_extra=True, run_check_acceptability=True, use_future_array_shapes=False, + astrometry_library=None, ): - """ - Read in data from a CalH5 file. - - Parameters - ---------- - filename : str - The UVH5 file to read from. - antenna_nums : array_like of int, optional - The antennas numbers to include when reading data into the object - (antenna positions and names for the removed antennas will be retained - unless `keep_all_metadata` is False). This cannot be provided if - `antenna_names` is also provided. Ignored if read_data is False. - antenna_names : array_like of str, optional - The antennas names to include when reading data into the object - (antenna positions and names for the removed antennas will be retained - unless `keep_all_metadata` is False). This cannot be provided if - `antenna_nums` is also provided. Ignored if read_data is False. - frequencies : array_like of float, optional - The frequencies to include when reading data into the object, each - value passed here should exist in the freq_array. Ignored if - read_data is False. - freq_chans : array_like of int, optional - The frequency channel numbers to include when reading data into the - object. Ignored if read_data is False. - times : array_like of float, optional - The times to include when reading data into the object, each value - passed here should exist in the time_array. Cannot be used with - `time_range`. - time_range : array_like of float, optional - The time range in Julian Date to keep in the object, must be - length 2. Some of the times in the object should fall between the - first and last elements. Cannot be used with `times`. - lsts : array_like of float, optional - The local sidereal times (LSTs) to keep in the object, each value - passed here should exist in the lst_array. Cannot be used with - `times`, `time_range`, or `lst_range`. - lst_range : array_like of float, optional - The local sidereal time (LST) range in radians to keep in the - object, must be of length 2. Some of the LSTs in the object should - fall between the first and last elements. If the second value is - smaller than the first, the LSTs are treated as having phase-wrapped - around LST = 2*pi = 0, and the LSTs kept on the object will run from - the larger value, through 0, and end at the smaller value. - jones : array_like of int, optional - The jones polarizations numbers to include when reading data into the - object, each value passed here should exist in the polarization_array. - Ignored if read_data is False. - read_data : bool - Read in the data-like arrays (gains/delays, flags, qualities). If set to - False, only the metadata will be read in. Setting read_data to False - results in a metadata only object. - gain_array_dtype : numpy dtype - Datatype to store the output gain_array as. Must be either - np.complex64 (single-precision real and imaginary) or np.complex128 (double- - precision real and imaginary). Only used for gain type files and if the - datatype of the gain data on-disk is not 'c8' or 'c16'. - background_lsts : bool - When set to True, the lst_array is calculated in a background thread. - run_check : bool - Option to check for the existence and proper shapes of parameters - after after reading in the file (the default is True, - meaning the check will be run). Ignored if read_data is False. - check_extra : bool - Option to check optional parameters as well as required ones (the - default is True, meaning the optional parameters will be checked). - Ignored if read_data is False. - run_check_acceptability : bool - Option to check acceptable range of the values of parameters after - reading in the file (the default is True, meaning the acceptable - range check will be done). Ignored if read_data is False. - use_future_array_shapes : bool - Option to convert to the future planned array shapes before the changes go - into effect by removing the spectral window axis. - - Returns - ------- - None - - Raises - ------ - IOError - If filename doesn't exist. - ValueError - If the data_array_dtype is not a complex dtype. - If incompatible select keywords are set (e.g. `times` and `time_range`) or - select keywords exclude all data or if keywords are set to the wrong type. - - """ + """Read in data from a CalH5 file.""" if isinstance(filename, FastCalH5Meta): meta = filename filename = str(meta.path) + close_meta = False else: + close_meta = True meta = FastCalH5Meta(filename) # update filename attribute @@ -704,24 +647,29 @@ def read_calh5( meta, run_check_acceptability=run_check_acceptability, background_lsts=background_lsts, + astrometry_library=astrometry_library, ) if read_data: # Now read in the data self._get_data( meta.datagrp, - antenna_nums, - antenna_names, - frequencies, - freq_chans, - times, - time_range, - lsts, - lst_range, - jones, - gain_array_dtype, + antenna_nums=antenna_nums, + antenna_names=antenna_names, + frequencies=frequencies, + freq_chans=freq_chans, + spws=spws, + times=times, + time_range=time_range, + lsts=lsts, + lst_range=lst_range, + jones=jones, + gain_array_dtype=gain_array_dtype, ) + if close_meta: + meta.close() + if not use_future_array_shapes: warnings.warn(_future_array_shapes_warning, DeprecationWarning) with warnings.catch_warnings(): @@ -776,16 +724,11 @@ def _write_header(self, header): header["Nspws"] = self.Nspws header["Ntimes"] = self.Ntimes header["antenna_numbers"] = self.antenna_numbers - header["channel_width"] = self.channel_width - header["time_array"] = self.time_array - header["freq_array"] = self.freq_array header["integration_time"] = self.integration_time - header["lst_array"] = self.lst_array - header["jones_array"] = self.polarization_array + header["jones_array"] = self.jones_array header["spw_array"] = self.spw_array - header["ant_array"] = self.ant_1_array + header["ant_array"] = self.ant_array header["antenna_positions"] = self.antenna_positions - header["flex_spw_id_array"] = self.flex_spw_id_array # handle antenna_names; works for lists or arrays header["antenna_names"] = np.asarray(self.antenna_names, dtype="bytes") header["x_orientation"] = np.string_(self.x_orientation) @@ -795,10 +738,26 @@ def _write_header(self, header): header["wide_band"] = self.wide_band # write out optional parameters + if self.channel_width is not None: + header["channel_width"] = self.channel_width + if self.flex_spw_id_array is not None: + header["flex_spw_id_array"] = self.flex_spw_id_array + + if self.time_array is not None: + header["time_array"] = self.time_array + if self.time_range is not None: + header["time_range"] = self.time_range + if self.lst_array is not None: + header["lst_array"] = self.lst_array + if self.lst_range is not None: + header["lst_range"] = self.lst_range + if self.Nsources is not None: header["Nsources"] = self.Nsources if self.baseline_range is not None: header["baseline_range"] = self.baseline_range + if self.diffuse_model is not None: + header["diffuse_model"] = np.string_(self.diffuse_model) if self.freq_array is not None: header["freq_array"] = self.freq_array if self.freq_range is not None: @@ -835,91 +794,30 @@ def _write_header(self, header): return + @copy_replace_short_description(UVCal.write_calh5, style=DocstringStyle.NUMPYDOC) def write_calh5( self, filename, *, - overwrite=False, + clobber=False, chunks=True, data_compression=None, flags_compression="lzf", quality_compression="lzf", - gain_write_dtype=None, add_to_history=None, run_check=True, check_extra=True, run_check_acceptability=True, ): - """ - Write an in-memory UVCal object to a CalH5 file. - - Parameters - ---------- - filename : str - The CalH5 file to write to. - overwrite : bool - Option to overwrite the file if it already exists. - chunks : tuple or bool - h5py.create_dataset chunks keyword. Tuple for chunk shape, - True for auto-chunking, None for no chunking. Default is True. - data_compression : str - HDF5 filter to apply when writing the gain_array or delay. Default is None - (no filter/compression). In addition to the normal HDF5 filter values, the - user may specify "bitshuffle" which will set the compression to `32008` for - bitshuffle and will set the `compression_opts` to `(0, 2)` to allow - bitshuffle to automatically determine the block size and to use the LZF - filter after bitshuffle. Using `bitshuffle` requires having the - `hdf5plugin` package installed. Dataset must be chunked to use compression. - flags_compression : str - HDF5 filter to apply when writing the flags_array. Default is the - LZF filter. Dataset must be chunked. - quality_compression : str - HDF5 filter to apply when writing the quality_array and/or - total_quality_array if they are defined. Default is the LZF filter. Dataset - must be chunked. - gain_write_dtype : numpy dtype - The datatype of output gain data (only applies if cal_type="gain"). If - 'None', then the same datatype as gain_array will be used. The user may - specify 'c8' for single-precision floats or 'c16' for double-presicion. - Otherwise, a numpy dtype object must be specified with an 'r' field and an - 'i' field for real and imaginary parts, respectively. See uvh5.py for - an example of defining such a datatype. - run_check : bool - Option to check for the existence and proper shapes of parameters - before writing the file. - check_extra : bool - Option to check optional parameters as well as required ones. - run_check_acceptability : bool - Option to check acceptable range of the values of parameters before - writing the file. - - Returns - ------- - None - - Raises - ------ - IOError - If the file located at `filename` already exists and overwrite=False, - an IOError is raised. - - Notes - ----- - The HDF5 library allows for the application of "filters" when writing - data, which can provide moderate to significant levels of compression - for the datasets in question. Testing has shown that for some typical - cases of UVData objects (empty/sparse flag_array objects, and/or uniform - nsample_arrays), the built-in LZF filter provides significant - compression for minimal computational overhead. - """ + """Write an in-memory UVCal object to a CalH5 file.""" if run_check: self.check( check_extra=check_extra, run_check_acceptability=run_check_acceptability ) if os.path.exists(filename): - if overwrite: - print("File exists; overwrite") + if clobber: + print("File exists; clobbering") else: raise IOError("File exists; skipping") @@ -943,32 +841,13 @@ def write_calh5( # write out data, flags, and nsample arrays dgrp = f.create_group("Data") if self.cal_type == "gain": - if gain_write_dtype is None: - if self.gain_array.dtype == "complex64": - gain_write_dtype = "c8" - else: - gain_write_dtype = "c16" - if gain_write_dtype not in ("c8", "c16"): - hdf5_utils._check_uvh5_dtype(gain_write_dtype) - gaindata = dgrp.create_dataset( - "gains", - self.gain_array.shape, - chunks=chunks, - compression=data_compression, - compression_opts=data_compression_opts, - dtype=gain_write_dtype, - ) - indices = (np.s_[:], np.s_[:], np.s_[:], np.s_[:]) - hdf5_utils._write_complex_astype(self.gain_array, gaindata, indices) - else: - gaindata = dgrp.create_dataset( - "gains", - chunks=chunks, - data=self.gain_array, - compression=data_compression, - compression_opts=data_compression_opts, - dtype=gain_write_dtype, - ) + dgrp.create_dataset( + "gains", + chunks=chunks, + data=self.gain_array, + compression=data_compression, + compression_opts=data_compression_opts, + ) else: dgrp.create_dataset( "delays", diff --git a/pyuvdata/uvcal/fhd_cal.py b/pyuvdata/uvcal/fhd_cal.py index 96d8299945..6d53b7d337 100644 --- a/pyuvdata/uvcal/fhd_cal.py +++ b/pyuvdata/uvcal/fhd_cal.py @@ -264,10 +264,12 @@ def read_fhd_cal( self.ref_antenna_name = settings_lines["ref_antenna_name"][0] self.Nsources = int(settings_lines["n_sources"][0]) self.sky_catalog = settings_lines["catalog_name"][0] - self.baseline_range = [ - float(settings_lines["min_cal_baseline"][0]), - float(settings_lines["max_cal_baseline"][0]), - ] + self.baseline_range = np.asarray( + [ + float(settings_lines["min_cal_baseline"][0]), + float(settings_lines["max_cal_baseline"][0]), + ] + ) galaxy_model = int(settings_lines["galaxy_model"][0]) if len(settings_lines["diffuse_model"]) > 0: diffuse_model = settings_lines["diffuse_model"][0] @@ -303,10 +305,12 @@ def read_fhd_cal( cal_data["ref_antenna_name"][0].decode("utf8").strip() ) self.Nsources = int(cal_data["skymodel"][0]["n_sources"][0]) - self.baseline_range = [ - float(cal_data["min_cal_baseline"][0]), - float(cal_data["max_cal_baseline"][0]), - ] + self.baseline_range = np.asarray( + [ + float(cal_data["min_cal_baseline"][0]), + float(cal_data["max_cal_baseline"][0]), + ] + ) galaxy_model = cal_data["skymodel"][0]["galaxy_model"][0] diffuse_model = cal_data["skymodel"][0]["diffuse_model"][0] diff --git a/pyuvdata/uvcal/tests/__init__.py b/pyuvdata/uvcal/tests/__init__.py index 7dd21f2142..4dd7a93cb0 100644 --- a/pyuvdata/uvcal/tests/__init__.py +++ b/pyuvdata/uvcal/tests/__init__.py @@ -16,3 +16,82 @@ def time_array_to_time_range(calobj_in, keep_time_array=False): calobj.set_lsts_from_time_array() return calobj + + +def extend_jones_axis(calobj, input_flag=True, total_quality=True): + while calobj.Njones < 4: + new_jones = np.min(calobj.jones_array) - 1 + calobj.jones_array = np.append(calobj.jones_array, new_jones) + calobj.Njones += 1 + if not calobj.metadata_only: + if calobj.future_array_shapes: + calobj.flag_array = np.concatenate( + (calobj.flag_array, calobj.flag_array[:, :, :, [-1]]), axis=3 + ) + if calobj.cal_type == "gain": + calobj.gain_array = np.concatenate( + (calobj.gain_array, calobj.gain_array[:, :, :, [-1]]), axis=3 + ) + else: + calobj.delay_array = np.concatenate( + (calobj.delay_array, calobj.delay_array[:, :, :, [-1]]), axis=3 + ) + if calobj.input_flag_array is not None: + calobj.input_flag_array = np.concatenate( + ( + calobj.input_flag_array, + calobj.input_flag_array[:, :, :, [-1]], + ), + axis=3, + ) + calobj.quality_array = np.concatenate( + (calobj.quality_array, calobj.quality_array[:, :, :, [-1]]), axis=3 + ) + if calobj.total_quality_array is not None: + calobj.total_quality_array = np.concatenate( + ( + calobj.total_quality_array, + calobj.total_quality_array[:, :, [-1]], + ), + axis=2, + ) + else: + calobj.flag_array = np.concatenate( + (calobj.flag_array, calobj.flag_array[:, :, :, :, [-1]]), axis=4 + ) + if calobj.cal_type == "gain": + calobj.gain_array = np.concatenate( + (calobj.gain_array, calobj.gain_array[:, :, :, :, [-1]]), axis=4 + ) + else: + calobj.delay_array = np.concatenate( + (calobj.delay_array, calobj.delay_array[:, :, :, :, [-1]]), + axis=4, + ) + if calobj.input_flag_array is not None: + calobj.input_flag_array = np.concatenate( + ( + calobj.input_flag_array, + calobj.input_flag_array[:, :, :, :, [-1]], + ), + axis=4, + ) + calobj.quality_array = np.concatenate( + (calobj.quality_array, calobj.quality_array[:, :, :, :, [-1]]), + axis=4, + ) + if calobj.total_quality_array is not None: + calobj.total_quality_array = np.concatenate( + ( + calobj.total_quality_array, + calobj.total_quality_array[:, :, :, [-1]], + ), + axis=3, + ) + if not calobj.metadata_only: + if calobj.input_flag_array is None and input_flag: + calobj.input_flag_array = calobj.flag_array + if calobj.total_quality_array is None and total_quality: + calobj.total_quality_array = np.ones( + calobj._total_quality_array.expected_shape(calobj) + ) diff --git a/pyuvdata/uvcal/tests/conftest.py b/pyuvdata/uvcal/tests/conftest.py index 51af6721e1..257ef0b70a 100644 --- a/pyuvdata/uvcal/tests/conftest.py +++ b/pyuvdata/uvcal/tests/conftest.py @@ -160,3 +160,100 @@ def fhd_cal_fit(fhd_cal_fit_main): yield fhd_cal del fhd_cal + + +@pytest.fixture +def multi_spw_gain(gain_data): + gain_obj = gain_data.copy() + gain_obj._set_flex_spw() + gain_obj.channel_width = ( + np.zeros(gain_obj.Nfreqs, dtype=np.float64) + gain_obj.channel_width + ) + gain_obj.Nspws = 2 + gain_obj.flex_spw_id_array = np.concatenate( + ( + np.ones(gain_obj.Nfreqs // 2, dtype=int), + np.full(gain_obj.Nfreqs // 2, 2, dtype=int), + ) + ) + gain_obj.spw_array = np.array([1, 2]) + spw2_inds = np.nonzero(gain_obj.flex_spw_id_array == 2)[0] + spw2_chan_width = gain_obj.channel_width[0] * 2 + gain_obj.freq_array[spw2_inds] = gain_obj.freq_array[ + spw2_inds[0] + ] + spw2_chan_width * np.arange(spw2_inds.size) + gain_obj.channel_width[spw2_inds] = spw2_chan_width + gain_obj.check(check_freq_spacing=True) + + yield gain_obj + + del gain_obj + + +@pytest.fixture +def wideband_gain(gain_data): + gain_obj = gain_data.copy() + gain_obj._set_wide_band() + + gain_obj.spw_array = np.array([1, 2, 3]) + gain_obj.Nspws = 3 + gain_obj.gain_array = gain_obj.gain_array[:, 0:3, :, :] + gain_obj.flag_array = gain_obj.flag_array[:, 0:3, :, :] + gain_obj.quality_array = gain_obj.quality_array[:, 0:3, :, :] + gain_obj.input_flag_array = np.zeros( + gain_obj._input_flag_array.expected_shape(gain_obj) + ).astype(np.bool_) + + gain_obj.freq_range = np.zeros((gain_obj.Nspws, 2), dtype=gain_obj.freq_array.dtype) + gain_obj.freq_range[0, :] = gain_obj.freq_array[[0, 2]] + gain_obj.freq_range[1, :] = gain_obj.freq_array[[2, 4]] + gain_obj.freq_range[2, :] = gain_obj.freq_array[[4, 6]] + + gain_obj.channel_width = None + gain_obj.freq_array = None + gain_obj.flex_spw_id_array = None + gain_obj.Nfreqs = 1 + + with uvtest.check_warnings( + DeprecationWarning, + match="The input_flag_array is deprecated and will be removed in version 2.5", + ): + gain_obj.check(check_freq_spacing=True) + + yield gain_obj + + del gain_obj + + +@pytest.fixture +def multi_spw_delay(delay_data_inputflag): + delay_obj = delay_data_inputflag.copy() + delay_obj.Nspws = 3 + delay_obj.spw_array = np.array([1, 2, 3]) + + # copy the delay array to the second SPW + delay_obj.delay_array = np.repeat(delay_obj.delay_array, delay_obj.Nspws, axis=1) + delay_obj.flag_array = np.repeat(delay_obj.flag_array, delay_obj.Nspws, axis=1) + delay_obj.input_flag_array = np.repeat( + delay_obj.input_flag_array, delay_obj.Nspws, axis=1 + ) + delay_obj.quality_array = np.repeat( + delay_obj.quality_array, delay_obj.Nspws, axis=1 + ) + + delay_obj.freq_range = np.repeat(delay_obj.freq_range, delay_obj.Nspws, axis=0) + # Make the second & third SPWs be contiguous with a 10 MHz range + delay_obj.freq_range[1, 0] = delay_obj.freq_range[0, 1] + delay_obj.freq_range[1, 1] = delay_obj.freq_range[1, 0] + 10e6 + delay_obj.freq_range[2, 0] = delay_obj.freq_range[1, 1] + delay_obj.freq_range[2, 1] = delay_obj.freq_range[1, 1] + 10e6 + + with uvtest.check_warnings( + DeprecationWarning, + match="The input_flag_array is deprecated and will be removed in version 2.5", + ): + delay_obj.check() + + yield delay_obj + + del delay_obj diff --git a/pyuvdata/uvcal/tests/test_calh5.py b/pyuvdata/uvcal/tests/test_calh5.py new file mode 100644 index 0000000000..66b6c64b54 --- /dev/null +++ b/pyuvdata/uvcal/tests/test_calh5.py @@ -0,0 +1,302 @@ +# -*- mode: python; coding: utf-8 -*- +# Copyright (c) 2023 Radio Astronomy Software Group +# Licensed under the 2-clause BSD License + +"""Tests for calh5 object""" +import os + +import h5py +import numpy as np +import pytest + +import pyuvdata.tests as uvtest +from pyuvdata import UVCal +from pyuvdata.uvcal import FastCalH5Meta +from pyuvdata.uvcal.tests import extend_jones_axis, time_array_to_time_range +from pyuvdata.uvcal.uvcal import _future_array_shapes_warning + + +@pytest.mark.filterwarnings("ignore:" + _future_array_shapes_warning) +@pytest.mark.filterwarnings("ignore:This method will be removed in version 3.0") +@pytest.mark.parametrize("future_shapes", [True, False]) +@pytest.mark.parametrize("time_range", [True, False]) +def test_calh5_write_read_loop_gain(gain_data, tmp_path, time_range, future_shapes): + calobj = gain_data + if not future_shapes: + calobj.use_current_array_shapes() + if time_range: + calobj = time_array_to_time_range(calobj) + + # add total_quality_array so that can be tested as well + calobj.total_quality_array = np.ones( + calobj._total_quality_array.expected_shape(calobj) + ) + + write_file = str(tmp_path / "outtest.calh5") + calobj.write_calh5(write_file, clobber=True) + calobj2 = UVCal.from_file(write_file, use_future_array_shapes=future_shapes) + + assert calobj == calobj2 + + cal_meta = FastCalH5Meta(write_file) + calobj3 = UVCal() + calobj3.read_calh5(cal_meta, use_future_array_shapes=future_shapes) + + assert calobj == calobj3 + + +def test_calh5_write_read_loop_multi_spw_gain(multi_spw_gain, tmp_path): + calobj = multi_spw_gain + + write_file = str(tmp_path / "outtest.calh5") + calobj.write_calh5(write_file, clobber=True) + calobj2 = UVCal.from_file(write_file, use_future_array_shapes=True) + + assert calobj == calobj2 + + +def test_calh5_write_read_loop_wideband_gain(wideband_gain, tmp_path): + calobj = wideband_gain + calobj.input_flag_array = None + + write_file = str(tmp_path / "outtest.calh5") + calobj.write_calh5(write_file, clobber=True) + calobj2 = UVCal.from_file(write_file, use_future_array_shapes=True) + + assert calobj == calobj2 + + +@pytest.mark.filterwarnings("ignore:" + _future_array_shapes_warning) +@pytest.mark.filterwarnings("ignore:This method will be removed in version 3.0") +@pytest.mark.filterwarnings("ignore:When converting a delay-style cal to future array") +@pytest.mark.parametrize("future_shapes", [True, False]) +@pytest.mark.parametrize("time_range", [True, False]) +def test_calh5_write_read_loop_delay(delay_data, tmp_path, time_range, future_shapes): + calobj = delay_data + if not future_shapes: + calobj.use_current_array_shapes() + if time_range: + calobj = time_array_to_time_range(calobj) + + write_file = str(tmp_path / "outtest.calh5") + calobj.write_calh5(write_file, clobber=True) + calobj2 = UVCal.from_file(write_file, use_future_array_shapes=future_shapes) + + assert calobj == calobj2 + + +def test_calh5_write_read_loop_multi_spw_delay(multi_spw_delay, tmp_path): + calobj = multi_spw_delay + calobj.input_flag_array = None + + write_file = str(tmp_path / "outtest.calh5") + calobj.write_calh5(write_file, clobber=True) + calobj2 = UVCal.from_file(write_file, use_future_array_shapes=True) + + assert calobj == calobj2 + + +def test_calh5_loop_bitshuffle(gain_data, tmp_path): + pytest.importorskip("hdf5plugin") + + calobj = gain_data + write_file = str(tmp_path / "outtest.calh5") + calobj.write_calh5(write_file, clobber=True, data_compression="bitshuffle") + calobj2 = UVCal.from_file(write_file, use_future_array_shapes=True) + + assert calobj == calobj2 + + +def test_calh5_meta(gain_data, tmp_path): + calobj = gain_data + + write_file = str(tmp_path / "outtest.calh5") + calobj.write_calh5(write_file, clobber=True) + cal_meta = FastCalH5Meta(write_file) + + ant_nums = cal_meta.antenna_numbers + jpol_nums = cal_meta.jones_array + jpol_names = cal_meta.pols + + assert cal_meta.has_key(ant_nums[5]) + assert cal_meta.has_key(ant_nums[5], jpol_nums[0]) + assert cal_meta.has_key(ant_nums[5], jpol_names[0]) + + assert not cal_meta.has_key(600) + assert not cal_meta.has_key(ant_nums[5], "ll") + + assert np.allclose(cal_meta.telescope_location, calobj.telescope_location) + + # remove history to test adding pyuvdata version + cal_meta.history = "" + calobj2 = cal_meta.to_uvcal() + calobj3 = calobj.copy(metadata_only=True) + calobj3.history = calobj2.history + + assert calobj2 == calobj3 + + +@pytest.mark.parametrize("time_range", [True, False]) +def test_calh5_no_lsts(gain_data, tmp_path, time_range): + calobj = gain_data + if time_range: + calobj = time_array_to_time_range(calobj) + + write_file = str(tmp_path / "outtest.calh5") + calobj.write_calh5(write_file, clobber=True) + + # remove lst_array from file; check that it's correctly computed on read + with h5py.File(write_file, "r+") as h5f: + if time_range: + del h5f["/Header/lst_range"] + else: + del h5f["/Header/lst_array"] + + calobj2 = UVCal.from_file(write_file, use_future_array_shapes=True) + + assert calobj == calobj2 + + +def test_none_extra_keywords(gain_data, tmp_path): + """Test that we can round-trip None values in extra_keywords""" + cal_obj = gain_data + test_calh5 = UVCal() + testfile = str(tmp_path / "none_extra_keywords.calh5") + + cal_obj.extra_keywords["foo"] = None + + cal_obj.write_calh5(testfile) + test_calh5.read(testfile, use_future_array_shapes=True) + + assert test_calh5 == cal_obj + + # also confirm dataset is empty/null + with h5py.File(testfile, "r") as h5f: + assert h5f["Header/extra_keywords/foo"].shape is None + + return + + +def test_calh5_unknown_telescope(gain_data, tmp_path): + cal_obj = gain_data + + cal_obj.telescope_name = "foo" + + testfile = str(tmp_path / "unknown_telescope.calh5") + cal_obj.write_calh5(testfile) + + with uvtest.check_warnings( + UserWarning, match="Telescope foo is not in known_telescopes." + ): + cal_obj2 = UVCal.from_file(testfile, use_future_array_shapes=True) + + assert cal_obj2 == cal_obj + + +@pytest.mark.filterwarnings("ignore:This method will be removed in version 3.0 when") +def test_write_calh5_errors(gain_data, tmp_path): + """ + Test raising errors in write_calh5 function. + """ + cal_obj = gain_data + cal_obj.use_current_array_shapes() + + cal_out = UVCal() + testfile = str(tmp_path / "outtest.calh5") + with open(testfile, "a"): + os.utime(testfile, None) + + # assert IOError if file exists + with pytest.raises(IOError, match="File exists; skipping"): + cal_obj.write_calh5(testfile, clobber=False) + + # use clobber=True to write out anyway + cal_obj.write_calh5(testfile, clobber=True) + with uvtest.check_warnings(DeprecationWarning, match=_future_array_shapes_warning): + cal_out.read(testfile) + + # make sure filenames are what we expect + assert cal_obj.filename == ["zen.2457698.40355.xx.gain.calfits"] + assert cal_out.filename == ["outtest.calh5"] + + assert cal_obj == cal_out + + # check error if missing required params + with h5py.File(testfile, "r+") as h5f: + del h5f["/Header/telescope_name"] + + with pytest.raises(KeyError, match="telescope_name not found in"): + cal_out.read(testfile) + + +@pytest.mark.parametrize( + ["caltype", "param_dict"], + [ + ["gain", {"antenna_nums": np.array([65, 96, 9, 97, 89, 22, 20, 72])}], + ["delay", {"antenna_names": np.array(["ant9"])}], + ["delay", {"times": np.arange(2, 5)}], + ["gain", {"times": 0}], + ["gain", {"freq_chans": np.arange(4, 8)}], + ["delay", {"spws": np.array([1, 3])}], + ["gain", {"freq_chans": 1}], + ["delay", {"spws": np.array([1, 2])}], + ["gain", {"jones": ["xx", "yy"]}], + ["delay", {"jones": -5}], + [ + "gain", + { + "antenna_nums": np.array([65, 96, 9, 97, 89, 22, 20, 72]), + "freq_chans": np.arange(2, 9), + "times": np.arange(2, 5), + "jones": ["xx", "yy"], + }, + ], + [ + "delay", + { + "antenna_nums": np.array([65, 96, 9, 97, 89, 22, 20, 72]), + "spws": [1, 2], + "times": 0, + "jones": -5, + }, + ], + [ + "gain", + { + "freq_chans": np.arange(2, 9), + "times": np.arange(2, 5), + "jones": ["xx", "yy"], + }, + ], + ["delay", {"spws": [1, 2], "times": 0, "jones": [-5, -6]}], + ], +) +def test_calh5_partial_read(gain_data, multi_spw_delay, tmp_path, caltype, param_dict): + if caltype == "gain": + calobj = gain_data + else: + calobj = multi_spw_delay + calobj.input_flag_array = None + + orig_time_array = calobj.time_array + total_quality = True + + for par, val in param_dict.items(): + if par == "times": + param_dict[par] = orig_time_array[val] + + if par.startswith("antenna"): + total_quality = False + + extend_jones_axis(calobj, input_flag=False, total_quality=total_quality) + + write_file = str(tmp_path / "outtest.calh5") + calobj.write_calh5(write_file, clobber=True) + + calobj2 = calobj.copy() + + calobj2.select(**param_dict) + + calobj3 = UVCal.from_file(write_file, use_future_array_shapes=True, **param_dict) + + assert calobj2 == calobj3 diff --git a/pyuvdata/uvcal/tests/test_fhd_cal.py b/pyuvdata/uvcal/tests/test_fhd_cal.py index 8fe5610cfd..aa0083cec0 100644 --- a/pyuvdata/uvcal/tests/test_fhd_cal.py +++ b/pyuvdata/uvcal/tests/test_fhd_cal.py @@ -35,9 +35,12 @@ @pytest.mark.parametrize("raw", [True, False]) -def test_read_fhdcal_write_read_calfits(raw, fhd_cal_raw, fhd_cal_fit, tmp_path): +@pytest.mark.parametrize("file_type", ["calfits", "calh5"]) +def test_read_fhdcal_write_read_calfits_h5( + raw, fhd_cal_raw, fhd_cal_fit, tmp_path, file_type +): """ - FHD cal to calfits loopback test. + FHD cal to calfits/calh5 loopback test. Read in FHD cal files, write out as calfits, read back in and check for object equality. @@ -52,10 +55,12 @@ def test_read_fhdcal_write_read_calfits(raw, fhd_cal_raw, fhd_cal_fit, tmp_path) assert np.max(fhd_cal.gain_array) < 2.0 - outfile = str(tmp_path / "outtest_FHDcal_1061311664.calfits") - fhd_cal.write_calfits(outfile, clobber=True) - calfits_cal = UVCal.from_file(outfile, use_future_array_shapes=True) - assert fhd_cal == calfits_cal + outfile = str(tmp_path / ("outtest_FHDcal_1061311664." + file_type)) + write_method = "write_" + file_type + getattr(fhd_cal, write_method)(outfile) + + cal_out = UVCal.from_file(outfile, use_future_array_shapes=True) + assert fhd_cal == cal_out @pytest.mark.filterwarnings("ignore:Telescope location derived from obs lat/lon/alt") diff --git a/pyuvdata/uvcal/tests/test_uvcal.py b/pyuvdata/uvcal/tests/test_uvcal.py index cf524e4346..079bb77860 100644 --- a/pyuvdata/uvcal/tests/test_uvcal.py +++ b/pyuvdata/uvcal/tests/test_uvcal.py @@ -19,7 +19,7 @@ import pyuvdata.utils as uvutils from pyuvdata import UVCal from pyuvdata.data import DATA_PATH -from pyuvdata.uvcal.tests import time_array_to_time_range +from pyuvdata.uvcal.tests import extend_jones_axis, time_array_to_time_range from pyuvdata.uvcal.uvcal import _future_array_shapes_warning pytestmark = pytest.mark.filterwarnings( @@ -107,182 +107,6 @@ def uvcal_data(): return -@pytest.fixture -def multi_spw_gain(gain_data): - gain_obj = gain_data.copy() - gain_obj._set_flex_spw() - gain_obj.channel_width = ( - np.zeros(gain_obj.Nfreqs, dtype=np.float64) + gain_obj.channel_width - ) - gain_obj.Nspws = 2 - gain_obj.flex_spw_id_array = np.concatenate( - ( - np.ones(gain_obj.Nfreqs // 2, dtype=int), - np.full(gain_obj.Nfreqs // 2, 2, dtype=int), - ) - ) - gain_obj.spw_array = np.array([1, 2]) - spw2_inds = np.nonzero(gain_obj.flex_spw_id_array == 2)[0] - spw2_chan_width = gain_obj.channel_width[0] * 2 - gain_obj.freq_array[spw2_inds] = gain_obj.freq_array[ - spw2_inds[0] - ] + spw2_chan_width * np.arange(spw2_inds.size) - gain_obj.channel_width[spw2_inds] = spw2_chan_width - gain_obj.check(check_freq_spacing=True) - - yield gain_obj - - del gain_obj - - -@pytest.fixture -def wideband_gain(gain_data): - gain_obj = gain_data.copy() - gain_obj._set_wide_band() - - gain_obj.spw_array = np.array([1, 2, 3]) - gain_obj.Nspws = 3 - gain_obj.gain_array = gain_obj.gain_array[:, 0:3, :, :] - gain_obj.flag_array = gain_obj.flag_array[:, 0:3, :, :] - gain_obj.quality_array = gain_obj.quality_array[:, 0:3, :, :] - gain_obj.input_flag_array = np.zeros( - gain_obj._input_flag_array.expected_shape(gain_obj) - ).astype(np.bool_) - - gain_obj.freq_range = np.zeros((gain_obj.Nspws, 2), dtype=gain_obj.freq_array.dtype) - gain_obj.freq_range[0, :] = gain_obj.freq_array[[0, 2]] - gain_obj.freq_range[1, :] = gain_obj.freq_array[[2, 4]] - gain_obj.freq_range[2, :] = gain_obj.freq_array[[4, 6]] - - gain_obj.channel_width = None - gain_obj.freq_array = None - gain_obj.flex_spw_id_array = None - gain_obj.Nfreqs = 1 - - with uvtest.check_warnings( - DeprecationWarning, - match="The input_flag_array is deprecated and will be removed in version 2.5", - ): - gain_obj.check(check_freq_spacing=True) - - yield gain_obj - - del gain_obj - - -@pytest.fixture -def multi_spw_delay(delay_data_inputflag): - delay_obj = delay_data_inputflag.copy() - delay_obj.Nspws = 3 - delay_obj.spw_array = np.array([1, 2, 3]) - - # copy the delay array to the second SPW - delay_obj.delay_array = np.repeat(delay_obj.delay_array, delay_obj.Nspws, axis=1) - delay_obj.flag_array = np.repeat(delay_obj.flag_array, delay_obj.Nspws, axis=1) - delay_obj.input_flag_array = np.repeat( - delay_obj.input_flag_array, delay_obj.Nspws, axis=1 - ) - delay_obj.quality_array = np.repeat( - delay_obj.quality_array, delay_obj.Nspws, axis=1 - ) - - delay_obj.freq_range = np.repeat(delay_obj.freq_range, delay_obj.Nspws, axis=0) - # Make the second & third SPWs be contiguous with a 10 MHz range - delay_obj.freq_range[1, 0] = delay_obj.freq_range[0, 1] - delay_obj.freq_range[1, 1] = delay_obj.freq_range[1, 0] + 10e6 - delay_obj.freq_range[2, 0] = delay_obj.freq_range[1, 1] - delay_obj.freq_range[2, 1] = delay_obj.freq_range[1, 1] + 10e6 - - with uvtest.check_warnings( - DeprecationWarning, - match="The input_flag_array is deprecated and will be removed in version 2.5", - ): - delay_obj.check() - - yield delay_obj - - del delay_obj - - -def extend_jones_axis(calobj, input_flag=True, total_quality=True): - while calobj.Njones < 4: - new_jones = np.min(calobj.jones_array) - 1 - calobj.jones_array = np.append(calobj.jones_array, new_jones) - calobj.Njones += 1 - if not calobj.metadata_only: - if calobj.future_array_shapes: - calobj.flag_array = np.concatenate( - (calobj.flag_array, calobj.flag_array[:, :, :, [-1]]), axis=3 - ) - if calobj.cal_type == "gain": - calobj.gain_array = np.concatenate( - (calobj.gain_array, calobj.gain_array[:, :, :, [-1]]), axis=3 - ) - else: - calobj.delay_array = np.concatenate( - (calobj.delay_array, calobj.delay_array[:, :, :, [-1]]), axis=3 - ) - if calobj.input_flag_array is not None: - calobj.input_flag_array = np.concatenate( - ( - calobj.input_flag_array, - calobj.input_flag_array[:, :, :, [-1]], - ), - axis=3, - ) - calobj.quality_array = np.concatenate( - (calobj.quality_array, calobj.quality_array[:, :, :, [-1]]), axis=3 - ) - if calobj.total_quality_array is not None: - calobj.total_quality_array = np.concatenate( - ( - calobj.total_quality_array, - calobj.total_quality_array[:, :, [-1]], - ), - axis=2, - ) - else: - calobj.flag_array = np.concatenate( - (calobj.flag_array, calobj.flag_array[:, :, :, :, [-1]]), axis=4 - ) - if calobj.cal_type == "gain": - calobj.gain_array = np.concatenate( - (calobj.gain_array, calobj.gain_array[:, :, :, :, [-1]]), axis=4 - ) - else: - calobj.delay_array = np.concatenate( - (calobj.delay_array, calobj.delay_array[:, :, :, :, [-1]]), - axis=4, - ) - if calobj.input_flag_array is not None: - calobj.input_flag_array = np.concatenate( - ( - calobj.input_flag_array, - calobj.input_flag_array[:, :, :, :, [-1]], - ), - axis=4, - ) - calobj.quality_array = np.concatenate( - (calobj.quality_array, calobj.quality_array[:, :, :, :, [-1]]), - axis=4, - ) - if calobj.total_quality_array is not None: - calobj.total_quality_array = np.concatenate( - ( - calobj.total_quality_array, - calobj.total_quality_array[:, :, :, [-1]], - ), - axis=3, - ) - if not calobj.metadata_only: - if calobj.input_flag_array is None and input_flag: - calobj.input_flag_array = calobj.flag_array - if calobj.total_quality_array is None and total_quality: - calobj.total_quality_array = np.ones( - calobj._total_quality_array.expected_shape(calobj) - ) - - def test_parameter_iter(uvcal_data): """Test expected parameters.""" (uv_cal_object, required_parameters, _, extra_parameters, _, _) = uvcal_data @@ -728,7 +552,7 @@ def test_set_redundant(gain_data): def test_convert_filetype(gain_data): # error testing - with pytest.raises(ValueError, match="filetype must be calfits."): + with pytest.raises(ValueError, match="filetype must be calh5 or calfits."): gain_data._convert_to_filetype("uvfits") @@ -1183,14 +1007,19 @@ def test_select_times( # check for errors associated with times not included in data early_time = np.min(orig_time_array) - np.max(calobj.integration_time) if time_range: - msg = "No times or time_ranges matching requested times." + msg = f"Time {early_time} does not fall in any time_range." else: msg = f"Time {early_time} is not present in the time_array" + with pytest.raises(ValueError, match=msg): calobj.select(times=[early_time]) with pytest.raises( - ValueError, match="Only one of times and time_range can be provided." + ValueError, + match=re.escape( + "Only one of [times, time_range, lsts, lst_range] may be specified per " + "selection operation." + ), ): calobj.select(times=times_to_keep, time_range=time_range_to_keep) @@ -1432,6 +1261,7 @@ def test_select_frequencies_multispw(future_shapes, multi_spw_gain, tmp_path): ], ): calobj3.select(spws=1) + assert calobj3 == calobj2 calobj3 = UVCal.from_file(write_file_calfits, use_future_array_shapes=future_shapes) @@ -3949,7 +3779,8 @@ def test_uvcal_get_methods(future_shapes, gain_data): @pytest.mark.filterwarnings("ignore:This method will be removed in version 3.0 when") -def test_write_read_optional_attrs(gain_data, tmp_path): +@pytest.mark.parametrize("file_type", ["calfits", "calh5"]) +def test_write_read_optional_attrs(gain_data, tmp_path, file_type): # read a test file cal_in = gain_data @@ -3958,13 +3789,14 @@ def test_write_read_optional_attrs(gain_data, tmp_path): cal_in.sky_field = "GLEAM" # write - write_file_calfits = str(tmp_path / "test.calfits") + outfile = str(tmp_path / ("test." + file_type)) + write_method = "write_" + file_type with uvtest.check_warnings( DeprecationWarning, match="The sky_field parameter is deprecated and will be removed in version " "2.5", ): - cal_in.write_calfits(write_file_calfits, clobber=True) + getattr(cal_in, write_method)(outfile) # read and compare # also check that passing a single file in a list works properly @@ -3973,7 +3805,7 @@ def test_write_read_optional_attrs(gain_data, tmp_path): match="The sky_field parameter is deprecated and will be removed in version " "2.5", ): - cal_in2 = UVCal.from_file([write_file_calfits], use_future_array_shapes=True) + cal_in2 = UVCal.from_file([outfile], use_future_array_shapes=True) assert cal_in == cal_in2 @@ -4157,7 +3989,8 @@ def test_read_errors(): UVCal.from_file([[gainfile]]) with pytest.raises( - ValueError, match="The only supported file_types are 'calfits' and 'fhd'." + ValueError, + match="The only supported file_types are 'calfits', 'calh5', and 'fhd'.", ): UVCal.from_file(gainfile, file_type="foo") @@ -4252,6 +4085,9 @@ def test_init_from_uvdata_setfreqs( if not uvcal_future_shapes: uvc.use_current_array_shapes() + print(uvc.cal_type) + print(uvc.freq_range) + uvc2 = uvc.copy(metadata_only=True) uvc2.select(frequencies=freqs_use) diff --git a/pyuvdata/uvcal/uvcal.py b/pyuvdata/uvcal/uvcal.py index c3dd28b89b..4d3d49f9bb 100644 --- a/pyuvdata/uvcal/uvcal.py +++ b/pyuvdata/uvcal/uvcal.py @@ -14,7 +14,7 @@ from .. import parameter as uvp from .. import telescopes as uvtel from .. import utils as uvutils -from ..docstrings import combine_docstrings +from ..docstrings import combine_docstrings, copy_replace_short_description from ..uvbase import UVBase from . import initializers @@ -30,40 +30,6 @@ ) -def _check_range_overlap(val_range, range_type="time"): - """ - Detect if any val_range in an array overlap. - - Parameters - ---------- - val_range : np.array of float - Array of ranges, shape (Nranges, 2). - range_type : str - Type of range (for good error messages) - - Returns - ------- - bool - True if any range overlaps. - """ - # first check that time ranges are well formed (stop is >= than start) - if np.any((val_range[:, 1] - val_range[:, 0]) < 0): - raise ValueError( - f"The {range_type} ranges are not well-formed, some stop {range_type}s " - f"are after start {range_type}s." - ) - - # Sort by start time - sorted_ranges = val_range[np.argsort(val_range[:, 0]), :] - - # then check if adjacent pairs overlap - for ind in range(sorted_ranges.shape[0] - 1): - range1 = sorted_ranges[ind] - range2 = sorted_ranges[ind + 1] - if range2[0] < range1[1]: - return True - - def _time_param_check(this, other): """Check if any time parameter is defined in this but not in other or vice-versa.""" if not isinstance(other, list): @@ -242,7 +208,6 @@ def __init__(self): desc = ( "Array of frequencies, center of the channel, " "shape (1, Nfreqs) or (Nfreqs,) if future_array_shapes=True, units Hz." - "Not required if future_array_shapes=True and wide_band=True." ) # TODO: Spw axis to be collapsed in future release self._freq_array = uvp.UVParameter( @@ -258,7 +223,7 @@ def __init__(self): "future_array_shapes=False, then it is a " "single value of type = float, otherwise it is an array of shape " "(Nfreqs,), type = float." - "Not required if future_array_shapes=True and wide_band=True." + "Should not be set if future_array_shapes=True and wide_band=True." ) self._channel_width = uvp.UVParameter( "channel_width", description=desc, expected_type=float, tols=1e-3 @@ -1379,8 +1344,7 @@ def check( check_extra=check_extra, run_check_acceptability=run_check_acceptability ) - # deprecate having both time_array and time_range set - # check that corresponding lst parameters are set + # deprecate having both arrays and ranges set for times and lsts time_like_pairs = [("time_array", "time_range"), ("lst_array", "lst_range")] for pair in time_like_pairs: if ( @@ -1395,6 +1359,7 @@ def check( elif getattr(self, pair[0]) is None and getattr(self, pair[1]) is None: raise ValueError(f"Either {pair[0]} or {pair[1]} must be set.") + # check that corresponding lst/time parameters are set for tp_ind, param in enumerate(time_like_pairs[0]): lst_param = time_like_pairs[1][tp_ind] if getattr(self, param) is not None and getattr(self, lst_param) is None: @@ -1404,7 +1369,7 @@ def check( # check that time ranges are well formed and do not overlap if self.time_range is not None: - if _check_range_overlap(self.time_range): + if uvutils._check_range_overlap(self.time_range): raise ValueError("Some time_ranges overlap.") # note: do not check lst range overlap because of branch cut. # Assume they are ok if time_ranges are ok. @@ -2557,7 +2522,7 @@ def __add__( DeprecationWarning, ) if this.time_range is not None: - if _check_range_overlap( + if uvutils._check_range_overlap( np.concatenate((this.time_range, other.time_range), axis=0) ): raise ValueError("A time_range overlaps in the two objects.") @@ -4152,21 +4117,19 @@ def fast_concat( if not inplace: return this - def select( + def _select_preprocess( self, *, - antenna_nums=None, - antenna_names=None, - frequencies=None, - freq_chans=None, - spws=None, - times=None, - time_range=None, - jones=None, - run_check=True, - check_extra=True, - run_check_acceptability=True, - inplace=True, + antenna_nums, + antenna_names, + frequencies, + freq_chans, + spws, + times, + time_range, + lsts, + lst_range, + jones, ): """ Downselect data to keep on the object along various axes. @@ -4204,33 +4167,40 @@ def select( The time range in Julian Date to keep in the object, must be length 2. Some of the times in the object should fall between the first and last elements. Cannot be used with `times`. + lsts : array_like of float, optional + The local sidereal times (LSTs) to keep in the object, each value + passed here should exist in the lst_array. Cannot be used with + `times`, `time_range`, or `lst_range`. + lst_range : array_like of float, optional + The local sidereal time (LST) range in radians to keep in the + object, must be of length 2. Some of the LSTs in the object should + fall between the first and last elements. If the second value is + smaller than the first, the LSTs are treated as having phase-wrapped + around LST = 2*pi = 0, and the LSTs kept on the object will run from + the larger value, through 0, and end at the smaller value. jones : array_like of int or str, optional The antenna polarizations numbers to keep in the object, each value passed here should exist in the jones_array. If passing strings, the canonical polarization strings (e.g. "Jxx", "Jrr") are supported and if the `x_orientation` attribute is set, the physical dipole strings (e.g. "Jnn", "Jee") are also supported. - run_check : bool - Option to check for the existence and proper shapes of parameters - after downselecting data on this object (the default is True, - meaning the check will be run). - check_extra : bool - Option to check optional parameters as well as required ones (the - default is True, meaning the optional parameters will be checked). - run_check_acceptability : bool - Option to check acceptable range of the values of parameters after - downselecting data on this object (the default is True, meaning the - acceptable range check will be done). - inplace : bool - Option to perform the select directly on self or return a new UVCal - object with just the selected data (the default is True, meaning the - select will be done on self). - """ - if inplace: - cal_object = self - else: - cal_object = self.copy() + Returns + ------- + ant_inds : list of int + list of antenna indices to keep. Can be None (to keep everything). + time_inds : list of int + list of time indices to keep. Can be None (to keep everything). + spw_inds : list of int + list of spw indices to keep. Can be None (to keep everything). + freq_inds : list of int + list of frequency indices to keep. Can be None (to keep everything). + pol_inds : list of int + list of polarization indices to keep. Can be None (to keep everything). + history_update_string : str + string to append to the end of the history. + + """ # build up history string as we go history_update_string = " Downselected to specific " n_selects = 0 @@ -4244,12 +4214,12 @@ def select( antenna_names = uvutils._get_iterable(antenna_names) antenna_nums = [] for s in antenna_names: - if s not in cal_object.antenna_names: + if s not in self.antenna_names: raise ValueError( f"Antenna name {s} is not present in the antenna_names array" ) - ind = np.where(np.array(cal_object.antenna_names) == s)[0][0] - antenna_nums.append(cal_object.antenna_numbers[ind]) + ind = np.where(np.array(self.antenna_names) == s)[0][0] + antenna_nums.append(self.antenna_numbers[ind]) if antenna_nums is not None: antenna_nums = uvutils._get_iterable(antenna_nums) @@ -4258,42 +4228,19 @@ def select( ant_inds = np.zeros(0, dtype=np.int64) for ant in antenna_nums: - if ant in cal_object.ant_array: - ant_inds = np.append( - ant_inds, np.where(cal_object.ant_array == ant)[0] - ) + if ant in self.ant_array: + ant_inds = np.append(ant_inds, np.where(self.ant_array == ant)[0]) else: raise ValueError( f"Antenna number {ant} is not present in the array" ) ant_inds = sorted(set(ant_inds)) - cal_object.Nants_data = len(ant_inds) - cal_object.ant_array = cal_object.ant_array[ant_inds] - if not self.metadata_only: - cal_object.flag_array = cal_object.flag_array[ant_inds] - if cal_object.quality_array is not None: - cal_object.quality_array = cal_object.quality_array[ant_inds] - if cal_object.cal_type == "delay": - cal_object.delay_array = cal_object.delay_array[ant_inds] - else: - cal_object.gain_array = cal_object.gain_array[ant_inds] - - if cal_object.input_flag_array is not None: - cal_object.input_flag_array = cal_object.input_flag_array[ant_inds] - - if cal_object.total_quality_array is not None: - warnings.warn( - "Cannot preserve total_quality_array when changing " - "number of antennas; discarding" - ) - cal_object.total_quality_array = None - - if times is not None and time_range is not None: - raise ValueError("Only one of times and time_range can be provided.") + else: + ant_inds = None if (times is not None or time_range is not None) and ( - cal_object.time_array is not None and cal_object.time_range is not None + self.time_array is not None and self.time_range is not None ): warnings.warn( "The time_array and time_range attributes are both set. " @@ -4302,6 +4249,21 @@ def select( DeprecationWarning, ) + time_inds = uvutils._select_times_helper( + times=times, + time_range=time_range, + lsts=lsts, + lst_range=lst_range, + obj_time_array=self.time_array, + obj_time_range=self.time_range, + obj_lst_array=self.lst_array, + obj_lst_range=self.lst_range, + time_tols=self._time_array.tols, + lst_tols=self._lst_array.tols, + ) + if time_inds is not None: + time_inds = sorted(set(time_inds.tolist())) + if times is not None or time_range is not None: if n_selects > 0: history_update_string += ", times" @@ -4309,124 +4271,34 @@ def select( history_update_string += "times" n_selects += 1 - time_inds = np.zeros(0, dtype=np.int64) - if times is not None: - times = uvutils._get_iterable(times) - if cal_object.time_range is not None: - for jd in times: - time_inds = np.append( - time_inds, - np.nonzero( - np.logical_and( - (cal_object.time_range[:, 0] <= jd), - (cal_object.time_range[:, 1] >= jd), - ) - )[0], - ) - else: - for jd in times: - if jd in cal_object.time_array: - time_inds = np.append( - time_inds, np.nonzero(cal_object.time_array == jd)[0] - ) - else: - raise ValueError( - f"Time {jd} is not present in the time_array" - ) + if lsts is not None or lst_range is not None: + if n_selects > 0: + history_update_string += ", lsts" else: - if cal_object.time_range is not None: - for tind, trange in enumerate(cal_object.time_range): - if _check_range_overlap(np.stack((trange, time_range), axis=0)): - time_inds = np.append(time_inds, tind) - else: - time_inds = np.nonzero( - np.logical_and( - (cal_object.time_array >= time_range[0]), - (cal_object.time_array <= time_range[1]), - ) - )[0] - - if time_inds.size == 0: - raise ValueError("No times or time_ranges matching requested times.") - - time_inds = np.array(sorted(set(time_inds.tolist()))) - cal_object.Ntimes = time_inds.size - if cal_object.time_array is not None: - cal_object.time_array = cal_object.time_array[time_inds] - if cal_object.time_range is not None: - cal_object.time_range = cal_object.time_range[time_inds] - if cal_object.lst_array is not None: - cal_object.lst_array = cal_object.lst_array[time_inds] - if cal_object.lst_range is not None: - cal_object.lst_range = cal_object.lst_range[time_inds] - if self.future_array_shapes: - cal_object.integration_time = cal_object.integration_time[time_inds] + history_update_string += "lsts" + n_selects += 1 - if cal_object.Ntimes > 1 and self.time_array is not None: - if not uvutils._test_array_constant_spacing(cal_object._time_array): + if time_inds is not None and self.time_range is None: + # don't warn if time_range is not None because calfits does not support + # multiple time_range values + time_inds_arr = np.array(time_inds) + if time_inds_arr.size > 1: + time_ind_separation = time_inds_arr[1:] - time_inds_arr[:-1] + if not uvutils._test_array_constant(time_ind_separation): warnings.warn( "Selected times are not evenly spaced. This " "is not supported by the calfits format." ) - if not self.metadata_only: - if self.future_array_shapes: - cal_object.flag_array = cal_object.flag_array[:, :, time_inds, :] - if cal_object.quality_array is not None: - cal_object.quality_array = cal_object.quality_array[ - :, :, time_inds, : - ] - if cal_object.cal_type == "delay": - cal_object.delay_array = cal_object.delay_array[ - :, :, time_inds, : - ] - else: - cal_object.gain_array = cal_object.gain_array[ - :, :, time_inds, : - ] - - if cal_object.input_flag_array is not None: - cal_object.input_flag_array = cal_object.input_flag_array[ - :, :, time_inds, : - ] - - if cal_object.total_quality_array is not None: - cal_object.total_quality_array = cal_object.total_quality_array[ - :, time_inds, : - ] - else: - cal_object.flag_array = cal_object.flag_array[:, :, :, time_inds, :] - if cal_object.quality_array is not None: - cal_object.quality_array = cal_object.quality_array[ - :, :, :, time_inds, : - ] - if cal_object.cal_type == "delay": - cal_object.delay_array = cal_object.delay_array[ - :, :, :, time_inds, : - ] - else: - cal_object.gain_array = cal_object.gain_array[ - :, :, :, time_inds, : - ] - - if cal_object.input_flag_array is not None: - cal_object.input_flag_array = cal_object.input_flag_array[ - :, :, :, time_inds, : - ] - - if cal_object.total_quality_array is not None: - cal_object.total_quality_array = cal_object.total_quality_array[ - :, :, time_inds, : - ] - if spws is not None: - if cal_object.Nspws == 1: + if self.Nspws == 1: warnings.warn( "Cannot select on spws if Nspws=1. Ignoring the spw parameter." ) + spw_inds = None else: - if not cal_object.wide_band: - assert cal_object.flex_spw is True, ( + if not self.wide_band: + assert self.flex_spw is True, ( "The `flex_spw` parameter must be True if there are multiple " "spectral windows and the `wide_band` parameter is not True." ) @@ -4434,12 +4306,13 @@ def select( if frequencies is None: if self.future_array_shapes: frequencies = self.freq_array[ - np.isin(cal_object.flex_spw_id_array, spws) + np.isin(self.flex_spw_id_array, spws) ] else: frequencies = self.freq_array[ - 0, np.isin(cal_object.flex_spw_id_array, spws) + 0, np.isin(self.flex_spw_id_array, spws) ] + spw_inds = None else: assert self.future_array_shapes, ( "The `future_array_shapes` parameter must be True if the " @@ -4452,42 +4325,18 @@ def select( n_selects += 1 # Check and see that all requested spws are available - spw_check = np.isin(spws, cal_object.spw_array) + spw_check = np.isin(spws, self.spw_array) if not np.all(spw_check): raise ValueError( f"SPW number {spws[np.where(~spw_check)[0][0]]} is not " "present in the spw_array" ) - spw_inds = np.where(np.isin(cal_object.spw_array, spws))[0] + spw_inds = np.where(np.isin(self.spw_array, spws))[0] spw_inds = sorted(set(spw_inds)) - cal_object.Nspws = len(spw_inds) - cal_object.freq_range = cal_object.freq_range[spw_inds, :] - cal_object.spw_array = cal_object.spw_array[spw_inds] - - if not cal_object.metadata_only: - if cal_object.cal_type == "delay": - cal_object.delay_array = cal_object.delay_array[ - :, spw_inds, :, : - ] - else: - cal_object.gain_array = cal_object.gain_array[ - :, spw_inds, :, : - ] - - cal_object.flag_array = cal_object.flag_array[:, spw_inds, :, :] - if cal_object.input_flag_array is not None: - cal_object.input_flag_array = cal_object.input_flag_array[ - :, spw_inds, :, : - ] - if cal_object.quality_array is not None: - cal_object.quality_array = cal_object.quality_array[ - :, spw_inds, :, : - ] - if cal_object.total_quality_array is not None: - tqa = cal_object.total_quality_array[spw_inds, :, :] - cal_object.total_quality_array = tqa + else: + spw_inds = None if self.freq_array is None and ( freq_chans is not None or frequencies is not None @@ -4500,20 +4349,18 @@ def select( freq_chans = uvutils._get_iterable(freq_chans) if frequencies is None: if self.future_array_shapes: - frequencies = cal_object.freq_array[freq_chans] + frequencies = self.freq_array[freq_chans] else: - frequencies = cal_object.freq_array[0, freq_chans] + frequencies = self.freq_array[0, freq_chans] else: frequencies = uvutils._get_iterable(frequencies) if self.future_array_shapes: frequencies = np.sort( - list(set(frequencies) | set(cal_object.freq_array[freq_chans])) + list(set(frequencies) | set(self.freq_array[freq_chans])) ) else: frequencies = np.sort( - list( - set(frequencies) | set(cal_object.freq_array[0, freq_chans]) - ) + list(set(frequencies) | set(self.freq_array[0, freq_chans])) ) if frequencies is not None: @@ -4524,7 +4371,7 @@ def select( history_update_string += "frequencies" n_selects += 1 - if cal_object.future_array_shapes: + if self.future_array_shapes: freq_arr_use = self.freq_array else: freq_arr_use = self.freq_array[0, :] @@ -4538,105 +4385,28 @@ def select( ) freq_inds = np.where(np.isin(freq_arr_use, frequencies))[0] - freq_inds = sorted(set(freq_inds)) - cal_object.Nfreqs = len(freq_inds) - if cal_object.future_array_shapes: - cal_object.freq_array = cal_object.freq_array[freq_inds] - else: - cal_object.freq_array = cal_object.freq_array[:, freq_inds] - - if cal_object.future_array_shapes or cal_object.flex_spw: - cal_object.channel_width = cal_object.channel_width[freq_inds] - - if cal_object.flex_spw_id_array is not None: - cal_object.flex_spw_id_array = cal_object.flex_spw_id_array[freq_inds] - - if cal_object.flex_spw: - spw_mask = np.isin(cal_object.spw_array, cal_object.flex_spw_id_array) - cal_object.spw_array = cal_object.spw_array[spw_mask] - cal_object.Nspws = cal_object.spw_array.size - if cal_object.freq_range is not None and cal_object.future_array_shapes: - cal_object.freq_range = np.zeros( - (cal_object.Nspws, 2), dtype=cal_object.freq_array.dtype - ) - for index, spw in enumerate(cal_object.spw_array): - spw_inds = np.nonzero(cal_object.flex_spw_id_array == spw)[0] - cal_object.freq_range[index, 0] = np.min( - cal_object.freq_array[spw_inds] - ) - cal_object.freq_range[index, 1] = np.max( - cal_object.freq_array[spw_inds] - ) - else: - if cal_object.freq_range is not None: - cal_object.freq_range = [ - np.min(cal_object.freq_array), - np.max(cal_object.freq_array), + if len(frequencies) > 1: + freq_ind_separation = freq_inds[1:] - freq_inds[:-1] + if self.flex_spw_id_array is not None: + freq_ind_separation = freq_ind_separation[ + np.diff(self.flex_spw_id_array[freq_inds]) == 0 ] - if cal_object.future_array_shapes: - cal_object.freq_range = np.asarray(cal_object.freq_range)[ - np.newaxis, : - ] - - if cal_object.Nfreqs > 1: - spacing_error, chanwidth_error = cal_object._check_freq_spacing( - raise_errors=False - ) - if spacing_error: + if not uvutils._test_array_constant(freq_ind_separation): warnings.warn( "Selected frequencies are not evenly spaced. This " "will make it impossible to write this data out to " "some file types" ) - elif chanwidth_error: + elif np.max(freq_ind_separation) > 1: warnings.warn( "Selected frequencies are not contiguous. This " "will make it impossible to write this data out to " "some file types." ) - if not cal_object.metadata_only: - if not cal_object.future_array_shapes: - cal_object.flag_array = cal_object.flag_array[:, :, freq_inds, :, :] - if cal_object.input_flag_array is not None: - cal_object.input_flag_array = cal_object.input_flag_array[ - :, :, freq_inds, :, : - ] - - if cal_object.cal_type == "delay": - pass - else: - if cal_object.future_array_shapes: - cal_object.flag_array = cal_object.flag_array[ - :, freq_inds, :, : - ] - if cal_object.input_flag_array is not None: - cal_object.input_flag_array = cal_object.input_flag_array[ - :, freq_inds, :, : - ] - if cal_object.quality_array is not None: - cal_object.quality_array = cal_object.quality_array[ - :, freq_inds, :, : - ] - cal_object.gain_array = cal_object.gain_array[ - :, freq_inds, :, : - ] - - if cal_object.total_quality_array is not None: - tqa = cal_object.total_quality_array[freq_inds, :, :] - cal_object.total_quality_array = tqa - else: - if cal_object.quality_array is not None: - cal_object.quality_array = cal_object.quality_array[ - :, :, freq_inds, :, : - ] - cal_object.gain_array = cal_object.gain_array[ - :, :, freq_inds, :, : - ] - - if cal_object.total_quality_array is not None: - tqa = cal_object.total_quality_array[:, freq_inds, :, :] - cal_object.total_quality_array = tqa + freq_inds = sorted(set(freq_inds)) + else: + freq_inds = None if jones is not None: jones = uvutils._get_iterable(jones) @@ -4654,9 +4424,9 @@ def select( j_num = uvutils.jstr2num(j, x_orientation=self.x_orientation) else: j_num = j - if j_num in cal_object.jones_array: + if j_num in self.jones_array: jones_inds = np.append( - jones_inds, np.where(cal_object.jones_array == j_num)[0] + jones_inds, np.where(self.jones_array == j_num)[0] ) else: raise ValueError( @@ -4664,73 +4434,271 @@ def select( ) jones_inds = sorted(set(jones_inds)) - cal_object.Njones = len(jones_inds) - cal_object.jones_array = cal_object.jones_array[jones_inds] - if len(jones_inds) > 2: - jones_separation = ( - cal_object.jones_array[1:] - cal_object.jones_array[:-1] + if not uvutils._test_array_constant_spacing(self.jones_array[jones_inds]): + warnings.warn( + "Selected jones polarization terms are not evenly spaced. This " + "will make it impossible to write this data out to some file types." ) - if not uvutils._test_array_constant(jones_separation): + else: + jones_inds = None + + history_update_string += " using pyuvdata." + + if n_selects == 0: + history_update_string = "" + + return ( + ant_inds, + time_inds, + spw_inds, + freq_inds, + jones_inds, + history_update_string, + ) + + def _select_by_index( + self, + *, + ant_inds, + time_inds, + spw_inds, + freq_inds, + jones_inds, + history_update_string, + ): + """ + Perform select based on indexing arrays. + + Parameters + ---------- + ant_inds : list of int + list of antenna indices to keep. Can be None (to keep everything). + time_inds : list of int + list of time indices to keep. Can be None (to keep everything). + freq_inds : list of int + list of frequency indices to keep. Can be None (to keep everything). + jones_inds : list of int + list of jones indices to keep. Can be None (to keep everything). + history_update_string : str + string to append to the end of the history. + """ + # Create a dictionary that we can loop over an update if need be + ind_dict = { + "Nants_data": ant_inds, + "Ntimes": time_inds, + "Nspws": spw_inds, + "Nfreqs": freq_inds, + "Njones": jones_inds, + } + + # During each loop interval, we pop off an element of this dict, so continue + # until the dict is empty. + while len(ind_dict): + # This is an easy way to grab the first key in the dict + key = next(iter(ind_dict)) + # Grab the corresponding index array + ind_arr = ind_dict.pop(key) + + # If nothing to select on, bail! + if ind_arr is None: + continue + + for param in self: + # For each attribute, if the value is None, then bail, otherwise + # attempt to figure out along which axis ind_arr will apply. + attr = getattr(self, param) + if attr.value is not None: + try: + sel_axis = attr.form.index(key) + except (AttributeError, ValueError): + # If form is not a tuple/list (and therefore not + # array-like), it'll throw an AttributeError, and if key is + # not found in the tuple/list, it'll throw a ValueError. + # In both cases, skip! + continue + + if isinstance(attr.value, np.ndarray): + # If we're working with an ndarray, use take to slice along + # the axis that we want to grab from. + attr.value = attr.value.take(ind_arr, axis=sel_axis) + elif isinstance(attr.value, list): + # If this is a list, it _should_ always have 1-dimension. + assert sel_axis == 0, ( + "Something is wrong, sel_axis != 0 when selecting on a " + "list, which should not be possible. Please file an " + "issue in our GitHub issue log so that we can fix it." + ) + attr.value = [attr.value[idx] for idx in ind_arr] + + if key == "Nants_data": + # Process post blt-specific selection actions, including counting + # unique times antennas/baselines in the data. + self.Nants_data = len(ind_arr) + if self.total_quality_array is not None: warnings.warn( - "Selected jones polarization terms are not evenly spaced. This " - "is not supported by the calfits format" + "Cannot preserve total_quality_array when changing " + "number of antennas; discarding" ) + self.total_quality_array = None + elif key == "Ntimes": + # Process post freq-specific selection actions + self.Ntimes = len(ind_arr) + elif key == "Nfreqs": + # Process post freq-specific selection actions + self.Nfreqs = len(ind_arr) + if self.flex_spw_id_array is not None: + # If we are dropping channels, then evaluate the spw axis + ind_dict["Nspws"] = np.where( + np.isin(self.spw_array, self.flex_spw_id_array) + )[0] + elif key == "Njones": + # Count the number of unique pols after pol-based selection + self.Njones = len(ind_arr) + elif key == "Nspws": + # Count the number of unique pols after spw-based selection + self.Nspws = len(ind_arr) + elif key == "Nants_telescope": + # Count the number of unique ants after ant-based selection + self.Nants_telescope = len(ind_arr) + + # Update the history string + self.history += history_update_string - if not cal_object.metadata_only: - if cal_object.future_array_shapes: - cal_object.flag_array = cal_object.flag_array[:, :, :, jones_inds] - if cal_object.quality_array is not None: - cal_object.quality_array = cal_object.quality_array[ - :, :, :, jones_inds - ] - if cal_object.cal_type == "delay": - cal_object.delay_array = cal_object.delay_array[ - :, :, :, jones_inds - ] - else: - cal_object.gain_array = cal_object.gain_array[ - :, :, :, jones_inds - ] + def select( + self, + *, + antenna_nums=None, + antenna_names=None, + frequencies=None, + freq_chans=None, + spws=None, + times=None, + time_range=None, + lsts=None, + lst_range=None, + jones=None, + run_check=True, + check_extra=True, + run_check_acceptability=True, + inplace=True, + ): + """ + Downselect data to keep on the object along various axes. - if cal_object.input_flag_array is not None: - cal_object.input_flag_array = cal_object.input_flag_array[ - :, :, :, jones_inds - ] + Axes that can be selected along include antennas, frequencies, times and + antenna polarization (jones). - if cal_object.total_quality_array is not None: - cal_object.total_quality_array = cal_object.total_quality_array[ - :, :, jones_inds - ] - else: - cal_object.flag_array = cal_object.flag_array[ - :, :, :, :, jones_inds - ] - if cal_object.quality_array is not None: - cal_object.quality_array = cal_object.quality_array[ - :, :, :, :, jones_inds - ] - if cal_object.cal_type == "delay": - cal_object.delay_array = cal_object.delay_array[ - :, :, :, :, jones_inds - ] - else: - cal_object.gain_array = cal_object.gain_array[ - :, :, :, :, jones_inds - ] + The history attribute on the object will be updated to identify the + operations performed. - if cal_object.input_flag_array is not None: - cal_object.input_flag_array = cal_object.input_flag_array[ - :, :, :, :, jones_inds - ] + Parameters + ---------- + antenna_nums : array_like of int, optional + The antennas numbers to keep in the object (antenna positions and + names for the removed antennas will be retained). + This cannot be provided if `antenna_names` is also provided. + antenna_names : array_like of str, optional + The antennas names to keep in the object (antenna positions and + names for the removed antennas will be retained). + This cannot be provided if `antenna_nums` is also provided. + frequencies : array_like of float, optional + The frequencies to keep in the object, each value passed here should + exist in the freq_array. + freq_chans : array_like of int, optional + The frequency channel numbers to keep in the object. + spws : array_like of in, optional + The spectral window numbers to keep in the object. If this is not a + wide-band object and `frequencies` or `freq_chans` is not None, frequencies + that match any of the specifications will be kept (i.e. the selections will + be OR'ed together). + times : array_like of float, optional + The times to keep in the object, each value passed here should exist in + the time_array or be contained in a time_range on the object. + time_range : array_like of float, optional + The time range in Julian Date to keep in the object, must be + length 2. Some of the times in the object should fall between the + first and last elements. Cannot be used with `times`. + lsts : array_like of float, optional + The local sidereal times (LSTs) to keep in the object, each value + passed here should exist in the lst_array. Cannot be used with + `times`, `time_range`, or `lst_range`. + lst_range : array_like of float, optional + The local sidereal time (LST) range in radians to keep in the + object, must be of length 2. Some of the LSTs in the object should + fall between the first and last elements. If the second value is + smaller than the first, the LSTs are treated as having phase-wrapped + around LST = 2*pi = 0, and the LSTs kept on the object will run from + the larger value, through 0, and end at the smaller value. + jones : array_like of int or str, optional + The antenna polarizations numbers to keep in the object, each value + passed here should exist in the jones_array. If passing strings, the + canonical polarization strings (e.g. "Jxx", "Jrr") are supported and if the + `x_orientation` attribute is set, the physical dipole strings + (e.g. "Jnn", "Jee") are also supported. + run_check : bool + Option to check for the existence and proper shapes of parameters + after downselecting data on this object (the default is True, + meaning the check will be run). + check_extra : bool + Option to check optional parameters as well as required ones (the + default is True, meaning the optional parameters will be checked). + run_check_acceptability : bool + Option to check acceptable range of the values of parameters after + downselecting data on this object (the default is True, meaning the + acceptable range check will be done). + inplace : bool + Option to perform the select directly on self or return a new UVCal + object with just the selected data (the default is True, meaning the + select will be done on self). + """ + if inplace: + cal_object = self + else: + cal_object = self.copy() - if cal_object.total_quality_array is not None: - cal_object.total_quality_array = cal_object.total_quality_array[ - :, :, :, jones_inds - ] + # Figure out which index positions we want to hold on to. + ( + ant_inds, + time_inds, + spw_inds, + freq_inds, + jones_inds, + history_update_string, + ) = cal_object._select_preprocess( + antenna_nums=antenna_nums, + antenna_names=antenna_names, + frequencies=frequencies, + freq_chans=freq_chans, + spws=spws, + times=times, + time_range=time_range, + lsts=lsts, + lst_range=lst_range, + jones=jones, + ) - if n_selects > 0: - history_update_string += " using pyuvdata." - cal_object.history = cal_object.history + history_update_string + # Call the low-level selection method. + cal_object._select_by_index( + ant_inds=ant_inds, + time_inds=time_inds, + freq_inds=freq_inds, + spw_inds=spw_inds, + jones_inds=jones_inds, + history_update_string=history_update_string, + ) + + # handle freq_range if selecting on freqs (can go away in v3.0) + if ( + cal_object.freq_range is not None + and cal_object.flex_spw is False + and freq_inds is not None + ): + cal_object.freq_range = [ + np.min(cal_object.freq_array), + np.max(cal_object.freq_array), + ] + if cal_object.future_array_shapes: + cal_object.freq_range = np.asarray(cal_object.freq_range)[np.newaxis, :] # check if object is self-consistent if run_check: @@ -4751,8 +4719,12 @@ def _convert_to_filetype(self, filetype): from . import calfits other_obj = calfits.CALFITS() + elif filetype == "calh5": + from . import calh5 + + other_obj = calh5.CalH5() else: - raise ValueError("filetype must be calfits.") + raise ValueError("filetype must be calh5 or calfits.") for p in self: param = getattr(self, p) setattr(other_obj, p, param) @@ -4892,6 +4864,109 @@ def read_calfits(self, filename, **kwargs): self._convert_from_filetype(calfits_obj) del calfits_obj + def read_calh5(self, filename, **kwargs): + """ + Read in data from calh5 file(s). + + Parameters + ---------- + filename : string, path, FastCalH5Meta, h5py.File + The CalH5 file to read from. A file name or path or a FastCalH5Meta or + h5py File object. Must contains a "/Header" group for CalH5 files + conforming to spec. + antenna_nums : array_like of int, optional + The antennas numbers to include when reading data into the object + (antenna positions and names for the removed antennas will be retained + unless `keep_all_metadata` is False). This cannot be provided if + `antenna_names` is also provided. Ignored if read_data is False. + antenna_names : array_like of str, optional + The antennas names to include when reading data into the object + (antenna positions and names for the removed antennas will be retained + unless `keep_all_metadata` is False). This cannot be provided if + `antenna_nums` is also provided. Ignored if read_data is False. + frequencies : array_like of float, optional + The frequencies to include when reading data into the object, each + value passed here should exist in the freq_array. Ignored if + read_data is False. + freq_chans : array_like of int, optional + The frequency channel numbers to include when reading data into the + object. Ignored if read_data is False. + times : array_like of float, optional + The times to include when reading data into the object, each value + passed here should exist in the time_array. Cannot be used with + `time_range`. + time_range : array_like of float, optional + The time range in Julian Date to keep in the object, must be + length 2. Some of the times in the object should fall between the + first and last elements. Cannot be used with `times`. + lsts : array_like of float, optional + The local sidereal times (LSTs) to keep in the object, each value + passed here should exist in the lst_array. Cannot be used with + `times`, `time_range`, or `lst_range`. + lst_range : array_like of float, optional + The local sidereal time (LST) range in radians to keep in the + object, must be of length 2. Some of the LSTs in the object should + fall between the first and last elements. If the second value is + smaller than the first, the LSTs are treated as having phase-wrapped + around LST = 2*pi = 0, and the LSTs kept on the object will run from + the larger value, through 0, and end at the smaller value. + jones : array_like of int, optional + The jones polarizations numbers to include when reading data into the + object, each value passed here should exist in the polarization_array. + Ignored if read_data is False. + read_data : bool + Read in the data-like arrays (gains/delays, flags, qualities). If set to + False, only the metadata will be read in. Setting read_data to False + results in a metadata only object. + background_lsts : bool + When set to True, the lst_array is calculated in a background thread. + run_check : bool + Option to check for the existence and proper shapes of parameters + after after reading in the file (the default is True, + meaning the check will be run). Ignored if read_data is False. + check_extra : bool + Option to check optional parameters as well as required ones (the + default is True, meaning the optional parameters will be checked). + Ignored if read_data is False. + run_check_acceptability : bool + Option to check acceptable range of the values of parameters after + reading in the file (the default is True, meaning the acceptable + range check will be done). Ignored if read_data is False. + use_future_array_shapes : bool + Option to convert to the future planned array shapes before the changes go + into effect by removing the spectral window axis. + astrometry_library : str + Library used for calculating LSTs. Allowed options are 'erfa' (which uses + the pyERFA), 'novas' (which uses the python-novas library), and 'astropy' + (which uses the astropy utilities). Default is erfa unless the + telescope_location frame is MCMF (on the moon), in which case the default + is astropy. + + Returns + ------- + None + + Raises + ------ + IOError + If filename doesn't exist. + ValueError + If incompatible select keywords are set (e.g. `times` and `time_range`) or + select keywords exclude all data or if keywords are set to the wrong type. + + """ + from . import calh5 + + if isinstance(filename, (list, tuple)): + raise ValueError( + "Use the generic `UVCal.read` method to read multiple files." + ) + + calh5_obj = calh5.CalH5() + calh5_obj.read_calh5(filename, **kwargs) + self._convert_from_filetype(calh5_obj) + del calh5_obj + def read_fhd_cal( self, cal_file, *, obs_file, layout_file=None, settings_file=None, **kwargs ): @@ -5052,7 +5127,21 @@ def read( axis=None, file_type=None, read_data=True, + background_lsts=True, use_future_array_shapes=False, + astrometry_library=None, + # selecting parameters + antenna_nums=None, + antenna_names=None, + ant_str=None, + frequencies=None, + freq_chans=None, + spws=None, + times=None, + time_range=None, + lsts=None, + lst_range=None, + jones=None, # checking parameters run_check=True, check_extra=True, @@ -5095,9 +5184,60 @@ def read( Read in the gains or delays, quality arrays and flag arrays. If set to False, only the metadata will be read in. Setting read_data to False results in a metadata only object. + background_lsts : bool + When set to True, the lst_array is calculated in a background thread. use_future_array_shapes : bool Option to convert to the future planned array shapes before the changes go into effect by removing the spectral window axis. + astrometry_library : str + Library used for calculating LSTs. Allowed options are 'erfa' (which uses + the pyERFA), 'novas' (which uses the python-novas library), and 'astropy' + (which uses the astropy utilities). Default is erfa unless the + telescope_location frame is MCMF (on the moon), in which case the default + is astropy. + + Selecting + --------- + antenna_nums : array_like of int, optional + The antennas numbers to include when reading data into the object + (antenna positions and names for the removed antennas will be retained + unless `keep_all_metadata` is False). This cannot be provided if + `antenna_names` is also provided. Ignored if read_data is False. + antenna_names : array_like of str, optional + The antennas names to include when reading data into the object + (antenna positions and names for the removed antennas will be retained + unless `keep_all_metadata` is False). This cannot be provided if + `antenna_nums` is also provided. Ignored if read_data is False. + frequencies : array_like of float, optional + The frequencies to include when reading data into the object, each + value passed here should exist in the freq_array. Ignored if + read_data is False. + freq_chans : array_like of int, optional + The frequency channel numbers to include when reading data into the + object. Ignored if read_data is False. + times : array_like of float, optional + The times to include when reading data into the object, each value + passed here should exist in the time_array. Cannot be used with + `time_range`. + time_range : array_like of float, optional + The time range in Julian Date to keep in the object, must be + length 2. Some of the times in the object should fall between the + first and last elements. Cannot be used with `times`. + lsts : array_like of float, optional + The local sidereal times (LSTs) to keep in the object, each value + passed here should exist in the lst_array. Cannot be used with + `times`, `time_range`, or `lst_range`. + lst_range : array_like of float, optional + The local sidereal time (LST) range in radians to keep in the + object, must be of length 2. Some of the LSTs in the object should + fall between the first and last elements. If the second value is + smaller than the first, the LSTs are treated as having phase-wrapped + around LST = 2*pi = 0, and the LSTs kept on the object will run from + the larger value, through 0, and end at the smaller value. + jones : array_like of int, optional + The jones polarizations numbers to include when reading data into the + object, each value passed here should exist in the polarization_array. + Ignored if read_data is False. Checking -------- @@ -5150,14 +5290,18 @@ def read( file_type = "fhd" elif "fits" in extension: file_type = "calfits" + elif "h5" in extension: + file_type = "calh5" else: raise ValueError( "File type could not be determined, use the " "file_type keyword to specify the type." ) - if file_type not in ["calfits", "fhd"]: - raise ValueError("The only supported file_types are 'calfits' and 'fhd'.") + if file_type not in ["calfits", "fhd", "calh5"]: + raise ValueError( + "The only supported file_types are 'calfits', 'calh5', and 'fhd'." + ) obs_file_use = None layout_file_use = None @@ -5204,7 +5348,21 @@ def read( filename[0], file_type=file_type, read_data=read_data, + background_lsts=background_lsts, use_future_array_shapes=use_future_array_shapes, + astrometry_library=astrometry_library, + # selecting parameters + antenna_nums=antenna_nums, + antenna_names=antenna_names, + ant_str=ant_str, + frequencies=frequencies, + freq_chans=freq_chans, + spws=spws, + times=times, + time_range=time_range, + lsts=lsts, + lst_range=lst_range, + jones=jones, # checking parameters run_check=run_check, check_extra=check_extra, @@ -5232,7 +5390,21 @@ def read( file, read_data=read_data, file_type=file_type, + background_lsts=background_lsts, use_future_array_shapes=use_future_array_shapes, + astrometry_library=astrometry_library, + # selecting parameters + antenna_nums=antenna_nums, + antenna_names=antenna_names, + ant_str=ant_str, + frequencies=frequencies, + freq_chans=freq_chans, + spws=spws, + times=times, + time_range=time_range, + lsts=lsts, + lst_range=lst_range, + jones=jones, # checking parameters run_check=run_check, check_extra=check_extra, @@ -5273,14 +5445,55 @@ def read( # Because self was at the beginning of the list, # everything is merged into it at the end of this loop else: + if file_type in ["fhd", "calfits"]: + if ( + antenna_nums is not None + or antenna_names is not None + or ant_str is not None + or frequencies is not None + or freq_chans is not None + or spws is not None + or times is not None + or lsts is not None + or time_range is not None + or lst_range is not None + or jones is not None + ): + select = True + warnings.warn( + "Warning: select on read keyword set, but " + 'file_type is "{ftype}" which does not support select ' + "on read. Entire file will be read and then select " + "will be performed".format(ftype=file_type) + ) + # these file types do not have select on read, so set all + # select parameters + select_antenna_nums = antenna_nums + select_antenna_names = antenna_names + select_ant_str = ant_str + select_frequencies = frequencies + select_freq_chans = freq_chans + select_spws = spws + select_times = times + select_lsts = lsts + select_time_range = time_range + select_lst_range = lst_range + select_jones = jones + else: + select = False + elif file_type in ["calh5"]: + select = False + if file_type == "calfits": self.read_calfits( filename, read_data=read_data, + background_lsts=background_lsts, run_check=run_check, check_extra=check_extra, run_check_acceptability=run_check_acceptability, use_future_array_shapes=use_future_array_shapes, + astrometry_library=astrometry_library, ) elif file_type == "fhd": @@ -5292,113 +5505,60 @@ def read( raw=raw, read_data=read_data, extra_history=extra_history, + background_lsts=background_lsts, run_check=run_check, check_extra=check_extra, run_check_acceptability=run_check_acceptability, use_future_array_shapes=use_future_array_shapes, + astrometry_library=astrometry_library, + ) + elif file_type == "calh5": + self.read_calh5( + filename, + antenna_nums=antenna_nums, + antenna_names=antenna_names, + ant_str=ant_str, + frequencies=frequencies, + freq_chans=freq_chans, + spws=spws, + times=times, + time_range=time_range, + lsts=lsts, + lst_range=lst_range, + jones=jones, + read_data=read_data, + background_lsts=background_lsts, + run_check=run_check, + check_extra=check_extra, + run_check_acceptability=run_check_acceptability, + use_future_array_shapes=use_future_array_shapes, + astrometry_library=astrometry_library, ) - @classmethod - def from_file( - cls, - filename, - *, - axis=None, - file_type=None, - read_data=True, - use_future_array_shapes=False, - # checking parameters - run_check=True, - check_extra=True, - run_check_acceptability=True, - # file-type specific parameters - # FHD - obs_file=None, - layout_file=None, - settings_file=None, - raw=True, - extra_history=None, - ): - """ - Initialize a new UVCal object by reading the input file. - - This method supports a number of different types of files. - Universal parameters (required and optional) are listed directly below, - followed by parameters used by all file types related to checking. Each file - type also has its own set of optional parameters that are listed at the end of - this docstring. - - Parameters - ---------- - filename : str or array_like of str - The file(s) or list(s) (or array(s)) of files to read from. - file_type : str - One of ['calfits', 'fhd'] or None. If None, the code attempts to guess what - the file type is based on file extensions (FHD: .sav, .txt; - uvfits: .calfits). Note that if a list of datasets is passed, the file type - is determined from the first dataset. - axis : str - Axis to concatenate files along. This enables fast concatenation - along the specified axis without the normal checking that all other - metadata agrees. This method does not guarantee correct resulting - objects. Please see the docstring for fast_concat for details. - Allowed values are: 'antenna', 'time', 'freq', 'spw', 'jones' ('freq' is - not allowed for delay or wideband objects and 'spw' is only allowed for - wideband objects). Only used if multiple files are passed. - read_data : bool - Read in the gains or delays, quality arrays and flag arrays. - If set to False, only the metadata will be read in. Setting read_data to - False results in a metadata only object. - use_future_array_shapes : bool - Option to convert to the future planned array shapes before the changes go - into effect by removing the spectral window axis. - - Checking - -------- - run_check : bool - Option to check for the existence and proper shapes of - parameters after reading in the file. - check_extra : bool - Option to check optional parameters as well as required ones. - run_check_acceptability : bool - Option to check acceptable range of the values of - parameters after reading in the file. - - FHD - --- - obs_file : str or list of str - The obs.sav file or list of files to read from. This is required for FHD - files. - layout_file : str - The FHD layout file. Required for antenna_positions to be set. - settings_file : str or list of str, optional - The settings_file or list of files to read from. Optional, - but very useful for provenance. - raw : bool - Option to use the raw (per antenna, per frequency) solution or - to use the fitted (polynomial over phase/amplitude) solution. - Default is True (meaning use the raw solutions). + if select: + self.select( + antenna_nums=select_antenna_nums, + antenna_names=select_antenna_names, + ant_str=select_ant_str, + frequencies=select_frequencies, + freq_chans=select_freq_chans, + spws=select_spws, + times=select_times, + lsts=select_lsts, + time_range=select_time_range, + lst_range=select_lst_range, + jones=select_jones, + run_check=run_check, + check_extra=check_extra, + run_check_acceptability=run_check_acceptability, + ) - """ + @classmethod + @copy_replace_short_description(read, style=DocstringStyle.NUMPYDOC) + def from_file(cls, filename, **kwargs): + """Initialize a new UVCal object by reading the input file.""" uvc = cls() - uvc.read( - filename, - axis=axis, - file_type=file_type, - read_data=read_data, - use_future_array_shapes=use_future_array_shapes, - # checking parameters - run_check=run_check, - check_extra=check_extra, - run_check_acceptability=run_check_acceptability, - # file-type specific parameters - # FHD - obs_file=obs_file, - layout_file=layout_file, - settings_file=settings_file, - raw=raw, - extra_history=extra_history, - ) + uvc.read(filename, **kwargs) return uvc def write_calfits( @@ -5448,3 +5608,69 @@ def write_calfits( clobber=clobber, ) del calfits_obj + + def write_calh5(self, filename, **kwargs): + """ + Write the data to a calh5 file. + + Write an in-memory UVCal object to a CalH5 file. + + Parameters + ---------- + filename : str + The CalH5 file to write to. + clobber : bool + Option to overwrite the file if it already exists. + chunks : tuple or bool + h5py.create_dataset chunks keyword. Tuple for chunk shape, + True for auto-chunking, None for no chunking. Default is True. + data_compression : str + HDF5 filter to apply when writing the gain_array or delay. Default is None + (no filter/compression). In addition to the normal HDF5 filter values, the + user may specify "bitshuffle" which will set the compression to `32008` for + bitshuffle and will set the `compression_opts` to `(0, 2)` to allow + bitshuffle to automatically determine the block size and to use the LZF + filter after bitshuffle. Using `bitshuffle` requires having the + `hdf5plugin` package installed. Dataset must be chunked to use compression. + flags_compression : str + HDF5 filter to apply when writing the flags_array. Default is the + LZF filter. Dataset must be chunked. + quality_compression : str + HDF5 filter to apply when writing the quality_array and/or + total_quality_array if they are defined. Default is the LZF filter. Dataset + must be chunked. + run_check : bool + Option to check for the existence and proper shapes of parameters + before writing the file. + check_extra : bool + Option to check optional parameters as well as required ones. + run_check_acceptability : bool + Option to check acceptable range of the values of parameters before + writing the file. + + Returns + ------- + None + + Raises + ------ + IOError + If the file located at `filename` already exists and clobber=False, + an IOError is raised. + + Notes + ----- + The HDF5 library allows for the application of "filters" when writing + data, which can provide moderate to significant levels of compression + for the datasets in question. Testing has shown that for some typical + cases of UVData objects (empty/sparse flag_array objects, and/or uniform + nsample_arrays), the built-in LZF filter provides significant + compression for minimal computational overhead. + + """ + if self.metadata_only: + raise ValueError("Cannot write out metadata only objects to a calh5 file.") + + calh5_obj = self._convert_to_filetype("calh5") + calh5_obj.write_calh5(filename, **kwargs) + del calh5_obj diff --git a/pyuvdata/uvdata/tests/test_uvdata.py b/pyuvdata/uvdata/tests/test_uvdata.py index e04aebe705..2672142c6d 100644 --- a/pyuvdata/uvdata/tests/test_uvdata.py +++ b/pyuvdata/uvdata/tests/test_uvdata.py @@ -2274,7 +2274,7 @@ def test_select_time_range_no_data(casa_uvfits): """Check for error associated with times not included in data.""" uv_object = casa_uvfits unique_times = np.unique(uv_object.time_array) - with pytest.raises(ValueError, match="No elements in time range"): + with pytest.raises(ValueError, match="No elements in time_array"): uv_object.select( time_range=[ np.min(unique_times) - uv_object.integration_time[0] * 2, @@ -2287,7 +2287,7 @@ def test_select_lst_range_no_data(casa_uvfits): """Check for error associated with LSTS not included in data.""" uv_object = casa_uvfits unique_lsts = np.unique(uv_object.lst_array) - with pytest.raises(ValueError, match="No elements in LST range"): + with pytest.raises(ValueError, match="No elements in lst_array"): uv_object.select( lst_range=[np.min(unique_lsts) - 0.2, np.min(unique_lsts) - 0.1] ) @@ -5599,9 +5599,9 @@ def test_parse_ants(casa_uvfits, hera_uvh5_xx): assert isinstance(ant_pairs_nums, type(None)) assert Counter(polarizations) == Counter(pols_expected) - # Unparsible string + # Unparsable string ant_str = "none" - with pytest.raises(ValueError, match="Unparsible argument none"): + with pytest.raises(ValueError, match="Unparsable argument none"): uv.parse_ants(ant_str) # Single antenna number @@ -6234,7 +6234,7 @@ def test_select_with_ant_str(casa_uvfits, hera_uvh5_xx): {"ant_str": "pI,pq,pU,pv"}, "Polarization 4 is not present in the polarization_array", ), - ({"ant_str": "none"}, "Unparsible argument none"), + ({"ant_str": "none"}, "Unparsable argument none"), ], ) def test_select_with_ant_str_errors(casa_uvfits, kwargs, message): diff --git a/pyuvdata/uvdata/uvdata.py b/pyuvdata/uvdata/uvdata.py index 05bd01ef10..1c7943fcc5 100644 --- a/pyuvdata/uvdata/uvdata.py +++ b/pyuvdata/uvdata/uvdata.py @@ -7898,6 +7898,7 @@ def _select_preprocess( list of polarization indices to keep. Can be None (to keep everything). history_update_string : str string to append to the end of the history. + """ # build up history string as we go history_update_string = " Downselected to specific " @@ -8103,127 +8104,18 @@ def _select_preprocess( else: blt_inds = ant_blt_inds - have_times = times is not None - have_time_range = time_range is not None - have_lsts = lsts is not None - have_lst_range = lst_range is not None - if ( - np.count_nonzero([have_times, have_time_range, have_lsts, have_lst_range]) - > 1 - ): - raise ValueError( - "Only one of [times, time_range, lsts, lst_range] may be " - "specified per selection operation." - ) - - if times is not None: - times = uvutils._get_iterable(times) - if np.array(times).ndim > 1: - times = np.array(times).flatten() - - time_blt_inds = np.zeros(0, dtype=np.int64) - for jd in times: - if np.any( - np.isclose( - self.time_array, - jd, - rtol=self._time_array.tols[0], - atol=self._time_array.tols[1], - ) - ): - time_blt_inds = np.append( - time_blt_inds, - np.where( - np.isclose( - self.time_array, - jd, - rtol=self._time_array.tols[0], - atol=self._time_array.tols[1], - ) - )[0], - ) - else: - raise ValueError( - "Time {t} is not present in the time_array".format(t=jd) - ) - - if time_range is not None: - if np.size(time_range) != 2: - raise ValueError("time_range must be length 2.") - - time_blt_inds = np.nonzero( - (self.time_array <= time_range[1]) & (self.time_array >= time_range[0]) - )[0] - if time_blt_inds.size == 0: - raise ValueError( - f"No elements in time range between {time_range[0]} and " - f"{time_range[1]}." - ) - - if lsts is not None: - if np.any(np.asarray(lsts) > 2 * np.pi): - warnings.warn( - "The lsts parameter contained a value greater than 2*pi. " - "LST values are assumed to be in radians, not hours." - ) - lsts = uvutils._get_iterable(lsts) - if np.array(lsts).ndim > 1: - lsts = np.array(lsts).flatten() - - time_blt_inds = np.zeros(0, dtype=np.int64) - for lst in lsts: - if np.any( - np.isclose( - self.lst_array, - lst, - rtol=self._lst_array.tols[0], - atol=self._lst_array.tols[1], - ) - ): - time_blt_inds = np.append( - time_blt_inds, - np.where( - np.isclose( - self.lst_array, - lst, - rtol=self._lst_array.tols[0], - atol=self._lst_array.tols[1], - ) - )[0], - ) - else: - raise ValueError(f"LST {lst} is not present in the lst_array") - - if lst_range is not None: - if np.size(lst_range) != 2: - raise ValueError("lst_range must be length 2.") - if np.any(np.asarray(lst_range) > 2 * np.pi): - warnings.warn( - "The lst_range contained a value greater than 2*pi. " - "LST values are assumed to be in radians, not hours." - ) - if lst_range[1] < lst_range[0]: - # we're wrapping around LST = 2*pi = 0 - lst_range_1 = [lst_range[0], 2 * np.pi] - lst_range_2 = [0, lst_range[1]] - time_blt_inds1 = np.nonzero( - (self.lst_array <= lst_range_1[1]) - & (self.lst_array >= lst_range_1[0]) - )[0] - time_blt_inds2 = np.nonzero( - (self.lst_array <= lst_range_2[1]) - & (self.lst_array >= lst_range_2[0]) - )[0] - time_blt_inds = np.union1d(time_blt_inds1, time_blt_inds2) - else: - time_blt_inds = np.nonzero( - (self.lst_array <= lst_range[1]) & (self.lst_array >= lst_range[0]) - )[0] - if time_blt_inds.size == 0: - raise ValueError( - f"No elements in LST range between {lst_range[0]} and " - f"{lst_range[1]}." - ) + time_blt_inds = uvutils._select_times_helper( + times=times, + time_range=time_range, + lsts=lsts, + lst_range=lst_range, + obj_time_array=self.time_array, + obj_time_range=None, + obj_lst_array=self.lst_array, + obj_lst_range=None, + time_tols=self._time_array.tols, + lst_tols=self._lst_array.tols, + ) if times is not None or time_range is not None: if n_selects > 0: diff --git a/pyuvdata/uvdata/uvh5.py b/pyuvdata/uvdata/uvh5.py index 92e0547a45..a8fd89ad6e 100644 --- a/pyuvdata/uvdata/uvh5.py +++ b/pyuvdata/uvdata/uvh5.py @@ -581,7 +581,7 @@ def to_uvdata( default is astropy. """ - uvd = UVH5() + uvd = UVData() uvd.read_uvh5( self, read_data=False,