From a84229cd2e0f59f0ffad88321dc652e81dc06622 Mon Sep 17 00:00:00 2001 From: Bryna Hazelton Date: Mon, 26 Jun 2023 16:15:17 -0700 Subject: [PATCH 1/4] add support for frequency averaging with multiple spws also update error messages now that frequency_average handles uneven freq spacing --- pyuvdata/utils.py | 18 +- pyuvdata/uvdata/tests/test_uvdata.py | 700 +++++++++++++++++---------- pyuvdata/uvdata/uvdata.py | 377 ++++++++++----- 3 files changed, 707 insertions(+), 388 deletions(-) diff --git a/pyuvdata/utils.py b/pyuvdata/utils.py index 458eb66421..6ad8c0f0a1 100644 --- a/pyuvdata/utils.py +++ b/pyuvdata/utils.py @@ -544,20 +544,16 @@ def _check_freq_spacing( chanwidth_error = True if raise_errors and spacing_error: raise ValueError( - "The frequencies are not evenly spaced (probably " - "because of a select operation) or has differing " - "values of channel widths. Some file formats " - "(e.g. uvfits, miriad) and methods (frequency_average) " - "do not support unevenly spaced frequencies." + "The frequencies are not evenly spaced (probably because of a select " + "operation) or has differing values of channel widths. Some file formats " + "(e.g. uvfits, miriad) do not support unevenly spaced frequencies." ) if raise_errors and chanwidth_error: raise ValueError( - "The frequencies are separated by more than their " - "channel width (probably because of a select operation). " - "Some file formats (e.g. uvfits, miriad) and " - "methods (frequency_average) do not support " - "frequencies that are spaced by more than their " - "channel width." + "The frequencies are separated by more than their channel width (probably " + "because of a select operation). Some file formats (e.g. uvfits, miriad) " + "do not support frequencies that are spaced by more than their channel " + "width." ) return spacing_error, chanwidth_error diff --git a/pyuvdata/uvdata/tests/test_uvdata.py b/pyuvdata/uvdata/tests/test_uvdata.py index 55b421327a..e0532e1eb6 100644 --- a/pyuvdata/uvdata/tests/test_uvdata.py +++ b/pyuvdata/uvdata/tests/test_uvdata.py @@ -9422,37 +9422,74 @@ def test_resample_in_time_warning(): @pytest.mark.filterwarnings("ignore:This method will be removed in version 3.0 when") @pytest.mark.filterwarnings("ignore:The uvw_array does not match the expected values") @pytest.mark.parametrize("future_shapes", [True, False]) -def test_frequency_average(casa_uvfits, future_shapes): +@pytest.mark.parametrize("flex_spw", [True, False]) +@pytest.mark.parametrize("sum_corr", [True, False]) +def test_frequency_average(casa_uvfits, future_shapes, flex_spw, sum_corr): """Test averaging in frequency.""" uvobj = casa_uvfits if not future_shapes: uvobj.use_current_array_shapes() + + if flex_spw: + # Make multiple spws + uvobj._set_flex_spw() + spw_nchan = int(uvobj.Nfreqs / 4) + uvobj.flex_spw_id_array = np.concatenate( + ( + np.full(spw_nchan, 0, dtype=int), + np.full(spw_nchan, 1, dtype=int), + np.full(spw_nchan, 2, dtype=int), + np.full(spw_nchan, 3, dtype=int), + ) + ) + uvobj.spw_array = np.arange(4) + uvobj.Nspws = 4 + if not future_shapes: + uvobj.channel_width = np.full(uvobj.Nfreqs, uvobj.channel_width) + uvobj2 = uvobj.copy() eq_coeffs = np.tile( np.arange(uvobj.Nfreqs, dtype=np.float64), (uvobj.Nants_telescope, 1) ) uvobj.eq_coeffs = eq_coeffs - uvobj.check() # check that there's no flagging assert np.nonzero(uvobj.flag_array)[0].size == 0 + n_chan_to_avg = 2 with uvtest.check_warnings(UserWarning, "eq_coeffs vary by frequency"): - uvobj.frequency_average(2), + uvobj.frequency_average( + n_chan_to_avg=2, keep_ragged=True, summing_correlator_mode=sum_corr + ) - assert uvobj.Nfreqs == (uvobj2.Nfreqs / 2) + assert uvobj.Nfreqs == (uvobj2.Nfreqs / n_chan_to_avg) - if future_shapes: - expected_freqs = uvobj2.freq_array.reshape(int(uvobj2.Nfreqs / 2), 2).mean( - axis=1 + input_freqs = np.squeeze(uvobj2.freq_array) + + expected_freqs = input_freqs[ + np.arange((uvobj2.Nfreqs // n_chan_to_avg) * n_chan_to_avg) + ] + expected_freqs = expected_freqs.reshape( + int(uvobj2.Nfreqs // n_chan_to_avg), n_chan_to_avg + ).mean(axis=1) + + input_chan_width = uvobj2.channel_width + + if future_shapes or flex_spw: + expected_chan_widths = np.full( + int(uvobj2.Nfreqs // n_chan_to_avg), + input_chan_width[0] * n_chan_to_avg, + dtype=float, ) else: - expected_freqs = uvobj2.freq_array.reshape(1, int(uvobj2.Nfreqs / 2), 2).mean( - axis=2 - ) + expected_chan_widths = input_chan_width * n_chan_to_avg + + expected_freqs = expected_freqs[np.newaxis, :] + assert np.max(np.abs(uvobj.freq_array - expected_freqs)) == 0 + assert np.max(np.abs(uvobj.channel_width - expected_chan_widths)) == 0 expected_coeffs = eq_coeffs.reshape( uvobj2.Nants_telescope, int(uvobj2.Nfreqs / 2), 2 @@ -9460,25 +9497,16 @@ def test_frequency_average(casa_uvfits, future_shapes): assert np.max(np.abs(uvobj.eq_coeffs - expected_coeffs)) == 0 # no flagging, so the following is true - expected_data = uvobj2.get_data(1, 2, squeeze="none") - if future_shapes: - reshape_tuple = ( - expected_data.shape[0], - int(uvobj2.Nfreqs / 2), - 2, - uvobj2.Npols, - ) + expected_data = uvobj2.get_data(1, 2) + reshape_tuple = (expected_data.shape[0], int(uvobj2.Nfreqs / 2), 2, uvobj2.Npols) + if sum_corr: + expected_data = expected_data.reshape(reshape_tuple).sum(axis=2) + else: expected_data = expected_data.reshape(reshape_tuple).mean(axis=2) - else: - reshape_tuple = ( - expected_data.shape[0], - 1, - int(uvobj2.Nfreqs / 2), - 2, - uvobj2.Npols, - ) - expected_data = expected_data.reshape(reshape_tuple).mean(axis=3) + if not future_shapes: + expected_data = expected_data[:, np.newaxis] + assert np.allclose(uvobj.get_data(1, 2, squeeze="none"), expected_data) assert np.nonzero(uvobj.flag_array)[0].size == 0 @@ -9489,64 +9517,247 @@ def test_frequency_average(casa_uvfits, future_shapes): @pytest.mark.filterwarnings("ignore:This method will be removed in version 3.0 when") @pytest.mark.filterwarnings("ignore:The uvw_array does not match the expected values") -@pytest.mark.parametrize("future_shapes", [True, False]) -def test_frequency_average_uneven(casa_uvfits, future_shapes): +@pytest.mark.parametrize(["future_shapes", "sum_corr"], [[True, True], [False, False]]) +@pytest.mark.parametrize( + ["flex_spw", "respect_spws"], [[True, True], [True, False], [False, False]] +) +@pytest.mark.parametrize("keep_ragged", [True, False]) +def test_frequency_average_uneven( + casa_uvfits, future_shapes, keep_ragged, sum_corr, flex_spw, respect_spws +): """Test averaging in frequency with a number that is not a factor of Nfreqs.""" uvobj = casa_uvfits if not future_shapes: uvobj.use_current_array_shapes() + + eq_coeffs = np.tile( + np.arange(uvobj.Nfreqs, dtype=np.float64), (uvobj.Nants_telescope, 1) + ) + uvobj.eq_coeffs = eq_coeffs + + if flex_spw: + # Make multiple spws + uvobj._set_flex_spw() + spw_nchan = int(uvobj.Nfreqs / 4) + uvobj.flex_spw_id_array = np.concatenate( + ( + np.full(spw_nchan, 0, dtype=int), + np.full(spw_nchan, 1, dtype=int), + np.full(spw_nchan, 2, dtype=int), + np.full(spw_nchan, 3, dtype=int), + ) + ) + uvobj.spw_array = np.arange(4) + uvobj.Nspws = 4 + if not future_shapes: + uvobj.channel_width = np.full(uvobj.Nfreqs, uvobj.channel_width) + uvobj2 = uvobj.copy() # check that there's no flagging assert np.nonzero(uvobj.flag_array)[0].size == 0 + n_chan_to_avg = 7 - with uvtest.check_warnings( - UserWarning, - [ - "Nfreqs does not divide by `n_chan_to_avg` evenly. The final 1 " - "frequencies will be excluded, to control which frequencies to exclude, " - "use a select to control.", - "The uvw_array does not match the expected values", - ], - ): - uvobj.frequency_average(7) + warn = [UserWarning] + msg = [ + re.escape( + "eq_coeffs vary by frequency. They should be applied to the data using " + "`remove_eq_coeffs` before frequency averaging" + ) + ] + if keep_ragged and not future_shapes: + warn.append(UserWarning) + msg.append( + "Ragged frequencies will result in varying channel widths, so " + "this object will have future array shapes after averaging. " + "Use keep_ragged=False to avoid this." + ) + with uvtest.check_warnings(warn, match=msg): + uvobj.frequency_average( + n_chan_to_avg, + keep_ragged=keep_ragged, + summing_correlator_mode=sum_corr, + respect_spws=respect_spws, + ) + + assert uvobj2.Nfreqs % n_chan_to_avg != 0 + + input_freqs = np.squeeze(uvobj2.freq_array) + + input_chan_width = uvobj2.channel_width + + if flex_spw and respect_spws: + expected_freqs = np.array([]) + expected_chan_widths = np.array([]) + for spw_ind in range(uvobj2.Nspws): + start_chan = spw_ind * spw_nchan + n_reg_chan = (spw_nchan // n_chan_to_avg) * n_chan_to_avg + this_expected = input_freqs[start_chan : (start_chan + n_reg_chan)] + this_expected = this_expected.reshape( + int(spw_nchan // n_chan_to_avg), n_chan_to_avg + ).mean(axis=1) + this_expected_cw = input_chan_width[start_chan : (start_chan + n_reg_chan)] + this_expected_cw = this_expected_cw.reshape( + int(spw_nchan // n_chan_to_avg), n_chan_to_avg + ).sum(axis=1) + if keep_ragged: + this_expected = np.append( + this_expected, + np.mean( + input_freqs[ + (start_chan + n_reg_chan) : (spw_ind + 1) * spw_nchan + ] + ), + ) + this_expected_cw = np.append( + this_expected_cw, + np.sum( + input_chan_width[ + (start_chan + n_reg_chan) : (spw_ind + 1) * spw_nchan + ] + ), + ) + expected_freqs = np.append(expected_freqs, this_expected) + expected_chan_widths = np.append(expected_chan_widths, this_expected_cw) + if keep_ragged: + assert uvobj.Nfreqs == (spw_nchan // n_chan_to_avg + 1) * uvobj2.Nspws + else: + assert uvobj.Nfreqs == (spw_nchan // n_chan_to_avg) * uvobj2.Nspws + else: + expected_freqs = input_freqs[ + np.arange((uvobj2.Nfreqs // n_chan_to_avg) * n_chan_to_avg) + ] + expected_freqs = expected_freqs.reshape( + int(uvobj2.Nfreqs // n_chan_to_avg), n_chan_to_avg + ).mean(axis=1) - assert uvobj2.Nfreqs % 7 != 0 + if not future_shapes and not uvobj.flex_spw: + if not keep_ragged: + expected_chan_widths = input_chan_width * n_chan_to_avg + else: + expected_chan_widths = np.full( + (uvobj2.Nfreqs // n_chan_to_avg), + input_chan_width * n_chan_to_avg, + dtype=float, + ) + else: + expected_chan_widths = input_chan_width[ + np.arange((uvobj2.Nfreqs // n_chan_to_avg) * n_chan_to_avg) + ] + expected_chan_widths = expected_chan_widths.reshape( + int(uvobj2.Nfreqs // n_chan_to_avg), n_chan_to_avg + ).sum(axis=1) + + if keep_ragged: + assert uvobj.Nfreqs == (uvobj2.Nfreqs // n_chan_to_avg + 1) + expected_freqs = np.append( + expected_freqs, + np.mean( + input_freqs[(uvobj2.Nfreqs // n_chan_to_avg) * n_chan_to_avg :] + ), + ) + if not future_shapes and not uvobj.flex_spw: + nch_ragged = ( + uvobj2.Nfreqs - (uvobj2.Nfreqs // n_chan_to_avg) * n_chan_to_avg + ) + expected_chan_widths = np.append( + expected_chan_widths, input_chan_width * nch_ragged + ) + else: + expected_chan_widths = np.append( + expected_chan_widths, + np.sum( + input_chan_width[ + (uvobj2.Nfreqs // n_chan_to_avg) * n_chan_to_avg : + ] + ), + ) + else: + assert uvobj.Nfreqs == (uvobj2.Nfreqs // n_chan_to_avg) - assert uvobj.Nfreqs == (uvobj2.Nfreqs // 7) + if not future_shapes and not keep_ragged: + expected_freqs = expected_freqs[np.newaxis, :] - if future_shapes: - expected_freqs = uvobj2.freq_array[np.arange((uvobj2.Nfreqs // 7) * 7)] - expected_freqs = expected_freqs.reshape(int(uvobj2.Nfreqs // 7), 7).mean(axis=1) - else: - expected_freqs = uvobj2.freq_array[:, np.arange((uvobj2.Nfreqs // 7) * 7)] - expected_freqs = expected_freqs.reshape(1, int(uvobj2.Nfreqs // 7), 7).mean( - axis=2 - ) assert np.max(np.abs(uvobj.freq_array - expected_freqs)) == 0 + assert np.max(np.abs(uvobj.channel_width - expected_chan_widths)) == 0 # no flagging, so the following is true - expected_data = uvobj2.get_data(1, 2, squeeze="none") - if future_shapes: - expected_data = expected_data[:, 0 : ((uvobj2.Nfreqs // 7) * 7), :] + initial_data = uvobj2.get_data(1, 2) + if flex_spw and respect_spws: reshape_tuple = ( - expected_data.shape[0], - int(uvobj2.Nfreqs // 7), - 7, + initial_data.shape[0], + int(spw_nchan // n_chan_to_avg), + n_chan_to_avg, uvobj2.Npols, ) - expected_data = expected_data.reshape(reshape_tuple).mean(axis=2) + for spw_ind in range(uvobj2.Nspws): + start_chan = spw_ind * spw_nchan + n_reg_chan = (spw_nchan // n_chan_to_avg) * n_chan_to_avg + + this_expected = initial_data[:, start_chan : (start_chan + n_reg_chan)] + if sum_corr: + this_expected = this_expected.reshape(reshape_tuple).sum(axis=2) + else: + this_expected = this_expected.reshape(reshape_tuple).mean(axis=2) + + if keep_ragged: + if sum_corr: + this_expected = np.append( + this_expected, + initial_data[ + :, (start_chan + n_reg_chan) : (spw_ind + 1) * spw_nchan + ].sum(axis=1, keepdims=True), + axis=1, + ) + else: + this_expected = np.append( + this_expected, + initial_data[ + :, (start_chan + n_reg_chan) : (spw_ind + 1) * spw_nchan + ].mean(axis=1, keepdims=True), + axis=1, + ) + if spw_ind == 0: + expected_data = this_expected + else: + expected_data = np.append(expected_data, this_expected, axis=1) else: - expected_data = expected_data[:, :, 0 : ((uvobj2.Nfreqs // 7) * 7), :] + expected_data = initial_data[ + :, 0 : ((uvobj2.Nfreqs // n_chan_to_avg) * n_chan_to_avg) + ] reshape_tuple = ( - expected_data.shape[0], - 1, - int(uvobj2.Nfreqs // 7), - 7, + initial_data.shape[0], + int(uvobj2.Nfreqs // n_chan_to_avg), + n_chan_to_avg, uvobj2.Npols, ) - expected_data = expected_data.reshape(reshape_tuple).mean(axis=3) + if sum_corr: + expected_data = expected_data.reshape(reshape_tuple).sum(axis=2) + else: + expected_data = expected_data.reshape(reshape_tuple).mean(axis=2) + + if keep_ragged: + if sum_corr: + expected_data = np.append( + expected_data, + initial_data[ + :, ((uvobj2.Nfreqs // n_chan_to_avg) * n_chan_to_avg) : + ].sum(axis=1, keepdims=True), + axis=1, + ) + else: + expected_data = np.append( + expected_data, + initial_data[ + :, ((uvobj2.Nfreqs // n_chan_to_avg) * n_chan_to_avg) : + ].mean(axis=1, keepdims=True), + axis=1, + ) + + if not future_shapes and not keep_ragged: + expected_data = expected_data[:, np.newaxis] + assert np.allclose(uvobj.get_data(1, 2, squeeze="none"), expected_data) assert np.nonzero(uvobj.flag_array)[0].size == 0 @@ -9555,7 +9766,11 @@ def test_frequency_average_uneven(casa_uvfits, future_shapes): @pytest.mark.filterwarnings("ignore:This method will be removed in version 3.0 when") @pytest.mark.filterwarnings("ignore:The uvw_array does not match the expected values") @pytest.mark.parametrize("future_shapes", [True, False]) -def test_frequency_average_flagging(casa_uvfits, future_shapes): +@pytest.mark.parametrize("keep_ragged", [True, False]) +@pytest.mark.parametrize("fully_flagged", [True, False]) +def test_frequency_average_flagging( + casa_uvfits, future_shapes, keep_ragged, fully_flagged +): """Test averaging in frequency with flagging all samples averaged.""" uvobj = casa_uvfits @@ -9566,91 +9781,102 @@ def test_frequency_average_flagging(casa_uvfits, future_shapes): # check that there's no flagging assert np.nonzero(uvobj.flag_array)[0].size == 0 + n_chan_to_avg = 3 + # apply some flagging for testing inds01 = uvobj.antpair2ind(1, 2) - if future_shapes: - uvobj.flag_array[inds01[0], 0:2, :] = True + if fully_flagged: + if future_shapes: + uvobj.flag_array[inds01[0], 0:n_chan_to_avg, :] = True + else: + uvobj.flag_array[inds01[0], :, 0:n_chan_to_avg, :] = True + assert np.nonzero(uvobj.flag_array)[0].size == uvobj.Npols * n_chan_to_avg else: - uvobj.flag_array[inds01[0], :, 0:2, :] = True - assert np.nonzero(uvobj.flag_array)[0].size == uvobj.Npols * 2 - - uvobj.frequency_average(2) - - assert uvobj.Nfreqs == (uvobj2.Nfreqs / 2) + if future_shapes: + uvobj.flag_array[inds01[0], 1:n_chan_to_avg, :] = True + else: + uvobj.flag_array[inds01[0], 0, 1:n_chan_to_avg, :] = True + assert np.nonzero(uvobj.flag_array)[0].size == uvobj.Npols * (n_chan_to_avg - 1) - if future_shapes: - expected_freqs = uvobj2.freq_array.reshape(int(uvobj2.Nfreqs / 2), 2).mean( - axis=1 - ) + if keep_ragged and not future_shapes: + warn = UserWarning + msg = [ + "Ragged frequencies will result in varying channel widths, so " + "this object will have future array shapes after averaging. " + "Use keep_ragged=False to avoid this." + ] else: - expected_freqs = uvobj2.freq_array.reshape(1, int(uvobj2.Nfreqs / 2), 2).mean( - axis=2 - ) - assert np.max(np.abs(uvobj.freq_array - expected_freqs)) == 0 + warn = None + msg = "" + with uvtest.check_warnings(warn, match=msg): + uvobj.frequency_average(n_chan_to_avg, keep_ragged=keep_ragged) - expected_data = uvobj2.get_data(1, 2, squeeze="none") - if future_shapes: - reshape_tuple = ( - expected_data.shape[0], - int(uvobj2.Nfreqs / 2), - 2, - uvobj2.Npols, - ) - expected_data = expected_data.reshape(reshape_tuple).mean(axis=2) - else: - reshape_tuple = ( - expected_data.shape[0], - 1, - int(uvobj2.Nfreqs / 2), - 2, - uvobj2.Npols, - ) - expected_data = expected_data.reshape(reshape_tuple).mean(axis=3) - assert np.allclose(uvobj.get_data(1, 2, squeeze="none"), expected_data) + input_freqs = np.squeeze(uvobj2.freq_array) - if future_shapes: - assert np.sum(uvobj.flag_array[inds01[0], 0, :]) == 4 - else: - assert np.sum(uvobj.flag_array[inds01[0], :, 0, :]) == 4 - assert np.nonzero(uvobj.flag_array)[0].size == uvobj.Npols - if future_shapes: - assert np.nonzero(uvobj.flag_array[inds01[1:], 0, :])[0].size == 0 + expected_freqs = input_freqs[ + np.arange((uvobj2.Nfreqs // n_chan_to_avg) * n_chan_to_avg) + ] + expected_freqs = expected_freqs.reshape( + int(uvobj2.Nfreqs // n_chan_to_avg), n_chan_to_avg + ).mean(axis=1) + + if keep_ragged: + assert uvobj.Nfreqs == (uvobj2.Nfreqs // n_chan_to_avg + 1) + expected_freqs = np.append( + expected_freqs, + np.mean(input_freqs[(uvobj2.Nfreqs // n_chan_to_avg) * n_chan_to_avg :]), + ) else: - assert np.nonzero(uvobj.flag_array[inds01[1:], :, 0, :])[0].size == 0 - + assert uvobj.Nfreqs == (uvobj2.Nfreqs // n_chan_to_avg) -@pytest.mark.filterwarnings("ignore:The uvw_array does not match the expected values") -def test_frequency_average_flagging_partial(casa_uvfits): - """Test averaging in frequency with flagging only one sample averaged.""" - uvobj = casa_uvfits - uvobj2 = uvobj.copy() - # check that there's no flagging - assert np.nonzero(uvobj.flag_array)[0].size == 0 + if not future_shapes and not keep_ragged: + expected_freqs = expected_freqs[np.newaxis, :] - # apply some flagging for testing - inds01 = uvobj.antpair2ind(1, 2) - uvobj.flag_array[inds01[0], 0, :] = True - assert np.nonzero(uvobj.flag_array)[0].size == uvobj.Npols + assert np.max(np.abs(uvobj.freq_array - expected_freqs)) == 0 - uvobj.frequency_average(2) + initial_data = uvobj2.get_data(1, 2) + expected_data = initial_data[ + :, 0 : ((uvobj2.Nfreqs // n_chan_to_avg) * n_chan_to_avg) + ] + reshape_tuple = ( + initial_data.shape[0], + int(uvobj2.Nfreqs // n_chan_to_avg), + n_chan_to_avg, + uvobj2.Npols, + ) + expected_data = expected_data.reshape(reshape_tuple).mean(axis=2) - assert uvobj.Nfreqs == (uvobj2.Nfreqs / 2) + if keep_ragged: + expected_data = np.append( + expected_data, + initial_data[:, ((uvobj2.Nfreqs // n_chan_to_avg) * n_chan_to_avg) :].mean( + axis=1, keepdims=True + ), + axis=1, + ) + if not fully_flagged: + if future_shapes: + expected_data[0, 0, :] = uvobj2.data_array[inds01[0], 0, :] + else: + expected_data[0, 0, :] = uvobj2.data_array[inds01[0], 0, 0, :] - # TODO: Spw axis to be collapsed in future release - expected_freqs = uvobj2.freq_array.reshape(1, int(uvobj2.Nfreqs / 2), 2).mean( - axis=2 - ) - assert np.max(np.abs(uvobj.freq_array - expected_freqs)) == 0 + if not future_shapes and not keep_ragged: + expected_data = expected_data[:, np.newaxis] - expected_data = uvobj2.get_data(1, 2, squeeze="none") - # TODO: Spw axis to be collapsed in future release - reshape_tuple = (expected_data.shape[0], int(uvobj2.Nfreqs / 2), 2, uvobj2.Npols) - expected_data = expected_data.reshape(reshape_tuple).mean(axis=2) - expected_data[0, 0, :] = uvobj2.data_array[inds01[0], 1, :] assert np.allclose(uvobj.get_data(1, 2, squeeze="none"), expected_data) - # check that there's no flagging - assert np.nonzero(uvobj.flag_array)[0].size == 0 + if fully_flagged: + if future_shapes or keep_ragged: + assert np.sum(uvobj.flag_array[inds01[0], 0, :]) == 4 + else: + assert np.sum(uvobj.flag_array[inds01[0], :, 0, :]) == 4 + assert np.nonzero(uvobj.flag_array)[0].size == uvobj.Npols + if future_shapes or keep_ragged: + assert np.nonzero(uvobj.flag_array[inds01[1:], 0, :])[0].size == 0 + else: + assert np.nonzero(uvobj.flag_array[inds01[1:], :, 0, :])[0].size == 0 + else: + assert np.nonzero(uvobj.flag_array)[0].size == 0 @pytest.mark.filterwarnings("ignore:The uvw_array does not match the expected values") @@ -9669,7 +9895,7 @@ def test_frequency_average_flagging_full_and_partial(casa_uvfits): uvobj.flag_array[inds01[0], 0:3, :] = True assert np.nonzero(uvobj.flag_array)[0].size == uvobj.Npols * 3 - uvobj.frequency_average(2) + uvobj.frequency_average(n_chan_to_avg=2) assert uvobj.Nfreqs == (uvobj2.Nfreqs / 2) @@ -9706,10 +9932,10 @@ def test_frequency_average_flagging_partial_twostage(casa_uvfits): uv_object3 = uvobj.copy() - uvobj.frequency_average(2) - uvobj.frequency_average(2) + uvobj.frequency_average(n_chan_to_avg=2) + uvobj.frequency_average(n_chan_to_avg=2) - uv_object3.frequency_average(4) + uv_object3.frequency_average(n_chan_to_avg=4) assert uvobj == uv_object3 @@ -9717,61 +9943,8 @@ def test_frequency_average_flagging_partial_twostage(casa_uvfits): @pytest.mark.filterwarnings("ignore:This method will be removed in version 3.0 when") @pytest.mark.filterwarnings("ignore:The uvw_array does not match the expected values") @pytest.mark.parametrize("future_shapes", [True, False]) -def test_frequency_average_summing_corr_mode(casa_uvfits, future_shapes): - """Test averaging in frequency.""" - # check that there's no flagging - uvobj = casa_uvfits - - if not future_shapes: - uvobj.use_current_array_shapes() - uvobj2 = uvobj.copy() - - assert np.nonzero(uvobj.flag_array)[0].size == 0 - - uvobj.frequency_average(2, summing_correlator_mode=True) - - assert uvobj.Nfreqs == (uvobj2.Nfreqs / 2) - - if future_shapes: - expected_freqs = uvobj2.freq_array.reshape(int(uvobj2.Nfreqs / 2), 2).mean( - axis=1 - ) - else: - expected_freqs = uvobj2.freq_array.reshape(1, int(uvobj2.Nfreqs / 2), 2).mean( - axis=2 - ) - assert np.max(np.abs(uvobj.freq_array - expected_freqs)) == 0 - - # no flagging, so the following is true - expected_data = uvobj2.get_data(1, 2, squeeze="none") - if future_shapes: - reshape_tuple = ( - expected_data.shape[0], - int(uvobj2.Nfreqs / 2), - 2, - uvobj2.Npols, - ) - expected_data = expected_data.reshape(reshape_tuple).sum(axis=2) - else: - reshape_tuple = ( - expected_data.shape[0], - 1, - int(uvobj2.Nfreqs / 2), - 2, - uvobj2.Npols, - ) - expected_data = expected_data.reshape(reshape_tuple).sum(axis=3) - assert np.allclose(uvobj.get_data(1, 2, squeeze="none"), expected_data) - - assert np.nonzero(uvobj.flag_array)[0].size == 0 - assert not isinstance(uvobj.data_array, np.ma.MaskedArray) - assert not isinstance(uvobj.nsample_array, np.ma.MaskedArray) - - -@pytest.mark.filterwarnings("ignore:This method will be removed in version 3.0 when") -@pytest.mark.filterwarnings("ignore:The uvw_array does not match the expected values") -@pytest.mark.parametrize("future_shapes", [True, False]) -def test_frequency_average_propagate_flags(casa_uvfits, future_shapes): +@pytest.mark.parametrize("keep_ragged", [True, False]) +def test_frequency_average_propagate_flags(casa_uvfits, future_shapes, keep_ragged): """ Test averaging in frequency with flagging all of one and only one of another sample averaged, and propagating flags. Data should be identical, @@ -9787,51 +9960,86 @@ def test_frequency_average_propagate_flags(casa_uvfits, future_shapes): # check that there's no flagging assert np.nonzero(uvobj.flag_array)[0].size == 0 + n_chan_to_avg = 3 + # apply some flagging for testing inds01 = uvobj.antpair2ind(1, 2) if future_shapes: - uvobj.flag_array[inds01[0], 0:3, :] = True + uvobj.flag_array[inds01[0], 0 : (n_chan_to_avg * 2 - 1), :] = True else: - uvobj.flag_array[inds01[0], :, 0:3, :] = True + uvobj.flag_array[inds01[0], :, 0 : (n_chan_to_avg * 2 - 1), :] = True - assert np.nonzero(uvobj.flag_array)[0].size == uvobj.Npols * 3 + assert np.nonzero(uvobj.flag_array)[0].size == uvobj.Npols * (n_chan_to_avg * 2 - 1) - uvobj.frequency_average(2, propagate_flags=True) + if keep_ragged and not future_shapes: + warn = UserWarning + msg = [ + "Ragged frequencies will result in varying channel widths, so " + "this object will have future array shapes after averaging. " + "Use keep_ragged=False to avoid this." + ] + else: + warn = None + msg = "" + with uvtest.check_warnings(warn, match=msg): + uvobj.frequency_average( + n_chan_to_avg, propagate_flags=True, keep_ragged=keep_ragged + ) - assert uvobj.Nfreqs == (uvobj2.Nfreqs / 2) + input_freqs = np.squeeze(uvobj2.freq_array) - if future_shapes: - expected_freqs = uvobj2.freq_array.reshape(int(uvobj2.Nfreqs / 2), 2).mean( - axis=1 + expected_freqs = input_freqs[ + np.arange((uvobj2.Nfreqs // n_chan_to_avg) * n_chan_to_avg) + ] + expected_freqs = expected_freqs.reshape( + int(uvobj2.Nfreqs // n_chan_to_avg), n_chan_to_avg + ).mean(axis=1) + + if keep_ragged: + assert uvobj.Nfreqs == (uvobj2.Nfreqs // n_chan_to_avg + 1) + expected_freqs = np.append( + expected_freqs, + np.mean(input_freqs[(uvobj2.Nfreqs // n_chan_to_avg) * n_chan_to_avg :]), ) else: - expected_freqs = uvobj2.freq_array.reshape(1, int(uvobj2.Nfreqs / 2), 2).mean( - axis=2 - ) + assert uvobj.Nfreqs == (uvobj2.Nfreqs // n_chan_to_avg) + + if not future_shapes and not keep_ragged: + expected_freqs = expected_freqs[np.newaxis, :] + assert np.max(np.abs(uvobj.freq_array - expected_freqs)) == 0 - expected_data = uvobj2.get_data(1, 2, squeeze="none") - if future_shapes: - reshape_tuple = ( - expected_data.shape[0], - int(uvobj2.Nfreqs / 2), - 2, - uvobj2.Npols, - ) - expected_data = expected_data.reshape(reshape_tuple).mean(axis=2) + initial_data = uvobj2.get_data(1, 2) + expected_data = initial_data[ + :, 0 : ((uvobj2.Nfreqs // n_chan_to_avg) * n_chan_to_avg) + ] + reshape_tuple = ( + initial_data.shape[0], + int(uvobj2.Nfreqs // n_chan_to_avg), + n_chan_to_avg, + uvobj2.Npols, + ) + expected_data = expected_data.reshape(reshape_tuple).mean(axis=2) - expected_data[0, 1, :] = uvobj2.data_array[inds01[0], 3, :] + if future_shapes: + freq_axis = 0 else: - reshape_tuple = ( - expected_data.shape[0], - 1, - int(uvobj2.Nfreqs / 2), - 2, - uvobj2.Npols, + freq_axis = 1 + expected_data[0, 1, :] = np.take( + uvobj2.data_array[inds01[0]], n_chan_to_avg * 2 - 1, axis=freq_axis + ) + + if keep_ragged: + expected_data = np.append( + expected_data, + initial_data[:, ((uvobj2.Nfreqs // n_chan_to_avg) * n_chan_to_avg) :].mean( + axis=1, keepdims=True + ), + axis=1, ) - expected_data = expected_data.reshape(reshape_tuple).mean(axis=3) - expected_data[0, :, 1, :] = uvobj2.data_array[inds01[0], :, 3, :] + if not future_shapes and not keep_ragged: + expected_data = expected_data[:, np.newaxis] assert np.allclose(uvobj.get_data(1, 2, squeeze="none"), expected_data) # Twice as many flags should exist compared to test of previous name. @@ -9856,7 +10064,7 @@ def test_frequency_average_nsample_precision(casa_uvfits): uvobj.nsample_array = uvobj.nsample_array.astype(np.float16) with uvtest.check_warnings(UserWarning, "eq_coeffs vary by frequency"): - uvobj.frequency_average(2), + uvobj.frequency_average(n_chan_to_avg=2), assert uvobj.Nfreqs == (uvobj2.Nfreqs / 2) @@ -9886,44 +10094,40 @@ def test_frequency_average_nsample_precision(casa_uvfits): assert uvobj.nsample_array.dtype.type is np.float16 -def test_frequency_average_flex_spw(sma_mir, casa_uvfits): - """ - Test that freq averaging works correctly when called on a flex_spw data set - (currently not implented). - """ - with pytest.raises( - NotImplementedError, - match=re.escape( - "Frequency averaging not (yet) available for multiple spectral windows" - ), - ): - sma_mir.frequency_average(2) - - # also test errors with varying freq spacing but with one spw +def test_frequency_average_warnings(casa_uvfits): + # test errors with varying freq spacing but with one spw uvd = casa_uvfits.copy() uvd.use_future_array_shapes() uvd.freq_array[-1] += uvd.channel_width[0] - with pytest.raises( - NotImplementedError, - match=re.escape( - "Frequency averaging not (yet) available for unevenly spaced " - "frequencies or varying channel widths." - ), + with uvtest.check_warnings( + [DeprecationWarning, UserWarning], + match=[ + re.escape( + "Some spectral windows do not divide evenly by `n_chan_to_avg` and the " + "keep_ragged parameter was not set. The current default is False, but " + "that will be changing to True in version 2.5. Set `keep_ragged` to " + "True or False to silence this warning." + ), + re.escape( + "Frequencies spacing and/or channel widths vary, so after averaging " + "they will also vary." + ), + ], ): - uvd.frequency_average(2) + uvd.frequency_average(n_chan_to_avg=3) # also test errors with varying channel widths but with one spw uvd = casa_uvfits.copy() uvd.use_future_array_shapes() uvd.channel_width[-1] *= 2 - with pytest.raises( - NotImplementedError, + with uvtest.check_warnings( + UserWarning, match=re.escape( - "Frequency averaging not (yet) available for unevenly spaced " - "frequencies or varying channel widths." + "Frequencies spacing and/or channel widths vary, so after averaging " + "they will also vary." ), ): - uvd.frequency_average(2) + uvd.frequency_average(n_chan_to_avg=2) # test warning with freq spacing not equal to channel width uvd = casa_uvfits.copy() @@ -9936,7 +10140,7 @@ def test_frequency_average_flex_spw(sma_mir, casa_uvfits): "spacing." ), ): - uvd.frequency_average(2) + uvd.frequency_average(n_chan_to_avg=2) spacing_error, chanwidth_error = uvd._check_freq_spacing(raise_errors=False) assert not spacing_error diff --git a/pyuvdata/uvdata/uvdata.py b/pyuvdata/uvdata/uvdata.py index b2ad278dba..2dfa615f05 100644 --- a/pyuvdata/uvdata/uvdata.py +++ b/pyuvdata/uvdata/uvdata.py @@ -10401,7 +10401,12 @@ def resample_in_time( return def frequency_average( - self, n_chan_to_avg, summing_correlator_mode=False, propagate_flags=False + self, + n_chan_to_avg, + summing_correlator_mode=False, + propagate_flags=False, + respect_spws=True, + keep_ragged=None, ): """ Average in frequency. @@ -10417,30 +10422,52 @@ def frequency_average( Parameters ---------- n_chan_to_avg : int - Number of channels to average together. If Nfreqs does not divide - evenly by this number, the frequencies at the end of the freq_array - will be dropped to make it evenly divisable. To control which - frequencies are removed, use select before calling this method. + Number of channels to average together. See the keep_ragged parameter for + the handling if the number of frequencies per spectral window does not + divide evenly by this number. summing_correlator_mode : bool - Option to integrate or split the flux from the original samples - rather than average or duplicate the flux from the original samples - to emulate the behavior in some correlators (e.g. HERA). + Option to integrate the flux from the original samples rather than average + the flux from the original samples to emulate the behavior in some + correlators (e.g. HERA). propagate_flags: bool Option to flag an averaged entry even if some of its contributors are not flagged. The averaged result will still leave the flagged samples out of the average, except when all contributors are flagged. - """ - if self.Nspws > 1: - raise NotImplementedError( - "Frequency averaging not (yet) available for multiple spectral windows" - ) + respect_spws : bool + Option to respect spectral window boundaries when averaging. If True, do not + average across spectral window boundaries. Setting this to False will result + in the averaged object having a single spectral window. + keep_ragged : bool + If the number of frequencies in each spectral window (or Nfreqs if + respect_spw=False) does not divide evenly by n_chan_to_avg, this + option controls whether the frequencies at the end of the spectral window + will be dropped to make it evenly divisable (keep_ragged=False) or will be + combined into a smaller frequency bin (keep_ragged=True). Note that if + ragged frequencies are kept, the final averaged object will have + future_array_shapes=True because it will have varying channel widths. + """ + kr_use_default = False + if keep_ragged is None: + kr_use_default = True + keep_ragged = False + + reset_cs = False + if not self.future_array_shapes: + self.use_future_array_shapes() + reset_cs = True + + if self.Nspws > 1 and not respect_spws: + # Put everything in one spectral window. + self.Nspws = 1 + self.flex_spw_id_array = np.zeros(self.Nfreqs, dtype=int) + self.spw_array = np.array([0]) spacing_error, chanwidth_error = self._check_freq_spacing(raise_errors=False) if spacing_error: - raise NotImplementedError( - "Frequency averaging not (yet) available for unevenly spaced " - "frequencies or varying channel widths." + warnings.warn( + "Frequencies spacing and/or channel widths vary, so after averaging " + "they will also vary." ) elif chanwidth_error: warnings.warn( @@ -10449,37 +10476,6 @@ def frequency_average( "spacing." ) - n_final_chan = int(np.floor(self.Nfreqs / n_chan_to_avg)) - nfreq_mod_navg = self.Nfreqs % n_chan_to_avg - if nfreq_mod_navg != 0: - # not an even number of final channels - warnings.warn( - "Nfreqs does not divide by `n_chan_to_avg` evenly. " - "The final {} frequencies will be excluded, to " - "control which frequencies to exclude, use a " - "select to control.".format(nfreq_mod_navg) - ) - chan_to_keep = np.arange(n_final_chan * n_chan_to_avg) - self.select(freq_chans=chan_to_keep) - - if self.future_array_shapes: - self.freq_array = self.freq_array.reshape( - (n_final_chan, n_chan_to_avg) - ).mean(axis=1) - self.channel_width = self.channel_width.reshape( - (n_final_chan, n_chan_to_avg) - ).sum(axis=1) - else: - self.freq_array = self.freq_array.reshape( - (1, n_final_chan, n_chan_to_avg) - ).mean(axis=2) - self.channel_width = self.channel_width * n_chan_to_avg - self.Nfreqs = n_final_chan - if self.flex_spw_id_array is not None: - self.flex_spw_id_array = self.flex_spw_id_array.reshape( - (n_final_chan, n_chan_to_avg) - ).mean(axis=1) - if self.eq_coeffs is not None: eq_coeff_diff = np.diff(self.eq_coeffs, axis=1) if np.abs(np.max(eq_coeff_diff)) > 0: @@ -10488,109 +10484,232 @@ def frequency_average( "applied to the data using `remove_eq_coeffs` " "before frequency averaging." ) - self.eq_coeffs = self.eq_coeffs.reshape( - (self.Nants_telescope, n_final_chan, n_chan_to_avg) - ).mean(axis=2) - if not self.metadata_only: - if self.future_array_shapes: - shape_tuple = (self.Nblts, n_final_chan, n_chan_to_avg, self.Npols) + nchans_spw = np.zeros(self.Nspws, dtype=int) + final_nchan = 0 + spw_chans = {} + final_spw_chans = {} + some_uneven = False + for spw_ind, spw in enumerate(self.spw_array): + these_inds = np.nonzero(self.flex_spw_id_array == spw)[0] + spw_chans[spw] = these_inds + nchans_spw[spw_ind] = these_inds.size + if nchans_spw[spw_ind] % n_chan_to_avg: + some_uneven = True + if keep_ragged: + this_final_nchan = int(np.ceil(nchans_spw[spw_ind] / n_chan_to_avg)) else: - shape_tuple = (self.Nblts, 1, n_final_chan, n_chan_to_avg, self.Npols) + this_final_nchan = int(np.floor(nchans_spw[spw_ind] / n_chan_to_avg)) + final_spw_chans[spw] = np.arange( + final_nchan, final_nchan + this_final_nchan + ) + final_nchan += this_final_nchan - mask = self.flag_array.reshape(shape_tuple) + if some_uneven and kr_use_default: + warnings.warn( + "Some spectral windows do not divide evenly by `n_chan_to_avg` and the " + "keep_ragged parameter was not set. The current default is False, but " + "that will be changing to True in version 2.5. Set `keep_ragged` to " + "True or False to silence this warning.", + DeprecationWarning, + ) + if some_uneven and keep_ragged and reset_cs: + warnings.warn( + "Ragged frequencies will result in varying channel widths, so " + "this object will have future array shapes after averaging. " + "Use keep_ragged=False to avoid this." + ) - if propagate_flags: - # if any contributors are flagged, the result should be flagged - if self.future_array_shapes: - self.flag_array = np.any( - self.flag_array.reshape(shape_tuple), axis=2 - ) + final_freq_array = np.zeros(final_nchan, dtype=float) + final_channel_width = np.zeros(final_nchan, dtype=float) + final_flex_spw_id_array = np.zeros(final_nchan, dtype=int) + if self.eq_coeffs is not None: + final_eq_coeffs = np.zeros((self.Nants_telescope, final_nchan), dtype=float) + + if not self.metadata_only: + final_shape_tuple = (self.Nblts, final_nchan, self.Npols) + final_flag_array = np.full(final_shape_tuple, False, dtype=bool) + final_data_array = np.zeros(final_shape_tuple, dtype=self.data_array.dtype) + final_nsample_array = np.zeros( + final_shape_tuple, dtype=self.nsample_array.dtype + ) + + for spw_ind, spw in enumerate(self.spw_array): + n_final_chan_reg = int(np.floor(nchans_spw[spw_ind] / n_chan_to_avg)) + nfreq_mod_navg = nchans_spw[spw_ind] % n_chan_to_avg + these_inds = spw_chans[spw] + this_ragged = False + regular_inds = these_inds + irregular_inds = np.array([]) + this_final_reg_inds = final_spw_chans[spw] + if nfreq_mod_navg != 0: + # not an even number of final channels + regular_inds = these_inds[0 : n_final_chan_reg * n_chan_to_avg] + if not keep_ragged: + these_inds = regular_inds else: - self.flag_array = np.any( - self.flag_array.reshape(shape_tuple), axis=3 + this_ragged = True + irregular_inds = these_inds[n_final_chan_reg * n_chan_to_avg :] + this_final_reg_inds = this_final_reg_inds[:-1] + + final_freq_array[this_final_reg_inds] = ( + self.freq_array[regular_inds] + .reshape((n_final_chan_reg, n_chan_to_avg)) + .mean(axis=1) + ) + final_channel_width[this_final_reg_inds] = ( + self.channel_width[regular_inds] + .reshape((n_final_chan_reg, n_chan_to_avg)) + .sum(axis=1) + ) + if this_ragged: + final_freq_array[final_spw_chans[spw][-1]] = np.mean( + self.freq_array[irregular_inds] + ) + final_channel_width[final_spw_chans[spw][-1]] = np.sum( + self.channel_width[irregular_inds] + ) + + final_flex_spw_id_array[final_spw_chans[spw]] = spw + + if self.eq_coeffs is not None: + final_eq_coeffs[:, this_final_reg_inds] = ( + self.eq_coeffs[:, regular_inds] + .reshape((self.Nants_telescope, n_final_chan_reg, n_chan_to_avg)) + .mean(axis=2) + ) + if this_ragged: + final_eq_coeffs[:, final_spw_chans[spw][-1]] = np.mean( + self.eq_coeffs[:, irregular_inds], axis=1 ) - else: - # if all inputs are flagged, the flag array should be True, - # otherwise it should be False. - # The sum below will be zero if it's all flagged and - # greater than zero otherwise - # Then we use a test against 0 to turn it into a Boolean - if self.future_array_shapes: - self.flag_array = ( - np.sum(~self.flag_array.reshape(shape_tuple), axis=2) == 0 + + if not self.metadata_only: + shape_tuple = (self.Nblts, n_final_chan_reg, n_chan_to_avg, self.Npols) + + reg_mask = self.flag_array[:, regular_inds].reshape(shape_tuple) + if this_ragged: + irreg_mask = self.flag_array[:, irregular_inds] + + if propagate_flags: + # if any contributors are flagged, the result should be flagged + final_flag_array[:, this_final_reg_inds] = np.any( + self.flag_array[:, regular_inds].reshape(shape_tuple), axis=2 ) + if this_ragged: + final_flag_array[:, final_spw_chans[spw][-1]] = np.any( + self.flag_array[:, irregular_inds], axis=1 + ) else: - self.flag_array = ( - np.sum(~self.flag_array.reshape(shape_tuple), axis=3) == 0 + # if all inputs are flagged, the flag array should be True, + # otherwise it should be False. + final_flag_array[:, this_final_reg_inds] = np.all( + self.flag_array[:, regular_inds].reshape(shape_tuple), axis=2 ) + if this_ragged: + final_flag_array[:, final_spw_chans[spw][-1]] = np.all( + self.flag_array[:, irregular_inds], axis=1 + ) - # need to update mask if a downsampled visibility will be flagged - # so that we don't set it to zero - for n_chan in np.arange(n_final_chan): - if self.future_array_shapes: - if (self.flag_array[:, n_chan]).any(): - ax0_inds, ax2_inds = np.nonzero(self.flag_array[:, n_chan, :]) - # Only if all entries are masked - # May not happen due to propagate_flags keyword - # mask should be left alone otherwise - if np.all(mask[ax0_inds, n_chan, :, ax2_inds]): - mask[ax0_inds, n_chan, :, ax2_inds] = False - else: - if (self.flag_array[:, :, n_chan]).any(): - ax0_inds, ax1_inds, ax3_inds = np.nonzero( - self.flag_array[:, :, n_chan, :] + # need to update mask if a downsampled visibility will be flagged + # so that we don't set it to zero + for chan_ind in np.arange(n_final_chan_reg): + this_chan = final_spw_chans[spw][chan_ind] + if (final_flag_array[:, this_chan]).any(): + ax0_inds, ax2_inds = np.nonzero( + final_flag_array[:, this_chan, :] ) # Only if all entries are masked # May not happen due to propagate_flags keyword # mask should be left alone otherwise - if np.all(mask[ax0_inds, ax1_inds, n_chan, :, ax3_inds]): - mask[ax0_inds, ax1_inds, n_chan, :, ax3_inds] = False - - masked_data = np.ma.masked_array( - self.data_array.reshape(shape_tuple), mask=mask - ) + fully_flagged = np.all( + reg_mask[ax0_inds, this_chan, :, ax2_inds], axis=1 + ) + ff_inds = np.nonzero(fully_flagged) + reg_mask[ + ax0_inds[ff_inds], this_chan, :, ax2_inds[ff_inds] + ] = False + if this_ragged: + ax0_inds, ax2_inds = np.nonzero( + final_flag_array[:, final_spw_chans[spw][-1], :] + ) + fully_flagged = np.all(irreg_mask[ax0_inds, :, ax2_inds], axis=1) + ff_inds = np.nonzero(fully_flagged) + irreg_mask[ax0_inds[ff_inds], :, ax2_inds[ff_inds]] = False - self.nsample_array = self.nsample_array.reshape(shape_tuple) - # promote nsample dtype if half-precision - nsample_dtype = self.nsample_array.dtype.type - if nsample_dtype is np.float16: - masked_nsample_dtype = np.float32 - else: - masked_nsample_dtype = nsample_dtype - masked_nsample = np.ma.masked_array( - self.nsample_array, mask=mask, dtype=masked_nsample_dtype - ) + masked_reg_data = np.ma.masked_array( + self.data_array[:, regular_inds].reshape(shape_tuple), mask=reg_mask + ) + if this_ragged: + masked_irreg_data = np.ma.masked_array( + self.data_array[:, irregular_inds], mask=irreg_mask + ) - if summing_correlator_mode: - if self.future_array_shapes: - self.data_array = np.sum(masked_data, axis=2).data + # promote nsample dtype if half-precision + nsample_dtype = self.nsample_array.dtype.type + if nsample_dtype is np.float16: + masked_nsample_dtype = np.float32 else: - self.data_array = np.sum(masked_data, axis=3).data - else: - # need to weight by the nsample_array - if self.future_array_shapes: - self.data_array = ( - np.sum(masked_data * masked_nsample, axis=2) - / np.sum(masked_nsample, axis=2) + masked_nsample_dtype = nsample_dtype + masked_reg_nsample = np.ma.masked_array( + self.nsample_array[:, regular_inds].reshape(shape_tuple), + mask=reg_mask, + dtype=masked_nsample_dtype, + ) + if this_ragged: + masked_irreg_nsample = np.ma.masked_array( + self.nsample_array[:, irregular_inds], + mask=irreg_mask, + dtype=masked_nsample_dtype, + ) + + if summing_correlator_mode: + final_data_array[:, this_final_reg_inds] = np.sum( + masked_reg_data, axis=2 ).data + if this_ragged: + final_data_array[:, final_spw_chans[spw][-1]] = np.sum( + masked_irreg_data, axis=1 + ).data else: - self.data_array = ( - np.sum(masked_data * masked_nsample, axis=3) - / np.sum(masked_nsample, axis=3) + # need to weight by the nsample_array + final_data_array[:, this_final_reg_inds] = ( + np.sum(masked_reg_data * masked_reg_nsample, axis=2) + / np.sum(masked_reg_nsample, axis=2) ).data - - # nsample array is the fraction of data that we actually kept, - # relative to the amount that went into the sum or average. - # Need to take care to return precision back to original value. - if self.future_array_shapes: - self.nsample_array = ( - np.sum(masked_nsample, axis=2) / float(n_chan_to_avg) - ).data.astype(nsample_dtype) - else: - self.nsample_array = ( - np.sum(masked_nsample, axis=3) / float(n_chan_to_avg) + if this_ragged: + final_data_array[:, final_spw_chans[spw][-1]] = ( + np.sum(masked_irreg_data * masked_irreg_nsample, axis=1) + / np.sum(masked_irreg_nsample, axis=1) + ).data + + # nsample array is the fraction of data that we actually kept, + # relative to the amount that went into the sum or average. + # Need to take care to return precision back to original value. + final_nsample_array[:, this_final_reg_inds] = ( + np.sum(masked_reg_nsample, axis=2) / float(n_chan_to_avg) ).data.astype(nsample_dtype) + if this_ragged: + final_nsample_array[:, final_spw_chans[spw][-1]] = ( + np.sum(masked_irreg_nsample, axis=1) / irregular_inds.size + ).data.astype(nsample_dtype) + + self.freq_array = final_freq_array + self.channel_width = final_channel_width + self.flex_spw_id_array = final_flex_spw_id_array + if self.eq_coeffs is not None: + self.eq_coeffs = final_eq_coeffs + + if not self.metadata_only: + self.flag_array = final_flag_array + self.data_array = final_data_array + self.nsample_array = final_nsample_array + + self.Nfreqs = final_nchan + + if reset_cs and not (some_uneven and keep_ragged): + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", "This method will be removed") + self.use_current_array_shapes() def get_redundancies( self, From 02120bfeca24a65780d0156a5b05806e6a90f9c3 Mon Sep 17 00:00:00 2001 From: Bryna Hazelton Date: Thu, 15 Jun 2023 16:24:25 -0700 Subject: [PATCH 2/4] fix the tutorial --- .circleci/config.yml | 2 +- docs/uvdata_tutorial.rst | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 2bba91e7c4..041cbbdf3b 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -114,7 +114,7 @@ jobs: command: | source ./ci/_activate_current_env.sh cd docs - python -m pytest -n 2 --doctest-glob="*.rst" -W "error::DeprecationWarning" + python -m pytest -n auto --doctest-glob="*.rst" -W "error::DeprecationWarning" cd .. - save_cache: key: deps-{{ .Branch }}-{{ checksum "ci/pyuvdata_tests.yml" }} diff --git a/docs/uvdata_tutorial.rst b/docs/uvdata_tutorial.rst index 78f6149109..41676d31e3 100644 --- a/docs/uvdata_tutorial.rst +++ b/docs/uvdata_tutorial.rst @@ -768,6 +768,12 @@ c) Resampling a BDA dataset in time d) Averaging in frequency ************************* +The :meth:`pyuvdata.UVData.frequency_average` method takes a number of channels to +average together. Use the `keep_ragged` parameter to control the handling if the +number of frequencies in each spectral window does not divide evenly by the number of +channels to be averaged together. Use the `respect_spws` parameter to control whether +averaging will be done over spectral window boundaries. + .. code-block:: python >>> import os @@ -780,7 +786,7 @@ d) Averaging in frequency Channel width: [122070.3125 122070.3125 122070.3125 122070.3125] >>> # Average by a factor of 2 in frequency - >>> uvd.frequency_average(2) + >>> uvd.frequency_average(n_chan_to_avg=2, keep_ragged=True) >>> print("Channel width after frequency averaging: ", uvd.channel_width) Channel width after frequency averaging: [244140.625 244140.625] @@ -789,8 +795,7 @@ UVData: Plotting Making a simple waterfall plot. Note: there is now support for reading in only part of a uvfits, uvh5 or miriad file -(see :ref:`large_files`), so you need not read in the -entire file to plot one waterfall. +(see :ref:`large_files`), so you need not read in the entire file to plot one waterfall. .. code-block:: python @@ -802,7 +807,6 @@ entire file to plot one waterfall. >>> filename = os.path.join(DATA_PATH, 'day2_TDEM0003_10s_norx_1src_1spw.uvfits') >>> uvd = UVData.from_file(filename, use_future_array_shapes=True) - >>> # Note that the length of the array along axis=1 is always 1. >>> print(uvd.data_array.shape) (1360, 64, 4) >>> print(uvd.Ntimes) From 24f22d10f25b6e9879137f1f77ed179b612be209 Mon Sep 17 00:00:00 2001 From: Bryna Hazelton Date: Fri, 16 Jun 2023 15:08:18 -0700 Subject: [PATCH 3/4] update changelog for frequency_average improvements. --- CHANGELOG.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c7fccfd2fc..7e11d1845d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file. ## [Unreleased] ### Added +- Support for multiple spectral windows in `UVData.frequency_average`, including a new +parameter `respect_spws` for controlling whether averaging crosses spectral window +boundaries. +- Better handling in `UVData.frequency_average` when averaging by a number of channels +that does not divide evenly into the number of channels in each spectral window. - Compatibility with Python 3.11 ### Changed @@ -20,7 +25,8 @@ All notable changes to this project will be documented in this file. methods that mostly wrap other methods. - A new `UVCal.new()` method (based on new function `new_uvcal`) that creates a new, self-consistent `UVCal` object from scratch from a set of flexible input parameters. -- A new `UVData.new()` method (based on new function `new_uvdata`) that creates a new, self-consistent `UVData` object from scratch from a set of flexible input parameters. +- A new `UVData.new()` method (based on new function `new_uvdata`) that creates a new, +self-consistent `UVData` object from scratch from a set of flexible input parameters. - A new `fast_concat` method on `UVCal`. - A new generic `read` method on `UVCal` that, like the `read` methods on our other objects, supports all file types and a new `from_file` class method to allow one-line From 2bcf9349ddfe68911f6fd0f0ee4119576c2e262e Mon Sep 17 00:00:00 2001 From: Bryna Hazelton Date: Fri, 30 Jun 2023 13:36:02 -0700 Subject: [PATCH 4/4] Add comments to frequency_average method --- pyuvdata/uvdata/uvdata.py | 50 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/pyuvdata/uvdata/uvdata.py b/pyuvdata/uvdata/uvdata.py index 2dfa615f05..deeeb41cd3 100644 --- a/pyuvdata/uvdata/uvdata.py +++ b/pyuvdata/uvdata/uvdata.py @@ -10414,8 +10414,7 @@ def frequency_average( Does a simple average over an integer number of input channels, leaving flagged samples out of the average. - In the future, this method will support non-equally spaced channels - and varying channel widths. It will also support setting the frequency + In the future, this method will support setting the frequency to the true mean of the averaged non-flagged frequencies rather than the simple mean of the input channel frequencies. For now it does not. @@ -10446,12 +10445,18 @@ def frequency_average( combined into a smaller frequency bin (keep_ragged=True). Note that if ragged frequencies are kept, the final averaged object will have future_array_shapes=True because it will have varying channel widths. + """ + # The default will be changing soon, so it's currently set to None in the + # function signature so we can detect that it's not set and warn about the + # changing default. kr_use_default = False if keep_ragged is None: kr_use_default = True keep_ragged = False + # All the logic with future array shapes. + # Test to see if we need to restore it to current shapes at the end. reset_cs = False if not self.future_array_shapes: self.use_future_array_shapes() @@ -10485,9 +10490,15 @@ def frequency_average( "before frequency averaging." ) + # Figure out how many channels are in each spw so we can tell if we have a + # ragged situation (indicated by the some_uneven variable). + # While we're at it, build up some useful dicts for later, keyed on spw nchans_spw = np.zeros(self.Nspws, dtype=int) + # final_nchan will hold the number of Nfreqs after averaging. final_nchan = 0 + # spw_chans will hold the original channel indices for each spw spw_chans = {} + # final_spw_chans will hold the final channel indices for each spw final_spw_chans = {} some_uneven = False for spw_ind, spw in enumerate(self.spw_array): @@ -10505,6 +10516,7 @@ def frequency_average( ) final_nchan += this_final_nchan + # Warn about changing keep_ragged default if it applies if some_uneven and kr_use_default: warnings.warn( "Some spectral windows do not divide evenly by `n_chan_to_avg` and the " @@ -10513,6 +10525,8 @@ def frequency_average( "True or False to silence this warning.", DeprecationWarning, ) + # Cannot go back to current array shapes if we have ragged channels that we're + # keeping if some_uneven and keep_ragged and reset_cs: warnings.warn( "Ragged frequencies will result in varying channel widths, so " @@ -10520,6 +10534,9 @@ def frequency_average( "Use keep_ragged=False to avoid this." ) + # Since we have to loop through the spws, we cannot do the averaging with a + # simple reshape and average. So we need to create arrays to hold the + # various metadata & data after averaging final_freq_array = np.zeros(final_nchan, dtype=float) final_channel_width = np.zeros(final_nchan, dtype=float) final_flex_spw_id_array = np.zeros(final_nchan, dtype=int) @@ -10534,7 +10551,15 @@ def frequency_average( final_shape_tuple, dtype=self.nsample_array.dtype ) + # Now loop through the spws to actually do the averaging for spw_ind, spw in enumerate(self.spw_array): + # n_final_chan_reg is the number of regular (non-ragged) channels after + # averaging in this spw. + # For the regular channels, we can average more quickly by reshaping the + # frequency axis into two axes of lengths (n_final_chan_reg, n_chan_to_avg) + # followed by an average (or sum) over the axis of length n_chan_to_avg. + # Then we just have to do one more calculation for the remaining input + # channels if there are ragged channels. n_final_chan_reg = int(np.floor(nchans_spw[spw_ind] / n_chan_to_avg)) nfreq_mod_navg = nchans_spw[spw_ind] % n_chan_to_avg these_inds = spw_chans[spw] @@ -10546,23 +10571,28 @@ def frequency_average( # not an even number of final channels regular_inds = these_inds[0 : n_final_chan_reg * n_chan_to_avg] if not keep_ragged: + # only use the non-ragged inds these_inds = regular_inds else: + # find the irregular inds for this spw this_ragged = True irregular_inds = these_inds[n_final_chan_reg * n_chan_to_avg :] this_final_reg_inds = this_final_reg_inds[:-1] + # Now do the reshaping and combining across the n_chan_to_avg length axis final_freq_array[this_final_reg_inds] = ( self.freq_array[regular_inds] .reshape((n_final_chan_reg, n_chan_to_avg)) .mean(axis=1) ) + # take a sum here rather to get final channel width final_channel_width[this_final_reg_inds] = ( self.channel_width[regular_inds] .reshape((n_final_chan_reg, n_chan_to_avg)) .sum(axis=1) ) if this_ragged: + # deal with the final ragged channel final_freq_array[final_spw_chans[spw][-1]] = np.mean( self.freq_array[irregular_inds] ) @@ -10612,6 +10642,10 @@ def frequency_average( # need to update mask if a downsampled visibility will be flagged # so that we don't set it to zero + # This is a common radio astronomy convention that when averaging over + # entirely flagged channels, you include the flagged channels in the + # result (so it's not zero) whereas you exclude flagged channels if + # there are any unflagged channels in the average. for chan_ind in np.arange(n_final_chan_reg): this_chan = final_spw_chans[spw][chan_ind] if (final_flag_array[:, this_chan]).any(): @@ -10636,6 +10670,9 @@ def frequency_average( ff_inds = np.nonzero(fully_flagged) irreg_mask[ax0_inds[ff_inds], :, ax2_inds[ff_inds]] = False + # create a masked data array from the data_array and mask_array + # (based on the flag_array). + # This lets numpy handle the averaging with flags. masked_reg_data = np.ma.masked_array( self.data_array[:, regular_inds].reshape(shape_tuple), mask=reg_mask ) @@ -10650,6 +10687,7 @@ def frequency_average( masked_nsample_dtype = np.float32 else: masked_nsample_dtype = nsample_dtype + # create a masked nsample array from the data_array and mask_array masked_reg_nsample = np.ma.masked_array( self.nsample_array[:, regular_inds].reshape(shape_tuple), mask=reg_mask, @@ -10663,6 +10701,7 @@ def frequency_average( ) if summing_correlator_mode: + # sum rather than average final_data_array[:, this_final_reg_inds] = np.sum( masked_reg_data, axis=2 ).data @@ -10671,7 +10710,7 @@ def frequency_average( masked_irreg_data, axis=1 ).data else: - # need to weight by the nsample_array + # do a weighted average with the weights given by the nsample_array final_data_array[:, this_final_reg_inds] = ( np.sum(masked_reg_data * masked_reg_nsample, axis=2) / np.sum(masked_reg_nsample, axis=2) @@ -10684,6 +10723,8 @@ def frequency_average( # nsample array is the fraction of data that we actually kept, # relative to the amount that went into the sum or average. + # So it's a sum over the averaged channels divided by the number of + # averaged channels # Need to take care to return precision back to original value. final_nsample_array[:, this_final_reg_inds] = ( np.sum(masked_reg_nsample, axis=2) / float(n_chan_to_avg) @@ -10693,6 +10734,7 @@ def frequency_average( np.sum(masked_irreg_nsample, axis=1) / irregular_inds.size ).data.astype(nsample_dtype) + # Put the final arrays on the object self.freq_array = final_freq_array self.channel_width = final_channel_width self.flex_spw_id_array = final_flex_spw_id_array @@ -10704,8 +10746,10 @@ def frequency_average( self.data_array = final_data_array self.nsample_array = final_nsample_array + # update Nfreqs self.Nfreqs = final_nchan + # return to current shapes if needed and possible if reset_cs and not (some_uneven and keep_ragged): with warnings.catch_warnings(): warnings.filterwarnings("ignore", "This method will be removed")