Skip to content

Commit

Permalink
Adding support for CORRECTED_DATA, MODEL_DATA read/write to CASA MS
Browse files Browse the repository at this point in the history
  • Loading branch information
kartographer committed Mar 5, 2024
1 parent 9cefc19 commit e3fbdca
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 33 deletions.
34 changes: 33 additions & 1 deletion pyuvdata/ms_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1303,14 +1303,20 @@ def write_ms_polarization(
pol_table.putcell("NUM_CORR", 0, len(polarization_array))


def init_ms_file(filepath):
def init_ms_file(filepath, make_model_col=False, make_corr_col=False):
"""
Create a skeleton MS dataset to fill.
Parameters
----------
filepath : str
Path to MS to be created.
make_model_col : bool
If set to True, will construct a measurement set that contains a MODEL_DATA
column in addition to the DATA column. Default is False.
make_model_col : bool
If set to True, will construct a measurement set that contains a CORRECTED_DATA
column in addition to the DATA column. Default is False.
"""
# The required_ms_desc returns the defaults for a CASA MS table
ms_desc = tables.required_ms_desc("MAIN")
Expand Down Expand Up @@ -1395,6 +1401,32 @@ def init_ms_file(filepath):
del datacoldesc["desc"]["shape"]
ms_desc.update(tables.maketabdesc(datacoldesc))

if make_model_col:
datacoldesc = tables.makearrcoldesc(

Check warning on line 1405 in pyuvdata/ms_utils.py

View check run for this annotation

Codecov / codecov/patch

pyuvdata/ms_utils.py#L1405

Added line #L1405 was not covered by tests
"MODEL_DATA",
0.0 + 0.0j,
valuetype="complex",
ndim=2,
datamanagertype="TiledShapeStMan",
datamanagergroup="TiledData",
comment="The data column",
)
del datacoldesc["desc"]["shape"]
ms_desc.update(tables.maketabdesc(datacoldesc))

Check warning on line 1415 in pyuvdata/ms_utils.py

View check run for this annotation

Codecov / codecov/patch

pyuvdata/ms_utils.py#L1414-L1415

Added lines #L1414 - L1415 were not covered by tests

if make_corr_col:
datacoldesc = tables.makearrcoldesc(

Check warning on line 1418 in pyuvdata/ms_utils.py

View check run for this annotation

Codecov / codecov/patch

pyuvdata/ms_utils.py#L1418

Added line #L1418 was not covered by tests
"CORRECTED_DATA",
0.0 + 0.0j,
valuetype="complex",
ndim=2,
datamanagertype="TiledShapeStMan",
datamanagergroup="TiledData",
comment="The data column",
)
del datacoldesc["desc"]["shape"]
ms_desc.update(tables.maketabdesc(datacoldesc))

Check warning on line 1428 in pyuvdata/ms_utils.py

View check run for this annotation

Codecov / codecov/patch

pyuvdata/ms_utils.py#L1427-L1428

Added lines #L1427 - L1428 were not covered by tests

# Now create a column for the weight spectrum, which we plug nsample_array into
weightspeccoldesc = tables.makearrcoldesc(
"WEIGHT_SPECTRUM",
Expand Down
94 changes: 62 additions & 32 deletions pyuvdata/uvdata/ms.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ def write_ms(
filepath,
*,
force_phase=False,
model_data=None,
corrected_data=None,
flip_conj=False,
clobber=False,
run_check=True,
Expand All @@ -66,14 +68,22 @@ def write_ms(
force_phase : bool
Option to automatically phase drift scan data to zenith of the first
timestamp.
model_data : ndarray
Optional argument, which contains data to be written into the MODEL_DATA
column of the measurement set (along with the data, which is written into
the DATA column). Must contain the same dimensions as `data_array`.
corrected_data : ndarray
Optional argument, which contains data to be written into the CORRECTED_DATA
column of the measurement set (along with the data, which is written into
the DATA column). Must contain the same dimensions as `data_array`.
clobber : bool
Option to overwrite the file if it already exists.
flip_conj : bool
If set to True, and the UVW coordinates are flipped (i.e., multiplied by
-1) and the visibilities are complex conjugated prior to write, such that
the data are written with the "opposite" conjugation scheme to what UVData
normally uses. Note that this is only needed for specific subset of
applications that read MS-formated data, and should only be used by expert
applications that read MS-formatted data, and should only be used by expert
users. Default is False.
run_check : bool
Option to check for the existence and proper shapes of parameters
Expand Down Expand Up @@ -146,11 +156,34 @@ def write_ms(
self._set_scan_numbers()

# Initialize a skelton measurement set
ms = ms_utils.init_ms_file(filepath)
ms = ms_utils.init_ms_file(
filepath,
make_model_col=model_data is not None,
make_corr_col=corrected_data is not None,
)

attr_list = ["data_array", "nsample_array", "flag_array"]
arr_list = [self.data_array, self.nsample_array, self.flag_array]
col_list = ["DATA", "WEIGHT_SPECTRUM", "FLAG"]

if model_data is not None:
assert (

Check warning on line 169 in pyuvdata/uvdata/ms.py

View check run for this annotation

Codecov / codecov/patch

pyuvdata/uvdata/ms.py#L169

Added line #L169 was not covered by tests
model_data.shape == self.data_array.shape
), "model_data must have the same shape as data_array."
arr_list.append(model_data)
col_list.append("MODEL_DATA")

Check warning on line 173 in pyuvdata/uvdata/ms.py

View check run for this annotation

Codecov / codecov/patch

pyuvdata/uvdata/ms.py#L172-L173

Added lines #L172 - L173 were not covered by tests
if corrected_data is not None:
assert (

Check warning on line 175 in pyuvdata/uvdata/ms.py

View check run for this annotation

Codecov / codecov/patch

pyuvdata/uvdata/ms.py#L175

Added line #L175 was not covered by tests
corrected_data.shape == self.data_array.shape
), "corrected_data must have the same shape as data_array."
arr_list.append(corrected_data)
col_list.append("CORRECTED_DATA")

Check warning on line 179 in pyuvdata/uvdata/ms.py

View check run for this annotation

Codecov / codecov/patch

pyuvdata/uvdata/ms.py#L178-L179

Added lines #L178 - L179 were not covered by tests

if not self.future_array_shapes:
for idx in range(len(arr_list)):
# If using future array shapes, squeeze the arrays now (which just
# returns a view, and therefore doesn't impact mem usage).
arr_list[idx] = np.squeeze(arr_list[idx], axis=1)

# Some tasks in CASA require a band-representative (band-averaged?) value for
# the weights and noise for all channels in each row in the MAIN table, which
# we will roughly calculate in temp_weights below.
Expand All @@ -172,14 +205,11 @@ def write_ms(
if self.Nspws == 1:
# If we only have one spectral window, there is nothing we need to worry
# about ordering, so just write the data-related arrays as is to disk
for attr, col in zip(attr_list, col_list):
if self.future_array_shapes:
temp_vals = getattr(self, attr)[:, :, pol_order]
else:
temp_vals = np.squeeze(getattr(self, attr), axis=1)[..., pol_order]
for arr, col in zip(arr_list, col_list):
temp_vals = arr[:, :, pol_order]

if flip_conj and (attr == "data_array"):
temp_vals = np.conj(temp_vals)
if flip_conj and ("DATA" in col):
temp_vals = np.conj(temp_vals, out=temp_vals)

ms.putcol(col, temp_vals)

Expand All @@ -202,7 +232,7 @@ def write_ms(
# (n.b., tables.putvarcol can write complex tables like these, but its
# slower and more memory-intensive than putcol).

# Since muliple records trace back to a single baseline-time, we use this
# Since multiple records trace back to a single baseline-time, we use this
# array to map from arrays that store on a per-record basis to positions
# within arrays that record metadata on a per-blt basis.
blt_map_array = np.zeros((self.Nblts * self.Nspws), dtype=int)
Expand Down Expand Up @@ -244,20 +274,15 @@ def write_ms(
# Extract out the relevant data out of our data-like arrays that
# belong to this scan number.
val_dict = {}
for attr, col in zip(attr_list, col_list):
if self.future_array_shapes:
val_dict[col] = getattr(self, attr)[scan_slice]
else:
val_dict[col] = np.squeeze(
getattr(self, attr)[scan_slice], axis=1
)
for arr, col in zip(arr_list, col_list):
temp_arr = arr[scan_slice]

if flip_conj and ("DATA" in col):
temp_arr = np.conjugate(temp_arr)

# Have to do this separately since uou can't supply multiple index
# arrays at once.
val_dict[col] = val_dict[col][:, :, pol_order]

if flip_conj:
val_dict["DATA"] = np.conj(val_dict["DATA"])
val_dict[col] = temp_arr[:, :, pol_order]

# This is where the bulk of the heavy lifting is - use the per-spw
# channel masks to record one spectral window at a time.
Expand All @@ -277,7 +302,7 @@ def write_ms(
)
last_row += Nrecs

# Now that we have an array to map baselime-time to individual records,
# Now that we have an array to map baseline-time to individual records,
# use our indexing array to map various metadata.
ant_1_array = self.ant_1_array[blt_map_array]
ant_2_array = self.ant_2_array[blt_map_array]
Expand All @@ -304,7 +329,7 @@ def write_ms(
ms.putcol("ANTENNA1", ant_1_array)
ms.putcol("ANTENNA2", ant_2_array)

# "INVERVAL" refers to "width" of the window of time time over which data was
# "INTERVAL" refers to "width" of the window of time time over which data was
# collected, while "EXPOSURE" is the sum total of integration time. UVData
# does not differentiate between these concepts, hence why one array is used
# for both values.
Expand Down Expand Up @@ -378,7 +403,7 @@ def _read_ms_main(
The measurement set root directory to read from.
data_column : str
name of CASA data column to read into data_array. Options are:
'DATA', 'MODEL', or 'CORRECTED_DATA'
'DATA', 'MODEL_DATA', or 'CORRECTED_DATA'
data_desc_dict : dict
Dictionary describing the various rows in the DATA_DESCRIPTION table of
an MS file. Keys match to the individual rows, and the values are themselves
Expand Down Expand Up @@ -435,13 +460,18 @@ def _read_ms_main(
if "pyuvdata_xorient" in main_keywords.keys():
self.x_orientation = main_keywords["pyuvdata_xorient"]

default_vis_units = {"DATA": "uncalib", "CORRECTED_DATA": "Jy", "MODEL": "Jy"}
default_vis_units = {
"DATA": "uncalib",
"CORRECTED_DATA": "Jy",
"MODEL": "Jy",
"MODEL_DATA": "Jy",
}

# make sure user requests a valid data_column
if data_column not in default_vis_units.keys():
raise ValueError(
"Invalid data_column value supplied. Use 'Data','MODEL' or"
" 'CORRECTED_DATA'"
"Invalid data_column value supplied. Use 'DATA','MODEL_DATA', or"
"'CORRECTED_DATA'."
)

# set visibility units
Expand Down Expand Up @@ -491,11 +521,11 @@ def _read_ms_main(
data_desc_count = np.sum(np.isin(list(data_desc_dict.keys()), unique_data_desc))

if data_desc_count == 0:
# If there are no records selected, then there isnt a whole lot to do
# If there are no records selected, then there isn't a whole lot to do
return None, None, None, None
elif data_desc_count == 1:
# If we only have a single spectral window, then we can bypass a whole lot
# of slicing and dicing on account of there being a one-to-one releationship
# of slicing and dicing on account of there being a one-to-one relationship
# in rows of the MS to the per-blt records of UVData objects.
self.time_array = Time(
time_arr / 86400.0, format="mjd", scale=timescale.lower()
Expand Down Expand Up @@ -759,9 +789,9 @@ def _read_ms_main(
# Note that this operation has to be split in two because you can only use
# advanced slicing on one axis (which both blt_idx and pol_idx require).
if flip_conj:
temp_data[:, :, pol_idx] = np.conj(tb_main_sel.getcol("DATA"))
temp_data[:, :, pol_idx] = np.conj(tb_main_sel.getcol(data_column))
else:
temp_data[:, :, pol_idx] = tb_main_sel.getcol("DATA")
temp_data[:, :, pol_idx] = tb_main_sel.getcol(data_column)

temp_flags[:, :, pol_idx] = tb_main_sel.getcol("FLAG")

Expand Down

0 comments on commit e3fbdca

Please sign in to comment.