From 221aa91464bd55d1e714c2e0c8ea464ef0578050 Mon Sep 17 00:00:00 2001
From: Garrett 'Karto' Keating <garrett.keating@cfa.harvard.edu>
Date: Sat, 10 Feb 2024 20:31:56 -0500
Subject: [PATCH 1/6] Improving robustness of pol-split detection in MIR
 datasets

---
 pyuvdata/uvdata/mir.py            | 32 ++++++++++++++++++++++---------
 pyuvdata/uvdata/mir_parser.py     |  2 +-
 pyuvdata/uvdata/tests/test_mir.py | 11 +++++++++++
 3 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/pyuvdata/uvdata/mir.py b/pyuvdata/uvdata/mir.py
index 125fb7ecca..dab4631365 100644
--- a/pyuvdata/uvdata/mir.py
+++ b/pyuvdata/uvdata/mir.py
@@ -343,16 +343,26 @@ def _init_from_mir_parser(
         if len(pol_dict) == 2:
             # If dual-pol, then we need to check if the tunings are split, because
             # the two polarizations will effectively be concat'd across the freq
-            # axis instead of the pol axis.
+            # axis instead of the pol axis. First, see if we have two diff receivers
             rxa_mask = ant1_rxa_mask & ant2_rxa_mask
             rxb_mask = ~(ant1_rxa_mask | ant2_rxa_mask)
 
             if np.any(rxa_mask) and np.any(rxb_mask):
-                # If we have both VV and HH data, check to see that the tunings of the
-                # two receivers match.
-                loa_freq = np.median(mir_data.sp_data["gunnLO"][rxa_mask])
-                lob_freq = np.median(mir_data.sp_data["gunnLO"][rxb_mask])
-                pol_split_tuning = not np.isclose(loa_freq, lob_freq)
+                # If we have both VV and HH data, check to see that the frequencies of
+                # each of the spectral chunks match. If they do, then we can concat
+                # across the polarization axis, but if _not_, we should treat this as
+                # a pol-split data set.
+                fsky_vals = mir_data.sp_data["fsky"]
+                chunk_vals = mir_data.sp_data["corrchunk"]
+                loa_chunks = set(zip(fsky_vals[rxa_mask], chunk_vals[rxa_mask]))
+                lob_chunks = set(zip(fsky_vals[rxb_mask], chunk_vals[rxb_mask]))
+                pol_split_tuning = not (
+                    loa_chunks.issubset(lob_chunks) or lob_chunks.issubset(loa_chunks)
+                )
+                print(loa_chunks)
+                print(lob_chunks)
+                print(pol_split_tuning)
+                print("hi!")
 
         # Map MIR pol code to pyuvdata/AIPS polarization number
         pol_code_dict = {}
@@ -419,9 +429,13 @@ def _init_from_mir_parser(
 
             # Make sure that something weird hasn't happened with the metadata (this
             # really should never happen, only one value should exist per window).
-            assert np.allclose(spw_fsky, mir_data.sp_data["fsky"][data_mask])
-            assert np.allclose(spw_fres, mir_data.sp_data["fres"][data_mask])
-            assert np.allclose(spw_nchan, mir_data.sp_data["nch"][data_mask])
+            for val, item in zip(
+                [spw_fsky, spw_fres, spw_nchan], ["fsky", "fres", "nch"]
+            ):
+                if not np.allclose(val, mir_data.sp_data[item][data_mask]):
+                    warnings.warn(
+                        "Discrepancy in %s for win %i sb %i pol %i." % (item, *spdx)
+                    )
 
             # Get the data in the right units and dtype
             spw_fsky = float(spw_fsky * 1e9)  # GHz -> Hz
diff --git a/pyuvdata/uvdata/mir_parser.py b/pyuvdata/uvdata/mir_parser.py
index 6d2f6e5e2b..528590c116 100644
--- a/pyuvdata/uvdata/mir_parser.py
+++ b/pyuvdata/uvdata/mir_parser.py
@@ -4183,7 +4183,7 @@ def _make_v3_compliant(self):
         # bl_data updates: ant1rx, ant2rx, u, v, w
         # First, update the antenna receiver if these values are unfilled (true in some
         # earlier tracks, no version demarcation notes it).
-        if np.all(self.bl_data["ant1rx"] == 0) and np.all(self.bl_data["ant2rx"] == 0):
+        if np.all(self.bl_data["ant1rx"] == 0) or np.all(self.bl_data["ant2rx"] == 0):
             ipol = self.bl_data["ipol"]
             irec = self.bl_data["irec"]
 
diff --git a/pyuvdata/uvdata/tests/test_mir.py b/pyuvdata/uvdata/tests/test_mir.py
index ebac5dbb0e..1d8747843d 100644
--- a/pyuvdata/uvdata/tests/test_mir.py
+++ b/pyuvdata/uvdata/tests/test_mir.py
@@ -789,3 +789,14 @@ def test_generate_sma_antpos_dict(use_file, sma_mir):
     ant_dict = generate_sma_antpos_dict(filepath)
     for ant_num, xyz_pos in zip(sma_mir.antenna_numbers, sma_mir.antenna_positions):
         assert np.allclose(ant_dict[ant_num], xyz_pos)
+
+
+def test_spw_consistency_warning():
+    mir_data = MirParser(sma_mir_test_file)
+    mir_data.sp_data._data["fres"][1] *= 2
+    mir_data.bl_data._data["ant1rx"][:] = 0
+    mir_data.bl_data._data["ant2rx"][:] = 0
+
+    mir_uv = Mir()
+    with uvtest.check_warnings(UserWarning, match="Discrepancy in fres"):
+        mir_uv._init_from_mir_parser(mir_data)

From a58c2f00563700b860ce4a7e5a3ac6802965b817 Mon Sep 17 00:00:00 2001
From: Garrett 'Karto' Keating <garrett.keating@cfa.harvard.edu>
Date: Sat, 10 Feb 2024 20:51:55 -0500
Subject: [PATCH 2/6] removing debugging print statements

---
 pyuvdata/uvdata/mir.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/pyuvdata/uvdata/mir.py b/pyuvdata/uvdata/mir.py
index dab4631365..6abb02046b 100644
--- a/pyuvdata/uvdata/mir.py
+++ b/pyuvdata/uvdata/mir.py
@@ -359,10 +359,6 @@ def _init_from_mir_parser(
                 pol_split_tuning = not (
                     loa_chunks.issubset(lob_chunks) or lob_chunks.issubset(loa_chunks)
                 )
-                print(loa_chunks)
-                print(lob_chunks)
-                print(pol_split_tuning)
-                print("hi!")
 
         # Map MIR pol code to pyuvdata/AIPS polarization number
         pol_code_dict = {}

From 9d5a35282026ea00e7564acf0bc0fe743a06e209 Mon Sep 17 00:00:00 2001
From: Garrett 'Karto' Keating <garrett.keating@cfa.harvard.edu>
Date: Sun, 11 Feb 2024 12:22:42 -0500
Subject: [PATCH 3/6] More minor code clean-up, moving vis read-in to the end
 of the Mir read operation

---
 pyuvdata/uvdata/mir.py                   | 118 ++++++++++++-----------
 pyuvdata/uvdata/mir_parser.py            |   9 +-
 pyuvdata/uvdata/tests/test_mir.py        |   3 +-
 pyuvdata/uvdata/tests/test_mir_parser.py |  12 ---
 4 files changed, 71 insertions(+), 71 deletions(-)

diff --git a/pyuvdata/uvdata/mir.py b/pyuvdata/uvdata/mir.py
index 6abb02046b..ab0a0108e4 100644
--- a/pyuvdata/uvdata/mir.py
+++ b/pyuvdata/uvdata/mir.py
@@ -509,57 +509,6 @@ def _init_from_mir_parser(
         for key in spdx_dict:
             spdx_dict[key]["ch_slice"] = spw_dict[spdx_dict[key]["spw_id"]]["ch_slice"]
 
-        # Create arrays to plug visibilities and flags into. The array is shaped this
-        # way since when reading in a MIR file, we scan through the blt-axis the
-        # slowest and the freq-axis the fastest (i.e., the data is roughly ordered by
-        # blt, pol, freq).
-        self.data_array = np.zeros((Nblts, Npols, Nfreqs), dtype=np.complex64)
-        self.flag_array = np.ones((Nblts, Npols, Nfreqs), dtype=bool)
-        self.nsample_array = np.zeros((Nblts, Npols, Nfreqs), dtype=np.float32)
-
-        # Get a list of the current inhid values for later
-        inhid_list = mir_data.in_data["inhid"].copy()
-
-        # Store a backup of the selection masks
-        backup_masks = {}
-        for item, obj in mir_data._metadata_attrs.items():
-            backup_masks[item] = obj._mask.copy()
-
-        # If no data is loaded, we want to load subsets of data to start rather than
-        # the whole block in one go, since this will save us 2x in memory.
-        inhid_step = len(inhid_list)
-        if (mir_data.vis_data is None) and (mir_data.auto_data is None):
-            inhid_step = (inhid_step // 8) + 1
-
-        for start in range(0, len(inhid_list), inhid_step):
-            # If no data is loaded, load up a quarter of the data at a time. This
-            # keeps the "extra" memory usage below that for the nsample_array attr,
-            # which is generated and filled _after_ this step (thus no extra memory
-            # should be required)
-            if (mir_data.vis_data is None) and (mir_data.auto_data is None):
-                # Note that the masks are combined via and operation.
-                mir_data.select(
-                    where=("inhid", "eq", inhid_list[start : start + inhid_step])
-                )
-
-            # Call this convenience function in case we need to run the data filling
-            # multiple times (if we are loading up subsets of data)
-            self._prep_and_insert_data(
-                mir_data,
-                sphid_dict,
-                spdx_dict,
-                blhid_blt_order,
-                apply_flags=apply_flags,
-                apply_tsys=apply_tsys,
-                apply_dedoppler=apply_dedoppler,
-            )
-
-            for item, obj in mir_data._metadata_attrs.items():
-                # Because the select operation messes with the masks, we want to restore
-                # those in case we mucked with them earlier (so that subsequent selects
-                # behave as expected).
-                obj._mask = backup_masks[item].copy()
-
         # Now assign our flexible arrays to the object itself
         self.freq_array = freq_array
         self.Nfreqs = Nfreqs
@@ -694,7 +643,6 @@ def _init_from_mir_parser(
         for blt_key in blt_temp_dict.keys():
             temp_dict = blt_temp_dict[blt_key]
             integration_time[blt_key] = temp_dict["rinteg"]
-            # TODO: Using MIR V3 convention for lst, will need make it V2 compatible.
             lst_array[blt_key] = temp_dict["lst"] * (np.pi / 12.0)  # Hours -> rad
             mjd_array[blt_key] = temp_dict["mjd"]
             ant_1_array[blt_key] = temp_dict["iant1"]
@@ -712,9 +660,22 @@ def _init_from_mir_parser(
         self.baseline_array = self.antnums_to_baseline(
             self.ant_1_array, self.ant_2_array, attempt256=False
         )
-        self.integration_time = integration_time
-        self.lst_array = lst_array
         self.time_array = Time(mjd_array, scale="tt", format="mjd").utc.jd
+        self.integration_time = integration_time
+
+        # There is a minor issue w/ MIR datasets where the LSTs are calculated via
+        # a polled average rather than calculated for the mid-point, which results
+        # in some imprecision (and some nuisance warnings).  Fix this by calculating the
+        # LSTs here, checking to make sure that they agree to within the expected
+        # precision (sampling rate is 20 Hz, and the max error to worry about is half a
+        # sample: 25 ms, or in radians, 2*pi/(40 * 86400)) = pi / 1728000.
+        # TODO: Re-evaluate this if/when MIR data writer stops calculating LST this way
+        self.set_lsts_from_time_array()
+        if not np.allclose(lst_array, self.lst_array, rtol=0, atol=np.pi / 1728000.0):
+            # If this check fails, it means that there's something off w/ the lst values
+            # (to a larger degree than expected), and we'll pass them back to the user,
+            # who can inspect them directly and decide what to do.
+            self.lst_array = lst_array
 
         self.polarization_array = polarization_array
         self.flex_spw_polarization_array = flex_pol
@@ -799,6 +760,55 @@ def _init_from_mir_parser(
         self.filename = [os.path.basename(basename)]
         self._filename.form = (1,)
 
+        # Finally, start the heavy lifting of loading the full data. We start this by
+        # creating arrays to plug visibilities and flags into. The array is shaped this
+        # way since when reading in a MIR file, we scan through the blt-axis the
+        # slowest and the freq-axis the fastest (i.e., the data is roughly ordered by
+        # blt, pol, freq).
+        self.data_array = np.zeros((Nblts, Npols, Nfreqs), dtype=np.complex64)
+        self.flag_array = np.ones((Nblts, Npols, Nfreqs), dtype=bool)
+        self.nsample_array = np.zeros((Nblts, Npols, Nfreqs), dtype=np.float32)
+
+        # Get a list of the current inhid values for later
+        inhid_list = mir_data.in_data["inhid"].copy()
+
+        # Store a backup of the selection masks
+        mir_data.save_mask("pre-select")
+
+        # If no data is loaded, we want to load subsets of data to start rather than
+        # the whole block in one go, since this will save us 2x in memory.
+        inhid_step = len(inhid_list)
+        if (mir_data.vis_data is None) and (mir_data.auto_data is None):
+            inhid_step = (inhid_step // 8) + 1
+
+        for start in range(0, len(inhid_list), inhid_step):
+            # If no data is loaded, load up a quarter of the data at a time. This
+            # keeps the "extra" memory usage below that for the nsample_array attr,
+            # which is generated and filled _after_ this step (thus no extra memory
+            # should be required)
+            if (mir_data.vis_data is None) and (mir_data.auto_data is None):
+                # Note that the masks are combined via and operation.
+                mir_data.select(
+                    where=("inhid", "eq", inhid_list[start : start + inhid_step])
+                )
+
+            # Call this convenience function in case we need to run the data filling
+            # multiple times (if we are loading up subsets of data)
+            self._prep_and_insert_data(
+                mir_data,
+                sphid_dict,
+                spdx_dict,
+                blhid_blt_order,
+                apply_flags=apply_flags,
+                apply_tsys=apply_tsys,
+                apply_dedoppler=apply_dedoppler,
+            )
+
+            # Because the select operation messes with the masks, we want to restore
+            # those in case we mucked with them earlier (so that subsequent selects
+            # behave as expected).
+            mir_data.restore_mask("pre-select")
+
         # We call transpose here since vis_data above is shape (Nblts, Npols, Nfreqs),
         # and we need to get it to (Nblts,Nfreqs, Npols) to match what UVData expects.
         self.data_array = np.transpose(self.data_array, (0, 2, 1))
diff --git a/pyuvdata/uvdata/mir_parser.py b/pyuvdata/uvdata/mir_parser.py
index 528590c116..6270b575b9 100644
--- a/pyuvdata/uvdata/mir_parser.py
+++ b/pyuvdata/uvdata/mir_parser.py
@@ -4148,15 +4148,18 @@ def _make_v3_compliant(self):
         for key, value in mjd_day_dict.items():
             if isinstance(value, str):
                 mjd_day_dict[key] = Time(
-                    datetime.strptime(value, "%b %d, %Y"), scale="tt"
-                ).tt.mjd
+                    datetime.strptime(value, "%b %d, %Y"), scale="utc"
+                ).mjd
 
         mjd_arr = (self.in_data["dhrs"] / 24.0) + np.array(
             [mjd_day_dict[idx] for idx in self.in_data["iref_time"]]
         )
 
         # Tally the JD dates, since that's used for various helper functions
-        jd_arr = Time(mjd_arr, format="mjd", scale="utc").utc.jd
+        jd_arr = Time(mjd_arr, format="mjd", scale="utc").jd
+
+        # Also, convert MJD back into the expected TT timescale
+        mjd_arr = Time(mjd_arr, format="mjd", scale="utc").tt.mjd
 
         # Calculate the LST at the time of obs
         lst_arr = (12.0 / np.pi) * uvutils.get_lst_for_time(
diff --git a/pyuvdata/uvdata/tests/test_mir.py b/pyuvdata/uvdata/tests/test_mir.py
index 1d8747843d..4b86a7d8a5 100644
--- a/pyuvdata/uvdata/tests/test_mir.py
+++ b/pyuvdata/uvdata/tests/test_mir.py
@@ -791,8 +791,7 @@ def test_generate_sma_antpos_dict(use_file, sma_mir):
         assert np.allclose(ant_dict[ant_num], xyz_pos)
 
 
-def test_spw_consistency_warning():
-    mir_data = MirParser(sma_mir_test_file)
+def test_spw_consistency_warning(mir_data):
     mir_data.sp_data._data["fres"][1] *= 2
     mir_data.bl_data._data["ant1rx"][:] = 0
     mir_data.bl_data._data["ant2rx"][:] = 0
diff --git a/pyuvdata/uvdata/tests/test_mir_parser.py b/pyuvdata/uvdata/tests/test_mir_parser.py
index 8a68fc2eaa..0a39cc9f1e 100644
--- a/pyuvdata/uvdata/tests/test_mir_parser.py
+++ b/pyuvdata/uvdata/tests/test_mir_parser.py
@@ -30,18 +30,6 @@
 )
 
 
-@pytest.fixture(scope="session")
-def mir_data_main():
-    mir_data = MirParser()
-
-    yield mir_data._load_test_data(load_cross=True, load_auto=True, has_auto=True)
-
-
-@pytest.fixture(scope="function")
-def mir_data(mir_data_main):
-    yield mir_data_main.copy()
-
-
 @pytest.fixture(scope="module")
 def compass_soln_file(tmp_path_factory):
     tmp_path = tmp_path_factory.mktemp("mir_parser", numbered=True)

From 943b2b96ea2700497f9c35deca37a44cf570d205 Mon Sep 17 00:00:00 2001
From: Garrett 'Karto' Keating <garrett.keating@cfa.harvard.edu>
Date: Sun, 11 Feb 2024 12:55:25 -0500
Subject: [PATCH 4/6] Updating CHANGELOG

---
 CHANGELOG.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 53bcbb6b14..9612f968cd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -36,6 +36,12 @@ positions are near surface of whatever celestial body their positions are refere
 (either the Earth or Moon, currently).
 
 ### Changed
+- Made the determination of whether or not to create a flex-pol dataset when reading
+in a MIR more robust (particularly with pre-V3 data formats).
+- Reading in of MIR data sets into `UVData` objects will now use pyuvdata-calculated
+values for `lst_array` via (`UVData.set_lsts_from_time_array`) instead of those read in
+from the file due to known precision issues in the later (~25 ms), so long as the two
+agree within this known precision limit.
 - `UVFlag.to_baseline` and `UVFlag.to_antenna` are now more robust to differences
 in antenna metadata sorting.
 - Made `MirParser` more robust against metadata indexing errors.
@@ -68,6 +74,8 @@ fully tested and didn't work properly.
 - Having `freq_array` and `channel_width` defined on wide-band UVCal objects.
 
 ### Fixed
+- A bug where `time_array` was not being correctly calculated for older (pre-V3) MIR
+data formats.
 - A small bug in UVFlag that could occur when reading in an older UVFlag HDF5
 file with missing antenna metadata.
 - A small bug (mostly affecting continuous integration) that threw an error when the

From aa4bd2f25ebbf1b4896af92d8e507d6fb6201b6c Mon Sep 17 00:00:00 2001
From: Garrett 'Karto' Keating <garrett.keating@cfa.harvard.edu>
Date: Mon, 12 Feb 2024 23:36:17 -0500
Subject: [PATCH 5/6] Making requested changes following review

---
 pyuvdata/uvdata/mir.py               | 32 ++++++++++------
 pyuvdata/uvdata/tests/conftest.py    |  6 ++-
 pyuvdata/uvdata/tests/test_mir.py    | 56 +++++++++++++++++++++-------
 pyuvdata/uvdata/tests/test_uvdata.py | 21 ++++++++---
 4 files changed, 83 insertions(+), 32 deletions(-)

diff --git a/pyuvdata/uvdata/mir.py b/pyuvdata/uvdata/mir.py
index ab0a0108e4..f58156b8bd 100644
--- a/pyuvdata/uvdata/mir.py
+++ b/pyuvdata/uvdata/mir.py
@@ -416,6 +416,16 @@ def _init_from_mir_parser(
         spdx_dict = {}
         spw_dict = {}
         for spdx in set(spdx_list):
+            # We need to do a some extra handling here, because a single correlator
+            # can produce multiple spectral windows (e.g., LSB/USB). The scheme below
+            # will negate the corr band number if LSB, will set the corr band number to
+            # 255 if the values arise from the pseudo-wideband values, and will add 512
+            # if the pols are split-tuned. This scheme, while a little funky, guarantees
+            # that each unique freq range has its own spectral window number.
+            spw_id = 255 if (spdx[0] == 0) else spdx[0]
+            spw_id *= (-1) ** (1 + spdx[1])
+            spw_id += 512 if (pol_split_tuning and spdx[2] == 1) else 0
+
             data_mask = np.array([spdx == item for item in spdx_list])
 
             # Grab values, get them into appropriate types
@@ -430,7 +440,9 @@ def _init_from_mir_parser(
             ):
                 if not np.allclose(val, mir_data.sp_data[item][data_mask]):
                     warnings.warn(
-                        "Discrepancy in %s for win %i sb %i pol %i." % (item, *spdx)
+                        "Discrepancy in %s for win %i sb %i pol %i. Values of "
+                        "`freq_array` and `channel_width` should be checked for "
+                        "channels corresponding to spw_id %i." % (item, *spdx, spw_id)
                     )
 
             # Get the data in the right units and dtype
@@ -438,16 +450,6 @@ def _init_from_mir_parser(
             spw_fres = float(spw_fres * 1e6)  # MHz -> Hz
             spw_nchan = int(spw_nchan)
 
-            # We need to do a some extra handling here, because a single correlator
-            # can produce multiple spectral windows (e.g., LSB/USB). The scheme below
-            # will negate the corr band number if LSB, will set the corr band number to
-            # 255 if the values arise from the pseudo-wideband values, and will add 512
-            # if the pols are split-tuned. This scheme, while a little funky, guarantees
-            # that each unique freq range has its own spectral window number.
-            spw_id = 255 if (spdx[0] == 0) else spdx[0]
-            spw_id *= (-1) ** (1 + spdx[1])
-            spw_id += 512 if (pol_split_tuning and spdx[2] == 1) else 0
-
             # Populate the channel width array
             channel_width = abs(spw_fres) + np.zeros(spw_nchan, dtype=np.float64)
 
@@ -675,6 +677,14 @@ def _init_from_mir_parser(
             # If this check fails, it means that there's something off w/ the lst values
             # (to a larger degree than expected), and we'll pass them back to the user,
             # who can inspect them directly and decide what to do.
+            warnings.warn(
+                "> 25 ms errors detected reading in LST values from MIR data. "
+                "This typically signifies a minor metadata recording error (which can "
+                "be mitigated by calling the `set_lsts_from_time_array` method with "
+                "`update_vis=False`), though additional errors about uvw-position "
+                "accuracy may signal more significant issues with metadata accuracy "
+                "that could have substantial impact on downstream analysis."
+            )
             self.lst_array = lst_array
 
         self.polarization_array = polarization_array
diff --git a/pyuvdata/uvdata/tests/conftest.py b/pyuvdata/uvdata/tests/conftest.py
index cd5821d8bc..b3b2695a80 100644
--- a/pyuvdata/uvdata/tests/conftest.py
+++ b/pyuvdata/uvdata/tests/conftest.py
@@ -106,8 +106,10 @@ def sma_mir_main():
     testfile = os.path.join(DATA_PATH, "sma_test.mir")
     with uvtest.check_warnings(
         UserWarning,
-        match="The lst_array is not self-consistent with the time_array and telescope "
-        "location. Consider recomputing with the `set_lsts_from_time_array` method.",
+        match=[
+            "> 25 ms errors detected reading in LST values from MIR data. ",
+            "The lst_array is not self-consistent with the time_array and telescope ",
+        ],
     ):
         uv_object.read(testfile, use_future_array_shapes=True)
     uv_object.set_lsts_from_time_array()
diff --git a/pyuvdata/uvdata/tests/test_mir.py b/pyuvdata/uvdata/tests/test_mir.py
index 4b86a7d8a5..0e8e0438d3 100644
--- a/pyuvdata/uvdata/tests/test_mir.py
+++ b/pyuvdata/uvdata/tests/test_mir.py
@@ -29,8 +29,10 @@ def sma_mir_filt_main():
     uv_object = UVData()
     with uvtest.check_warnings(
         UserWarning,
-        match="The lst_array is not self-consistent with the time_array and telescope "
-        "location. Consider recomputing with the `set_lsts_from_time_array` method.",
+        match=[
+            "> 25 ms errors detected reading in LST values from MIR data. ",
+            "The lst_array is not self-consistent with the time_array and telescope ",
+        ],
     ):
         uv_object.read(
             sma_mir_test_file,
@@ -265,10 +267,9 @@ def test_mir_partial_read(sma_mir):
         match=[
             "Warning: a select on read keyword is set that is not supported by "
             "read_mir. This select will be done after reading the file.",
-            "The lst_array is not self-consistent with the time_array and telescope "
-            "location. Consider recomputing with the `set_lsts_from_time_array` method",
-            "The lst_array is not self-consistent with the time_array and telescope "
-            "location. Consider recomputing with the `set_lsts_from_time_array` method",
+            "> 25 ms errors detected reading in LST values from MIR data. ",
+            "The lst_array is not self-consistent with the time_array and telescope ",
+            "The lst_array is not self-consistent with the time_array and telescope ",
         ],
     ):
         uv3 = UVData.from_file(
@@ -303,8 +304,10 @@ def test_multi_nchan_spw_read(tmp_path):
     uv_in = UVData()
     with uvtest.check_warnings(
         UserWarning,
-        match="The lst_array is not self-consistent with the time_array and telescope "
-        "location. Consider recomputing with the `set_lsts_from_time_array` method.",
+        match=[
+            "> 25 ms errors detected reading in LST values from MIR data. ",
+            "The lst_array is not self-consistent with the time_array and telescope ",
+        ],
     ):
         uv_in.read_mir(sma_mir_test_file, corrchunk=[0, 1, 2, 3, 4])
     uv_in.set_lsts_from_time_array()
@@ -315,6 +318,7 @@ def test_multi_nchan_spw_read(tmp_path):
 
 
 @pytest.mark.filterwarnings("ignore:The lst_array is not self-consistent with the.")
+@pytest.mark.filterwarnings("ignore:> 25 ms errors detected reading in LST values")
 def test_read_mir_write_ms_flex_pol(mir_data, tmp_path):
     """
     Mir to MS loopback test with flex-pol.
@@ -393,7 +397,13 @@ def test_inconsistent_sp_records(mir_data, sma_mir):
 
     mir_uv = UVData()
     mir_obj = Mir()
-    with uvtest.check_warnings(UserWarning, "Per-spectral window metadata differ."):
+    with uvtest.check_warnings(
+        UserWarning,
+        match=[
+            "Per-spectral window metadata differ.",
+            "> 25 ms errors detected reading in LST values",
+        ],
+    ):
         mir_obj._init_from_mir_parser(mir_data)
     mir_uv._convert_from_filetype(mir_obj)
     mir_uv.use_future_array_shapes()
@@ -411,7 +421,13 @@ def test_inconsistent_bl_records(mir_data, sma_mir):
     mir_data.load_data()
     mir_uv = UVData()
     mir_obj = Mir()
-    with uvtest.check_warnings(UserWarning, "Per-baseline metadata differ."):
+    with uvtest.check_warnings(
+        UserWarning,
+        match=[
+            "> 25 ms errors detected reading in LST values",
+            "Per-baseline metadata differ.",
+        ],
+    ):
         mir_obj._init_from_mir_parser(mir_data)
     mir_uv._convert_from_filetype(mir_obj)
     mir_uv.use_future_array_shapes()
@@ -420,6 +436,7 @@ def test_inconsistent_bl_records(mir_data, sma_mir):
     assert mir_uv == sma_mir
 
 
+@pytest.mark.filterwarnings("ignore:> 25 ms errors detected reading in LST values")
 def test_multi_ipol(mir_data, sma_mir):
     """
     Test that the MIR object does the right thing when different polarization types
@@ -621,6 +638,7 @@ def test_flex_pol_spw_all_flag(sma_mir_filt):
     assert np.all(sma_mir_filt.flex_spw_polarization_array == -5)
 
 
+@pytest.mark.filterwarnings("ignore:> 25 ms errors detected reading in LST values")
 def test_bad_sphid(mir_data):
     """
     Test what bad values for sphid in sp_data result in an error.
@@ -636,6 +654,7 @@ def test_bad_sphid(mir_data):
     assert str(err.value).startswith("'Mismatch between keys in vis_data and sphid")
 
 
+@pytest.mark.filterwarnings("ignore:> 25 ms errors detected reading in LST values")
 def test_bad_pol_code(mir_data):
     """
     Test that an extra (unused) pol code doesn't produce an error. Note that we want
@@ -652,6 +671,7 @@ def test_bad_pol_code(mir_data):
 
 
 @pytest.mark.filterwarnings("ignore:The lst_array is not self-consistent")
+@pytest.mark.filterwarnings("ignore:> 25 ms errors detected reading in LST values")
 def test_rechunk_on_read():
     """Test that rechunking on read works as expected."""
     uv_data = UVData.from_file(
@@ -664,6 +684,7 @@ def test_rechunk_on_read():
 
 
 @pytest.mark.filterwarnings("ignore:The lst_array is not self-consistent")
+@pytest.mark.filterwarnings("ignore:> 25 ms errors detected reading in LST values")
 @pytest.mark.parametrize(
     "select_kwargs",
     [
@@ -688,6 +709,7 @@ def test_select_on_read(select_kwargs, sma_mir):
     assert sma_mir == uv_data
 
 
+@pytest.mark.filterwarnings("ignore:> 25 ms errors detected reading in LST values")
 def test_non_icrs_coord_read(mir_data):
     # When fed a non-J2000 coordinate, we want to convert that so that it can easily
     mir_uv = UVData()
@@ -714,6 +736,7 @@ def test_non_icrs_coord_read(mir_data):
     )
 
 
+@pytest.mark.filterwarnings("ignore:> 25 ms errors detected reading in LST values")
 def test_dedoppler_data(mir_data, sma_mir):
     mir_uv = UVData()
     mir_obj = Mir()
@@ -763,14 +786,16 @@ def test_source_pos_change_warning(mir_data, tmp_path):
     ):
         mir_copy.__iadd__(mir_data, force=True, merge=False)
 
-    print(mir_copy.auto_data)
-
     # Muck the ra coord
     mir_copy.in_data["rar"] = [0, 1]
     mir_obj = Mir()
 
     with uvtest.check_warnings(
-        UserWarning, "Position for 3c84 changes by more than an arcminute."
+        UserWarning,
+        [
+            "> 25 ms errors detected reading in LST values",
+            "Position for 3c84 changes by more than an arcminute.",
+        ],
     ):
         mir_obj._init_from_mir_parser(mir_copy)
 
@@ -797,5 +822,8 @@ def test_spw_consistency_warning(mir_data):
     mir_data.bl_data._data["ant2rx"][:] = 0
 
     mir_uv = Mir()
-    with uvtest.check_warnings(UserWarning, match="Discrepancy in fres"):
+    with uvtest.check_warnings(
+        UserWarning,
+        match=["Discrepancy in fres", "> 25 ms errors detected reading in LST values"],
+    ):
         mir_uv._init_from_mir_parser(mir_data)
diff --git a/pyuvdata/uvdata/tests/test_uvdata.py b/pyuvdata/uvdata/tests/test_uvdata.py
index 2aa8d3a1af..ee1fa7074b 100644
--- a/pyuvdata/uvdata/tests/test_uvdata.py
+++ b/pyuvdata/uvdata/tests/test_uvdata.py
@@ -11987,11 +11987,22 @@ def test_set_nsamples_wrong_shape_error(hera_uvh5):
         ["zen.2458661.23480.HH.uvh5", ""],
         [
             "sma_test.mir",
-            (
-                "The lst_array is not self-consistent with the time_array and telescope"
-                " location. Consider recomputing with the `set_lsts_from_time_array`"
-                " method"
-            ),
+            [
+                (
+                    "The lst_array is not self-consistent with the time_array and "
+                    "telescope location. Consider recomputing with the "
+                    "`set_lsts_from_time_array` method"
+                ),
+                (
+                    "> 25 ms errors detected reading in LST values from MIR data. "
+                    "This typically signifies a minor metadata recording error (which "
+                    "can be mitigated by calling the `set_lsts_from_time_array` method "
+                    "with `update_vis=False`), though additional errors about "
+                    "uvw-position accuracy may signal more significant issues with "
+                    "metadata accuracy that could have substantial impact on "
+                    "downstream analysis."
+                ),
+            ],
         ],
         [
             "carma_miriad",

From f7db0899481ea314b18cd6979e4196ed55608b65 Mon Sep 17 00:00:00 2001
From: Garrett 'Karto' Keating <garrett.keating@cfa.harvard.edu>
Date: Tue, 13 Feb 2024 06:17:10 -0500
Subject: [PATCH 6/6] Updating test to filter new warning

---
 pyuvdata/tests/test_utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyuvdata/tests/test_utils.py b/pyuvdata/tests/test_utils.py
index d57fa924b9..d73548b813 100644
--- a/pyuvdata/tests/test_utils.py
+++ b/pyuvdata/tests/test_utils.py
@@ -4143,6 +4143,7 @@ def test_uvw_track_generator_errs():
 @pytest.mark.parametrize("use_uvw", [False, True])
 @pytest.mark.parametrize("use_earthloc", [False, True])
 @pytest.mark.filterwarnings("ignore:The lst_array is not self-consistent")
+@pytest.mark.filterwarnings("ignore:> 25 ms errors detected reading in LST values")
 def test_uvw_track_generator(flip_u, use_uvw, use_earthloc):
     sma_mir = UVData.from_file(os.path.join(DATA_PATH, "sma_test.mir"))
     sma_mir.set_lsts_from_time_array()