Merge pull request #1259 from bhilbert4/dark-monitor-integ-limits-and…

…-dates Dark monitor threshold and cadence updates
spacetelescope · Feb 21, 2024 · 7a4d932 · 7a4d932
2 parents 4d60962 + 1238e5c
commit 7a4d932
Show file tree

Hide file tree

Showing 9 changed files with 734 additions and 772 deletions.
diff --git a/jwql/instrument_monitors/common_monitors/dark_monitor.py b/jwql/instrument_monitors/common_monitors/dark_monitor.py
diff --git a/jwql/instrument_monitors/common_monitors/dark_monitor_file_thresholds.txt b/jwql/instrument_monitors/common_monitors/dark_monitor_file_thresholds.txt
diff --git a/jwql/instrument_monitors/pipeline_tools.py b/jwql/instrument_monitors/pipeline_tools.py
@@ -193,15 +193,22 @@ def get_pipeline_steps(instrument):
     return required_steps
 
 
-def image_stack(file_list):
-    """Given a list of fits files containing 2D images, read in all data
+def image_stack(file_list, skipped_initial_ints=0):
+    """Given a list of fits files containing 2D or 3D images, read in all data
     and place into a 3D stack
 
     Parameters
     ----------
     file_list : list
         List of fits file names
 
+    skipped_initial_ints : int
+        Number of initial integrations from each file to skip over and
+        not include in the stack. Only works with files containing 3D
+        arrays (e.g. rateints files). This is primarily for MIRI, where
+        we want to skip the first N integrations due to dark current
+        instability.
+
     Returns
     -------
     cube : numpy.ndarray
@@ -219,17 +226,21 @@ def image_stack(file_list):
         if i == 0:
             ndim_base = image.shape
             if len(ndim_base) == 3:
-                cube = copy.deepcopy(image)
+                cube = copy.deepcopy(image[skipped_initial_ints:, :, :])
+                num_ints -= skipped_initial_ints
             elif len(ndim_base) == 2:
                 cube = np.expand_dims(image, 0)
         else:
             ndim = image.shape
             if ndim_base[-2:] == ndim[-2:]:
                 if len(ndim) == 2:
                     image = np.expand_dims(image, 0)
+                    cube = np.vstack((cube, image))
+                elif len(ndim) == 3:
+                    cube = np.vstack((cube, image[skipped_initial_ints:, :, :]))
+                    num_ints -= skipped_initial_ints
                 elif len(ndim) > 3:
                     raise ValueError("4-dimensional input slope images not supported.")
-                cube = np.vstack((cube, image))
             else:
                 raise ValueError("Input images are of inconsistent size in x/y dimension.")
         exptimes.append([exptime] * num_ints)

diff --git a/jwql/tests/test_dark_monitor.py b/jwql/tests/test_dark_monitor.py
@@ -28,10 +28,150 @@
 from jwql.instrument_monitors.common_monitors import dark_monitor
 from jwql.tests.resources import has_test_db
 from jwql.utils.monitor_utils import mast_query_darks
+from jwql.utils.constants import DARK_MONITOR_BETWEEN_EPOCH_THRESHOLD_TIME
 from jwql.utils.utils import get_config
 from jwql.utils.constants import ON_GITHUB_ACTIONS
 
 
+def generate_data_for_file_splitting_test():
+    # Define data for parameterized test_split_files_into_sub_lists calls
+    files = [f'file_{idx}.fits' for idx in range(10)]
+    now = Time.now().mjd
+    deltat = [26., 25., 24., 23., 22., 4., 3., 2., 1., 0.]
+    start_times = [now - dt for dt in deltat]
+    end_times = [s + 0.1 for s in start_times]
+    threshold = 5.  # integrations
+    integration_list = [3, 3, 2, 2, 2, 1, 1, 1, 1, 1]
+    expected = [['file_0.fits', 'file_1.fits'],
+                ['file_2.fits', 'file_3.fits', 'file_4.fits'],
+                ['file_5.fits', 'file_6.fits', 'file_7.fits', 'file_8.fits', 'file_9.fits']
+                ]
+    test1 = (files, start_times, end_times, integration_list, threshold, expected)
+
+    # Final epoch may not be over. Not enough ints in final epoch
+    deltat = [26., 25., 24., 23., 22., 4., 3., 2., 1., 0.]
+    start_times = [now - dt for dt in deltat]
+    end_times = [s + 0.1 for s in start_times]
+    threshold = 6.  # integrations
+    integration_list = [3, 3, 2, 2, 2, 1, 1, 1, 1, 1]
+    expected = [['file_0.fits', 'file_1.fits'],
+                ['file_2.fits', 'file_3.fits', 'file_4.fits']
+                ]
+    test2 = (files, start_times, end_times, integration_list, threshold, expected)
+
+    # Final epoch may not be over. Not enough ints in final subgroup of final epoch
+    deltat = [26., 25., 24., 23., 22., 4., 3., 2., 1., 0.]
+    start_times = [now - dt for dt in deltat]
+    end_times = [s + 0.1 for s in start_times]
+    threshold = 6.  # integrations
+    integration_list = [3, 3, 2, 2, 2, 1, 3, 3, 2, 2]
+    expected = [['file_0.fits', 'file_1.fits'],
+                ['file_2.fits', 'file_3.fits', 'file_4.fits'],
+                ['file_5.fits', 'file_6.fits', 'file_7.fits']
+                ]
+    test3 = (files, start_times, end_times, integration_list, threshold, expected)
+
+    deltat = [40., 39., 38., 37., 36., 18., 17., 16., 15., 0.]
+    start_times = [now - dt for dt in deltat]
+    end_times = [s + 0.1 for s in start_times]
+    threshold = 5.  # integrations
+    integration_list = [3, 3, 2, 2, 2, 1, 1, 1, 1, 1]
+    expected = [['file_0.fits', 'file_1.fits'],
+                ['file_2.fits', 'file_3.fits', 'file_4.fits'],
+                ['file_5.fits', 'file_6.fits', 'file_7.fits', 'file_8.fits']
+                ]
+    test4 = (files, start_times, end_times, integration_list, threshold, expected)
+
+    deltat = [40., 39., 38., 37., 36., 18., 17., 16., 15., 0.]
+    start_times = [now - dt for dt in deltat]
+    end_times = [s + 0.1 for s in start_times]
+    threshold = 6.  # integrations
+    integration_list = [3, 3, 2, 2, 2, 1, 1, 1, 1, 1]
+    expected = [['file_0.fits', 'file_1.fits'],
+                ['file_2.fits', 'file_3.fits', 'file_4.fits'],
+                ['file_5.fits', 'file_6.fits', 'file_7.fits', 'file_8.fits']
+                ]
+    test5 = (files, start_times, end_times, integration_list, threshold, expected)
+
+    deltat = [9., 8., 7., 6., 5., 4., 3., 2., 1., 0.]
+    start_times = [now - dt for dt in deltat]
+    end_times = [s + 0.1 for s in start_times]
+    integration_list = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+    threshold = 6
+    expected = [['file_0.fits', 'file_1.fits', 'file_2.fits', 'file_3.fits', 'file_4.fits', 'file_5.fits']]
+    test6 = (files, start_times, end_times, integration_list, threshold, expected)
+
+    threshold = 9
+    expected = [['file_0.fits', 'file_1.fits', 'file_2.fits', 'file_3.fits', 'file_4.fits', 'file_5.fits',
+                 'file_6.fits', 'file_7.fits', 'file_8.fits']]
+    test7 = (files, start_times, end_times, integration_list, threshold, expected)
+
+    integration_list = [1] * len(start_times)
+    threshold = 10
+    expected = [['file_0.fits', 'file_1.fits', 'file_2.fits', 'file_3.fits', 'file_4.fits', 'file_5.fits',
+                 'file_6.fits', 'file_7.fits', 'file_8.fits', 'file_9.fits']
+                ]
+    test8 = (files, start_times, end_times, integration_list, threshold, expected)
+
+    deltat = [23., 22., 21., 20., 19., 18., 17., 16., 15., 0.]
+    start_times = [now - dt for dt in deltat]
+    end_times = [s + 0.1 for s in start_times]
+    integration_list = [1] * len(start_times)
+    threshold = 10
+    expected = [['file_0.fits', 'file_1.fits', 'file_2.fits', 'file_3.fits', 'file_4.fits', 'file_5.fits',
+                 'file_6.fits', 'file_7.fits', 'file_8.fits']
+                ]
+    test9 = (files, start_times, end_times, integration_list, threshold, expected)
+
+    deltat = [9., 8., 7., 6., 5., 4., 3., 2., 1., 0.]
+    start_times = [now - dt for dt in deltat]
+    end_times = [s + 0.1 for s in start_times]
+    integration_list = [1] * len(start_times)
+    threshold = 10
+    expected = [['file_0.fits', 'file_1.fits', 'file_2.fits', 'file_3.fits', 'file_4.fits', 'file_5.fits',
+                 'file_6.fits', 'file_7.fits', 'file_8.fits', 'file_9.fits']
+                ]
+    test10 = (files, start_times, end_times, integration_list, threshold, expected)
+
+    deltat = [9., 8., 7., 6., 5., 4., 3., 2., 1., 0.]
+    start_times = [now - dt for dt in deltat]
+    end_times = [s + 0.1 for s in start_times]
+    integration_list = [1] * len(start_times)
+    threshold = 11
+    expected = []
+    test11 = (files, start_times, end_times, integration_list, threshold, expected)
+
+    deltat = [40., 39., 38., 37., 24., 23., 22., 21., 1., 0.]
+    start_times = [now - dt for dt in deltat]
+    end_times = [s + 0.1 for s in start_times]
+    threshold = 6  # integrations
+    integration_list = [3, 3, 2, 2, 2, 1, 1, 1, 1, 1]
+    expected = [['file_0.fits', 'file_1.fits'],
+                ['file_2.fits', 'file_3.fits'],
+                ['file_4.fits', 'file_5.fits', 'file_6.fits', 'file_7.fits']
+                ]
+    test12 = (files, start_times, end_times, integration_list, threshold, expected)
+
+    # In this case, the final 2 files are grouped together due to being taken close
+    # in time to one another. However, they do not contain enough integrations to
+    # reach the threshold. Since these are the final two files, we have no way of
+    # knowing if they are just the first two observations of a larger set that should
+    # be grouped. Therefore, the dark monitor ignores these final two files, under
+    # the assumption that they will be used the next time the monitor is run.
+    deltat = [50., 49., 48., 47., 34., 33., 32., 31., 20., 19.]
+    start_times = [now - dt for dt in deltat]
+    end_times = [s + 0.1 for s in start_times]
+    threshold = 6  # integrations
+    integration_list = [3, 3, 2, 2, 2, 1, 1, 1, 1, 1]
+    expected = [['file_0.fits', 'file_1.fits'],
+                ['file_2.fits', 'file_3.fits'],
+                ['file_4.fits', 'file_5.fits', 'file_6.fits', 'file_7.fits']
+                ]
+    test13 = (files, start_times, end_times, integration_list, threshold, expected)
+
+    return [test1, test2, test3, test4, test5, test6, test7, test8, test9, test10, test11, test12, test13]
+
+
 def test_find_hot_dead_pixels():
     """Test hot and dead pixel searches"""
     monitor = dark_monitor.Dark()
@@ -137,6 +277,16 @@ def test_shift_to_full_frame():
     assert np.all(new_coords[1] == np.array([518, 515]))
 
 
+@pytest.mark.parametrize("files,start_times,end_times,integration_list,threshold,expected", generate_data_for_file_splitting_test())
+def test_split_files_into_sub_lists(files, start_times, end_times, integration_list, threshold, expected):
+    """Test that file lists are appropriately split into subgroups for separate monitor runs"""
+    d = dark_monitor.Dark()
+    d.instrument = 'nircam'
+    d.split_files_into_sub_lists(files, start_times, end_times, integration_list, threshold)
+
+    assert d.file_batches == expected
+
+
 @pytest.mark.skipif(not has_test_db(), reason='Modifies test database.')
 def test_add_bad_pix():
     coord = ([1, 2, 3], [4, 5, 6])

diff --git a/jwql/utils/calculations.py b/jwql/utils/calculations.py
@@ -17,6 +17,7 @@
  """
 
 import numpy as np
+import warnings
 
 from astropy.modeling import fitting, models
 from astropy.stats import sigma_clip
@@ -169,8 +170,9 @@ def mean_stdev(image, sigma_threshold=3):
     stdev_value : float
         Sigma-clipped standard deviation of image
     """
-
-    clipped, lower, upper = sigmaclip(image, low=sigma_threshold, high=sigma_threshold)
+    # Ignore the warning about NaNs being clipped.
+    warnings.filterwarnings('ignore', message='Input data contains invalid values (NaNs or infs), which were automatically clipped.*')
+    clipped = sigma_clip(image, sigma=sigma_threshold, masked=False)
     mean_value = np.mean(clipped)
     stdev_value = np.std(clipped)
 

diff --git a/jwql/utils/constants.py b/jwql/utils/constants.py
@@ -207,6 +207,18 @@
 # Types of potential bad pixels identified by the dark current monitor
 DARK_MONITOR_BADPIX_TYPES = ["hot", "dead", "noisy"]
 
+# Minimum amount of time, in days, between epochs of dark current observations. If the
+# dark monitor sees this much time, or longer, between two dark current files, it assumes
+# that the two files are part of separate epochs. This means the monitor will run separately
+# on these files, rather than bundling them together into a batch, where they would have
+# been combined into a mean dark rate
+DARK_MONITOR_BETWEEN_EPOCH_THRESHOLD_TIME = {'nircam': 10.,
+                                             'niriss': 10.,
+                                             'miri': 0.00001,  # Treat each MIRI exposure separately
+                                             'nirspec': 10.,
+                                             'fgs': 10.
+                                             }
+
 # Maximum number of potential new bad pixels to overplot on the dark monitor
 # mean dark image plot. Too many overplotted points starts to obscure the image
 # itself, and are most likely not really new bad pixels
@@ -616,6 +628,10 @@
 # Maximum number of records returned by MAST for a single query
 MAST_QUERY_LIMIT = 550000
 
+# Minimum number of groups per integration required to include data
+# in the dark current monitor
+MINIMUM_DARK_CURRENT_GROUPS = 10
+
 # Expected position sensor values for MIRI. Used by the EDB monitor
 # to filter out bad values. Tuple values are the expected value and
 # the standard deviation associated with the value

diff --git a/jwql/utils/instrument_properties.py b/jwql/utils/instrument_properties.py
@@ -129,6 +129,11 @@ def amplifier_info(filename, omit_reference_pixels=True):
                 except KeyError:
                     raise KeyError('DQ extension not found.')
 
+        # If the file contains multiple frames (e.g. rateints file)
+        # keep just the first
+        if len(data_quality.shape) == 3:
+            data_quality = data_quality[0, :, :]
+
         # Reference pixels should be flagged in the DQ array with the
         # REFERENCE_PIXEL flag. Find the science pixels by looping for
         # pixels that don't have that bit set.

diff --git a/jwql/utils/monitor_utils.py b/jwql/utils/monitor_utils.py
@@ -19,9 +19,9 @@
 import datetime
 import os
 from astroquery.mast import Mast, Observations
+import numpy as np
 from django import setup
 
-
 from jwql.database.database_interface import Monitor, engine
 from jwql.utils.constants import ASIC_TEMPLATES, JWST_DATAPRODUCTS, MAST_QUERY_LIMIT
 from jwql.utils.constants import ON_GITHUB_ACTIONS, ON_READTHEDOCS
@@ -154,6 +154,11 @@ def mast_query_darks(instrument, aperture, start_date, end_date, readpatt=None):
             if len(query['data']) > 0:
                 query_results.extend(query['data'])
 
+    # Put the file entries in chronological order
+    expstarts = [e['expstart'] for e in query_results]
+    idx = np.argsort(expstarts)
+    query_results = list(np.array(query_results)[idx])
+
     return query_results