Skip to content

Commit

Permalink
Merge pull request spacetelescope#1259 from bhilbert4/dark-monitor-in…
Browse files Browse the repository at this point in the history
…teg-limits-and-dates

Dark monitor threshold and cadence updates
  • Loading branch information
mfixstsci authored Feb 21, 2024
2 parents 4d60962 + 1238e5c commit 7a4d932
Show file tree
Hide file tree
Showing 9 changed files with 734 additions and 772 deletions.
561 changes: 433 additions & 128 deletions jwql/instrument_monitors/common_monitors/dark_monitor.py

Large diffs are not rendered by default.

Large diffs are not rendered by default.

19 changes: 15 additions & 4 deletions jwql/instrument_monitors/pipeline_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,15 +193,22 @@ def get_pipeline_steps(instrument):
return required_steps


def image_stack(file_list):
"""Given a list of fits files containing 2D images, read in all data
def image_stack(file_list, skipped_initial_ints=0):
"""Given a list of fits files containing 2D or 3D images, read in all data
and place into a 3D stack
Parameters
----------
file_list : list
List of fits file names
skipped_initial_ints : int
Number of initial integrations from each file to skip over and
not include in the stack. Only works with files containing 3D
arrays (e.g. rateints files). This is primarily for MIRI, where
we want to skip the first N integrations due to dark current
instability.
Returns
-------
cube : numpy.ndarray
Expand All @@ -219,17 +226,21 @@ def image_stack(file_list):
if i == 0:
ndim_base = image.shape
if len(ndim_base) == 3:
cube = copy.deepcopy(image)
cube = copy.deepcopy(image[skipped_initial_ints:, :, :])
num_ints -= skipped_initial_ints
elif len(ndim_base) == 2:
cube = np.expand_dims(image, 0)
else:
ndim = image.shape
if ndim_base[-2:] == ndim[-2:]:
if len(ndim) == 2:
image = np.expand_dims(image, 0)
cube = np.vstack((cube, image))
elif len(ndim) == 3:
cube = np.vstack((cube, image[skipped_initial_ints:, :, :]))
num_ints -= skipped_initial_ints
elif len(ndim) > 3:
raise ValueError("4-dimensional input slope images not supported.")
cube = np.vstack((cube, image))
else:
raise ValueError("Input images are of inconsistent size in x/y dimension.")
exptimes.append([exptime] * num_ints)
Expand Down
150 changes: 150 additions & 0 deletions jwql/tests/test_dark_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,150 @@
from jwql.instrument_monitors.common_monitors import dark_monitor
from jwql.tests.resources import has_test_db
from jwql.utils.monitor_utils import mast_query_darks
from jwql.utils.constants import DARK_MONITOR_BETWEEN_EPOCH_THRESHOLD_TIME
from jwql.utils.utils import get_config
from jwql.utils.constants import ON_GITHUB_ACTIONS


def generate_data_for_file_splitting_test():
# Define data for parameterized test_split_files_into_sub_lists calls
files = [f'file_{idx}.fits' for idx in range(10)]
now = Time.now().mjd
deltat = [26., 25., 24., 23., 22., 4., 3., 2., 1., 0.]
start_times = [now - dt for dt in deltat]
end_times = [s + 0.1 for s in start_times]
threshold = 5. # integrations
integration_list = [3, 3, 2, 2, 2, 1, 1, 1, 1, 1]
expected = [['file_0.fits', 'file_1.fits'],
['file_2.fits', 'file_3.fits', 'file_4.fits'],
['file_5.fits', 'file_6.fits', 'file_7.fits', 'file_8.fits', 'file_9.fits']
]
test1 = (files, start_times, end_times, integration_list, threshold, expected)

# Final epoch may not be over. Not enough ints in final epoch
deltat = [26., 25., 24., 23., 22., 4., 3., 2., 1., 0.]
start_times = [now - dt for dt in deltat]
end_times = [s + 0.1 for s in start_times]
threshold = 6. # integrations
integration_list = [3, 3, 2, 2, 2, 1, 1, 1, 1, 1]
expected = [['file_0.fits', 'file_1.fits'],
['file_2.fits', 'file_3.fits', 'file_4.fits']
]
test2 = (files, start_times, end_times, integration_list, threshold, expected)

# Final epoch may not be over. Not enough ints in final subgroup of final epoch
deltat = [26., 25., 24., 23., 22., 4., 3., 2., 1., 0.]
start_times = [now - dt for dt in deltat]
end_times = [s + 0.1 for s in start_times]
threshold = 6. # integrations
integration_list = [3, 3, 2, 2, 2, 1, 3, 3, 2, 2]
expected = [['file_0.fits', 'file_1.fits'],
['file_2.fits', 'file_3.fits', 'file_4.fits'],
['file_5.fits', 'file_6.fits', 'file_7.fits']
]
test3 = (files, start_times, end_times, integration_list, threshold, expected)

deltat = [40., 39., 38., 37., 36., 18., 17., 16., 15., 0.]
start_times = [now - dt for dt in deltat]
end_times = [s + 0.1 for s in start_times]
threshold = 5. # integrations
integration_list = [3, 3, 2, 2, 2, 1, 1, 1, 1, 1]
expected = [['file_0.fits', 'file_1.fits'],
['file_2.fits', 'file_3.fits', 'file_4.fits'],
['file_5.fits', 'file_6.fits', 'file_7.fits', 'file_8.fits']
]
test4 = (files, start_times, end_times, integration_list, threshold, expected)

deltat = [40., 39., 38., 37., 36., 18., 17., 16., 15., 0.]
start_times = [now - dt for dt in deltat]
end_times = [s + 0.1 for s in start_times]
threshold = 6. # integrations
integration_list = [3, 3, 2, 2, 2, 1, 1, 1, 1, 1]
expected = [['file_0.fits', 'file_1.fits'],
['file_2.fits', 'file_3.fits', 'file_4.fits'],
['file_5.fits', 'file_6.fits', 'file_7.fits', 'file_8.fits']
]
test5 = (files, start_times, end_times, integration_list, threshold, expected)

deltat = [9., 8., 7., 6., 5., 4., 3., 2., 1., 0.]
start_times = [now - dt for dt in deltat]
end_times = [s + 0.1 for s in start_times]
integration_list = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
threshold = 6
expected = [['file_0.fits', 'file_1.fits', 'file_2.fits', 'file_3.fits', 'file_4.fits', 'file_5.fits']]
test6 = (files, start_times, end_times, integration_list, threshold, expected)

threshold = 9
expected = [['file_0.fits', 'file_1.fits', 'file_2.fits', 'file_3.fits', 'file_4.fits', 'file_5.fits',
'file_6.fits', 'file_7.fits', 'file_8.fits']]
test7 = (files, start_times, end_times, integration_list, threshold, expected)

integration_list = [1] * len(start_times)
threshold = 10
expected = [['file_0.fits', 'file_1.fits', 'file_2.fits', 'file_3.fits', 'file_4.fits', 'file_5.fits',
'file_6.fits', 'file_7.fits', 'file_8.fits', 'file_9.fits']
]
test8 = (files, start_times, end_times, integration_list, threshold, expected)

deltat = [23., 22., 21., 20., 19., 18., 17., 16., 15., 0.]
start_times = [now - dt for dt in deltat]
end_times = [s + 0.1 for s in start_times]
integration_list = [1] * len(start_times)
threshold = 10
expected = [['file_0.fits', 'file_1.fits', 'file_2.fits', 'file_3.fits', 'file_4.fits', 'file_5.fits',
'file_6.fits', 'file_7.fits', 'file_8.fits']
]
test9 = (files, start_times, end_times, integration_list, threshold, expected)

deltat = [9., 8., 7., 6., 5., 4., 3., 2., 1., 0.]
start_times = [now - dt for dt in deltat]
end_times = [s + 0.1 for s in start_times]
integration_list = [1] * len(start_times)
threshold = 10
expected = [['file_0.fits', 'file_1.fits', 'file_2.fits', 'file_3.fits', 'file_4.fits', 'file_5.fits',
'file_6.fits', 'file_7.fits', 'file_8.fits', 'file_9.fits']
]
test10 = (files, start_times, end_times, integration_list, threshold, expected)

deltat = [9., 8., 7., 6., 5., 4., 3., 2., 1., 0.]
start_times = [now - dt for dt in deltat]
end_times = [s + 0.1 for s in start_times]
integration_list = [1] * len(start_times)
threshold = 11
expected = []
test11 = (files, start_times, end_times, integration_list, threshold, expected)

deltat = [40., 39., 38., 37., 24., 23., 22., 21., 1., 0.]
start_times = [now - dt for dt in deltat]
end_times = [s + 0.1 for s in start_times]
threshold = 6 # integrations
integration_list = [3, 3, 2, 2, 2, 1, 1, 1, 1, 1]
expected = [['file_0.fits', 'file_1.fits'],
['file_2.fits', 'file_3.fits'],
['file_4.fits', 'file_5.fits', 'file_6.fits', 'file_7.fits']
]
test12 = (files, start_times, end_times, integration_list, threshold, expected)

# In this case, the final 2 files are grouped together due to being taken close
# in time to one another. However, they do not contain enough integrations to
# reach the threshold. Since these are the final two files, we have no way of
# knowing if they are just the first two observations of a larger set that should
# be grouped. Therefore, the dark monitor ignores these final two files, under
# the assumption that they will be used the next time the monitor is run.
deltat = [50., 49., 48., 47., 34., 33., 32., 31., 20., 19.]
start_times = [now - dt for dt in deltat]
end_times = [s + 0.1 for s in start_times]
threshold = 6 # integrations
integration_list = [3, 3, 2, 2, 2, 1, 1, 1, 1, 1]
expected = [['file_0.fits', 'file_1.fits'],
['file_2.fits', 'file_3.fits'],
['file_4.fits', 'file_5.fits', 'file_6.fits', 'file_7.fits']
]
test13 = (files, start_times, end_times, integration_list, threshold, expected)

return [test1, test2, test3, test4, test5, test6, test7, test8, test9, test10, test11, test12, test13]


def test_find_hot_dead_pixels():
"""Test hot and dead pixel searches"""
monitor = dark_monitor.Dark()
Expand Down Expand Up @@ -137,6 +277,16 @@ def test_shift_to_full_frame():
assert np.all(new_coords[1] == np.array([518, 515]))


@pytest.mark.parametrize("files,start_times,end_times,integration_list,threshold,expected", generate_data_for_file_splitting_test())
def test_split_files_into_sub_lists(files, start_times, end_times, integration_list, threshold, expected):
"""Test that file lists are appropriately split into subgroups for separate monitor runs"""
d = dark_monitor.Dark()
d.instrument = 'nircam'
d.split_files_into_sub_lists(files, start_times, end_times, integration_list, threshold)

assert d.file_batches == expected


@pytest.mark.skipif(not has_test_db(), reason='Modifies test database.')
def test_add_bad_pix():
coord = ([1, 2, 3], [4, 5, 6])
Expand Down
6 changes: 4 additions & 2 deletions jwql/utils/calculations.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"""

import numpy as np
import warnings

from astropy.modeling import fitting, models
from astropy.stats import sigma_clip
Expand Down Expand Up @@ -169,8 +170,9 @@ def mean_stdev(image, sigma_threshold=3):
stdev_value : float
Sigma-clipped standard deviation of image
"""

clipped, lower, upper = sigmaclip(image, low=sigma_threshold, high=sigma_threshold)
# Ignore the warning about NaNs being clipped.
warnings.filterwarnings('ignore', message='Input data contains invalid values (NaNs or infs), which were automatically clipped.*')
clipped = sigma_clip(image, sigma=sigma_threshold, masked=False)
mean_value = np.mean(clipped)
stdev_value = np.std(clipped)

Expand Down
16 changes: 16 additions & 0 deletions jwql/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,18 @@
# Types of potential bad pixels identified by the dark current monitor
DARK_MONITOR_BADPIX_TYPES = ["hot", "dead", "noisy"]

# Minimum amount of time, in days, between epochs of dark current observations. If the
# dark monitor sees this much time, or longer, between two dark current files, it assumes
# that the two files are part of separate epochs. This means the monitor will run separately
# on these files, rather than bundling them together into a batch, where they would have
# been combined into a mean dark rate
DARK_MONITOR_BETWEEN_EPOCH_THRESHOLD_TIME = {'nircam': 10.,
'niriss': 10.,
'miri': 0.00001, # Treat each MIRI exposure separately
'nirspec': 10.,
'fgs': 10.
}

# Maximum number of potential new bad pixels to overplot on the dark monitor
# mean dark image plot. Too many overplotted points starts to obscure the image
# itself, and are most likely not really new bad pixels
Expand Down Expand Up @@ -616,6 +628,10 @@
# Maximum number of records returned by MAST for a single query
MAST_QUERY_LIMIT = 550000

# Minimum number of groups per integration required to include data
# in the dark current monitor
MINIMUM_DARK_CURRENT_GROUPS = 10

# Expected position sensor values for MIRI. Used by the EDB monitor
# to filter out bad values. Tuple values are the expected value and
# the standard deviation associated with the value
Expand Down
5 changes: 5 additions & 0 deletions jwql/utils/instrument_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,11 @@ def amplifier_info(filename, omit_reference_pixels=True):
except KeyError:
raise KeyError('DQ extension not found.')

# If the file contains multiple frames (e.g. rateints file)
# keep just the first
if len(data_quality.shape) == 3:
data_quality = data_quality[0, :, :]

# Reference pixels should be flagged in the DQ array with the
# REFERENCE_PIXEL flag. Find the science pixels by looping for
# pixels that don't have that bit set.
Expand Down
7 changes: 6 additions & 1 deletion jwql/utils/monitor_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@
import datetime
import os
from astroquery.mast import Mast, Observations
import numpy as np
from django import setup


from jwql.database.database_interface import Monitor, engine
from jwql.utils.constants import ASIC_TEMPLATES, JWST_DATAPRODUCTS, MAST_QUERY_LIMIT
from jwql.utils.constants import ON_GITHUB_ACTIONS, ON_READTHEDOCS
Expand Down Expand Up @@ -154,6 +154,11 @@ def mast_query_darks(instrument, aperture, start_date, end_date, readpatt=None):
if len(query['data']) > 0:
query_results.extend(query['data'])

# Put the file entries in chronological order
expstarts = [e['expstart'] for e in query_results]
idx = np.argsort(expstarts)
query_results = list(np.array(query_results)[idx])

return query_results


Expand Down
Loading

0 comments on commit 7a4d932

Please sign in to comment.