Skip to content

Commit

Permalink
Merge branch 'main' into jp3770
Browse files Browse the repository at this point in the history
  • Loading branch information
penaguerrero authored Oct 3, 2024
2 parents b76e877 + b47cac8 commit 5da311a
Show file tree
Hide file tree
Showing 9 changed files with 65 additions and 65 deletions.
1 change: 1 addition & 0 deletions changes/8851.outlier_detection.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Update documentation to clarify the interaction between pipeline-level and step-level `--in_memory` flags.
1 change: 1 addition & 0 deletions changes/8853.outlier_detection.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Avoid modifying input and saving duplicate files when resample_data=False.
2 changes: 2 additions & 0 deletions docs/jwst/outlier_detection/arguments.rst
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ that control the behavior of the processing:
Specifies whether or not to load and create all images that are used during
processing into memory. If ``False``, input files are loaded from disk when
needed and all intermediate files are stored on disk, rather than in memory.
This flag is superseded by the pipeline-level ``--in-memory`` flag, and thus
has no effect when running the full level 3 pipeline.

Step Arguments for IFU data
===========================
Expand Down
6 changes: 5 additions & 1 deletion docs/jwst/outlier_detection/outlier_detection_imaging.rst
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,10 @@ during processing includes:
Those sections are then read in one at a time to compute the median image.

These changes result in a minimum amount of memory usage during processing at the obvious
expense of reading and writing the products from disk.
expense of reading and writing the products from disk. Note that if a ModelLibrary object
is input to the step, the memory behavior of the step is read from the ``on_disk`` status
of the ModelLibrary object, and the ``in_memory`` parameter of the step is ignored.
When running ``calwebb_image3``, the ``in_memory`` flag should therefore be set at the pipeline level,
e.g., ``strun calwebb_image3 asn.json --in-memory=True``; the step-specific flag will be ignored.

.. automodapi:: jwst.outlier_detection.imaging
1 change: 0 additions & 1 deletion jwst/outlier_detection/imaging.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ def detect_outliers(
kernel=kernel,
fillval=fillval,
good_bits=good_bits,
in_memory=in_memory,
allowed_memory=allowed_memory,
)
median_data, median_wcs = median_with_resampling(input_models,
Expand Down
2 changes: 1 addition & 1 deletion jwst/outlier_detection/outlier_detection_step.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ class OutlierDetectionStep(Step):
good_bits = string(default="~DO_NOT_USE") # DQ flags to allow
search_output_file = boolean(default=False)
allowed_memory = float(default=None) # Fraction of memory to use for the combined image
in_memory = boolean(default=False)
in_memory = boolean(default=False) # ignored if run within the pipeline; set at pipeline level instead
"""

def process(self, input_data):
Expand Down
1 change: 0 additions & 1 deletion jwst/outlier_detection/spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ def detect_outliers(
kernel=kernel,
fillval=fillval,
good_bits=good_bits,
in_memory=in_memory,
)

median_data, median_wcs = median_with_resampling(
Expand Down
87 changes: 42 additions & 45 deletions jwst/outlier_detection/tests/test_outlier_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ def we_three_sci():
def test_outlier_step_no_outliers(we_three_sci, do_resample, tmp_cwd):
"""Test whole step, no outliers"""
container = ModelContainer(list(we_three_sci))
container[0].var_rnoise[10, 10] = 1E9
pristine = ModelContainer([m.copy() for m in container])
OutlierDetectionStep.call(container, in_memory=True, resample_data=do_resample)

Expand Down Expand Up @@ -261,7 +262,9 @@ def test_outlier_step_base(we_three_sci, tmp_cwd):
assert len(median_files) != 0


def test_outlier_step_spec(tmp_cwd, tmp_path):
@pytest.mark.parametrize('resample', [True, False])
@pytest.mark.parametrize('save_intermediate', [True, False])
def test_outlier_step_spec(tmp_cwd, tmp_path, resample, save_intermediate):
"""Test outlier step for spec data including saving intermediate results."""
output_dir = tmp_path / 'output'
output_dir.mkdir(exist_ok=True)
Expand All @@ -275,50 +278,33 @@ def test_outlier_step_spec(tmp_cwd, tmp_path):
miri_cal.meta.exposure.type = "MIR_LRS-FIXEDSLIT"

# Make a couple copies, give them unique exposure numbers and filename
container = ModelContainer([miri_cal, miri_cal.copy(), miri_cal.copy()])
container = ModelContainer([miri_cal.copy(), miri_cal.copy(), miri_cal.copy()])
for i, model in enumerate(container):
model.meta.filename = f'test_{i}_cal.fits'

# Drop a CR on the science array in the first image
container[0].data[209, 37] += 1

# Verify that intermediate files are removed when not saved
# (s2d files are expected, i2d files are not, but we'll check
# for them to make sure the imaging extension didn't creep back in)
OutlierDetectionStep.call(container, output_dir=output_dir, save_results=True)
for dirname in [output_dir, tmp_cwd]:
result_files = glob(os.path.join(dirname, '*outlierdetectionstep.fits'))
i2d_files = glob(os.path.join(dirname, '*i2d*.fits'))
s2d_files = glob(os.path.join(dirname, '*outlier_s2d.fits'))
median_files = glob(os.path.join(dirname, '*median.fits'))
blot_files = glob(os.path.join(dirname, '*blot.fits'))

# intermediate files are removed
assert len(i2d_files) == 0
assert len(s2d_files) == 0
assert len(median_files) == 0
assert len(blot_files) == 0

# result files are written to the output directory
if dirname == output_dir:
assert len(result_files) == len(container)
else:
assert len(result_files) == 0

# Call again, but save intermediate to the output path
# Call outlier detection
result = OutlierDetectionStep.call(
container, save_results=True, save_intermediate_results=True,
output_dir=output_dir
)
container, resample_data=resample,
output_dir=output_dir, save_results=True,
save_intermediate_results=save_intermediate)

# Make sure nothing changed in SCI array
for image, corrected in zip(container, result):
np.testing.assert_allclose(image.data, corrected.data)
for image in result:
nn = ~np.isnan(image.data)
np.testing.assert_allclose(image.data[nn], miri_cal.data[nn])

# Verify CR is flagged
assert np.isnan(result[0].data[209, 37])
assert result[0].dq[209, 37] == OUTLIER_DO_NOT_USE

# Verify that intermediate files are saved at the specified location
if save_intermediate:
expected_intermediate = len(container)
else:
expected_intermediate = 0
for dirname in [output_dir, tmp_cwd]:
all_files = glob(os.path.join(dirname, '*.fits'))
result_files = glob(os.path.join(dirname, '*outlierdetectionstep.fits'))
Expand All @@ -327,24 +313,35 @@ def test_outlier_step_spec(tmp_cwd, tmp_path):
median_files = glob(os.path.join(dirname, '*median.fits'))
blot_files = glob(os.path.join(dirname, '*blot.fits'))
if dirname == output_dir:
# result files are written to the output directory
# Result files are always written to the output directory
assert len(result_files) == len(container)

# s2d, median, and blot files are written to the output directory
assert len(s2d_files) == len(container)
assert len(blot_files) == len(container)
assert len(median_files) == 1

# i2d files not written
# s2d and blot files are written to the output directory
# if save_intermediate is True and resampling is set
if resample:
assert len(s2d_files) == expected_intermediate
assert len(blot_files) == expected_intermediate
else:
assert len(s2d_files) == 0
assert len(blot_files) == 0

# Only one median file is saved if save_intermediate is True,
# no matter how many input files there are
if save_intermediate:
assert len(median_files) == 1
else:
assert len(median_files) == 0

# i2d files are never written
assert len(i2d_files) == 0

# nothing else was written
assert len(all_files) == len(s2d_files) + \
len(median_files) + \
len(result_files) + \
len(blot_files)
# Nothing else was written
assert len(all_files) == (len(s2d_files)
+ len(median_files)
+ len(result_files)
+ len(blot_files))
else:
# nothing should be written to the current directory
# Nothing should be written to the current directory
assert len(result_files) == 0
assert len(s2d_files) == 0
assert len(median_files) == 0
Expand Down Expand Up @@ -674,4 +671,4 @@ def make_resamp(input_models):
asn_id="test",
allowed_memory=None,
)
return resamp
return resamp
29 changes: 13 additions & 16 deletions jwst/outlier_detection/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,24 +79,21 @@ def median_without_resampling(input_models,
for i in range(len(input_models)):

drizzled_model = input_models.borrow(i)
drizzled_model.wht = build_driz_weight(drizzled_model,
weight_type=weight_type,
good_bits=good_bits)
median_wcs = copy.deepcopy(drizzled_model.meta.wcs)
input_models.shelve(drizzled_model, i, modify=True)

if save_intermediate_results:
# write the drizzled model to file
_fileio.save_drizzled(drizzled_model, make_output_path)

drizzled_data = drizzled_model.data.copy()
weight = build_driz_weight(drizzled_model,
weight_type=weight_type,
good_bits=good_bits)
if i == 0:
input_shape = (ngroups,)+drizzled_model.data.shape
dtype = drizzled_model.data.dtype
median_wcs = copy.deepcopy(drizzled_model.meta.wcs)
input_shape = (ngroups,) + drizzled_data.shape
dtype = drizzled_data.dtype
computer = MedianComputer(input_shape, in_memory, buffer_size, dtype)

weight_threshold = compute_weight_threshold(drizzled_model.wht, maskpt)
drizzled_model.data[drizzled_model.wht < weight_threshold] = np.nan
computer.append(drizzled_model.data, i)
weight_threshold = compute_weight_threshold(weight, maskpt)
drizzled_data[weight < weight_threshold] = np.nan
computer.append(drizzled_data, i)

input_models.shelve(drizzled_model, i, modify=False)

# Perform median combination on set of drizzled mosaics
median_data = computer.evaluate()
Expand Down Expand Up @@ -154,14 +151,14 @@ def median_with_resampling(input_models,
with input_models:
for i, indices in enumerate(indices_by_group):

median_wcs = resamp.output_wcs
drizzled_model = resamp.resample_group(input_models, indices)

if save_intermediate_results:
# write the drizzled model to file
_fileio.save_drizzled(drizzled_model, make_output_path)

if i == 0:
median_wcs = resamp.output_wcs
input_shape = (ngroups,)+drizzled_model.data.shape
dtype = drizzled_model.data.dtype
computer = MedianComputer(input_shape, in_memory, buffer_size, dtype)
Expand Down

0 comments on commit 5da311a

Please sign in to comment.