From 4a33784119f32522ffe4432f4b1c390acd2f2c9e Mon Sep 17 00:00:00 2001 From: Melanie Clarke Date: Wed, 2 Oct 2024 14:13:31 -0400 Subject: [PATCH 1/5] Fix intermediate issues with non-resampled outlier methods --- .../tests/test_outlier_detection.py | 43 +++++++++++++------ jwst/outlier_detection/utils.py | 14 +++--- 2 files changed, 35 insertions(+), 22 deletions(-) diff --git a/jwst/outlier_detection/tests/test_outlier_detection.py b/jwst/outlier_detection/tests/test_outlier_detection.py index 3155824fc8..de42b4fd2a 100644 --- a/jwst/outlier_detection/tests/test_outlier_detection.py +++ b/jwst/outlier_detection/tests/test_outlier_detection.py @@ -203,6 +203,7 @@ def we_three_sci(): def test_outlier_step_no_outliers(we_three_sci, do_resample, tmp_cwd): """Test whole step, no outliers""" container = ModelContainer(list(we_three_sci)) + container[0].var_rnoise[10, 10] = 1E9 pristine = ModelContainer([m.copy() for m in container]) OutlierDetectionStep.call(container, in_memory=True, resample_data=do_resample) @@ -261,7 +262,8 @@ def test_outlier_step_base(we_three_sci, tmp_cwd): assert len(median_files) != 0 -def test_outlier_step_spec(tmp_cwd, tmp_path): +@pytest.mark.parametrize('resample', [True, False]) +def test_outlier_step_spec(tmp_cwd, tmp_path, resample): """Test outlier step for spec data including saving intermediate results.""" output_dir = tmp_path / 'output' output_dir.mkdir(exist_ok=True) @@ -274,18 +276,25 @@ def test_outlier_step_spec(tmp_cwd, tmp_path): # Make it an exposure type outlier detection expects miri_cal.meta.exposure.type = "MIR_LRS-FIXEDSLIT" - # Make a couple copies, give them unique exposure numbers and filename - container = ModelContainer([miri_cal, miri_cal.copy(), miri_cal.copy()]) - for i, model in enumerate(container): - model.meta.filename = f'test_{i}_cal.fits' + def make_input(): + # Make a couple copies, give them unique exposure numbers and filename + container = ModelContainer([miri_cal.copy(), miri_cal.copy(), miri_cal.copy()]) + for i, model in enumerate(container): + model.meta.filename = f'test_{i}_cal.fits' - # Drop a CR on the science array in the first image - container[0].data[209, 37] += 1 + # Drop a CR on the science array in the first image + container[0].data[209, 37] += 1 + + return container # Verify that intermediate files are removed when not saved - # (s2d files are expected, i2d files are not, but we'll check - # for them to make sure the imaging extension didn't creep back in) - OutlierDetectionStep.call(container, output_dir=output_dir, save_results=True) + # If resampling, s2d files are expected, i2d files are not, + # but we'll check for them to make sure the imaging extension + # didn't creep back in. + container = make_input() + OutlierDetectionStep.call(container, + output_dir=output_dir, save_results=True, + resample_data=resample) for dirname in [output_dir, tmp_cwd]: result_files = glob(os.path.join(dirname, '*outlierdetectionstep.fits')) i2d_files = glob(os.path.join(dirname, '*i2d*.fits')) @@ -306,9 +315,10 @@ def test_outlier_step_spec(tmp_cwd, tmp_path): assert len(result_files) == 0 # Call again, but save intermediate to the output path + container = make_input() result = OutlierDetectionStep.call( container, save_results=True, save_intermediate_results=True, - output_dir=output_dir + output_dir=output_dir, resample_data=resample ) # Make sure nothing changed in SCI array @@ -331,8 +341,13 @@ def test_outlier_step_spec(tmp_cwd, tmp_path): assert len(result_files) == len(container) # s2d, median, and blot files are written to the output directory - assert len(s2d_files) == len(container) - assert len(blot_files) == len(container) + if resample: + assert len(s2d_files) == len(container) + assert len(blot_files) == len(container) + else: + assert len(s2d_files) == 0 + assert len(blot_files) == 0 + assert len(median_files) == 1 # i2d files not written @@ -674,4 +689,4 @@ def make_resamp(input_models): asn_id="test", allowed_memory=None, ) - return resamp \ No newline at end of file + return resamp diff --git a/jwst/outlier_detection/utils.py b/jwst/outlier_detection/utils.py index 1563c2565d..a552087687 100644 --- a/jwst/outlier_detection/utils.py +++ b/jwst/outlier_detection/utils.py @@ -78,16 +78,14 @@ def median_without_resampling(input_models, with input_models: for i in range(len(input_models)): - drizzled_model = input_models.borrow(i) + input_model = input_models.borrow(i) + drizzled_model = copy.deepcopy(input_model) + input_models.shelve(input_model, i, modify=False) + drizzled_model.wht = build_driz_weight(drizzled_model, - weight_type=weight_type, - good_bits=good_bits) + weight_type=weight_type, + good_bits=good_bits) median_wcs = copy.deepcopy(drizzled_model.meta.wcs) - input_models.shelve(drizzled_model, i, modify=True) - - if save_intermediate_results: - # write the drizzled model to file - _fileio.save_drizzled(drizzled_model, make_output_path) if i == 0: input_shape = (ngroups,)+drizzled_model.data.shape From 72613a3b0a1d7b1e339c83683744e6c3a4b3b0ba Mon Sep 17 00:00:00 2001 From: Melanie Clarke Date: Wed, 2 Oct 2024 14:24:42 -0400 Subject: [PATCH 2/5] Add change note --- changes/8853.outlier_detection.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/8853.outlier_detection.rst diff --git a/changes/8853.outlier_detection.rst b/changes/8853.outlier_detection.rst new file mode 100644 index 0000000000..abb775f6c8 --- /dev/null +++ b/changes/8853.outlier_detection.rst @@ -0,0 +1 @@ +Avoid modifying input and saving duplicate files when resample_data=False. From 0a1cd93dc1c9af9c800a16be412cbd58fde3acd6 Mon Sep 17 00:00:00 2001 From: Melanie Clarke Date: Wed, 2 Oct 2024 14:31:25 -0400 Subject: [PATCH 3/5] Copy only the needed data --- jwst/outlier_detection/utils.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/jwst/outlier_detection/utils.py b/jwst/outlier_detection/utils.py index a552087687..f8cea4995b 100644 --- a/jwst/outlier_detection/utils.py +++ b/jwst/outlier_detection/utils.py @@ -78,23 +78,22 @@ def median_without_resampling(input_models, with input_models: for i in range(len(input_models)): - input_model = input_models.borrow(i) - drizzled_model = copy.deepcopy(input_model) - input_models.shelve(input_model, i, modify=False) - - drizzled_model.wht = build_driz_weight(drizzled_model, - weight_type=weight_type, - good_bits=good_bits) + drizzled_model = input_models.borrow(i) + drizzled_data = drizzled_model.data.copy() + weight = build_driz_weight(drizzled_model, + weight_type=weight_type, + good_bits=good_bits) median_wcs = copy.deepcopy(drizzled_model.meta.wcs) + input_models.shelve(drizzled_model, i, modify=False) if i == 0: - input_shape = (ngroups,)+drizzled_model.data.shape - dtype = drizzled_model.data.dtype + input_shape = (ngroups,) + drizzled_data.shape + dtype = drizzled_data.dtype computer = MedianComputer(input_shape, in_memory, buffer_size, dtype) - weight_threshold = compute_weight_threshold(drizzled_model.wht, maskpt) - drizzled_model.data[drizzled_model.wht < weight_threshold] = np.nan - computer.append(drizzled_model.data, i) + weight_threshold = compute_weight_threshold(weight, maskpt) + drizzled_data[weight < weight_threshold] = np.nan + computer.append(drizzled_data, i) # Perform median combination on set of drizzled mosaics median_data = computer.evaluate() From 290b2263dbfb8b02b0f8b9d69c598849b80d80bc Mon Sep 17 00:00:00 2001 From: Melanie Clarke Date: Wed, 2 Oct 2024 14:35:03 -0400 Subject: [PATCH 4/5] Only copy the wcs for the first model --- jwst/outlier_detection/utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/jwst/outlier_detection/utils.py b/jwst/outlier_detection/utils.py index f8cea4995b..ecebe8fd2b 100644 --- a/jwst/outlier_detection/utils.py +++ b/jwst/outlier_detection/utils.py @@ -83,10 +83,8 @@ def median_without_resampling(input_models, weight = build_driz_weight(drizzled_model, weight_type=weight_type, good_bits=good_bits) - median_wcs = copy.deepcopy(drizzled_model.meta.wcs) - input_models.shelve(drizzled_model, i, modify=False) - if i == 0: + median_wcs = copy.deepcopy(drizzled_model.meta.wcs) input_shape = (ngroups,) + drizzled_data.shape dtype = drizzled_data.dtype computer = MedianComputer(input_shape, in_memory, buffer_size, dtype) @@ -95,6 +93,8 @@ def median_without_resampling(input_models, drizzled_data[weight < weight_threshold] = np.nan computer.append(drizzled_data, i) + input_models.shelve(drizzled_model, i, modify=False) + # Perform median combination on set of drizzled mosaics median_data = computer.evaluate() @@ -151,7 +151,6 @@ def median_with_resampling(input_models, with input_models: for i, indices in enumerate(indices_by_group): - median_wcs = resamp.output_wcs drizzled_model = resamp.resample_group(input_models, indices) if save_intermediate_results: @@ -159,6 +158,7 @@ def median_with_resampling(input_models, _fileio.save_drizzled(drizzled_model, make_output_path) if i == 0: + median_wcs = resamp.output_wcs input_shape = (ngroups,)+drizzled_model.data.shape dtype = drizzled_model.data.dtype computer = MedianComputer(input_shape, in_memory, buffer_size, dtype) From fb3c2cd092e676609da4ee7a14b45137ed898048 Mon Sep 17 00:00:00 2001 From: Melanie Clarke Date: Wed, 2 Oct 2024 15:03:05 -0400 Subject: [PATCH 5/5] Refactor test to avoid repeated step call --- .../tests/test_outlier_detection.py | 94 ++++++++----------- 1 file changed, 38 insertions(+), 56 deletions(-) diff --git a/jwst/outlier_detection/tests/test_outlier_detection.py b/jwst/outlier_detection/tests/test_outlier_detection.py index de42b4fd2a..4c4d3cdfb3 100644 --- a/jwst/outlier_detection/tests/test_outlier_detection.py +++ b/jwst/outlier_detection/tests/test_outlier_detection.py @@ -263,7 +263,8 @@ def test_outlier_step_base(we_three_sci, tmp_cwd): @pytest.mark.parametrize('resample', [True, False]) -def test_outlier_step_spec(tmp_cwd, tmp_path, resample): +@pytest.mark.parametrize('save_intermediate', [True, False]) +def test_outlier_step_spec(tmp_cwd, tmp_path, resample, save_intermediate): """Test outlier step for spec data including saving intermediate results.""" output_dir = tmp_path / 'output' output_dir.mkdir(exist_ok=True) @@ -276,59 +277,34 @@ def test_outlier_step_spec(tmp_cwd, tmp_path, resample): # Make it an exposure type outlier detection expects miri_cal.meta.exposure.type = "MIR_LRS-FIXEDSLIT" - def make_input(): - # Make a couple copies, give them unique exposure numbers and filename - container = ModelContainer([miri_cal.copy(), miri_cal.copy(), miri_cal.copy()]) - for i, model in enumerate(container): - model.meta.filename = f'test_{i}_cal.fits' - - # Drop a CR on the science array in the first image - container[0].data[209, 37] += 1 - - return container - - # Verify that intermediate files are removed when not saved - # If resampling, s2d files are expected, i2d files are not, - # but we'll check for them to make sure the imaging extension - # didn't creep back in. - container = make_input() - OutlierDetectionStep.call(container, - output_dir=output_dir, save_results=True, - resample_data=resample) - for dirname in [output_dir, tmp_cwd]: - result_files = glob(os.path.join(dirname, '*outlierdetectionstep.fits')) - i2d_files = glob(os.path.join(dirname, '*i2d*.fits')) - s2d_files = glob(os.path.join(dirname, '*outlier_s2d.fits')) - median_files = glob(os.path.join(dirname, '*median.fits')) - blot_files = glob(os.path.join(dirname, '*blot.fits')) + # Make a couple copies, give them unique exposure numbers and filename + container = ModelContainer([miri_cal.copy(), miri_cal.copy(), miri_cal.copy()]) + for i, model in enumerate(container): + model.meta.filename = f'test_{i}_cal.fits' - # intermediate files are removed - assert len(i2d_files) == 0 - assert len(s2d_files) == 0 - assert len(median_files) == 0 - assert len(blot_files) == 0 + # Drop a CR on the science array in the first image + container[0].data[209, 37] += 1 - # result files are written to the output directory - if dirname == output_dir: - assert len(result_files) == len(container) - else: - assert len(result_files) == 0 - - # Call again, but save intermediate to the output path - container = make_input() + # Call outlier detection result = OutlierDetectionStep.call( - container, save_results=True, save_intermediate_results=True, - output_dir=output_dir, resample_data=resample - ) + container, resample_data=resample, + output_dir=output_dir, save_results=True, + save_intermediate_results=save_intermediate) # Make sure nothing changed in SCI array - for image, corrected in zip(container, result): - np.testing.assert_allclose(image.data, corrected.data) + for image in result: + nn = ~np.isnan(image.data) + np.testing.assert_allclose(image.data[nn], miri_cal.data[nn]) # Verify CR is flagged + assert np.isnan(result[0].data[209, 37]) assert result[0].dq[209, 37] == OUTLIER_DO_NOT_USE # Verify that intermediate files are saved at the specified location + if save_intermediate: + expected_intermediate = len(container) + else: + expected_intermediate = 0 for dirname in [output_dir, tmp_cwd]: all_files = glob(os.path.join(dirname, '*.fits')) result_files = glob(os.path.join(dirname, '*outlierdetectionstep.fits')) @@ -337,29 +313,35 @@ def make_input(): median_files = glob(os.path.join(dirname, '*median.fits')) blot_files = glob(os.path.join(dirname, '*blot.fits')) if dirname == output_dir: - # result files are written to the output directory + # Result files are always written to the output directory assert len(result_files) == len(container) - # s2d, median, and blot files are written to the output directory + # s2d and blot files are written to the output directory + # if save_intermediate is True and resampling is set if resample: - assert len(s2d_files) == len(container) - assert len(blot_files) == len(container) + assert len(s2d_files) == expected_intermediate + assert len(blot_files) == expected_intermediate else: assert len(s2d_files) == 0 assert len(blot_files) == 0 - assert len(median_files) == 1 + # Only one median file is saved if save_intermediate is True, + # no matter how many input files there are + if save_intermediate: + assert len(median_files) == 1 + else: + assert len(median_files) == 0 - # i2d files not written + # i2d files are never written assert len(i2d_files) == 0 - # nothing else was written - assert len(all_files) == len(s2d_files) + \ - len(median_files) + \ - len(result_files) + \ - len(blot_files) + # Nothing else was written + assert len(all_files) == (len(s2d_files) + + len(median_files) + + len(result_files) + + len(blot_files)) else: - # nothing should be written to the current directory + # Nothing should be written to the current directory assert len(result_files) == 0 assert len(s2d_files) == 0 assert len(median_files) == 0