From 4a33784119f32522ffe4432f4b1c390acd2f2c9e Mon Sep 17 00:00:00 2001
From: Melanie Clarke <mclarke@stsci.edu>
Date: Wed, 2 Oct 2024 14:13:31 -0400
Subject: [PATCH 1/5] Fix intermediate issues with non-resampled outlier
 methods

---
 .../tests/test_outlier_detection.py           | 43 +++++++++++++------
 jwst/outlier_detection/utils.py               | 14 +++---
 2 files changed, 35 insertions(+), 22 deletions(-)

diff --git a/jwst/outlier_detection/tests/test_outlier_detection.py b/jwst/outlier_detection/tests/test_outlier_detection.py
index 3155824fc8..de42b4fd2a 100644
--- a/jwst/outlier_detection/tests/test_outlier_detection.py
+++ b/jwst/outlier_detection/tests/test_outlier_detection.py
@@ -203,6 +203,7 @@ def we_three_sci():
 def test_outlier_step_no_outliers(we_three_sci, do_resample, tmp_cwd):
     """Test whole step, no outliers"""
     container = ModelContainer(list(we_three_sci))
+    container[0].var_rnoise[10, 10] = 1E9
     pristine = ModelContainer([m.copy() for m in container])
     OutlierDetectionStep.call(container, in_memory=True, resample_data=do_resample)
 
@@ -261,7 +262,8 @@ def test_outlier_step_base(we_three_sci, tmp_cwd):
     assert len(median_files) != 0
 
 
-def test_outlier_step_spec(tmp_cwd, tmp_path):
+@pytest.mark.parametrize('resample', [True, False])
+def test_outlier_step_spec(tmp_cwd, tmp_path, resample):
     """Test outlier step for spec data including saving intermediate results."""
     output_dir = tmp_path / 'output'
     output_dir.mkdir(exist_ok=True)
@@ -274,18 +276,25 @@ def test_outlier_step_spec(tmp_cwd, tmp_path):
     # Make it an exposure type outlier detection expects
     miri_cal.meta.exposure.type = "MIR_LRS-FIXEDSLIT"
 
-    # Make a couple copies, give them unique exposure numbers and filename
-    container = ModelContainer([miri_cal, miri_cal.copy(), miri_cal.copy()])
-    for i, model in enumerate(container):
-        model.meta.filename = f'test_{i}_cal.fits'
+    def make_input():
+        # Make a couple copies, give them unique exposure numbers and filename
+        container = ModelContainer([miri_cal.copy(), miri_cal.copy(), miri_cal.copy()])
+        for i, model in enumerate(container):
+            model.meta.filename = f'test_{i}_cal.fits'
 
-    # Drop a CR on the science array in the first image
-    container[0].data[209, 37] += 1
+        # Drop a CR on the science array in the first image
+        container[0].data[209, 37] += 1
+
+        return container
 
     # Verify that intermediate files are removed when not saved
-    # (s2d files are expected, i2d files are not, but we'll check
-    # for them to make sure the imaging extension didn't creep back in)
-    OutlierDetectionStep.call(container, output_dir=output_dir, save_results=True)
+    # If resampling, s2d files are expected, i2d files are not,
+    # but we'll check for them to make sure the imaging extension
+    # didn't creep back in.
+    container = make_input()
+    OutlierDetectionStep.call(container,
+                              output_dir=output_dir, save_results=True,
+                              resample_data=resample)
     for dirname in [output_dir, tmp_cwd]:
         result_files = glob(os.path.join(dirname, '*outlierdetectionstep.fits'))
         i2d_files = glob(os.path.join(dirname, '*i2d*.fits'))
@@ -306,9 +315,10 @@ def test_outlier_step_spec(tmp_cwd, tmp_path):
             assert len(result_files) == 0
 
     # Call again, but save intermediate to the output path
+    container = make_input()
     result = OutlierDetectionStep.call(
         container, save_results=True, save_intermediate_results=True,
-        output_dir=output_dir
+        output_dir=output_dir, resample_data=resample
     )
 
     # Make sure nothing changed in SCI array
@@ -331,8 +341,13 @@ def test_outlier_step_spec(tmp_cwd, tmp_path):
             assert len(result_files) == len(container)
 
             # s2d, median, and blot files are written to the output directory
-            assert len(s2d_files) == len(container)
-            assert len(blot_files) == len(container)
+            if resample:
+                assert len(s2d_files) == len(container)
+                assert len(blot_files) == len(container)
+            else:
+                assert len(s2d_files) == 0
+                assert len(blot_files) == 0
+
             assert len(median_files) == 1
 
             # i2d files not written
@@ -674,4 +689,4 @@ def make_resamp(input_models):
         asn_id="test",
         allowed_memory=None,
     )
-    return resamp
\ No newline at end of file
+    return resamp
diff --git a/jwst/outlier_detection/utils.py b/jwst/outlier_detection/utils.py
index 1563c2565d..a552087687 100644
--- a/jwst/outlier_detection/utils.py
+++ b/jwst/outlier_detection/utils.py
@@ -78,16 +78,14 @@ def median_without_resampling(input_models,
     with input_models:
         for i in range(len(input_models)):
 
-            drizzled_model = input_models.borrow(i)
+            input_model = input_models.borrow(i)
+            drizzled_model = copy.deepcopy(input_model)
+            input_models.shelve(input_model, i, modify=False)
+
             drizzled_model.wht = build_driz_weight(drizzled_model,
-                                                    weight_type=weight_type,
-                                                    good_bits=good_bits)
+                                                   weight_type=weight_type,
+                                                   good_bits=good_bits)
             median_wcs = copy.deepcopy(drizzled_model.meta.wcs)
-            input_models.shelve(drizzled_model, i, modify=True)
-
-            if save_intermediate_results:
-                # write the drizzled model to file
-                _fileio.save_drizzled(drizzled_model, make_output_path)
 
             if i == 0:
                 input_shape = (ngroups,)+drizzled_model.data.shape

From 72613a3b0a1d7b1e339c83683744e6c3a4b3b0ba Mon Sep 17 00:00:00 2001
From: Melanie Clarke <mclarke@stsci.edu>
Date: Wed, 2 Oct 2024 14:24:42 -0400
Subject: [PATCH 2/5] Add change note

---
 changes/8853.outlier_detection.rst | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changes/8853.outlier_detection.rst

diff --git a/changes/8853.outlier_detection.rst b/changes/8853.outlier_detection.rst
new file mode 100644
index 0000000000..abb775f6c8
--- /dev/null
+++ b/changes/8853.outlier_detection.rst
@@ -0,0 +1 @@
+Avoid modifying input and saving duplicate files when resample_data=False.

From 0a1cd93dc1c9af9c800a16be412cbd58fde3acd6 Mon Sep 17 00:00:00 2001
From: Melanie Clarke <mclarke@stsci.edu>
Date: Wed, 2 Oct 2024 14:31:25 -0400
Subject: [PATCH 3/5] Copy only the needed data

---
 jwst/outlier_detection/utils.py | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/jwst/outlier_detection/utils.py b/jwst/outlier_detection/utils.py
index a552087687..f8cea4995b 100644
--- a/jwst/outlier_detection/utils.py
+++ b/jwst/outlier_detection/utils.py
@@ -78,23 +78,22 @@ def median_without_resampling(input_models,
     with input_models:
         for i in range(len(input_models)):
 
-            input_model = input_models.borrow(i)
-            drizzled_model = copy.deepcopy(input_model)
-            input_models.shelve(input_model, i, modify=False)
-
-            drizzled_model.wht = build_driz_weight(drizzled_model,
-                                                   weight_type=weight_type,
-                                                   good_bits=good_bits)
+            drizzled_model = input_models.borrow(i)
+            drizzled_data = drizzled_model.data.copy()
+            weight = build_driz_weight(drizzled_model,
+                                       weight_type=weight_type,
+                                       good_bits=good_bits)
             median_wcs = copy.deepcopy(drizzled_model.meta.wcs)
+            input_models.shelve(drizzled_model, i, modify=False)
 
             if i == 0:
-                input_shape = (ngroups,)+drizzled_model.data.shape
-                dtype = drizzled_model.data.dtype
+                input_shape = (ngroups,) + drizzled_data.shape
+                dtype = drizzled_data.dtype
                 computer = MedianComputer(input_shape, in_memory, buffer_size, dtype)
 
-            weight_threshold = compute_weight_threshold(drizzled_model.wht, maskpt)
-            drizzled_model.data[drizzled_model.wht < weight_threshold] = np.nan
-            computer.append(drizzled_model.data, i)
+            weight_threshold = compute_weight_threshold(weight, maskpt)
+            drizzled_data[weight < weight_threshold] = np.nan
+            computer.append(drizzled_data, i)
 
     # Perform median combination on set of drizzled mosaics
     median_data = computer.evaluate()

From 290b2263dbfb8b02b0f8b9d69c598849b80d80bc Mon Sep 17 00:00:00 2001
From: Melanie Clarke <mclarke@stsci.edu>
Date: Wed, 2 Oct 2024 14:35:03 -0400
Subject: [PATCH 4/5] Only copy the wcs for the first model

---
 jwst/outlier_detection/utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/jwst/outlier_detection/utils.py b/jwst/outlier_detection/utils.py
index f8cea4995b..ecebe8fd2b 100644
--- a/jwst/outlier_detection/utils.py
+++ b/jwst/outlier_detection/utils.py
@@ -83,10 +83,8 @@ def median_without_resampling(input_models,
             weight = build_driz_weight(drizzled_model,
                                        weight_type=weight_type,
                                        good_bits=good_bits)
-            median_wcs = copy.deepcopy(drizzled_model.meta.wcs)
-            input_models.shelve(drizzled_model, i, modify=False)
-
             if i == 0:
+                median_wcs = copy.deepcopy(drizzled_model.meta.wcs)
                 input_shape = (ngroups,) + drizzled_data.shape
                 dtype = drizzled_data.dtype
                 computer = MedianComputer(input_shape, in_memory, buffer_size, dtype)
@@ -95,6 +93,8 @@ def median_without_resampling(input_models,
             drizzled_data[weight < weight_threshold] = np.nan
             computer.append(drizzled_data, i)
 
+            input_models.shelve(drizzled_model, i, modify=False)
+
     # Perform median combination on set of drizzled mosaics
     median_data = computer.evaluate()
 
@@ -151,7 +151,6 @@ def median_with_resampling(input_models,
     with input_models:
         for i, indices in enumerate(indices_by_group):
 
-            median_wcs = resamp.output_wcs
             drizzled_model = resamp.resample_group(input_models, indices)
 
             if save_intermediate_results:
@@ -159,6 +158,7 @@ def median_with_resampling(input_models,
                 _fileio.save_drizzled(drizzled_model, make_output_path)
 
             if i == 0:
+                median_wcs = resamp.output_wcs
                 input_shape = (ngroups,)+drizzled_model.data.shape
                 dtype = drizzled_model.data.dtype
                 computer = MedianComputer(input_shape, in_memory, buffer_size, dtype)

From fb3c2cd092e676609da4ee7a14b45137ed898048 Mon Sep 17 00:00:00 2001
From: Melanie Clarke <mclarke@stsci.edu>
Date: Wed, 2 Oct 2024 15:03:05 -0400
Subject: [PATCH 5/5] Refactor test to avoid repeated step call

---
 .../tests/test_outlier_detection.py           | 94 ++++++++-----------
 1 file changed, 38 insertions(+), 56 deletions(-)

diff --git a/jwst/outlier_detection/tests/test_outlier_detection.py b/jwst/outlier_detection/tests/test_outlier_detection.py
index de42b4fd2a..4c4d3cdfb3 100644
--- a/jwst/outlier_detection/tests/test_outlier_detection.py
+++ b/jwst/outlier_detection/tests/test_outlier_detection.py
@@ -263,7 +263,8 @@ def test_outlier_step_base(we_three_sci, tmp_cwd):
 
 
 @pytest.mark.parametrize('resample', [True, False])
-def test_outlier_step_spec(tmp_cwd, tmp_path, resample):
+@pytest.mark.parametrize('save_intermediate', [True, False])
+def test_outlier_step_spec(tmp_cwd, tmp_path, resample, save_intermediate):
     """Test outlier step for spec data including saving intermediate results."""
     output_dir = tmp_path / 'output'
     output_dir.mkdir(exist_ok=True)
@@ -276,59 +277,34 @@ def test_outlier_step_spec(tmp_cwd, tmp_path, resample):
     # Make it an exposure type outlier detection expects
     miri_cal.meta.exposure.type = "MIR_LRS-FIXEDSLIT"
 
-    def make_input():
-        # Make a couple copies, give them unique exposure numbers and filename
-        container = ModelContainer([miri_cal.copy(), miri_cal.copy(), miri_cal.copy()])
-        for i, model in enumerate(container):
-            model.meta.filename = f'test_{i}_cal.fits'
-
-        # Drop a CR on the science array in the first image
-        container[0].data[209, 37] += 1
-
-        return container
-
-    # Verify that intermediate files are removed when not saved
-    # If resampling, s2d files are expected, i2d files are not,
-    # but we'll check for them to make sure the imaging extension
-    # didn't creep back in.
-    container = make_input()
-    OutlierDetectionStep.call(container,
-                              output_dir=output_dir, save_results=True,
-                              resample_data=resample)
-    for dirname in [output_dir, tmp_cwd]:
-        result_files = glob(os.path.join(dirname, '*outlierdetectionstep.fits'))
-        i2d_files = glob(os.path.join(dirname, '*i2d*.fits'))
-        s2d_files = glob(os.path.join(dirname, '*outlier_s2d.fits'))
-        median_files = glob(os.path.join(dirname, '*median.fits'))
-        blot_files = glob(os.path.join(dirname, '*blot.fits'))
+    # Make a couple copies, give them unique exposure numbers and filename
+    container = ModelContainer([miri_cal.copy(), miri_cal.copy(), miri_cal.copy()])
+    for i, model in enumerate(container):
+        model.meta.filename = f'test_{i}_cal.fits'
 
-        # intermediate files are removed
-        assert len(i2d_files) == 0
-        assert len(s2d_files) == 0
-        assert len(median_files) == 0
-        assert len(blot_files) == 0
+    # Drop a CR on the science array in the first image
+    container[0].data[209, 37] += 1
 
-        # result files are written to the output directory
-        if dirname == output_dir:
-            assert len(result_files) == len(container)
-        else:
-            assert len(result_files) == 0
-
-    # Call again, but save intermediate to the output path
-    container = make_input()
+    # Call outlier detection
     result = OutlierDetectionStep.call(
-        container, save_results=True, save_intermediate_results=True,
-        output_dir=output_dir, resample_data=resample
-    )
+        container, resample_data=resample,
+        output_dir=output_dir, save_results=True,
+        save_intermediate_results=save_intermediate)
 
     # Make sure nothing changed in SCI array
-    for image, corrected in zip(container, result):
-        np.testing.assert_allclose(image.data, corrected.data)
+    for image in result:
+        nn = ~np.isnan(image.data)
+        np.testing.assert_allclose(image.data[nn], miri_cal.data[nn])
 
     # Verify CR is flagged
+    assert np.isnan(result[0].data[209, 37])
     assert result[0].dq[209, 37] == OUTLIER_DO_NOT_USE
 
     # Verify that intermediate files are saved at the specified location
+    if save_intermediate:
+        expected_intermediate = len(container)
+    else:
+        expected_intermediate = 0
     for dirname in [output_dir, tmp_cwd]:
         all_files = glob(os.path.join(dirname, '*.fits'))
         result_files = glob(os.path.join(dirname, '*outlierdetectionstep.fits'))
@@ -337,29 +313,35 @@ def make_input():
         median_files = glob(os.path.join(dirname, '*median.fits'))
         blot_files = glob(os.path.join(dirname, '*blot.fits'))
         if dirname == output_dir:
-            # result files are written to the output directory
+            # Result files are always written to the output directory
             assert len(result_files) == len(container)
 
-            # s2d, median, and blot files are written to the output directory
+            # s2d and blot files are written to the output directory
+            # if save_intermediate is True and resampling is set
             if resample:
-                assert len(s2d_files) == len(container)
-                assert len(blot_files) == len(container)
+                assert len(s2d_files) == expected_intermediate
+                assert len(blot_files) == expected_intermediate
             else:
                 assert len(s2d_files) == 0
                 assert len(blot_files) == 0
 
-            assert len(median_files) == 1
+            # Only one median file is saved if save_intermediate is True,
+            # no matter how many input files there are
+            if save_intermediate:
+                assert len(median_files) == 1
+            else:
+                assert len(median_files) == 0
 
-            # i2d files not written
+            # i2d files are never written
             assert len(i2d_files) == 0
 
-            # nothing else was written
-            assert len(all_files) == len(s2d_files) + \
-                                     len(median_files) + \
-                                     len(result_files) + \
-                                     len(blot_files)
+            # Nothing else was written
+            assert len(all_files) == (len(s2d_files)
+                                      + len(median_files)
+                                      + len(result_files)
+                                      + len(blot_files))
         else:
-            # nothing should be written to the current directory
+            # Nothing should be written to the current directory
             assert len(result_files) == 0
             assert len(s2d_files) == 0
             assert len(median_files) == 0