Merge branch 'main' into jp3770

spacetelescope · Oct 3, 2024 · 5da311a · 5da311a
2 parents b76e877 + b47cac8
commit 5da311a
Show file tree

Hide file tree

Showing 9 changed files with 65 additions and 65 deletions.
diff --git a/changes/8851.outlier_detection.rst b/changes/8851.outlier_detection.rst
@@ -0,0 +1 @@
+Update documentation to clarify the interaction between pipeline-level and step-level `--in_memory` flags.
diff --git a/changes/8853.outlier_detection.rst b/changes/8853.outlier_detection.rst
@@ -0,0 +1 @@
+Avoid modifying input and saving duplicate files when resample_data=False.
diff --git a/docs/jwst/outlier_detection/arguments.rst b/docs/jwst/outlier_detection/arguments.rst
@@ -89,6 +89,8 @@ that control the behavior of the processing:
  Specifies whether or not to load and create all images that are used during
  processing into memory. If ``False``, input files are loaded from disk when
  needed and all intermediate files are stored on disk, rather than in memory.
+ This flag is superseded by the pipeline-level ``--in-memory`` flag, and thus
+ has no effect when running the full level 3 pipeline.
 
 Step Arguments for IFU data
 ===========================

diff --git a/docs/jwst/outlier_detection/outlier_detection_imaging.rst b/docs/jwst/outlier_detection/outlier_detection_imaging.rst
@@ -171,6 +171,10 @@ during processing includes:
  Those sections are then read in one at a time to compute the median image.
 
 These changes result in a minimum amount of memory usage during processing at the obvious
-expense of reading and writing the products from disk.
+expense of reading and writing the products from disk. Note that if a ModelLibrary object
+is input to the step, the memory behavior of the step is read from the ``on_disk`` status
+of the ModelLibrary object, and the ``in_memory`` parameter of the step is ignored.
+When running ``calwebb_image3``, the ``in_memory`` flag should therefore be set at the pipeline level,
+e.g., ``strun calwebb_image3 asn.json --in-memory=True``; the step-specific flag will be ignored.
 
 .. automodapi:: jwst.outlier_detection.imaging
diff --git a/jwst/outlier_detection/imaging.py b/jwst/outlier_detection/imaging.py
@@ -65,7 +65,6 @@ def detect_outliers(
  kernel=kernel,
  fillval=fillval,
  good_bits=good_bits,
- in_memory=in_memory,
  allowed_memory=allowed_memory,
  )
  median_data, median_wcs = median_with_resampling(input_models,

diff --git a/jwst/outlier_detection/outlier_detection_step.py b/jwst/outlier_detection/outlier_detection_step.py
@@ -67,7 +67,7 @@ class OutlierDetectionStep(Step):
  good_bits = string(default="~DO_NOT_USE") # DQ flags to allow
  search_output_file = boolean(default=False)
  allowed_memory = float(default=None) # Fraction of memory to use for the combined image
- in_memory = boolean(default=False)
+ in_memory = boolean(default=False) # ignored if run within the pipeline; set at pipeline level instead
  """
 
  def process(self, input_data):

diff --git a/jwst/outlier_detection/spec.py b/jwst/outlier_detection/spec.py
@@ -66,7 +66,6 @@ def detect_outliers(
  kernel=kernel,
  fillval=fillval,
  good_bits=good_bits,
- in_memory=in_memory,
  )
 
  median_data, median_wcs = median_with_resampling(

diff --git a/jwst/outlier_detection/tests/test_outlier_detection.py b/jwst/outlier_detection/tests/test_outlier_detection.py
@@ -203,6 +203,7 @@ def we_three_sci():
 def test_outlier_step_no_outliers(we_three_sci, do_resample, tmp_cwd):
  """Test whole step, no outliers"""
  container = ModelContainer(list(we_three_sci))
+ container[0].var_rnoise[10, 10] = 1E9
  pristine = ModelContainer([m.copy() for m in container])
  OutlierDetectionStep.call(container, in_memory=True, resample_data=do_resample)
 
@@ -261,7 +262,9 @@ def test_outlier_step_base(we_three_sci, tmp_cwd):
  assert len(median_files) != 0
 
 
-def test_outlier_step_spec(tmp_cwd, tmp_path):
+@pytest.mark.parametrize('resample', [True, False])
+@pytest.mark.parametrize('save_intermediate', [True, False])
+def test_outlier_step_spec(tmp_cwd, tmp_path, resample, save_intermediate):
  """Test outlier step for spec data including saving intermediate results."""
  output_dir = tmp_path / 'output'
  output_dir.mkdir(exist_ok=True)
@@ -275,50 +278,33 @@ def test_outlier_step_spec(tmp_cwd, tmp_path):
  miri_cal.meta.exposure.type = "MIR_LRS-FIXEDSLIT"
 
  # Make a couple copies, give them unique exposure numbers and filename
- container = ModelContainer([miri_cal, miri_cal.copy(), miri_cal.copy()])
+ container = ModelContainer([miri_cal.copy(), miri_cal.copy(), miri_cal.copy()])
  for i, model in enumerate(container):
  model.meta.filename = f'test_{i}_cal.fits'
 
  # Drop a CR on the science array in the first image
  container[0].data[209, 37] += 1
 
- # Verify that intermediate files are removed when not saved
- # (s2d files are expected, i2d files are not, but we'll check
- # for them to make sure the imaging extension didn't creep back in)
- OutlierDetectionStep.call(container, output_dir=output_dir, save_results=True)
- for dirname in [output_dir, tmp_cwd]:
- result_files = glob(os.path.join(dirname, '*outlierdetectionstep.fits'))
- i2d_files = glob(os.path.join(dirname, '*i2d*.fits'))
- s2d_files = glob(os.path.join(dirname, '*outlier_s2d.fits'))
- median_files = glob(os.path.join(dirname, '*median.fits'))
- blot_files = glob(os.path.join(dirname, '*blot.fits'))
-
- # intermediate files are removed
- assert len(i2d_files) == 0
- assert len(s2d_files) == 0
- assert len(median_files) == 0
- assert len(blot_files) == 0
-
- # result files are written to the output directory
- if dirname == output_dir:
- assert len(result_files) == len(container)
- else:
- assert len(result_files) == 0
-
- # Call again, but save intermediate to the output path
+ # Call outlier detection
  result = OutlierDetectionStep.call(
- container, save_results=True, save_intermediate_results=True,
- output_dir=output_dir
- )
+ container, resample_data=resample,
+ output_dir=output_dir, save_results=True,
+  save_intermediate_results=save_intermediate)
 
  # Make sure nothing changed in SCI array
- for image, corrected in zip(container, result):
- np.testing.assert_allclose(image.data, corrected.data)
+ for image in result:
+ nn = ~np.isnan(image.data)
+ np.testing.assert_allclose(image.data[nn], miri_cal.data[nn])
 
  # Verify CR is flagged
+ assert np.isnan(result[0].data[209, 37])
  assert result[0].dq[209, 37] == OUTLIER_DO_NOT_USE
 
  # Verify that intermediate files are saved at the specified location
+ if save_intermediate:
+ expected_intermediate = len(container)
+ else:
+ expected_intermediate = 0
  for dirname in [output_dir, tmp_cwd]:
  all_files = glob(os.path.join(dirname, '*.fits'))
  result_files = glob(os.path.join(dirname, '*outlierdetectionstep.fits'))
@@ -327,24 +313,35 @@ def test_outlier_step_spec(tmp_cwd, tmp_path):
  median_files = glob(os.path.join(dirname, '*median.fits'))
  blot_files = glob(os.path.join(dirname, '*blot.fits'))
  if dirname == output_dir:
- # result files are written to the output directory
+ # Result files are always written to the output directory
  assert len(result_files) == len(container)
 
- # s2d, median, and blot files are written to the output directory
- assert len(s2d_files) == len(container)
- assert len(blot_files) == len(container)
- assert len(median_files) == 1
-
- # i2d files not written
+ # s2d and blot files are written to the output directory
+ # if save_intermediate is True and resampling is set
+ if resample:
+ assert len(s2d_files) == expected_intermediate
+ assert len(blot_files) == expected_intermediate
+ else:
+ assert len(s2d_files) == 0
+ assert len(blot_files) == 0
+
+ # Only one median file is saved if save_intermediate is True,
+ # no matter how many input files there are
+ if save_intermediate:
+ assert len(median_files) == 1
+ else:
+ assert len(median_files) == 0
+
+ # i2d files are never written
  assert len(i2d_files) == 0
 
- # nothing else was written
- assert len(all_files) == len(s2d_files) + \
- len(median_files) + \
- len(result_files) + \
- len(blot_files)
+ # Nothing else was written
+ assert len(all_files) == (len(s2d_files)
+  + len(median_files)
+  + len(result_files)
+  + len(blot_files))
  else:
- # nothing should be written to the current directory
+ # Nothing should be written to the current directory
  assert len(result_files) == 0
  assert len(s2d_files) == 0
  assert len(median_files) == 0
@@ -674,4 +671,4 @@ def make_resamp(input_models):
  asn_id="test",
  allowed_memory=None,
  )
- return resamp
+ return resamp
diff --git a/jwst/outlier_detection/utils.py b/jwst/outlier_detection/utils.py
@@ -79,24 +79,21 @@ def median_without_resampling(input_models,
  for i in range(len(input_models)):
 
  drizzled_model = input_models.borrow(i)
- drizzled_model.wht = build_driz_weight(drizzled_model,
- weight_type=weight_type,
- good_bits=good_bits)
- median_wcs = copy.deepcopy(drizzled_model.meta.wcs)
- input_models.shelve(drizzled_model, i, modify=True)
-
- if save_intermediate_results:
- # write the drizzled model to file
- _fileio.save_drizzled(drizzled_model, make_output_path)
-
+ drizzled_data = drizzled_model.data.copy()
+ weight = build_driz_weight(drizzled_model,
+ weight_type=weight_type,
+ good_bits=good_bits)
  if i == 0:
- input_shape = (ngroups,)+drizzled_model.data.shape
- dtype = drizzled_model.data.dtype
+ median_wcs = copy.deepcopy(drizzled_model.meta.wcs)
+ input_shape = (ngroups,) + drizzled_data.shape
+ dtype = drizzled_data.dtype
  computer = MedianComputer(input_shape, in_memory, buffer_size, dtype)
 
- weight_threshold = compute_weight_threshold(drizzled_model.wht, maskpt)
- drizzled_model.data[drizzled_model.wht < weight_threshold] = np.nan
- computer.append(drizzled_model.data, i)
+ weight_threshold = compute_weight_threshold(weight, maskpt)
+ drizzled_data[weight < weight_threshold] = np.nan
+ computer.append(drizzled_data, i)
+
+ input_models.shelve(drizzled_model, i, modify=False)
 
  # Perform median combination on set of drizzled mosaics
  median_data = computer.evaluate()
@@ -154,14 +151,14 @@ def median_with_resampling(input_models,
  with input_models:
  for i, indices in enumerate(indices_by_group):
 
- median_wcs = resamp.output_wcs
  drizzled_model = resamp.resample_group(input_models, indices)
 
  if save_intermediate_results:
  # write the drizzled model to file
  _fileio.save_drizzled(drizzled_model, make_output_path)
 
  if i == 0:
+ median_wcs = resamp.output_wcs
  input_shape = (ngroups,)+drizzled_model.data.shape
  dtype = drizzled_model.data.dtype
  computer = MedianComputer(input_shape, in_memory, buffer_size, dtype)