Threshold on median error

spacetelescope · Sep 24, 2024 · 8686a40 · 8686a40
1 parent 1e6f8df
commit 8686a40
Show file tree

Hide file tree

Showing 4 changed files with 106 additions and 20 deletions.
diff --git a/jwst/outlier_detection/spec.py b/jwst/outlier_detection/spec.py
@@ -73,12 +73,12 @@ def detect_outliers(
  # for compatibility with image3 pipeline
  library = ModelLibrary(input_models, on_disk=False)
 
- median_data, median_wcs = drizzle_and_median(library,
-  resamp,
-  maskpt,
-  resample_data=resample_data,
-  save_intermediate_results=save_intermediate_results,
-  make_output_path=make_output_path)
+ median_data, median_wcs, median_err = drizzle_and_median(
+ library, resamp, maskpt,
+ resample_data=resample_data,
+ save_intermediate_results=save_intermediate_results,
+ make_output_path=make_output_path,
+ return_error=True)
 
  # Perform outlier detection using statistical comparisons between
  # each original input image and its blotted version of the median image
@@ -92,8 +92,9 @@ def detect_outliers(
  scale1,
  scale2,
  backg,
+ median_err=median_err,
  save_blot=save_intermediate_results,
- make_output_path=make_output_path
+ make_output_path=make_output_path,
  )
  else:
  flag_crs_in_models(input_models,

diff --git a/jwst/outlier_detection/tests/test_outlier_detection.py b/jwst/outlier_detection/tests/test_outlier_detection.py
@@ -83,6 +83,7 @@ def test_flag_cr(sci_blot_image_pair):
  _flag_resampled_model_crs(
  sci,
  blot.data,
+ None,
  5.0,
  4.0,
  1.2,
@@ -680,4 +681,4 @@ def make_resamp(input_models):
  asn_id="test",
  allowed_memory=None,
  )
- return resamp
+ return resamp
diff --git a/jwst/outlier_detection/utils.py b/jwst/outlier_detection/utils.py
@@ -63,7 +63,8 @@ def drizzle_and_median(input_models,
  resample_data=False,
  save_intermediate_results=False,
  make_output_path=None,
- buffer_size=None):
+ buffer_size=None,
+ return_error=False):
  """
  Shared code between imaging and spec modes for resampling and median computation
 
@@ -92,6 +93,10 @@ def drizzle_and_median(input_models,
  The size of chunk in bytes that will be read into memory when computing the median.
  This parameter has no effect if the input library has its on_disk attribute
  set to False.
+
+ return_error : bool, optional
+ If set, an approximate median error is computed alongside the
+ median science image.
  """
 
  # validate inputs
@@ -112,7 +117,8 @@ def drizzle_and_median(input_models,
 
  if resample_data:
  median_wcs = resamp.output_wcs
- drizzled_model = resamp.resample_group(input_models, indices)
+ drizzled_model = resamp.resample_group(
+ input_models, indices, compute_error=return_error)
  else:
  # for non-dithered data, the resampled image is just the original image
  drizzled_model = input_models.borrow(i)
@@ -121,6 +127,7 @@ def drizzle_and_median(input_models,
  weight_type=resamp.weight_type,
  good_bits=resamp.good_bits)
  input_models.shelve(drizzled_model, i, modify=True)
+
  # copy for when saving median and input is a filename?
  median_wcs = copy.deepcopy(drizzled_model.meta.wcs)
 
@@ -130,11 +137,21 @@ def drizzle_and_median(input_models,
  if in_memory:
  # allocate memory for data arrays that go into median
  data_frames = np.empty(full_shape, dtype=np.float32)
+ if return_error:
+ err_frames = np.empty(full_shape, dtype=np.float32)
+ else:
+ err_frames = None
  else:
  # set up temporary storage for data arrays that go into median
  median_computer = OnDiskMedian(full_shape,
  dtype=drizzled_model.data.dtype,
  buffer_size=buffer_size)
+ if return_error:
+ median_err_computer = OnDiskMedian(full_shape,
+ dtype=drizzled_model.err.dtype,
+ buffer_size=buffer_size)
+ else:
+ median_err_computer = None
 
  if save_intermediate_results:
  # write the drizzled model to file
@@ -146,33 +163,52 @@ def drizzle_and_median(input_models,
 
  # handle the weights right away, so only data array needs to be saved
  weight_threshold = compute_weight_threshold(drizzled_model.wht, maskpt)
- drizzled_model.data[drizzled_model.wht < weight_threshold] = np.nan
+ below_threshold = drizzled_model.wht < weight_threshold
+ drizzled_model.data[below_threshold] = np.nan
+ drizzled_model.err[below_threshold] = np.nan
 
  if in_memory:
  # populate pre-allocated memory with the drizzled data
  data_frames[i] = drizzled_model.data
+ if return_error:
+ err_frames[i] = drizzled_model.err
  else:
  # distribute the drizzled data into the temporary storage
  median_computer.add_image(drizzled_model.data)
+ if return_error:
+ median_err_computer.add_image(drizzled_model.err)
 
  # Perform median combination on set of drizzled mosaics
+ median_err = None
  if in_memory:
  median_data = nanmedian3D(data_frames)
  del data_frames
+ if return_error:
+ median_err = nanmedian3D(err_frames)
+ del err_frames
  else:
  median_data = median_computer.compute_median()
  median_computer.cleanup()
+ if return_error:
+ median_err = median_err_computer.compute_median()
+ median_err_computer.cleanup()
 
  if save_intermediate_results:
  # Save median model to fits
  median_model = datamodels.ImageModel(median_data)
+ if return_error:
+ median_model.err = median_err
  median_model.update(drizzled_model)
  median_model.meta.wcs = median_wcs
  save_median(median_model, make_output_path, resamp.asn_id)
  del median_model
  del drizzled_model
 
- return median_data, median_wcs
+ if return_error:
+ return median_data, median_wcs, median_err
+ else:
+ return median_data, median_wcs
+
 
 
 class DiskAppendableArray:
@@ -406,6 +442,7 @@ def flag_resampled_model_crs(
  scale1,
  scale2,
  backg,
+ median_err=None,
  save_blot=False,
  make_output_path=None,
 ):
@@ -418,23 +455,32 @@ def flag_resampled_model_crs(
  else:
  pix_ratio = 1.0
 
- blot = gwcs_blot(median_data, median_wcs, input_model.data.shape, input_model.meta.wcs, pix_ratio)
+ blot = gwcs_blot(median_data, median_wcs, input_model.data.shape,
+ input_model.meta.wcs, pix_ratio) #, fillval=np.nan)
+ if median_err is not None:
+ blot_err = gwcs_blot(median_err, median_wcs, input_model.data.shape,
+ input_model.meta.wcs, pix_ratio) #, fillval=np.nan)
+ else:
+ blot_err = None
  if save_blot:
  if make_output_path is None:
  raise ValueError("make_output_path must be provided if save_blot is True")
  model_path = make_output_path(input_model.meta.filename, suffix='blot')
  blot_model = _make_blot_model(input_model, blot)
+ if median_err is not None:
+ blot_model.err = blot_err
  blot_model.meta.filename = model_path
  blot_model.save(model_path)
  log.info(f"Saved model in {model_path}")
  del blot_model
  # dq flags will be updated in-place
- _flag_resampled_model_crs(input_model, blot, snr1, snr2, scale1, scale2, backg)
+ _flag_resampled_model_crs(input_model, blot, blot_err, snr1, snr2, scale1, scale2, backg)
 
 
 def _flag_resampled_model_crs(
  input_model,
  blot,
+ blot_err,
  snr1,
  snr2,
  scale1,
@@ -451,7 +497,11 @@ def _flag_resampled_model_crs(
  backg = input_model.meta.background.level
  log.debug(f"Adding background level {backg} to blotted image")
 
- cr_mask = flag_resampled_crs(input_model.data, input_model.err, blot, snr1, snr2, scale1, scale2, backg)
+ if blot_err is not None:
+ err_to_use = blot_err
+ else:
+ err_to_use = input_model.err
+ cr_mask = flag_resampled_crs(input_model.data, err_to_use, blot, snr1, snr2, scale1, scale2, backg)
 
  # update the dq flags in-place
  input_model.dq |= cr_mask * np.uint32(DO_NOT_USE | OUTLIER)
@@ -471,6 +521,7 @@ def flag_crs_in_models_with_resampling(
  scale1,
  scale2,
  backg,
+ median_err=None,
  save_blot=False,
  make_output_path=None,
 ):
@@ -483,6 +534,7 @@ def flag_crs_in_models_with_resampling(
  scale1,
  scale2,
  backg,
+ median_err=median_err,
  save_blot=save_blot,
  make_output_path=make_output_path)
 
@@ -504,4 +556,4 @@ def _make_blot_model(input_model, blot):
  blot_model = type(input_model)()
  blot_model.data = blot
  blot_model.update(input_model)
- return blot_model
+ return blot_model
diff --git a/jwst/resample/resample.py b/jwst/resample/resample.py
@@ -252,14 +252,18 @@ def _get_intensity_scale(self, img):
  iscale = 1.0
  return iscale
 
- def resample_group(self, input_models, indices):
+ def resample_group(self, input_models, indices, compute_error=False):
  """Apply resample_many_to_many for one group
  
  Parameters
  ----------
  input_models : ModelLibrary
 
  indices : list
+
+ compute_error : bool, optional
+ If set, an approximate error image will be resampled
+ alongside the science image.
  """
  output_model = self.blank_output.copy()
 
@@ -328,6 +332,33 @@ def resample_group(self, input_models, indices):
  ymax=ymax
  )
  del data
+
+ # make an approximate error image by drizzling it
+ # in the same way the image is handled
+ if compute_error:
+ resampled_error = np.zeros_like(output_model.data)
+ outwht = np.zeros_like(output_model.data)
+ outcon = np.zeros_like(output_model.con)
+ self.drizzle_arrays(
+ img.err,
+ inwht,
+ img.meta.wcs,
+ output_model.meta.wcs,
+ resampled_error,
+ outwht,
+ outcon,
+ iscale=iscale,
+ pixfrac=self.pixfrac,
+ kernel=self.kernel,
+ fillval='NAN',
+ xmin=xmin,
+ xmax=xmax,
+ ymin=ymin,
+ ymax=ymax
+ )
+ output_model.err = resampled_error
+ del outwht, outcon
+
  input_models.shelve(img, index, modify=False)
  del img
 
@@ -356,14 +387,15 @@ def resample_many_to_many(self, input_models):
  log.info(f"Saved model in {output_name}")
  output_models.append(output_name)
  else:
- output_models.append(output_model.data)
+ output_models.append(output_model)
 
  if not self.in_memory:
  # build ModelLibrary as an association from the output files
  # this saves memory if there are multiple groups
  asn = asn_from_list(output_models, product_name='outlier_i2d')
  asn_dict = json.loads(asn.dump()[1]) # serializes the asn and converts to dict
  return ModelLibrary(asn_dict, on_disk=True)
+
  # otherwise just build it as a list of in-memory models
  return ModelLibrary(output_models, on_disk=False)
 
@@ -440,7 +472,7 @@ def resample_many_to_one(self, input_models):
  output_model.var_poisson,
  output_model.var_flat
  ]
- output_model.err = np.sqrt(np.nansum(var_components,axis=0))
+ output_model.err = np.sqrt(np.nansum(var_components, axis=0))
 
  # nansum returns zero for input that is all NaN -
  # set those values to NaN instead
@@ -608,7 +640,7 @@ def _resample_one_variance_array(self, name, input_model, output_model):
  iscale=iscale,
  pixfrac=self.pixfrac,
  kernel=self.kernel,
- fillval=np.nan,
+ fillval='NAN',
  xmin=xmin,
  xmax=xmax,
  ymin=ymin,