Merge pull request #93 from transientskp/fix_flaw_in_bg_calculation

Fix flaw in bg calculation
transientskp · Nov 7, 2024 · 8e911d7 · 8e911d7
2 parents c8e2bd2 + bb439af
commit 8e911d7
Show file tree

Hide file tree

Showing 2 changed files with 82 additions and 52 deletions.
diff --git a/sourcefinder/image.py b/sourcefinder/image.py
@@ -263,28 +263,26 @@ def __grids(self):
         useful_chunk = ndimage.find_objects(np.where(self.data.mask, 0, 1))
         assert (len(useful_chunk) == 1)
         y_dim = self.data[useful_chunk[0]].data.shape[1]
-        useful_data = da.from_array(self.data[useful_chunk[0]], chunks=(self.back_size_x, y_dim))
+        useful_data = da.from_array(self.data[useful_chunk[0]],
+                                    chunks=(self.back_size_x, y_dim))
 
-        mode_and_rms = useful_data.map_blocks(ImageData.compute_mode_and_rms_of_row_of_subimages,
-                                              y_dim,  self.back_size_y,
-                                              dtype=np.complex64,
-                                              chunks=(1, 1)).compute()
+        mode_and_rms = useful_data.map_blocks(
+            ImageData.compute_mode_and_rms_of_row_of_subimages, y_dim,
+            self.back_size_y, dtype=np.complex64, chunks=(1, 1)).compute()
 
-        # See also similar comment below. This solution was chosen because map_blocks does not seem to be able to
-        # output multiple arrays. One can however output to a complex array and take real and imaginary
-        # parts afterward. Not a very clean solution, I admit.
-        mode_grid = mode_and_rms.real
-        rms_grid = mode_and_rms.imag
+        # See also similar comment below. This solution was chosen because
+        # map_blocks does not seem to be able to output multiple arrays. One can
+        # however output to a complex array and take real and imaginary parts
+        # afterward. Not a very clean solution, I admit.
 
-        rms_grid = np.ma.array(
-            rms_grid, mask=np.where(rms_grid == 0, 1, 0), dtype=np.float32)
-        # A rms of zero is not physical, since any instrument has system noise, so I use that as criterion
-        # to mask values. A zero background mode is physically possible, but also highly unlikely, given the way
-        # we determine it.
-        mode_grid = np.ma.array(
-            mode_grid, mask=np.where(rms_grid == 0, 1, 0), dtype=np.float32)
+        # Fill in the zeroes with nearest neighbours.
+        # In this way we do not have to make a MaskedArray, which
+        # scipy.interpolate.interp1d cannot handle adequately.
+        # utils.nearest_nonzero modifies in-place.
+        mode_grid = utils.nearest_nonzero(mode_and_rms.real)
+        rms_grid = utils.nearest_nonzero(mode_and_rms.imag)
 
-        return { 'bg': mode_grid, 'rms': rms_grid,}
+        return {'bg': mode_grid, 'rms': rms_grid}
 
     @staticmethod
     def compute_mode_and_rms_of_row_of_subimages(row_of_subimages, y_dim, back_size_y):
@@ -296,7 +294,7 @@ def compute_mode_and_rms_of_row_of_subimages(row_of_subimages, y_dim, back_size_
 
         for starty in range(0, y_dim, back_size_y):
             chunk = row_of_subimages[:, starty:starty+back_size_y]
-            if not chunk.any():
+            if np.ma.is_masked(chunk) or not chunk.any():
                 # In the original code we had rmsrow.append(False), but now we work with an array instead of a list,
                 # so I'll set these values to zero instead and use these zeroes to create the mask.
                 rms = 0
@@ -320,12 +318,12 @@ def compute_mode_and_rms_of_row_of_subimages(row_of_subimages, y_dim, back_size_
                     if np.fabs(mean - median) / sigma >= 0.3:
                         sigmaclip_logger.debug(
                             'bg skewed, %f clipping iterations', num_clip_its)
-                        mode=median
+                        mode = median
                     else:
                         sigmaclip_logger.debug(
                             'bg not skewed, %f clipping iterations',
                             num_clip_its)
-                        mode=2.5 * median - 1.5 * mean
+                        mode = 2.5 * median - 1.5 * mean
             row_of_complex_values = np.append(row_of_complex_values,  np.array(mode + 1j*rms))[None]
         # This solution is a bit dirty. I would like dask.array.map_blocks to output two arrays,
         # but presently that module does not seem to provide for that. But I can, however, output to a
@@ -375,7 +373,7 @@ def _interpolate(self, grid, roundup=False):
         yratio = float(my_ydim) / self.back_size_y
 
         my_map = np.ma.MaskedArray(np.zeros(self.data.shape),
-                                      mask=self.data.mask, dtype=np.float32)
+                                   mask=self.data.mask, dtype=np.float32)
 
         # Remove the MaskedArrayFutureWarning warning and keep old numpy < 1.11
         # behavior
@@ -392,7 +390,7 @@ def _interpolate(self, grid, roundup=False):
         # with "ValueError: x and y arrays must have at least 2 entries". So in that case
         # map_coordinates should be used.
 
-        if INTERPOLATE_ORDER == 1 and grid.shape[0]>1 and grid.shape[1]>1:
+        if INTERPOLATE_ORDER == 1 and grid.shape[0] > 1 and grid.shape[1] > 1:
             x_initial = np.linspace(0., grid.shape[0]-1, grid.shape[0],
                                     endpoint=True, dtype=np.float32)
             y_initial = np.linspace(0., grid.shape[1]-1, grid.shape[1],
@@ -1157,10 +1155,10 @@ def _pyse(
 
             for count, label in enumerate(labels):
                 chunk = slices[label - 1]
-
+            
                 param = extract.ParamSet()
                 param["sig"] = maxis[count] / self.rmsmap.data[tuple(maxposs[count])]
-
+            
                 param["peak"] = Uncertain(moments_of_sources[count, 0, 0], moments_of_sources[count, 1, 0])
                 param["flux"] = Uncertain(moments_of_sources[count, 0, 1], moments_of_sources[count, 1, 1])
                 param["xbar"] = Uncertain(moments_of_sources[count, 0, 2], moments_of_sources[count, 1, 2])
@@ -1171,7 +1169,7 @@ def _pyse(
                 param["semimaj_deconv"] = Uncertain(moments_of_sources[count, 0, 7], moments_of_sources[count, 1, 7])
                 param["semimin_deconv"] = Uncertain(moments_of_sources[count, 0, 8], moments_of_sources[count, 1, 8])
                 param["theta_deconv"] = Uncertain(moments_of_sources[count, 0, 9], moments_of_sources[count, 1, 9])
-
+            
                 det = extract.Detection(param, self, chunk=chunk)
                 results.append(det)
 

diff --git a/sourcefinder/utils.py b/sourcefinder/utils.py
@@ -4,8 +4,9 @@
 
 import math
 
-import numpy
+import numpy as np
 import scipy.integrate
+from scipy.ndimage import distance_transform_edt
 
 from sourcefinder.gaussian import gaussian
 from sourcefinder.utility import coordinates
@@ -21,11 +22,11 @@ def generate_subthresholds(min_value, max_value, num_thresholds):
     # greater than the difference between max and min.
     # We subtract 1 from this to get the range between 0 and (max-min).
     # We add min to that to get the range between min and max.
-    subthrrange = numpy.logspace(
+    subthrrange = np.logspace(
         0.0,
-        numpy.log(max_value + 1 - min_value),
+        np.log(max_value + 1 - min_value),
         num=num_thresholds + 1,  # first value == min_value
-        base=numpy.e,
+        base=np.e,
         endpoint=False  # do not include max_value
     )[1:]
     subthrrange += (min_value - 1)
@@ -60,7 +61,7 @@ def get_error_radius(wcs, x_value, x_error, y_value, y_error):
             )
     except RuntimeError:
         # We get a runtime error from wcs.p2s if the errors place the
-        # limits outside of the image, in which case we set the angular
+        # limits outside the image, in which case we set the angular
         # uncertainty to infinity.
         error_radius = float('inf')
     return error_radius
@@ -72,7 +73,7 @@ def circular_mask(xdim, ydim, radius):
     the centre are set to 0; outside that region, they are set to 1.
     """
     centre_x, centre_y = (xdim - 1) / 2.0, (ydim - 1) / 2.0
-    x, y = numpy.ogrid[-centre_x:xdim - centre_x, -centre_y:ydim - centre_y]
+    x, y = np.ogrid[-centre_x:xdim - centre_x, -centre_y:ydim - centre_y]
     return x * x + y * y >= radius * radius
 
 
@@ -83,8 +84,8 @@ def generate_result_maps(data, sourcelist):
     showing the sources themselves and the other the residual after the
     sources have been removed from the input data.
     """
-    residual_map = numpy.array(data)  # array constructor copies by default
-    gaussian_map = numpy.zeros(residual_map.shape)
+    residual_map = np.array(data)  # array constructor copies by default
+    gaussian_map = np.zeros(residual_map.shape)
     for src in sourcelist:
         # Include everything with 6 times the std deviation along the major
         # axis. Should be very very close to 100% of the flux.
@@ -105,9 +106,9 @@ def generate_result_maps(data, sourcelist):
             src.smin.value,
             src.theta.value
         )(
-            numpy.indices(residual_map.shape)[0, lower_bound_x:upper_bound_x,
+            np.indices(residual_map.shape)[0, lower_bound_x:upper_bound_x,
                                               lower_bound_y:upper_bound_y],
-            numpy.indices(residual_map.shape)[1, lower_bound_x:upper_bound_x,
+            np.indices(residual_map.shape)[1, lower_bound_x:upper_bound_x,
                                               lower_bound_y:upper_bound_y]
         )
 
@@ -144,7 +145,7 @@ def calculate_correlation_lengths(semimajor, semiminor):
 def calculate_beamsize(semimajor, semiminor):
     """Calculate the beamsize based on the semi major and minor axes"""
 
-    return numpy.pi * semimajor * semiminor
+    return np.pi * semimajor * semiminor
 
 
 def fudge_max_pix(semimajor, semiminor, theta):
@@ -176,14 +177,14 @@ def fudge_max_pix(semimajor, semiminor, theta):
     #   Return the double (definite) integral of f1(y,x) from x=a..b
     #   and y=f2(x)..f3(x).
 
-    log20 = numpy.log(2.0)
-    cos_theta = numpy.cos(theta)
-    sin_theta = numpy.sin(theta)
+    log20 = np.log(2.0)
+    cos_theta = np.cos(theta)
+    sin_theta = np.sin(theta)
 
     def landscape(y, x):
         up = math.pow(((cos_theta * x + sin_theta * y) / semiminor), 2)
         down = math.pow(((cos_theta * y - sin_theta * x) / semimajor), 2)
-        return numpy.exp(log20 * (up + down))
+        return np.exp(log20 * (up + down))
 
     (correction, abserr) = scipy.integrate.dblquad(landscape, -0.5, 0.5,
                                                    lambda ymin: -0.5,
@@ -214,19 +215,16 @@ def maximum_pixel_method_variance(semimajor, semiminor, theta):
     #   Return the double (definite) integral of f1(y,x) from x=a..b
     #   and y=f2(x)..f3(x).
 
-    log20 = numpy.log(2.0)
-    cos_theta = numpy.cos(theta)
-    sin_theta = numpy.sin(theta)
+    log20 = np.log(2.0)
+    cos_theta = np.cos(theta)
+    sin_theta = np.sin(theta)
 
     def landscape(y, x):
-        return numpy.exp(2.0 * log20 *
-                         (
-                         math.pow(((cos_theta * x + sin_theta * y) / semiminor),
+        return np.exp(2.0 * log20 * (
+                      math.pow(((cos_theta * x + sin_theta * y) / semiminor),
                                   2) +
-                         math.pow(((cos_theta * y - sin_theta * x) / semimajor),
-                                  2)
-                         )
-                         )
+                      math.pow(((cos_theta * y - sin_theta * x) / semimajor),
+                                  2)))
 
     (result, abserr) = scipy.integrate.dblquad(landscape, -0.5, 0.5,
                                                lambda ymin: -0.5,
@@ -249,8 +247,42 @@ def flatten(nested_list):
         flattened = list(flatten(nested)).
     """
     for elem in nested_list:
-        if isinstance(elem, (tuple, list, numpy.ndarray)):
+        if isinstance(elem, (tuple, list, np.ndarray)):
             for i in flatten(elem):
                 yield i
         else:
             yield elem
+
+
+# “The nearest_nonzero function has been generated using ChatGPT 4.0.
+# Its AI-output has been verified for correctness, accuracy and
+# completeness, adapted where needed, and approved by the author.”
+def nearest_nonzero(test_array):
+    """
+    Replace zeros in test_array with the nearest non-zero values based on
+    distance.
+
+    Parameters
+    ----------
+    test_array : np.ndarray
+        A 2D array where zeros will be replaced by the nearest non-zero values.
+
+    Returns
+    -------
+    np.ndarray
+        A copy of test_array with zeros replaced by nearest non-zero values.
+    """
+    # Create a mask for non-zero values
+    non_zero_mask = test_array != 0
+
+    # Calculate the distance transform and nearest non-zero indices
+    distances, nearest_indices = distance_transform_edt(~non_zero_mask,
+                                                        return_indices=True)
+
+    # Use nearest indices to fill in zero positions with the nearest non-zero
+    # values
+    nearest_values = test_array[nearest_indices[0], nearest_indices[1]]
+    result = test_array.copy()
+    result[~non_zero_mask] = nearest_values[~non_zero_mask]
+
+    return result