Skip to content

Commit

Permalink
Merge pull request #93 from transientskp/fix_flaw_in_bg_calculation
Browse files Browse the repository at this point in the history
Fix flaw in bg calculation
  • Loading branch information
HannoSpreeuw authored Nov 7, 2024
2 parents c8e2bd2 + bb439af commit 8e911d7
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 52 deletions.
50 changes: 24 additions & 26 deletions sourcefinder/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,28 +263,26 @@ def __grids(self):
useful_chunk = ndimage.find_objects(np.where(self.data.mask, 0, 1))
assert (len(useful_chunk) == 1)
y_dim = self.data[useful_chunk[0]].data.shape[1]
useful_data = da.from_array(self.data[useful_chunk[0]], chunks=(self.back_size_x, y_dim))
useful_data = da.from_array(self.data[useful_chunk[0]],
chunks=(self.back_size_x, y_dim))

mode_and_rms = useful_data.map_blocks(ImageData.compute_mode_and_rms_of_row_of_subimages,
y_dim, self.back_size_y,
dtype=np.complex64,
chunks=(1, 1)).compute()
mode_and_rms = useful_data.map_blocks(
ImageData.compute_mode_and_rms_of_row_of_subimages, y_dim,
self.back_size_y, dtype=np.complex64, chunks=(1, 1)).compute()

# See also similar comment below. This solution was chosen because map_blocks does not seem to be able to
# output multiple arrays. One can however output to a complex array and take real and imaginary
# parts afterward. Not a very clean solution, I admit.
mode_grid = mode_and_rms.real
rms_grid = mode_and_rms.imag
# See also similar comment below. This solution was chosen because
# map_blocks does not seem to be able to output multiple arrays. One can
# however output to a complex array and take real and imaginary parts
# afterward. Not a very clean solution, I admit.

rms_grid = np.ma.array(
rms_grid, mask=np.where(rms_grid == 0, 1, 0), dtype=np.float32)
# A rms of zero is not physical, since any instrument has system noise, so I use that as criterion
# to mask values. A zero background mode is physically possible, but also highly unlikely, given the way
# we determine it.
mode_grid = np.ma.array(
mode_grid, mask=np.where(rms_grid == 0, 1, 0), dtype=np.float32)
# Fill in the zeroes with nearest neighbours.
# In this way we do not have to make a MaskedArray, which
# scipy.interpolate.interp1d cannot handle adequately.
# utils.nearest_nonzero modifies in-place.
mode_grid = utils.nearest_nonzero(mode_and_rms.real)
rms_grid = utils.nearest_nonzero(mode_and_rms.imag)

return { 'bg': mode_grid, 'rms': rms_grid,}
return {'bg': mode_grid, 'rms': rms_grid}

@staticmethod
def compute_mode_and_rms_of_row_of_subimages(row_of_subimages, y_dim, back_size_y):
Expand All @@ -296,7 +294,7 @@ def compute_mode_and_rms_of_row_of_subimages(row_of_subimages, y_dim, back_size_

for starty in range(0, y_dim, back_size_y):
chunk = row_of_subimages[:, starty:starty+back_size_y]
if not chunk.any():
if np.ma.is_masked(chunk) or not chunk.any():
# In the original code we had rmsrow.append(False), but now we work with an array instead of a list,
# so I'll set these values to zero instead and use these zeroes to create the mask.
rms = 0
Expand All @@ -320,12 +318,12 @@ def compute_mode_and_rms_of_row_of_subimages(row_of_subimages, y_dim, back_size_
if np.fabs(mean - median) / sigma >= 0.3:
sigmaclip_logger.debug(
'bg skewed, %f clipping iterations', num_clip_its)
mode=median
mode = median
else:
sigmaclip_logger.debug(
'bg not skewed, %f clipping iterations',
num_clip_its)
mode=2.5 * median - 1.5 * mean
mode = 2.5 * median - 1.5 * mean
row_of_complex_values = np.append(row_of_complex_values, np.array(mode + 1j*rms))[None]
# This solution is a bit dirty. I would like dask.array.map_blocks to output two arrays,
# but presently that module does not seem to provide for that. But I can, however, output to a
Expand Down Expand Up @@ -375,7 +373,7 @@ def _interpolate(self, grid, roundup=False):
yratio = float(my_ydim) / self.back_size_y

my_map = np.ma.MaskedArray(np.zeros(self.data.shape),
mask=self.data.mask, dtype=np.float32)
mask=self.data.mask, dtype=np.float32)

# Remove the MaskedArrayFutureWarning warning and keep old numpy < 1.11
# behavior
Expand All @@ -392,7 +390,7 @@ def _interpolate(self, grid, roundup=False):
# with "ValueError: x and y arrays must have at least 2 entries". So in that case
# map_coordinates should be used.

if INTERPOLATE_ORDER == 1 and grid.shape[0]>1 and grid.shape[1]>1:
if INTERPOLATE_ORDER == 1 and grid.shape[0] > 1 and grid.shape[1] > 1:
x_initial = np.linspace(0., grid.shape[0]-1, grid.shape[0],
endpoint=True, dtype=np.float32)
y_initial = np.linspace(0., grid.shape[1]-1, grid.shape[1],
Expand Down Expand Up @@ -1157,10 +1155,10 @@ def _pyse(

for count, label in enumerate(labels):
chunk = slices[label - 1]

param = extract.ParamSet()
param["sig"] = maxis[count] / self.rmsmap.data[tuple(maxposs[count])]

param["peak"] = Uncertain(moments_of_sources[count, 0, 0], moments_of_sources[count, 1, 0])
param["flux"] = Uncertain(moments_of_sources[count, 0, 1], moments_of_sources[count, 1, 1])
param["xbar"] = Uncertain(moments_of_sources[count, 0, 2], moments_of_sources[count, 1, 2])
Expand All @@ -1171,7 +1169,7 @@ def _pyse(
param["semimaj_deconv"] = Uncertain(moments_of_sources[count, 0, 7], moments_of_sources[count, 1, 7])
param["semimin_deconv"] = Uncertain(moments_of_sources[count, 0, 8], moments_of_sources[count, 1, 8])
param["theta_deconv"] = Uncertain(moments_of_sources[count, 0, 9], moments_of_sources[count, 1, 9])

det = extract.Detection(param, self, chunk=chunk)
results.append(det)

Expand Down
84 changes: 58 additions & 26 deletions sourcefinder/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@

import math

import numpy
import numpy as np
import scipy.integrate
from scipy.ndimage import distance_transform_edt

from sourcefinder.gaussian import gaussian
from sourcefinder.utility import coordinates
Expand All @@ -21,11 +22,11 @@ def generate_subthresholds(min_value, max_value, num_thresholds):
# greater than the difference between max and min.
# We subtract 1 from this to get the range between 0 and (max-min).
# We add min to that to get the range between min and max.
subthrrange = numpy.logspace(
subthrrange = np.logspace(
0.0,
numpy.log(max_value + 1 - min_value),
np.log(max_value + 1 - min_value),
num=num_thresholds + 1, # first value == min_value
base=numpy.e,
base=np.e,
endpoint=False # do not include max_value
)[1:]
subthrrange += (min_value - 1)
Expand Down Expand Up @@ -60,7 +61,7 @@ def get_error_radius(wcs, x_value, x_error, y_value, y_error):
)
except RuntimeError:
# We get a runtime error from wcs.p2s if the errors place the
# limits outside of the image, in which case we set the angular
# limits outside the image, in which case we set the angular
# uncertainty to infinity.
error_radius = float('inf')
return error_radius
Expand All @@ -72,7 +73,7 @@ def circular_mask(xdim, ydim, radius):
the centre are set to 0; outside that region, they are set to 1.
"""
centre_x, centre_y = (xdim - 1) / 2.0, (ydim - 1) / 2.0
x, y = numpy.ogrid[-centre_x:xdim - centre_x, -centre_y:ydim - centre_y]
x, y = np.ogrid[-centre_x:xdim - centre_x, -centre_y:ydim - centre_y]
return x * x + y * y >= radius * radius


Expand All @@ -83,8 +84,8 @@ def generate_result_maps(data, sourcelist):
showing the sources themselves and the other the residual after the
sources have been removed from the input data.
"""
residual_map = numpy.array(data) # array constructor copies by default
gaussian_map = numpy.zeros(residual_map.shape)
residual_map = np.array(data) # array constructor copies by default
gaussian_map = np.zeros(residual_map.shape)
for src in sourcelist:
# Include everything with 6 times the std deviation along the major
# axis. Should be very very close to 100% of the flux.
Expand All @@ -105,9 +106,9 @@ def generate_result_maps(data, sourcelist):
src.smin.value,
src.theta.value
)(
numpy.indices(residual_map.shape)[0, lower_bound_x:upper_bound_x,
np.indices(residual_map.shape)[0, lower_bound_x:upper_bound_x,
lower_bound_y:upper_bound_y],
numpy.indices(residual_map.shape)[1, lower_bound_x:upper_bound_x,
np.indices(residual_map.shape)[1, lower_bound_x:upper_bound_x,
lower_bound_y:upper_bound_y]
)

Expand Down Expand Up @@ -144,7 +145,7 @@ def calculate_correlation_lengths(semimajor, semiminor):
def calculate_beamsize(semimajor, semiminor):
"""Calculate the beamsize based on the semi major and minor axes"""

return numpy.pi * semimajor * semiminor
return np.pi * semimajor * semiminor


def fudge_max_pix(semimajor, semiminor, theta):
Expand Down Expand Up @@ -176,14 +177,14 @@ def fudge_max_pix(semimajor, semiminor, theta):
# Return the double (definite) integral of f1(y,x) from x=a..b
# and y=f2(x)..f3(x).

log20 = numpy.log(2.0)
cos_theta = numpy.cos(theta)
sin_theta = numpy.sin(theta)
log20 = np.log(2.0)
cos_theta = np.cos(theta)
sin_theta = np.sin(theta)

def landscape(y, x):
up = math.pow(((cos_theta * x + sin_theta * y) / semiminor), 2)
down = math.pow(((cos_theta * y - sin_theta * x) / semimajor), 2)
return numpy.exp(log20 * (up + down))
return np.exp(log20 * (up + down))

(correction, abserr) = scipy.integrate.dblquad(landscape, -0.5, 0.5,
lambda ymin: -0.5,
Expand Down Expand Up @@ -214,19 +215,16 @@ def maximum_pixel_method_variance(semimajor, semiminor, theta):
# Return the double (definite) integral of f1(y,x) from x=a..b
# and y=f2(x)..f3(x).

log20 = numpy.log(2.0)
cos_theta = numpy.cos(theta)
sin_theta = numpy.sin(theta)
log20 = np.log(2.0)
cos_theta = np.cos(theta)
sin_theta = np.sin(theta)

def landscape(y, x):
return numpy.exp(2.0 * log20 *
(
math.pow(((cos_theta * x + sin_theta * y) / semiminor),
return np.exp(2.0 * log20 * (
math.pow(((cos_theta * x + sin_theta * y) / semiminor),
2) +
math.pow(((cos_theta * y - sin_theta * x) / semimajor),
2)
)
)
math.pow(((cos_theta * y - sin_theta * x) / semimajor),
2)))

(result, abserr) = scipy.integrate.dblquad(landscape, -0.5, 0.5,
lambda ymin: -0.5,
Expand All @@ -249,8 +247,42 @@ def flatten(nested_list):
flattened = list(flatten(nested)).
"""
for elem in nested_list:
if isinstance(elem, (tuple, list, numpy.ndarray)):
if isinstance(elem, (tuple, list, np.ndarray)):
for i in flatten(elem):
yield i
else:
yield elem


# “The nearest_nonzero function has been generated using ChatGPT 4.0.
# Its AI-output has been verified for correctness, accuracy and
# completeness, adapted where needed, and approved by the author.”
def nearest_nonzero(test_array):
"""
Replace zeros in test_array with the nearest non-zero values based on
distance.
Parameters
----------
test_array : np.ndarray
A 2D array where zeros will be replaced by the nearest non-zero values.
Returns
-------
np.ndarray
A copy of test_array with zeros replaced by nearest non-zero values.
"""
# Create a mask for non-zero values
non_zero_mask = test_array != 0

# Calculate the distance transform and nearest non-zero indices
distances, nearest_indices = distance_transform_edt(~non_zero_mask,
return_indices=True)

# Use nearest indices to fill in zero positions with the nearest non-zero
# values
nearest_values = test_array[nearest_indices[0], nearest_indices[1]]
result = test_array.copy()
result[~non_zero_mask] = nearest_values[~non_zero_mask]

return result

0 comments on commit 8e911d7

Please sign in to comment.