From b6ef1afd4da8212c0fc1188965b343868a1a6caf Mon Sep 17 00:00:00 2001 From: Simon Torres Date: Wed, 10 Apr 2024 12:47:37 -0400 Subject: [PATCH 1/3] Improve background subtraction for low-snr data --- CHANGES.rst | 10 ++++- goodman_focus/goodman_focus.py | 72 ++++++++++++++++++++++++++++------ tox.ini | 4 +- 3 files changed, 70 insertions(+), 16 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index f0ab21b..a6d30ed 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,11 @@ +.. _v2.0.3 + +2.0.3 10-04-2022 +================ + +- Improved background fitting for low signal-to-noise ratio data. + + .. _v2.0.2: 2.0.2 08-04-2022 @@ -131,4 +139,4 @@ - Fixed some issues with documentation - Added .readthedocs.yml file for RTD builds - Added ``install_requires`` field in ``setup()`` -- Removed python 3.5 from the supported versions. \ No newline at end of file +- Removed python 3.5 from the supported versions. diff --git a/goodman_focus/goodman_focus.py b/goodman_focus/goodman_focus.py index d8558c3..1594c6f 100644 --- a/goodman_focus/goodman_focus.py +++ b/goodman_focus/goodman_focus.py @@ -73,7 +73,31 @@ def get_args(arguments=None): return args -def get_peaks(ccd, file_name='', plots=False): +def clean_clipped_profile(clipped_profile: np.ma.MaskedArray): + """Helper function to reduce code duplicity + + Creates a new x-axis for the values not masked, and also it creates the profile without the + masked values. + This information is used for fitting the background model for later background subtraction. + + Args: + clipped_profile (MaskedArray): Masked profile to clean. + + Returns: + clipped_x_axis and cleaned_profile. + + """ + clipped_x_axis = np.array([i for i in range(len(clipped_profile)) if not clipped_profile.mask[i]]) + cleaned_profile = clipped_profile[~clipped_profile.mask] + + return clipped_x_axis, cleaned_profile + + +def get_peaks(ccd: CCDData, + file_name: str = '', + split_size_for_low_snr_data: int = 10, + threshold_for_selecting_peaks: float = 2, + plots: bool = False): """Identify peaks in an image For Imaging and Spectroscopy the images obtained for focusing have lines @@ -82,11 +106,15 @@ def get_peaks(ccd, file_name='', plots=False): the detector. Args: - ccd (object): Image to get peaks from + ccd (CCDData): Image to get peaks from file_name (str): Name of the file used. This is optional and is used - only for debugging purposes. + only for debugging purposes. + split_size_for_low_snr_data (int): When the data has low signal-to-noise ratio is required + to split the spectral profile in this number of parts. Default: 10 + threshold_for_selecting_peaks (float): Factor of spectral profile's standard deviation to + discriminate peaks. plots (bool): Show plots of the profile, background and - background-subtracted profile + background-subtracted profile Returns: A list of peak values, peak intensities as well as the x-axis and the @@ -103,9 +131,19 @@ def get_peaks(ccd, file_name='', plots=False): x_axis = np.array(range(len(raw_profile))) clipped_profile = sigma_clip(raw_profile, sigma=1, maxiters=5) - - clipped_x_axis = [i for i in range(len(clipped_profile)) if not clipped_profile.mask[i]] - cleaned_profile = clipped_profile[~clipped_profile.mask] + clipped_x_axis, cleaned_profile = clean_clipped_profile(clipped_profile=clipped_profile) + + percent_of_remaining_profile_points = (len(cleaned_profile) * 100) / len(raw_profile) + if percent_of_remaining_profile_points < 20.: + log.warning(f"The percentage of remaining points after cleaning. " + f"{percent_of_remaining_profile_points:.2f}% suggests that " + f"this data has low signal to noise ratio.") + segmented_clipping = [] + for sub_section in np.array_split(raw_profile, split_size_for_low_snr_data): + clipped_sub_section = sigma_clip(sub_section, sigma_upper=1, maxiters=5) + segmented_clipping.append(clipped_sub_section) + clipped_profile = np.ma.concatenate(segmented_clipping, axis=0) + clipped_x_axis, cleaned_profile = clean_clipped_profile(clipped_profile=clipped_profile) background_model = models.Linear1D(slope=0, intercept=np.mean(clipped_profile)) @@ -125,22 +163,30 @@ def get_peaks(ccd, file_name='', plots=False): [0 if it is None else it for it in filtered_data]) peaks = signal.argrelmax(filtered_data, axis=0, order=5)[0] + log.debug(f"Found {len(peaks)} peaks in file") - if len(peaks) == 1: - log.debug(f"Found {len(peaks)} peak in file") - else: - log.debug(f"Found {len(peaks)} peaks in file") + cleaned_profile_stddev = np.std(profile) + log.debug(f"Standard deviation of spectral profile after subtracting " + f"background is: {cleaned_profile_stddev}") + + peaks = [i for i in peaks if profile[int(i)] > threshold_for_selecting_peaks * cleaned_profile_stddev] + log.debug(f"Peaks below {threshold_for_selecting_peaks * cleaned_profile_stddev} rejected. " + f"Update threshold to updated, current value is " + f"{threshold_for_selecting_peaks} standard deviation.") + log.debug(f"{len(peaks)} peaks remaining after cleaning.") values = np.array([profile[int(index)] for index in peaks]) if plots: # pragma: no cover plt.title(f"{file_name} {np.mean(clipped_profile)}") plt.axhline(0, color='k', label='Zero') + plt.axhline(threshold_for_selecting_peaks * cleaned_profile_stddev, color='g', + label=f"{threshold_for_selecting_peaks} Cleaned Profile STD") plt.plot(x_axis, raw_profile, label='Raw Profile') plt.plot(x_axis, clipped_profile, label='Clipped Profile') plt.plot(x_axis, background_model(x_axis), - label='Background Model') - plt.plot(x_axis, fitted_background(x_axis), label='Background Level') + label='Initial Background Model') + plt.plot(x_axis, fitted_background(x_axis), label='Fitted Background Level') plt.plot(x_axis, profile, label='Background Subtracted Profile') # plt.plot(x_axis, filtered_data, label="Filtered Data") for _peak in peaks: diff --git a/tox.ini b/tox.ini index 0a18bb0..3688b58 100644 --- a/tox.ini +++ b/tox.ini @@ -24,7 +24,7 @@ setenv = NPY_LAPACK_ORDER= # Pass through the following environment variables which may be needed for the CI -passenv = HOME WINDIR LC_ALL LC_CTYPE CC CI TRAVIS +passenv = HOME,WINDIR,LC_ALL,LC_CTYPE,CC,CI,TRAVIS # Run the tests in a temporary directory to make sure that we don't import # this package from the source tree @@ -100,4 +100,4 @@ skip_install = true changedir = . description = check code style, e.g. with flake8 deps = flake8 -commands = flake8 specutils --count --max-line-length=100 --select=E101,W191,W291,W292,W293,W391,E111,E112,E113,E502,E722,E901,E902 \ No newline at end of file +commands = flake8 specutils --count --max-line-length=100 --select=E101,W191,W291,W292,W293,W391,E111,E112,E113,E502,E722,E901,E902 From 45b2c03769ca34e89055d9d41de53dca7dfd3732 Mon Sep 17 00:00:00 2001 From: Simon Torres Date: Wed, 10 Apr 2024 12:51:30 -0400 Subject: [PATCH 2/3] bumped version to 2.0.3 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index e2f5736..330f01c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -32,4 +32,4 @@ install_requires = sphinx # version should be PEP440 compatible (http://www.python.org/dev/peps/pep-0440) -version = 2.0.2 +version = 2.0.3 From c01330115649a14c3fb9a7113d45439d4b96a836 Mon Sep 17 00:00:00 2001 From: Simon Torres Date: Wed, 10 Apr 2024 13:03:22 -0400 Subject: [PATCH 3/3] add --selection-threshold argument for limiting the number of peaks based on a factor of the standard deviation of the background-subtracted spectral profile. --- goodman_focus/goodman_focus.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/goodman_focus/goodman_focus.py b/goodman_focus/goodman_focus.py index 1594c6f..aa50355 100644 --- a/goodman_focus/goodman_focus.py +++ b/goodman_focus/goodman_focus.py @@ -63,6 +63,12 @@ def get_args(arguments=None): help='Show a plot when it finishes the focus ' 'calculation') + parser.add_argument('--selection-threshold', + action='store', + dest='selection_threshold', + help='Threshold as a factor of spectral profile standard deviation ' + 'after background subtraction.') + parser.add_argument('--debug', action='store_true', dest='debug', @@ -305,6 +311,7 @@ def __init__(self, file_pattern="*.fits", obstype="FOCUS", features_model='gaussian', + selection_threshold=2, plot_results=False, debug=False): @@ -312,6 +319,7 @@ def __init__(self, self.file_pattern = file_pattern self.obstype = obstype self.features_model = features_model + self.selection_threshold = selection_threshold self.plot_results = plot_results self.debug = debug @@ -602,6 +610,7 @@ def get_focus_data(self, group): peaks, values, x_axis, profile = get_peaks( ccd=self.__ccd, file_name=self.file_name, + threshold_for_selecting_peaks=self.selection_threshold, plots=self.debug) self.fwhm = get_fwhm(peaks=peaks,