lifewatch · caplinje-NOAA · Nov 10, 2024 · Nov 13, 2024 · Nov 14, 2024 · Nov 14, 2024
diff --git a/.gitignore b/.gitignore
@@ -64,4 +64,7 @@ docs/source/generated/
 .DS_Store
 
 tests/test_data/data_exploration
+tests/test_data/impulsive_data/kurtosis_result.nc
+tests/test_data/impulsive_data/pileDriving_results_pypam.csv
+
 pypam.egg-info/
diff --git a/pypam/_event.py b/pypam/_event.py
@@ -40,18 +40,40 @@ def cut(self, start=0, end=None):
         self.end += end
         self.signal = self.signal[start:end]
 
-    def analyze(self):
+    def analyze(self, impulsive=False, energy_window=0.9):
         """
         Perform all necessary calculations for a single event
 
+        Parameters
+        ----------
+        impulsive : bool
+            whether or not the analysis should perform impulsive metrics
+        energy_window: float
+            If provided, calculate relevant metrics over the given energy window (e.g. RMS_90
+            for energy_window= .9).
         Returns
         -------
-        rms, sel, peak
+        dictionary of metrics: rms, sel, peak, kurtosis, tau
         """
-        rms = self.rms()
-        sel = self.sel()
+
+        if impulsive:
+            windowStr = str(int(energy_window*100))
+            rms = self.rms(energy_window=energy_window)
+            sel = super(Event, self).sel()
+            tau = self.pulse_width(energy_window)
+
+        else:
+            windowStr = ''
+            rms = self.rms()
+            sel = self.sel()
+            tau = (self.end-self.start)/self.fs
+
         peak = self.peak()
-        return rms, sel, peak
+        kurtosis = self.kurtosis()
+        start_time = self.start / self.fs
+
+        out = {'startTime':start_time,'peak':peak,f'rms{windowStr}':rms,'sel':sel,'tau':tau,'kurtosis':kurtosis}
+        return out
 
     def sel(self, high_noise=False):
         """

diff --git a/pypam/acoustic_file.py b/pypam/acoustic_file.py
@@ -137,6 +137,8 @@ def _bins(self, binsize=None, bin_overlap=0):
         Where i is the index, time_bin is the datetime of the beginning of the block and signal is the signal object
         of the bin
         """
+        if bin_overlap>1:
+            raise ValueError(f'bin_overlap must be fractional.')
         if binsize is None:
             blocksize = self.file.frames - self._start_frame
         else:
@@ -145,7 +147,7 @@ def _bins(self, binsize=None, bin_overlap=0):
         n_blocks = self._n_blocks(blocksize, noverlap=noverlap)
         time_array, _, _ = self._time_array(binsize, bin_overlap=bin_overlap)
         for i, block in tqdm(enumerate(sf.blocks(self.file_path, blocksize=blocksize, start=self._start_frame,
-                                                 overlap=bin_overlap, always_2d=True, fill_value=0.0)),
+                                                 overlap=noverlap, always_2d=True, fill_value=0.0)),
                              total=n_blocks, leave=False, position=0):
             # Select the desired channel
             block = block[:, self.channel]
@@ -155,8 +157,9 @@ def _bins(self, binsize=None, bin_overlap=0):
             signal = sig.Signal(signal=signal_upa, fs=self.fs, channel=self.channel)
             if self.dc_subtract:
                 signal.remove_dc()
-            start_sample = i * blocksize + self._start_frame
-            end_sample = start_sample + len(signal_upa)
+            step = blocksize - noverlap
+            start_sample = i * step + self._start_frame
+            end_sample = start_sample + blocksize
             yield i, time_bin, signal, start_sample, end_sample
         self.file.seek(0)
 
@@ -544,7 +547,7 @@ def _apply(self, method_name, binsize=None, db=True, band_list=None, bin_overlap
     def rms(self, binsize=None, bin_overlap=0, db=True):
         """
         Calculation of root mean squared value (rms) of the signal in upa for each bin
-        Returns Dataframe with 'datetime' as index and 'rms' value as a column
+        Returns Dataset with 'datetime' as coordinate and 'rms' value as a variable
 
         Parameters
         ----------
@@ -558,10 +561,25 @@ def rms(self, binsize=None, bin_overlap=0, db=True):
         rms_ds = self._apply(method_name='rms', binsize=binsize, bin_overlap=bin_overlap, db=db)
         return rms_ds
 
+    def kurtosis(self, binsize=None, bin_overlap=0):
+        """
+        Calculation of kurtosis value of the signal for each bin
+        Returns Dataset with 'datetime' as coordinate and 'kurtosis' value as a variable
+
+        Parameters
+        ----------
+        binsize : float, in sec
+            Time window considered. If set to None, only one value is returned
+        bin_overlap : float [0 to 1]
+            Percentage to overlap the bin windows
+        """
+        kurtosis_ds = self._apply(method_name='kurtosis', binsize=binsize, bin_overlap=bin_overlap, db=False)
+        return kurtosis_ds
+
     def aci(self, binsize=None, bin_overlap=0, nfft=1024, fft_overlap=0.5):
         """
         Calculation of root mean squared value (rms) of the signal in upa for each bin
-        Returns Dataframe with 'datetime' as index and 'rms' value as a column
+        Returns Dataset with 'datetime' as coordinate and 'aci' value as a variable
 
         Parameters
         ----------
@@ -581,7 +599,7 @@ def aci(self, binsize=None, bin_overlap=0, nfft=1024, fft_overlap=0.5):
     def dynamic_range(self, binsize=None, bin_overlap=0, db=True):
         """
         Compute the dynamic range of each bin
-        Returns a dataframe with datetime as index and dr as column
+        Returns a Dataset with 'datetime' as coordinate and 'dr' as variable
 
         Parameters
         ----------

diff --git a/pypam/signal.py b/pypam/signal.py
@@ -289,21 +289,50 @@ def window_method(self, method_name, window, **kwargs):
             time.append(block.time)
         return time, output
 
-    def rms(self, db=True, **kwargs):
+    def rms(self, db=True, energy_window = None, **kwargs):
         """
         Calculation of root mean squared value (rms) of the signal in uPa
 
         Parameters
         ----------
         db : bool
             If set to True the result will be given in db, otherwise in uPa
+        energy_window: float
+            If provided, calculate the rms over the given energy window (e.g. RMS_90
+            for energy_window= .9).
         """
-        rms_val = utils.rms(self.signal)
+        if energy_window:
+            [start, end] = utils.energy_window(self.signal, energy_window)
+            rms_val = utils.rms(self.signal[start:end])
+        else:
+            rms_val = utils.rms(self.signal)
         # Convert it to db if applicable
         if db:
             rms_val = utils.to_db(rms_val, ref=1.0, square=True)
         return rms_val
 
+    def pulse_width(self, energy_window, **kwargs):
+        """
+        Returns the pulse width of an impulsive signal
+        according to a fractional energy window
+
+        Parameters
+        ----------
+        energy_window : float [0,1]
+            given energy window to calculate pulse width
+        **kwargs : TYPE
+            DESCRIPTION.
+
+        Returns
+        -------
+        tau: float
+        energy_window pulse width in seconds
+
+        """
+        [start, end] = utils.energy_window(self.signal, energy_window)
+
+        return (end - start) / self.fs
+
     def dynamic_range(self, db=True, **kwargs):
         """
         Compute the dynamic range of each bin
@@ -323,8 +352,19 @@ def dynamic_range(self, db=True, **kwargs):
     def sel(self, db=True, **kwargs):
         """
         Calculate the sound exposure level of an event
+
+        Parameters
+        ----------
+        db : bool
+            If set to True the result will be given in db, otherwise in uPa
+
+        Returns
+        -------
+        sel: float
+        sound exposure level
         """
         y = utils.sel(self.signal, self.fs)
+
         if db:
             y = utils.to_db(y, square=False)
         return y
@@ -339,6 +379,16 @@ def peak(self, db=True, **kwargs):
             y = utils.to_db(y, square=True)
         return y
 
+    def kurtosis(self, **kwargs):
+        """
+        Calculation of kurtosis of the signal
+
+        Parameters
+        ----------
+
+        """
+        return utils.kurtosis(self.signal)
+
     def third_octave_levels(self, db=True, **kwargs):
         """
         Calculation of calibrated 1/3-octave band levels

diff --git a/pypam/units.py b/pypam/units.py
@@ -17,7 +17,8 @@
                 'aei': ('AEI', 'unitless'),
                 'adi': ('ADI', 'unitless'),
                 'zcr': ('zcr', 'unitless'),
-                'zcr_avg': ('zcr_avg', 'unitless')
+                'zcr_avg': ('zcr_avg', 'unitless'),
+                'kurtosis': ('Kurtosis', 'unitless'),
                 }
 
 

diff --git a/pypam/utils.py b/pypam/utils.py
@@ -120,7 +120,7 @@ def sel(signal, fs):
     Parameters
     ----------
     signal : numpy array
-        Signal to compute the dynamic range
+        Signal to compute the SEL
     fs : int
         Sampling frequency
     """
@@ -135,11 +135,53 @@ def peak(signal):
     Parameters
     ----------
     signal : numpy array
-        Signal to compute the dynamic range
+        Signal to compute the peak value
     """
     return np.max(np.abs(signal))
 
 
+@nb.njit
+def kurtosis(signal):
+    """
+    Return the kurtosis of the signal according to Muller et al. 2020
+    Parameters
+    ----------
+    signal : numpy array
+        Signal to compute the kurtosis
+    """
+    n = len(signal)
+    var = (signal - np.mean(signal)) ** 2
+    mu4 = np.sum(var ** 2) / n
+    mu2 = np.sum(var) / (n-1)
+    return mu4/mu2 ** 2
+
+@nb.njit
+def energy_window(signal, percentage):
+    """
+    Return sample window [start, end] which contains a given percentage of the
+    signals total energy. See Madsen 2005 for more details.
+    Parameters
+    ----------
+    signal : numpy array
+        Signal to compute the sel
+    percentage : float between [0,1]
+        percentage of total energy contained in output window
+    """
+    # calculate beginning and ending percentage window (e.g., for x=90%, window = [5%,95%])
+    percent_start = .50 - percentage / 2
+    percent_end = .50 + percentage / 2
+
+    # calculate normalized cumulative energy distribution
+    Ep_cs = np.cumsum(signal ** 2)
+    Ep_cs_norm = Ep_cs / np.max(Ep_cs)
+
+    # find corresponding indices
+    iStartPercent = np.argmin(np.abs(Ep_cs_norm - percent_start))
+    iEndPercent = np.argmin(np.abs(Ep_cs_norm - percent_end))
+
+    window = [iStartPercent, iEndPercent]
+    return window
+
 @nb.njit
 def set_gain(wave, gain):
     """

diff --git a/tests/test_acoustic_file.py b/tests/test_acoustic_file.py
@@ -4,23 +4,26 @@
 from tests import skip_unless_with_plots
 import pathlib
 import matplotlib.pyplot as plt
-
+import numpy as np
+import os
 plt.rcParams.update(plt.rcParamsDefault)
+# get relative path
+test_dir = os.path.dirname(__file__)
 
 # Hydrophone Setup
 # If Vpp is 2.0 then it means the wav is -1 to 1 directly related to V
 model = 'ST300HF'
 name = 'SoundTrap'
 serial_number = 67416073
-calibration_file = pathlib.Path("tests/test_data/calibration_data.xlsx")
+calibration_file = pathlib.Path(f"{test_dir}/test_data/calibration_data.xlsx")
 soundtrap = pyhy.soundtrap.SoundTrap(name=name, model=model, serial_number=serial_number,
                                      calibration_file=calibration_file, val='sensitivity', freq_col_id=1,
                                      val_col_id=29, start_data_id=6)
 
 
 class TestAcuFile(unittest.TestCase):
     def setUp(self) -> None:
-        self.acu_file = AcuFile('tests/test_data/67416073.210610033655.wav', soundtrap, 1)
+        self.acu_file = AcuFile(f'{test_dir}/test_data/67416073.210610033655.wav', soundtrap, 1)
 
     @skip_unless_with_plots()
     def test_plots(self):
@@ -39,3 +42,23 @@ def test_update_freq_cal(self):
         ds_psd_updated = self.acu_file.update_freq_cal(ds=ds_psd, data_var='band_density')
         print(ds_psd['band_density'].values)
         print(ds_psd_updated['band_density'].values)
+
+    @skip_unless_with_plots()
+    def test_overlapping_bins(self):
+        binsize, bin_overlap = 1, 0.2
+        ds_rms_overlap = self.acu_file.rms(binsize, bin_overlap)
+        ds_rms = self.acu_file.rms(binsize, 0)
+        fig,ax = plt.subplots()
+        ax.plot(ds_rms_overlap.datetime[0:100], ds_rms_overlap.rms[0:100], label='.50 overlap')
+        ax.plot(ds_rms.datetime[0:100], ds_rms.rms[0:100], label='no overlap')
+        ax.legend()
+        fig.show()
+        # compare output time step (dt) to expected time step
+        step = np.mean(np.diff(ds_rms_overlap.start_sample))
+        dt = step/self.acu_file.fs
+        expected_dt = binsize*(1-bin_overlap)
+        error = np.abs(dt-expected_dt)
+        # error should be less than soundfile dt (if rounded)
+        assert error<=(1/self.acu_file.fs)
+
+
diff --git a/tests/test_acoustic_indices.py b/tests/test_acoustic_indices.py
@@ -4,15 +4,18 @@
 import pypam.acoustic_indices
 import pyhydrophone as pyhy
 import numpy as np
+import os
 
+# get relative path
+test_dir = os.path.dirname(__file__)
 
 # Hydrophone Setup
 # If Vpp is 2.0 then it means the wav is -1 to 1 directly related to V
 model = 'ST300HF'
 name = 'SoundTrap'
 serial_number = 67416073
 soundtrap = pyhy.soundtrap.SoundTrap(name=name, model=model, serial_number=serial_number)
-acu_file = AcuFile(sfile='tests/test_data/67416073.210610033655.wav', hydrophone=soundtrap, p_ref=1)
+acu_file = AcuFile(sfile=f'{test_dir}/test_data/67416073.210610033655.wav', hydrophone=soundtrap, p_ref=1)
 
 
 class TestAcousticIndices(unittest.TestCase):

diff --git a/tests/test_acoustic_survey.py b/tests/test_acoustic_survey.py
@@ -2,22 +2,26 @@
 import pathlib
 import matplotlib.pyplot as plt
 import numpy as np
+import os
 
 from pypam.acoustic_survey import ASA
 import pyhydrophone as pyhy
 from tests import skip_unless_with_plots, with_plots
 
 plt.rcParams.update(plt.rcParamsDefault)
 
+# get relative path
+test_dir = os.path.dirname(__file__)
+
 # Data information
-folder_path = pathlib.Path('tests/test_data')
+folder_path = pathlib.Path(f'{test_dir}/test_data')
 
 # Hydrophone Setup
 # If Vpp is 2.0 then it means the wav is -1 to 1 directly related to V
 model = 'ST300HF'
 name = 'SoundTrap'
 serial_number = 67416073
-calibration_file = pathlib.Path("tests/test_data/calibration_data.xlsx")
+calibration_file = pathlib.Path(f"{test_dir}/test_data/calibration_data.xlsx")
 soundtrap = pyhy.soundtrap.SoundTrap(name=name, model=model, serial_number=serial_number,
                                      calibration_file=calibration_file, val='sensitivity', freq_col_id=1,
                                      val_col_id=29, start_data_id=6)