From 2aed3e7ee0033152170515d50becf6456ef02a89 Mon Sep 17 00:00:00 2001 From: Camilo Diaz Date: Mon, 26 Feb 2024 03:57:09 -0500 Subject: [PATCH 01/22] cleaning and formating files form the ICEsat2_SI_tools folder --- .../ICEsat2_SI_tools/angle_optimizer.py | 260 ++-- .../ICEsat2_SI_tools/beam_stats.py | 5 - .../ICEsat2_SI_tools/convert_GPS_time.py | 179 +-- .../ICEsat2_SI_tools/convert_julian.py | 110 +- .../ICEsat2_SI_tools/filter_regrid.py | 393 +++--- .../ICEsat2_SI_tools/iotools.py | 19 +- .../ICEsat2_SI_tools/lanczos.py | 75 +- .../ICEsat2_SI_tools/read_ground_tracks.py | 112 +- .../sliderule_converter_tools.py | 1 - .../ICEsat2_SI_tools/spectral_estimates.py | 1152 ++++++++++------- .../ICEsat2_SI_tools/spicke_remover.py | 102 +- .../ICEsat2_SI_tools/wave_tools.py | 32 +- 12 files changed, 1325 insertions(+), 1115 deletions(-) diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/angle_optimizer.py b/src/icesat2_tracks/ICEsat2_SI_tools/angle_optimizer.py index 7ec54aa2..e3e28b53 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/angle_optimizer.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/angle_optimizer.py @@ -1,13 +1,15 @@ """ This library contains method, and classes used to search for the best angle given x,y data using single frequecy fits. """ + from numba import jit import numpy as np numba_parallel = False -def get_wavenumbers_polar( amp, angle_rad): + +def get_wavenumbers_polar(amp, angle_rad): """ inputs: @@ -18,108 +20,126 @@ def get_wavenumbers_polar( amp, angle_rad): wavenumber k,l """ import numpy as np + k0 = amp * np.cos(angle_rad) l0 = amp * np.sin(angle_rad) return k0, l0 -def wavemodel(XX, YY, ks, ls, amps, group_phase = 0): - import numpy as np +def wavemodel(XX, YY, ks, ls, amps, group_phase=0): - G = np.vstack([ np.cos(np.outer(XX, ks) + np.outer(YY, ls) ).T , np.sin(np.outer(XX, ks) + np.outer(YY, ls) ).T ] ).T + import numpy as np - #phase1 = np.random.rand(1, amp_list.size) * np.pi*2 - #phase = np.arange(0, amp_list.size) * np.pi/2 + G = np.vstack( + [ + np.cos(np.outer(XX, ks) + np.outer(YY, ls)).T, + np.sin(np.outer(XX, ks) + np.outer(YY, ls)).T, + ] + ).T - b = np.hstack([ np.cos(group_phase) * amps , np.sin(group_phase) * amps ]).squeeze() - z_model = (G @ b) + b = np.hstack([np.cos(group_phase) * amps, np.sin(group_phase) * amps]).squeeze() + z_model = G @ b return z_model -@jit(nopython=True, parallel= numba_parallel) -def wavemodel_single_wave(XX, YY, ks, ls, amps, group_phase = 0): - z_model = amps * np.cos(XX * ks + YY * ls + group_phase ) - return z_model#.squeeze() + +@jit(nopython=True, parallel=numba_parallel) +def wavemodel_single_wave(XX, YY, ks, ls, amps, group_phase=0): + z_model = amps * np.cos(XX * ks + YY * ls + group_phase) + return z_model -def get_z_model(x_positions, y_position, K_prime, K_amp, alpha_rad, group_phase): +def get_z_model(x_positions, y_position, K_prime, K_amp, alpha_rad, group_phase): - K_abs = K_prime / np.cos(alpha_rad) + K_abs = K_prime / np.cos(alpha_rad) k = K_abs * np.cos(alpha_rad) l = K_abs * np.sin(alpha_rad) - return wavemodel( x_positions,y_position, k, l, np.array(K_amp ), group_phase= group_phase) + return wavemodel( + x_positions, y_position, k, l, np.array(K_amp), group_phase=group_phase + ) + -@jit(nopython=True, parallel= False) -def get_z_model_single_wave(x_positions, y_position, K_prime, K_amp, alpha_rad, group_phase): +@jit(nopython=True, parallel=False) +def get_z_model_single_wave( + x_positions, y_position, K_prime, K_amp, alpha_rad, group_phase +): - K_abs = K_prime / np.cos(alpha_rad) + K_abs = K_prime / np.cos(alpha_rad) k = K_abs * np.cos(alpha_rad) l = K_abs * np.sin(alpha_rad) - return wavemodel_single_wave( x_positions,y_position, k, l, K_amp, group_phase= group_phase) - + return wavemodel_single_wave( + x_positions, y_position, k, l, K_amp, group_phase=group_phase + ) -def objective_func(pars, x, y, z, test_flag= False , prior= None, prior_weight = 2 ): +def objective_func(pars, x, y, z, test_flag=False, prior=None, prior_weight=2): "objective function that returns the residual array" - z_model = get_z_model_single_wave(x, y, pars['K_prime'].value, pars['K_amp'].value, pars['alpha'].value,pars['phase'].value) + z_model = get_z_model_single_wave( + x, + y, + pars["K_prime"].value, + pars["K_amp"].value, + pars["alpha"].value, + pars["phase"].value, + ) if prior is not None: - a_0, a_std = prior['alpha'] - penalties = np.array([ (abs(a_0 - pars['alpha'] )**2 / a_std**2) ]) + a_0, a_std = prior["alpha"] + penalties = np.array([(abs(a_0 - pars["alpha"]) ** 2 / a_std**2)]) else: - penalties = np.array([0]) + penalties = np.array([0]) - cost =( abs(z - z_model) )**2 /z.std()**2 + cost = (abs(z - z_model)) ** 2 / z.std() ** 2 if test_flag: return z_model else: - return np.concatenate([cost , prior_weight * penalties]) + return np.concatenate([cost, prior_weight * penalties]) -# def gaussian_prior(x, x0, sigma): -# return np.exp(-np.power((x - x0)/sigma, 2.)/2.) - -def likelyhood_func(pars, x, y, z, z_error= None, test_flag= False , prior= None , prior_weight = 2): +def likelyhood_func( + pars, x, y, z, z_error=None, test_flag=False, prior=None, prior_weight=2 +): """ ---not well testet --- likelyhood function using log(p) """ # get model - z_model = get_z_model(x, y, pars['K_prime'], pars['K_amp'], pars['alpha'],pars['phase']) + z_model = get_z_model( + x, y, pars["K_prime"], pars["K_amp"], pars["alpha"], pars["phase"] + ) # define cost - cost_sqrt =( abs(z - z_model) )**2 + cost_sqrt = (abs(z - z_model)) ** 2 # estimate total variance if z_error is None: - tot_var = z.std()**2* 0.01 + z_model**2 + tot_var = z.std() ** 2 * 0.01 + z_model**2 else: - tot_var = z_error**2 + z_model**2 + tot_var = z_error**2 + z_model**2 - #cost_sqrt.sum()/tot_var.sum() - # (cost_sqrt/tot_var).sum() - # - # np.log(tot_var).sum() def simple_log_panelty(x, x0, sigma): - return -np.power((x - x0)/sigma, 2.)/2. + return -np.power((x - x0) / sigma, 2.0) / 2.0 # try ot get prior if prior is not None: - a_0, a_std = prior['alpha'] + a_0, a_std = prior["alpha"] # this corresponds to the the panelty log( p(alpha) ) - penalties = simple_log_panelty(pars['alpha'], a_0,a_std ) + penalties = simple_log_panelty(pars["alpha"], a_0, a_std) else: - penalties = np.array([0]) + penalties = np.array([0]) if test_flag: return z_model else: - return - 0.5 * (cost_sqrt/tot_var + np.log(tot_var) ).sum() + prior_weight * penalties + return ( + -0.5 * (cost_sqrt / tot_var + np.log(tot_var)).sum() + + prior_weight * penalties + ) class sample_with_mcmc: @@ -156,6 +176,7 @@ class sample_with_mcmc: def __init__(self, params): import lmfit as LM + self.LM = LM self.set_parameters(params) @@ -165,14 +186,7 @@ def __init__(self, params): def set_objective_func(self, ofunc): self.objective_func = ofunc - # def objective_func(self, ): - # sn2 = 0.1**2 - # return - cost(params['x'], params['y']) + np.log(sn2) - - # def test_ojective_func(self, model_func): - # return self.objective_func(self.params, self.data, model_func, self.freq) - - def set_parameters(self, par_dict, verbose= False): + def set_parameters(self, par_dict, verbose=False): """ defines params object at inital seed for mcmc par_dict should contain: var_name : [min, max, nseed] @@ -181,97 +195,129 @@ def set_parameters(self, par_dict, verbose= False): params = self.LM.Parameters() var_seeds = list() - for k,I in par_dict.items(): - params.add(k, (I[0]+ I[1])/2, vary=True , min=I[0], max=I[1]) - - var_seeds.append( np.linspace(I[0],I[1], I[2])) + for k, I in par_dict.items(): + params.add(k, (I[0] + I[1]) / 2, vary=True, min=I[0], max=I[1]) + var_seeds.append(np.linspace(I[0], I[1], I[2])) if len(var_seeds) > 2: - raise ValueError('nor proframmed for 3d') + raise ValueError("nor proframmed for 3d") - self.nwalkers= int(var_seeds[0].size * var_seeds[1].size) + self.nwalkers = int(var_seeds[0].size * var_seeds[1].size) pxx, pyy = np.meshgrid(var_seeds[0], var_seeds[1]) - self.seeds = np.vstack([pxx.flatten(), pyy.flatten() ]).T + self.seeds = np.vstack([pxx.flatten(), pyy.flatten()]).T self.params = params if verbose: - print('Nwalker: ', self.nwalkers) - print('Seeds: ', self.seeds.shape) + print("Nwalker: ", self.nwalkers) + print("Seeds: ", self.seeds.shape) print(self.params) def test_objective_func(self): - return self.objective_func(self.params, *self.fitting_args, **self.fitting_kargs) + return self.objective_func( + self.params, *self.fitting_args, **self.fitting_kargs + ) - def sample(self, fitting_args= None , method='emcee', steps=100, verbose= True, **kargs): + def sample( + self, fitting_args=None, method="emcee", steps=100, verbose=True, **kargs + ): - fitting_args, fitting_kargs = self.fitting_args, self.fitting_kargs - # TODO: this funciton throws an error in CI. The nan_policy='omit' policiy was added to avoid this issue + fitting_args, fitting_kargs = self.fitting_args, self.fitting_kargs + # TODO: this funciton throws an error in CI. The nan_policy='omit' policiy was added to avoid this issue # according to the guidelines in https://lmfit.github.io/lmfit-py/faq.html#i-get-errors-from-nan-in-my-fit-what-can-i-do - self.fitter = self.LM.minimize(self.objective_func, self.params, method=method, - args=fitting_args, kws=fitting_kargs , - nwalkers=self.nwalkers, steps=steps, pos= self.seeds,nan_policy='omit' , **kargs) + self.fitter = self.LM.minimize( + self.objective_func, + self.params, + method=method, + args=fitting_args, + kws=fitting_kargs, + nwalkers=self.nwalkers, + steps=steps, + pos=self.seeds, + nan_policy="omit", + **kargs, + ) if verbose: print(self.LM.report_fit(self.fitter)) - print('results at self.fitter') + print("results at self.fitter") - def plot_sample(self, **kargs ): + def plot_sample(self, **kargs): import matplotlib.pyplot as plt + chain = self.chain() nwalkers = self.nwalkers for n in np.arange(nwalkers): - plt.plot(chain[:,n,1], chain[:,n,0] , '-', **kargs) - plt.plot(chain[:,n,1], chain[:,n,0] , '.', **kargs) + plt.plot(chain[:, n, 1], chain[:, n, 0], "-", **kargs) + plt.plot(chain[:, n, 1], chain[:, n, 0], ".", **kargs) + def optimize(self, fitting_args=None, method="dual_annealing", verbose=True): - def optimize(self, fitting_args= None , method='dual_annealing', verbose= True): + fitting_args = self.fitting_args + fitting_kargs = self.fitting_kargs - fitting_args = self.fitting_args - fitting_kargs = self.fitting_kargs - - self.fitter_optimize = self.LM.minimize(self.objective_func, self.params, method=method, - args=fitting_args, kws=fitting_kargs ) + self.fitter_optimize = self.LM.minimize( + self.objective_func, + self.params, + method=method, + args=fitting_args, + kws=fitting_kargs, + ) if verbose: print(self.LM.report_fit(self.fitter_optimize)) - print('results at self.fitter_optimize') + print("results at self.fitter_optimize") def plot_optimze(self, **kargs): import matplotlib.pyplot as plt - plt.plot(self.fitter_optimize.params['phase'].value, self.fitter_optimize.params['alpha'].value, '.', **kargs) + plt.plot( + self.fitter_optimize.params["phase"].value, + self.fitter_optimize.params["alpha"].value, + ".", + **kargs, + ) - def brute(self, fitting_args= None , method='brute', verbose= True, N_grid = 30): + def brute(self, fitting_args=None, method="brute", verbose=True, N_grid=30): - fitting_args = self.fitting_args - fitting_kargs = self.fitting_kargs + fitting_args = self.fitting_args + fitting_kargs = self.fitting_kargs - self.fitter_brute = self.LM.minimize(self.objective_func, self.params, method=method, - args=fitting_args, kws=fitting_kargs, Ns=N_grid ) + self.fitter_brute = self.LM.minimize( + self.objective_func, + self.params, + method=method, + args=fitting_args, + kws=fitting_kargs, + Ns=N_grid, + ) if verbose: print(self.LM.report_fit(self.fitter_brute)) - print('results at self.fitter_brute') - + print("results at self.fitter_brute") - def plot_brute(self, clevel = np.linspace(-3.2, 3.2, 30), **kargs): + def plot_brute(self, clevel=np.linspace(-3.2, 3.2, 30), **kargs): import matplotlib.pyplot as plt - fitter_brute =self.fitter_brute - - dd = (fitter_brute.brute_Jout- fitter_brute.brute_Jout.mean())/fitter_brute.brute_Jout.std() - plt.contourf(fitter_brute.brute_grid[1,:,:], fitter_brute.brute_grid[0,:,:], dd , clevel, cmap= plt.cm.YlGnBu_r ) - #plt.scatter(fitter_brute.brute_grid[1,:,:], fitter_brute.brute_grid[0,:,:], s=0.2, alpha= 0.4, color='black') - #plt.plot(fitter_brute.brute_x0[1], fitter_brute.brute_x0[0], **kargs) - x_name, y_name = list(fitter_brute.params.keys())[1], list(fitter_brute.params.keys())[0] + fitter_brute = self.fitter_brute + + dd = ( + fitter_brute.brute_Jout - fitter_brute.brute_Jout.mean() + ) / fitter_brute.brute_Jout.std() + plt.contourf( + fitter_brute.brute_grid[1, :, :], + fitter_brute.brute_grid[0, :, :], + dd, + clevel, + cmap=plt.cm.YlGnBu_r, + ) + + x_name, y_name = ( + list(fitter_brute.params.keys())[1], + list(fitter_brute.params.keys())[0], + ) plt.xlabel(x_name) plt.ylabel(y_name) - # plt.xlabel('Phase (rad)') - # plt.ylabel('Angle (rad)') - - - def chain(self, burn=None): "return results as nparray contains walk of each walker" if burn is not None: @@ -286,8 +332,9 @@ def flatchain(self, burn=None): else: return self.fitter.flatchain - def get_marginal_dist(self, var, var_dx, burn = None, plot_flag= False, normalize = True): - + def get_marginal_dist( + self, var, var_dx, burn=None, plot_flag=False, normalize=True + ): """ retrurn the marginal distribution from self.params object @@ -300,18 +347,17 @@ def get_marginal_dist(self, var, var_dx, burn = None, plot_flag= False, normaliz otherwise it returns the rar histogram """ - data = self.flatchain(burn) - #fitter.flatchain.loc[100:][var] - bins = np.arange(self.params[var].min,self.params[var].max+ var_dx,var_dx) + bins = np.arange(self.params[var].min, self.params[var].max + var_dx, var_dx) y_hist, _ = np.histogram(self.fitter.flatchain.loc[burn:][var], bins) - bins_pos = (bins[0:-1] + np.diff(bins)/2) + bins_pos = bins[0:-1] + np.diff(bins) / 2 if normalize: - y_hist = y_hist/var_dx/y_hist.sum() + y_hist = y_hist / var_dx / y_hist.sum() if plot_flag: import matplotlib.pyplot as plt + plt.stairs(y_hist, bins) return y_hist, bins, bins_pos diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py b/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py index 3f815e15..90473ffa 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py @@ -145,9 +145,6 @@ def plot_beam_statistics(D, high_beams, low_beams, col_dict, track_name=None): plt.title("low beams N", loc="left") plt.xlabel("along track distance (km)") - # plt.ylabel('Point density (m)') - - ax5 = plt.subplot(gs[0:2, 2]) lat_shift = 0 for k in low_beams: @@ -184,8 +181,6 @@ def plot_beam_statistics(D, high_beams, low_beams, col_dict, track_name=None): ## plot track stats basics for sliderules ATL06 output - - def plot_ATL06_track_data(G2, cdict): """ Plots the beam statistics in a 3 x 3 plot diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/convert_GPS_time.py b/src/icesat2_tracks/ICEsat2_SI_tools/convert_GPS_time.py index a1e491c8..7a2b7bc8 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/convert_GPS_time.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/convert_GPS_time.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -u""" +""" convert_GPS_time.py (10/2017) Return the calendar date and time for given GPS time. Based on Tiffany Summerscales's PHP conversion algorithm @@ -33,88 +33,109 @@ from .convert_julian import convert_julian import pdb -#-- PURPOSE: Define GPS leap seconds + +# -- PURPOSE: Define GPS leap seconds def get_leaps(): - leaps = [46828800, 78364801, 109900802, 173059203, 252028804, 315187205, - 346723206, 393984007, 425520008, 457056009, 504489610, 551750411, - 599184012, 820108813, 914803214, 1025136015, 1119744016, 1167264017] - return leaps + leaps = [ + 46828800, + 78364801, + 109900802, + 173059203, + 252028804, + 315187205, + 346723206, + 393984007, + 425520008, + 457056009, + 504489610, + 551750411, + 599184012, + 820108813, + 914803214, + 1025136015, + 1119744016, + 1167264017, + ] + return leaps + -#-- PURPOSE: Test to see if any GPS seconds are leap seconds +# -- PURPOSE: Test to see if any GPS seconds are leap seconds def is_leap(GPS_Time): - leaps = get_leaps() - Flag = np.zeros_like(GPS_Time, dtype=np.bool_) - for leap in leaps: - count = np.count_nonzero(np.floor(GPS_Time) == leap) - if (count > 0): - indices, = np.nonzero(np.floor(GPS_Time) == leap) - Flag[indices] = True - return Flag - -#-- PURPOSE: Count number of leap seconds that have passed for each GPS time + leaps = get_leaps() + Flag = np.zeros_like(GPS_Time, dtype=np.bool_) + for leap in leaps: + count = np.count_nonzero(np.floor(GPS_Time) == leap) + if count > 0: + (indices,) = np.nonzero(np.floor(GPS_Time) == leap) + Flag[indices] = True + return Flag + + +# -- PURPOSE: Count number of leap seconds that have passed for each GPS time def count_leaps(GPS_Time): - leaps = get_leaps() - #-- number of leap seconds prior to GPS_Time - n_leaps = np.zeros_like(GPS_Time, dtype=np.uint) - for i,leap in enumerate(leaps): - count = np.count_nonzero(GPS_Time >= leap) - if (count > 0): - indices, = np.nonzero(GPS_Time >= leap) - # print(indices) - # pdb.set_trace() - n_leaps[indices] += 1 - return n_leaps - -#-- PURPOSE: Convert UNIX Time to GPS Time + leaps = get_leaps() + # -- number of leap seconds prior to GPS_Time + n_leaps = np.zeros_like(GPS_Time, dtype=np.uint) + for i, leap in enumerate(leaps): + count = np.count_nonzero(GPS_Time >= leap) + if count > 0: + (indices,) = np.nonzero(GPS_Time >= leap) + n_leaps[indices] += 1 + return n_leaps + + +# -- PURPOSE: Convert UNIX Time to GPS Time def convert_UNIX_to_GPS(UNIX_Time): - #-- calculate offsets for UNIX times that occur during leap seconds - offset = np.zeros_like(UNIX_Time) - count = np.count_nonzero((UNIX_Time % 1) != 0) - if (count > 0): - indices, = np.nonzero((UNIX_Time % 1) != 0) - UNIX_Time[indices] -= 0.5 - offset[indices] = 1.0 - #-- convert UNIX_Time to GPS without taking into account leap seconds - #-- (UNIX epoch: Jan 1, 1970 00:00:00, GPS epoch: Jan 6, 1980 00:00:00) - GPS_Time = UNIX_Time - 315964800 - leaps = get_leaps() - #-- calculate number of leap seconds prior to GPS_Time - n_leaps = np.zeros_like(GPS_Time, dtype=np.uint) - for i,leap in enumerate(leaps): - count = np.count_nonzero(GPS_Time >= (leap - i)) - if (count > 0): - indices, = np.nonzero(GPS_Time >= (leap - i)) - n_leaps[indices] += 1 - #-- take into account leap seconds and offsets - GPS_Time += n_leaps + offset - return GPS_Time - -#-- PURPOSE: Convert GPS Time to UNIX Time + # -- calculate offsets for UNIX times that occur during leap seconds + offset = np.zeros_like(UNIX_Time) + count = np.count_nonzero((UNIX_Time % 1) != 0) + if count > 0: + (indices,) = np.nonzero((UNIX_Time % 1) != 0) + UNIX_Time[indices] -= 0.5 + offset[indices] = 1.0 + # -- convert UNIX_Time to GPS without taking into account leap seconds + # -- (UNIX epoch: Jan 1, 1970 00:00:00, GPS epoch: Jan 6, 1980 00:00:00) + GPS_Time = UNIX_Time - 315964800 + leaps = get_leaps() + # -- calculate number of leap seconds prior to GPS_Time + n_leaps = np.zeros_like(GPS_Time, dtype=np.uint) + for i, leap in enumerate(leaps): + count = np.count_nonzero(GPS_Time >= (leap - i)) + if count > 0: + (indices,) = np.nonzero(GPS_Time >= (leap - i)) + n_leaps[indices] += 1 + # -- take into account leap seconds and offsets + GPS_Time += n_leaps + offset + return GPS_Time + + +# -- PURPOSE: Convert GPS Time to UNIX Time def convert_GPS_to_UNIX(GPS_Time): - #-- convert GPS_Time to UNIX without taking into account leap seconds - #-- (UNIX epoch: Jan 1, 1970 00:00:00, GPS epoch: Jan 6, 1980 00:00:00) - UNIX_Time = GPS_Time + 315964800 - #-- number of leap seconds prior to GPS_Time - n_leaps = count_leaps(GPS_Time) - UNIX_Time -= n_leaps - #-- check if GPS Time is leap second - Flag = is_leap(GPS_Time) - if Flag.any(): - #-- for leap seconds: add a half second offset - indices, = np.nonzero(Flag) - UNIX_Time[indices] += 0.5 - return UNIX_Time - -#-- PURPOSE: convert from GPS time to calendar dates + # -- convert GPS_Time to UNIX without taking into account leap seconds + # -- (UNIX epoch: Jan 1, 1970 00:00:00, GPS epoch: Jan 6, 1980 00:00:00) + UNIX_Time = GPS_Time + 315964800 + # -- number of leap seconds prior to GPS_Time + n_leaps = count_leaps(GPS_Time) + UNIX_Time -= n_leaps + # -- check if GPS Time is leap second + Flag = is_leap(GPS_Time) + if Flag.any(): + # -- for leap seconds: add a half second offset + (indices,) = np.nonzero(Flag) + UNIX_Time[indices] += 0.5 + return UNIX_Time + + +# -- PURPOSE: convert from GPS time to calendar dates def convert_GPS_time(GPS_Time, OFFSET=0.0): - #-- convert from standard GPS time to UNIX time accounting for leap seconds - #-- and adding the specified offset to GPS_Time - UNIX_Time = convert_GPS_to_UNIX(np.array(GPS_Time) + OFFSET) - #-- calculate Julian date from UNIX time and convert into calendar dates - #-- UNIX time: seconds from 1970-01-01 00:00:00 UTC - julian_date = (UNIX_Time/86400.0) + 2440587.500000 - cal_date = convert_julian(julian_date) - #-- include UNIX times in output - cal_date['UNIX'] = UNIX_Time - #-- return the calendar dates and UNIX time - return cal_date + # -- convert from standard GPS time to UNIX time accounting for leap seconds + # -- and adding the specified offset to GPS_Time + UNIX_Time = convert_GPS_to_UNIX(np.array(GPS_Time) + OFFSET) + # -- calculate Julian date from UNIX time and convert into calendar dates + # -- UNIX time: seconds from 1970-01-01 00:00:00 UTC + julian_date = (UNIX_Time / 86400.0) + 2440587.500000 + cal_date = convert_julian(julian_date) + # -- include UNIX times in output + cal_date["UNIX"] = UNIX_Time + # -- return the calendar dates and UNIX time + return cal_date diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/convert_julian.py b/src/icesat2_tracks/ICEsat2_SI_tools/convert_julian.py index 74b3fd81..b5d6c7bc 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/convert_julian.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/convert_julian.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -u""" +""" convert_julian.py Written by Tyler Sutterley (10/2017) @@ -50,60 +50,62 @@ """ import numpy as np -def convert_julian(JD, ASTYPE=None, FORMAT='dict'): - #-- convert to array if only a single value was imported - if (np.ndim(JD) == 0): - JD = np.array([JD]) - SINGLE_VALUE = True - else: - SINGLE_VALUE = False - JDO = np.floor(JD + 0.5) - C = np.zeros_like(JD) - #-- calculate C for dates before and after the switch to Gregorian - IGREG = 2299161.0 - ind1, = np.nonzero(JDO < IGREG) - C[ind1] = JDO[ind1] + 1524.0 - ind2, = np.nonzero(JDO >= IGREG) - B = np.floor((JDO[ind2] - 1867216.25)/36524.25) - C[ind2] = JDO[ind2] + B - np.floor(B/4.0) + 1525.0 - #-- calculate coefficients for date conversion - D = np.floor((C - 122.1)/365.25) - E = np.floor((365.0 * D) + np.floor(D/4.0)) - F = np.floor((C - E)/30.6001) - #-- calculate day, month, year and hour - DAY = np.floor(C - E + 0.5) - np.floor(30.6001*F) - MONTH = F - 1.0 - 12.0*np.floor(F/14.0) - YEAR = D - 4715.0 - np.floor((7.0+MONTH)/10.0) - HOUR = np.floor(24.0*(JD + 0.5 - JDO)) - #-- calculate minute and second - G = (JD + 0.5 - JDO) - HOUR/24.0 - MINUTE = np.floor(G*1440.0) - SECOND = (G - MINUTE/1440.0) * 86400.0 +def convert_julian(JD, ASTYPE=None, FORMAT="dict"): + # -- convert to array if only a single value was imported + if np.ndim(JD) == 0: + JD = np.array([JD]) + SINGLE_VALUE = True + else: + SINGLE_VALUE = False + + JDO = np.floor(JD + 0.5) + C = np.zeros_like(JD) + # -- calculate C for dates before and after the switch to Gregorian + IGREG = 2299161.0 + (ind1,) = np.nonzero(JDO < IGREG) + C[ind1] = JDO[ind1] + 1524.0 + (ind2,) = np.nonzero(JDO >= IGREG) + B = np.floor((JDO[ind2] - 1867216.25) / 36524.25) + C[ind2] = JDO[ind2] + B - np.floor(B / 4.0) + 1525.0 + # -- calculate coefficients for date conversion + D = np.floor((C - 122.1) / 365.25) + E = np.floor((365.0 * D) + np.floor(D / 4.0)) + F = np.floor((C - E) / 30.6001) + # -- calculate day, month, year and hour + DAY = np.floor(C - E + 0.5) - np.floor(30.6001 * F) + MONTH = F - 1.0 - 12.0 * np.floor(F / 14.0) + YEAR = D - 4715.0 - np.floor((7.0 + MONTH) / 10.0) + HOUR = np.floor(24.0 * (JD + 0.5 - JDO)) + # -- calculate minute and second + G = (JD + 0.5 - JDO) - HOUR / 24.0 + MINUTE = np.floor(G * 1440.0) + SECOND = (G - MINUTE / 1440.0) * 86400.0 - #-- convert all variables to output type (from float) - if ASTYPE is not None: - YEAR = YEAR.astype(ASTYPE) - MONTH = MONTH.astype(ASTYPE) - DAY = DAY.astype(ASTYPE) - HOUR = HOUR.astype(ASTYPE) - MINUTE = MINUTE.astype(ASTYPE) - SECOND = SECOND.astype(ASTYPE) + # -- convert all variables to output type (from float) + if ASTYPE is not None: + YEAR = YEAR.astype(ASTYPE) + MONTH = MONTH.astype(ASTYPE) + DAY = DAY.astype(ASTYPE) + HOUR = HOUR.astype(ASTYPE) + MINUTE = MINUTE.astype(ASTYPE) + SECOND = SECOND.astype(ASTYPE) - #-- if only a single value was imported initially: remove singleton dims - if SINGLE_VALUE: - YEAR = YEAR.item(0) - MONTH = MONTH.item(0) - DAY = DAY.item(0) - HOUR = HOUR.item(0) - MINUTE = MINUTE.item(0) - SECOND = SECOND.item(0) + # -- if only a single value was imported initially: remove singleton dims + if SINGLE_VALUE: + YEAR = YEAR.item(0) + MONTH = MONTH.item(0) + DAY = DAY.item(0) + HOUR = HOUR.item(0) + MINUTE = MINUTE.item(0) + SECOND = SECOND.item(0) - #-- return date variables in output format (default python dictionary) - if (FORMAT == 'dict'): - return dict(year=YEAR, month=MONTH, day=DAY, - hour=HOUR, minute=MINUTE, second=SECOND) - elif (FORMAT == 'tuple'): - return (YEAR, MONTH, DAY, HOUR, MINUTE, SECOND) - elif (FORMAT == 'zip'): - return zip(YEAR, MONTH, DAY, HOUR, MINUTE, SECOND) + # -- return date variables in output format (default python dictionary) + if FORMAT == "dict": + return dict( + year=YEAR, month=MONTH, day=DAY, hour=HOUR, minute=MINUTE, second=SECOND + ) + elif FORMAT == "tuple": + return (YEAR, MONTH, DAY, HOUR, MINUTE, SECOND) + elif FORMAT == "zip": + return zip(YEAR, MONTH, DAY, HOUR, MINUTE, SECOND) diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/filter_regrid.py b/src/icesat2_tracks/ICEsat2_SI_tools/filter_regrid.py index 1c276853..77766490 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/filter_regrid.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/filter_regrid.py @@ -1,55 +1,48 @@ - import numpy as np from numba import jit -def correct_heights(T03, T03c, coord = 'delta_time'): +def correct_heights(T03, T03c, coord="delta_time"): """ returns the corrected photon heigts in T03 given SSSH approxiamtion 'dem_h' in T03c """ - T03['heights_c']= T03['heights'] - np.interp( T03[coord],T03c[coord], T03c['dem_h'] ) + T03["heights_c"] = T03["heights"] - np.interp( + T03[coord], T03c[coord], T03c["dem_h"] + ) return T03 -# def track_type_beam(hdf5_file): -# """ -# Returns True if track is acending -# hdf5_file is a hdf5 beam file -# -# sc_orient - spacecraft orientation -# This parameter tracks the spacecraft orientation between ‘forward’ and ‘backward’ orientations, to allow mapping between ATLAS hardware and the beam orientation on the ground. Forward == 1; backward == 0; transition ==2. -# -# -# """ -# return hdf5_file['orbit_info/sc_orient'][:][0] ==0 - -def track_pole_ward_file(hdf5_file, product='ALT03'): +def track_pole_ward_file(hdf5_file, product="ALT03"): """ Returns true if track goes poleward hdf5_file is a an HFD5 object in read mode """ - if product == 'ALT03': - T_lat = hdf5_file['gt1r/geolocation/reference_photon_lat'][:] - T_time = hdf5_file['gt1r/geolocation/delta_time'][:] - elif product == 'ALT10': - T_lat = hdf5_file['gt1r/freeboard_beam_segment/latitude'][:] - T_time = hdf5_file['gt1r/freeboard_beam_segment/delta_time'][:] - #return ( T_lat[T_time.argmax()] - T_lat[T_time.argmin()] ) < 0 - print('1st lat =' + str(abs(T_lat[T_time.argmin()])) , ';last lat =' + str(abs(T_lat[T_time.argmax()])) ) + if product == "ALT03": + T_lat = hdf5_file["gt1r/geolocation/reference_photon_lat"][:] + T_time = hdf5_file["gt1r/geolocation/delta_time"][:] + elif product == "ALT10": + T_lat = hdf5_file["gt1r/freeboard_beam_segment/latitude"][:] + T_time = hdf5_file["gt1r/freeboard_beam_segment/delta_time"][:] + + print( + "1st lat =" + str(abs(T_lat[T_time.argmin()])), + ";last lat =" + str(abs(T_lat[T_time.argmax()])), + ) return abs(T_lat[T_time.argmax()]) > abs(T_lat[T_time.argmin()]) - def track_type(T): """ Returns if track acending or desending T is a pandas table """ - #T = B[k] - #T = B[beams_list[0]] - return (T['lats'].iloc[T['delta_time'].argmax()] - T['lats'].iloc[T['delta_time'].argmin()] ) < 0 + return ( + T["lats"].iloc[T["delta_time"].argmax()] + - T["lats"].iloc[T["delta_time"].argmin()] + ) < 0 + def lat_min_max_extended(B, beams_list, accent=None): """ @@ -62,57 +55,63 @@ def lat_min_max_extended(B, beams_list, accent=None): returns: min_lat, max_lat, accent min and max latitudes of the beams, (True/False) True if the track is accending """ - #B, beams_list = B , high_beams - accent = regrid.track_type( B[beams_list[0]] ) if accent is None else accent - if B[beams_list[0]]['lats'].iloc[0] < 0: - hemis = 'SH' + accent = regrid.track_type(B[beams_list[0]]) if accent is None else accent + + if B[beams_list[0]]["lats"].iloc[0] < 0: + hemis = "SH" else: - hemis = 'NH' + hemis = "NH" - track_pos_start, track_pos_end= list(), list() + track_pos_start, track_pos_end = list(), list() for k in beams_list: - if (hemis == 'SH'): - track_pos_start.append( B[k].loc[B[k]['lats'].argmax()][ ['lats', 'lons']] ) - track_pos_end.append( B[k].loc[B[k]['lats'].argmin()][ ['lats', 'lons']] ) + if hemis == "SH": + track_pos_start.append(B[k].loc[B[k]["lats"].argmax()][["lats", "lons"]]) + track_pos_end.append(B[k].loc[B[k]["lats"].argmin()][["lats", "lons"]]) else: - track_pos_start.append( B[k].loc[B[k]['lats'].argmin()][ ['lats', 'lons']] ) - track_pos_end.append( B[k].loc[B[k]['lats'].argmax()][ ['lats', 'lons']] ) - + track_pos_start.append(B[k].loc[B[k]["lats"].argmin()][["lats", "lons"]]) + track_pos_end.append(B[k].loc[B[k]["lats"].argmax()][["lats", "lons"]]) track_lat_start, track_lat_end = list(), list() track_lon_start, track_lon_end = list(), list() for ll in track_pos_start: - track_lat_start.append(ll['lats']) - track_lon_start.append(ll['lons']) - + track_lat_start.append(ll["lats"]) + track_lon_start.append(ll["lons"]) for ll in track_pos_end: - track_lat_end.append(ll['lats']) - track_lon_end.append(ll['lons']) - - # track_lat_start.append( B[k]['lats'].min() ) - # track_lat_end.append( B[k]['lats'].max() ) - # - # track_lon_left.append(B[k]['lons'].min()) - # track_lon_right.append(B[k]['lons'].max()) + track_lat_end.append(ll["lats"]) + track_lon_end.append(ll["lons"]) if accent: track_lon_start - #track_lat_start.min(), track_lon_right.max() - - if (hemis == 'SH') & accent: - return [max(track_lat_start) , min(track_lat_end)], [max(track_lon_start), min(track_lon_end)], accent # accenting SH mean start is in the top right - elif (hemis == 'SH') & ~accent: - return [max(track_lat_start) , min(track_lat_end)], [min(track_lon_start), max(track_lon_end)], accent # decent SH mean start is in the top left - elif (hemis == 'NH') & accent: - return [min(track_lat_start) , max(track_lat_end)], [min(track_lon_start), max(track_lon_end)], accent # accent NH mean start is in the lower left - elif (hemis == 'NH') & ~accent: - return [min(track_lat_start) , max(track_lat_end)], [max(track_lon_start), min(track_lon_end)], accent # decent NH mean start is in the lower right - else: - raise ValueError('some defintions went wrong') + if (hemis == "SH") & accent: + return ( + [max(track_lat_start), min(track_lat_end)], + [max(track_lon_start), min(track_lon_end)], + accent, + ) # accenting SH mean start is in the top right + elif (hemis == "SH") & ~accent: + return ( + [max(track_lat_start), min(track_lat_end)], + [min(track_lon_start), max(track_lon_end)], + accent, + ) # decent SH mean start is in the top left + elif (hemis == "NH") & accent: + return ( + [min(track_lat_start), max(track_lat_end)], + [min(track_lon_start), max(track_lon_end)], + accent, + ) # accent NH mean start is in the lower left + elif (hemis == "NH") & ~accent: + return ( + [min(track_lat_start), max(track_lat_end)], + [max(track_lon_start), min(track_lon_end)], + accent, + ) # decent NH mean start is in the lower right + else: + raise ValueError("some defintions went wrong") def lat_min_max(B, beams_list, accent=None): @@ -126,25 +125,26 @@ def lat_min_max(B, beams_list, accent=None): returns: min_lat, max_lat, accent min and max latitudes of the beams, (True/False) True if the track is accending """ - #B, beams_list = B , high_beams - accent = track_type( B[beams_list[0]] ) if accent is None else accent - if B[beams_list[0]]['lats'].iloc[0] < 0: - hemis = 'SH' + accent = track_type(B[beams_list[0]]) if accent is None else accent + + if B[beams_list[0]]["lats"].iloc[0] < 0: + hemis = "SH" else: - hemis = 'NH' + hemis = "NH" - track_lat_mins, track_lat_maxs= list(), list() + track_lat_mins, track_lat_maxs = list(), list() for k in beams_list: - track_lat_mins.append( B[k]['lats'].min() ) - track_lat_maxs.append( B[k]['lats'].max() ) + track_lat_mins.append(B[k]["lats"].min()) + track_lat_maxs.append(B[k]["lats"].max()) - if hemis == 'SH': - return max(track_lat_maxs) , min(track_lat_mins), accent + if hemis == "SH": + return max(track_lat_maxs), min(track_lat_mins), accent else: return min(track_lat_mins), max(track_lat_maxs), accent -def derive_axis(TT, lat_lims = None): + +def derive_axis(TT, lat_lims=None): """ returns TT distance along track 'dist' in meters input: @@ -153,43 +153,38 @@ def derive_axis(TT, lat_lims = None): returns: TT with x,y,dist and order by dist """ - #TT, lat_lims = B[key], lat_lims_high + # derive distances in meters - r_e= 6.3710E+6 - dy= r_e*2*np.pi/360.0 - #deglon_in_m= np.cos(T2['lats']*np.pi/180.0)*dy + r_e = 6.3710e6 + dy = r_e * 2 * np.pi / 360.0 # either use position of the 1st photon or use defined start latitude if lat_lims is None: - TT['y']=(TT['lats'].max() - TT['lats']) *dy + TT["y"] = (TT["lats"].max() - TT["lats"]) * dy else: - TT['y']=(lat_lims[0] - TT['lats']) *dy + TT["y"] = (lat_lims[0] - TT["lats"]) * dy - #TT['y'] = (TT['lats']) *dy - - - if (lat_lims[2] == True): + if lat_lims[2] == True: # accending track - lon_min = TT['lons'].max() + lon_min = TT["lons"].max() else: # decending track - lon_min = TT['lons'].min() + lon_min = TT["lons"].min() - #print(lon_min) - TT['x'] = (TT['lons'] - lon_min) * np.cos( TT['lats']*np.pi/180.0 ) * dy - #TT['x'] = (TT['lons'] ) * np.cos( TT['lats']*np.pi/180.0 ) * dy - TT['dist'] = np.sqrt(TT['x']**2 + TT['y']**2) + TT["x"] = (TT["lons"] - lon_min) * np.cos(TT["lats"] * np.pi / 180.0) * dy + TT["dist"] = np.sqrt(TT["x"] ** 2 + TT["y"] ** 2) # set 1st dist to 0, not used if global limits are used if lat_lims is None: - TT['dist']= TT['dist']- TT['dist'].min() + TT["dist"] = TT["dist"] - TT["dist"].min() else: - TT['dist']= TT['dist']#- lat_lims[0] + TT["dist"] = TT["dist"] # - lat_lims[0] - TT=TT.sort_values(by='dist') + TT = TT.sort_values(by="dist") return TT -def reduce_to_height_distance(TT, key, dx=1, lat_lims = None): + +def reduce_to_height_distance(TT, key, dx=1, lat_lims=None): """ interpolates key (photos heights) to regular grid using 'dist' in pandas table TT. dx is the interpolation interval @@ -200,17 +195,18 @@ def reduce_to_height_distance(TT, key, dx=1, lat_lims = None): x1, y1 position, height """ from scipy.interpolate import interp1d + if type(dx) is np.ndarray: x1 = dx else: - x1 = np.arange(0,TT['dist'].max(), dx) - y1 = np.interp(x1, TT['dist'], TT[key] ) + x1 = np.arange(0, TT["dist"].max(), dx) + y1 = np.interp(x1, TT["dist"], TT[key]) return x1, y1 + # this is not need anymore def poly_correct(x, y, poly_order=7, plot_flag=False): - """ subtracts a fitted polynom to y inputs: @@ -220,38 +216,58 @@ def poly_correct(x, y, poly_order=7, plot_flag=False): returns y' y - polynom fit """ - z = np.polyfit(x , y , poly_order) + z = np.polyfit(x, y, poly_order) p = np.poly1d(z) if plot_flag: - plt.plot(x,y, '.', markersize=0.2,) - plt.plot(x, p(x), '-', markersize=0.2,) - #return z + plt.plot( + x, + y, + ".", + markersize=0.2, + ) + plt.plot( + x, + p(x), + "-", + markersize=0.2, + ) + return y - p(x) ### regridding -#@jit(nopython=True) -def get_mode(y, bins = np.arange(-5,5, 0.1)): + +# @jit(nopython=True) +def get_mode(y, bins=np.arange(-5, 5, 0.1)): "returns modes of histogram of y defined by bins" - hist, xbin = np.histogram(y, bins = bins ) + hist, xbin = np.histogram(y, bins=bins) return xbin[hist.argmax()] -@jit(nopython=True, parallel= False) + +@jit(nopython=True, parallel=False) def weighted_mean(x_rel, y): "returns the gaussian weighted mean for stencil" - #@jit(nopython=True, parallel= False) + # @jit(nopython=True, parallel= False) def weight_fnk(x): "returns gaussian weight given the distance to the center x" - return np.exp(- (x/.5)**2 ) + return np.exp(-((x / 0.5) ** 2)) w = weight_fnk(x_rel) - return np.sum(w*y)/np.sum(w) + return np.sum(w * y) / np.sum(w) -# this function is applied to beam: -def get_stencil_stats_shift( T2, stencil_iter, key_var , key_x_coord, stancil_width , Nphoton_min = 5, plot_flag= False): +# this function is applied to beam: +def get_stencil_stats_shift( + T2, + stencil_iter, + key_var, + key_x_coord, + stancil_width, + Nphoton_min=5, + plot_flag=False, +): """ T2 pd.Dataframe with beam data needs at least 'dist' and key stencil_iter np.array that constains the stancil boundaries and center [left boundary, center, right boundary] @@ -272,42 +288,35 @@ def get_stencil_stats_shift( T2, stencil_iter, key_var , key_x_coord, stancil_w """ import pandas as pd - stencil_1 = stencil_iter[:, ::2] - stencil_1half = stencil_iter[:, 1::2] - def calc_stencil_stats(group, key, key_x_coord, stancil_width, stancils): + stencil_1 = stencil_iter[:, ::2] + stencil_1half = stencil_iter[:, 1::2] - "returns stats per stencil" - #import time - #tstart = time.time() - Nphoton = group.shape[0] - istancil = group['x_bins'].iloc[int(Nphoton/2)] - stencil_center = stancils[1, istancil-1] + def calc_stencil_stats(group, key, key_x_coord, stancil_width, stancils): + "returns stats per stencil" + Nphoton = group.shape[0] + istancil = group["x_bins"].iloc[int(Nphoton / 2)] + stencil_center = stancils[1, istancil - 1] if Nphoton > Nphoton_min: - x_rel = (group[key_x_coord] - stencil_center)/ stancil_width - y = group[key] - - #Tmedian[key+ '_weighted_mean'] + x_rel = (group[key_x_coord] - stencil_center) / stancil_width + y = group[key] key_weighted_mean = weighted_mean(np.array(x_rel), np.array(y)) - key_std = y.std() - key_mode = get_mode(y) + key_std = y.std() + key_mode = get_mode(y) else: - - #Nphoton = 0 key_weighted_mean = np.nan - #Tmedian[key+ '_mode'] = np.nan - key_std = np.nan - key_mode = np.nan - - #Tweight = pd.DataFrame([key_weighted_mean, key_std, Nphoton], index= [key+ '_weighted_mean', key+ '_std', 'N_photos' ]) - Tweight = pd.Series([key_weighted_mean, key_std, Nphoton, key_mode], index= [key+ '_weighted_mean', key+ '_std', 'N_photos', key+ '_mode' ]) + key_std = np.nan + key_mode = np.nan + Tweight = pd.Series( + [key_weighted_mean, key_std, Nphoton, key_mode], + index=[key + "_weighted_mean", key + "_std", "N_photos", key + "_mode"], + ) - #print ( str( istancil) + ' s' + str(time.time() - tstart)) return Tweight.T T_sets = list() @@ -315,55 +324,56 @@ def calc_stencil_stats(group, key, key_x_coord, stancil_width, stancils): for stancil_set in [stencil_1, stencil_1half]: # select photons that are in bins - Ti_sel = T2[ (stancil_set[0,0] < T2['x']) & (T2['x'] < stancil_set[2,-1]) ] + Ti_sel = T2[(stancil_set[0, 0] < T2["x"]) & (T2["x"] < stancil_set[2, -1])] # put each photon in a bin - bin_labels = np.searchsorted(stancil_set[0,:], Ti_sel['x']) - #bin_labels2 = np.digitize( Ti_sel['x'], stancil_set[0,:], right = True ) + bin_labels = np.searchsorted(stancil_set[0, :], Ti_sel["x"]) - Ti_sel['x_bins'] =bin_labels + Ti_sel["x_bins"] = bin_labels # group data by this bin - Ti_g = Ti_sel.groupby(Ti_sel['x_bins'], dropna= False , as_index = True )#.median() - + Ti_g = Ti_sel.groupby(Ti_sel["x_bins"], dropna=False, as_index=True) # take median of the data Ti_median = Ti_g.median() # apply weighted mean and count photons - args = [ key_var, key_x_coord, stancil_width, stancil_set] + args = [key_var, key_x_coord, stancil_width, stancil_set] - #%timeit -r 1 -n 1 Ti_weight = Ti_g.apply(calc_stencil_stats, *args) - Ti_weight = Ti_g.apply(calc_stencil_stats, *args) + Ti_weight = Ti_g.apply(calc_stencil_stats, *args) - #merge both datasets - T_merged = pd.concat( [Ti_median, Ti_weight], axis= 1) + # merge both datasets + T_merged = pd.concat([Ti_median, Ti_weight], axis=1) # rename columns - T_merged = T_merged.rename(columns={key_var: key_var+'_median', key_x_coord: key_x_coord+ '_median'}) - T_merged[ key_var+ '_median'][ np.isnan(T_merged[key_var+ '_std']) ] = np.nan # replace median calculation with nans + T_merged = T_merged.rename( + columns={key_var: key_var + "_median", key_x_coord: key_x_coord + "_median"} + ) + T_merged[key_var + "_median"][ + np.isnan(T_merged[key_var + "_std"]) + ] = np.nan # replace median calculation with nans # set stancil center an new x-coodinate - T_merged['x'] = stancil_set[1, T_merged.index-1] + T_merged["x"] = stancil_set[1, T_merged.index - 1] T_sets.append(T_merged) # mergeboth stancils - T3 = pd.concat(T_sets ).sort_values(by= 'x').reset_index() + T3 = pd.concat(T_sets).sort_values(by="x").reset_index() if plot_flag: - Ti_1, Ti_1half = T_sets - - plt.plot( Ti_1half.iloc[0:60].x, Ti_1half.iloc[0:60]['heights_c_median'], '.' ) - plt.plot( Ti_1.iloc[0:60].x, Ti_1.iloc[0:60]['heights_c_median'], '.' ) - plt.plot( T3.iloc[0:120].x, T3.iloc[0:120]['heights_c_median'], '-' ) + Ti_1, Ti_1half = T_sets + plt.plot(Ti_1half.iloc[0:60].x, Ti_1half.iloc[0:60]["heights_c_median"], ".") + plt.plot(Ti_1.iloc[0:60].x, Ti_1.iloc[0:60]["heights_c_median"], ".") + plt.plot(T3.iloc[0:120].x, T3.iloc[0:120]["heights_c_median"], "-") return T3 # this function is applied to beam: -#old version -def get_stencil_stats(T2, stencil_iter, key , key_x_coord, stancil_width , Nphoton_min = 5, map_func=None): - +# old version +def get_stencil_stats( + T2, stencil_iter, key, key_x_coord, stancil_width, Nphoton_min=5, map_func=None +): """ T2 pd.DAtaframe with beam data needs at least 'dist' and key stencil_iter iterable that constains the stancil boundaries and center [left boundary, center, right boundary] @@ -396,67 +406,64 @@ def calc_stencil_stats(istencil): "returns stats per stencil" tstart = time.time() - i_mask = (x_data >= istencil[0]) & (x_data < istencil[2]) - Nphoton = sum(i_mask) + i_mask = (x_data >= istencil[0]) & (x_data < istencil[2]) + Nphoton = sum(i_mask) if Nphoton < Nphoton_min: Tmedian = T2[i_mask].median() - Tmedian[key+ '_weighted_mean'] = np.nan - Tmedian[key+ '_mode'] = np.nan - Tmedian['N_photos'] = Nphoton - Tmedian[key+ '_std'] = np.nan + Tmedian[key + "_weighted_mean"] = np.nan + Tmedian[key + "_mode"] = np.nan + Tmedian["N_photos"] = Nphoton + Tmedian[key + "_std"] = np.nan return istencil[1], Tmedian + x_rel = (x_data[i_mask] - istencil[1]) / stancil_width + y = y_data[i_mask] - x_rel = (x_data[i_mask] - istencil[1])/ stancil_width - y = y_data[i_mask] + Tmedian = T2[i_mask].median() + Tmedian[key + "_weighted_mean"] = weighted_mean(x_rel, y) + Tmedian[key + "_mode"] = get_mode(y) + Tmedian["N_photos"] = Nphoton + Tmedian[key + "_std"] = y.std() - Tmedian = T2[i_mask].median() - Tmedian[key+ '_weighted_mean'] = weighted_mean(x_rel, y) - Tmedian[key+ '_mode'] = get_mode(y) - Tmedian['N_photos'] = Nphoton - Tmedian[key+ '_std'] = y.std() - #Tmedian[key+ '_median'][ np.isnan(Tmedian[key+ 'std']) ]= np.nan # replace median calculation with nans - print ( str( istencil[1]) + ' s' + str(time.time() - tstart)) + print(str(istencil[1]) + " s" + str(time.time() - tstart)) return istencil[1], Tmedian # apply func to all stancils map_func = map if map_func is None else map_func - D_filt = dict(map_func(calc_stencil_stats, stencil_iter)) - - DF_filt = pd.DataFrame.from_dict(D_filt, orient='index') - DF_filt = DF_filt.rename(columns={key: key+'_median', key_x_coord: 'median_'+key_x_coord}) - DF_filt[ key+ '_median'][ np.isnan(DF_filt[key+ '_std']) ] = np.nan # replace median calculation with nans + D_filt = dict(map_func(calc_stencil_stats, stencil_iter)) + + DF_filt = pd.DataFrame.from_dict(D_filt, orient="index") + DF_filt = DF_filt.rename( + columns={key: key + "_median", key_x_coord: "median_" + key_x_coord} + ) + DF_filt[key + "_median"][ + np.isnan(DF_filt[key + "_std"]) + ] = np.nan # replace median calculation with nans DF_filt[key_x_coord] = DF_filt.index - DF_filt = DF_filt.reset_index() + DF_filt = DF_filt.reset_index() return DF_filt -# %% old version -# define common dist_grid: -#dx= 5 # 2 * resolution in meters, datapoint +-dx are used to take the mean -#dist_grid = np.arange( np.nanmin(dist_list[:, 0], 0) , np.nanmax(dist_list[:, 1], 0), dx ) # derive bin means def bin_means(T2, dist_grid): - dF_mean = pd.DataFrame(index =T2.columns) - ilim = int(len(dist_grid)) - N_i = list() + dF_mean = pd.DataFrame(index=T2.columns) + ilim = int(len(dist_grid)) + N_i = list() - for i in np.arange(1,ilim-1, 1): - if i % 5000 ==0: + for i in np.arange(1, ilim - 1, 1): + if i % 5000 == 0: print(i) - i_mask=(T2['dist'] >= dist_grid[i-1]) & (T2['dist'] < dist_grid[i+1]) - #if ( (T2['dist'] >= dist_grid[i-1]) & (T2['dist'] < dist_grid[i+1]) ).sum() > 0: + i_mask = (T2["dist"] >= dist_grid[i - 1]) & (T2["dist"] < dist_grid[i + 1]) dF_mean[i] = T2[i_mask].mean() - #dF_median[i] = T2[i_mask].median() N_i.append(i_mask.sum()) - dF_mean = dF_mean.T - dF_mean['N_photos'] = N_i - dF_mean['dist'] = dist_grid[np.arange(1,ilim-1, 1)] + dF_mean = dF_mean.T + dF_mean["N_photos"] = N_i + dF_mean["dist"] = dist_grid[np.arange(1, ilim - 1, 1)] return dF_mean diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/iotools.py b/src/icesat2_tracks/ICEsat2_SI_tools/iotools.py index 45444490..3fb63f35 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/iotools.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/iotools.py @@ -37,9 +37,6 @@ def init_from_input(arguments): else: track_name = arguments[1] batch_key = arguments[2] - # $(hemisphere) $(coords) $(config) - - # print("read vars from file: " + str(arguments[1])) if len(arguments) >= 4: if arguments[3] == "True": @@ -49,16 +46,9 @@ def init_from_input(arguments): else: test_flag = arguments[3] - # print("test_flag found, test_flag= " + str(test_flag)) else: test_flag = False - # TODO: print statements to be handled with logger - # # print(track_name) - - # print("----- batch =" + batch_key) - # print("----- test_flag: " + str(test_flag)) - return track_name, batch_key, test_flag @@ -193,7 +183,7 @@ def set_ATL03_trackname(self): def set_ATL10_trackname(self): block1 = (self.YY, self.MM, self.DD) block1b = (self.HH, self.MN, self.SS) - block2 = (self.TRK, self.CYC, "01") # granule is alwasy '01' for ATL10 + block2 = (self.TRK, self.CYC, "01") if self.RL is "": raise ValueError("RL not set") if self.VRS is "": @@ -499,7 +489,7 @@ def getATL03_beam(fileT, numpy=False, beam="gt1l", maxElev=1e6): "signal_confidence": signal_confidence, "mask_seaice": mask_seaice, "delta_time": delta_time, - "along_track_distance": along_track_distance, #'delta_time_granule':delta_time_granule, + "along_track_distance": along_track_distance, "across_track_distance": across_track_distance, "ph_id_count": ph_id_count, } @@ -520,11 +510,6 @@ def getATL03_beam(fileT, numpy=False, beam="gt1l", maxElev=1e6): print("df shape ", dF.shape) dF = dF[mask_total] - # dF_seg = dF_seg[mask_total] - # print('df[mask] shape ',dF.shape) - - # Reset row indexing - # dF=dF#.reset_index(drop=True) return dF, dF_seg diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/lanczos.py b/src/icesat2_tracks/ICEsat2_SI_tools/lanczos.py index 3396b57f..f8ade32a 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/lanczos.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/lanczos.py @@ -1,6 +1,7 @@ import numpy as np -def lanczos_1d(width, dx, a=2): + +def lanczos_1d(width, dx, a=2): """ This is a 1D lanczos Filter for time series analysis. it generates the Filter to be convolved with the timeseries @@ -18,25 +19,26 @@ def lanczos_1d(width, dx, a=2): # a= 1 # Lanczos parameter. the length of the filter is a*width # dx= .1 # deltax of the to be filtered timeseries - r=width/2.0 - xl=a*r - x= np.arange(-xl, xl, dx) - xprime=x/r + r = width / 2.0 + xl = a * r + x = np.arange(-xl, xl, dx) + xprime = x / r # define the filter - L = np.sinc(xprime) * np.sinc(xprime/a) - L = np.where((xprime > -a) & (xprime < a),L, 0) + L = np.sinc(xprime) * np.sinc(xprime / a) + L = np.where((xprime > -a) & (xprime < a), L, 0) - return x, L/L.sum() + return x, L / L.sum() -def lanczos_2d(width , dx, a=2): - x , L =lanczos_1d(width, dx, a=a) - L2d =np.outer(L ,L.T) +def lanczos_2d(width, dx, a=2): + x, L = lanczos_1d(width, dx, a=a) + L2d = np.outer(L, L.T) return x, L2d -def lanczos_filter_1d(x, data, width, a=2 , mode='same', method='direct'): + +def lanczos_filter_1d(x, data, width, a=2, mode="same", method="direct"): """ colvolves the lanzcos filter with data. inputs @@ -52,15 +54,16 @@ def lanczos_filter_1d(x, data, width, a=2 , mode='same', method='direct'): data_lp low-passed data, same size as before. """ import scipy.signal as signal - dx = np.diff(x).mean() - x , L = lanczos_1d(width, dx, a=a) + dx = np.diff(x).mean() + x, L = lanczos_1d(width, dx, a=a) - data_lp= signal.convolve(data, L, mode=mode, method=method)#* + data_lp = signal.convolve(data, L, mode=mode, method=method) # * return data_lp -def lanczos_filter_1d_wrapping(x, data, width, a=2 , mode='wrap'): + +def lanczos_filter_1d_wrapping(x, data, width, a=2, mode="wrap"): """ colvolves the lanzcos filter with data. same as lanczos_filter_1d but can wrap around @@ -78,16 +81,16 @@ def lanczos_filter_1d_wrapping(x, data, width, a=2 , mode='wrap'): data_lp low-passed data, same size as before. """ import scipy.ndimage - dx = np.diff(x).mean() - x , L = lanczos_1d(width, dx, a=a) - data_lp= scipy.ndimage.convolve(data, L, mode=mode)#* + dx = np.diff(x).mean() + x, L = lanczos_1d(width, dx, a=a) - return data_lp + data_lp = scipy.ndimage.convolve(data, L, mode=mode) # * + return data_lp -def lanczos_filter_2d(x, data, width, a=2 , mode='same'): +def lanczos_filter_2d(x, data, width, a=2, mode="same"): """ colvolves the lanzcos filter with data in 3 dimensions. inputs @@ -104,27 +107,27 @@ def lanczos_filter_2d(x, data, width, a=2 , mode='same'): """ import scipy.ndimage.filters as signal - #import scipy.ndimage.filters as signal - dx = abs(np.diff(x).mean()) - x , L2d = lanczos_2d(width, dx, a=a) + dx = abs(np.diff(x).mean()) + x, L2d = lanczos_2d(width, dx, a=a) - - data_lp= signal.convolve(data, L2d, mode=mode)#* + data_lp = signal.convolve(data, L2d, mode=mode) # * return data_lp -def lanczos_filter_2d_apply(data, x, width, a=2 , mode='same'): - return lanczos_filter_2d(x, data, width, a=a , mode=mode) -def lanczos_3d(width , dx, a=2): - x , L =lanczos_1d(width, dx, a=a) - L2d =np.outer(L ,L.T) +def lanczos_filter_2d_apply(data, x, width, a=2, mode="same"): + return lanczos_filter_2d(x, data, width, a=a, mode=mode) + + +def lanczos_3d(width, dx, a=2): + x, L = lanczos_1d(width, dx, a=a) + L2d = np.outer(L, L.T) L3d = np.multiply.outer(L2d, L.T) return x, L3d -def lanczos_filter_3d(x, data, width, a=2 , mode='same'): +def lanczos_filter_3d(x, data, width, a=2, mode="same"): """ colvolves the lanzcos filter with data in 3 dimensions. inputs @@ -141,11 +144,9 @@ def lanczos_filter_3d(x, data, width, a=2 , mode='same'): """ import scipy.ndimage.filters as signal - #import scipy.ndimage.filters as signal - dx = abs(np.diff(x).mean()) - x , L3d = lanczos_3d(width, dx, a=a) - + dx = abs(np.diff(x).mean()) + x, L3d = lanczos_3d(width, dx, a=a) - data_lp= signal.convolve(data, L3d, mode=mode)#* + data_lp = signal.convolve(data, L3d, mode=mode) # * return data_lp diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/read_ground_tracks.py b/src/icesat2_tracks/ICEsat2_SI_tools/read_ground_tracks.py index e449fef6..1777d1f6 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/read_ground_tracks.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/read_ground_tracks.py @@ -8,130 +8,146 @@ import osgeo.ogr import matplotlib.pyplot as plt + # PURPOSE: read ICESat-2 ground tracks for TRACK and BEAM def read_ICESat2_groundtrack(input_file): # decompress and parse KMZ file input_file = pathlib.Path(input_file).expanduser().absolute() - kmls = zipfile.ZipFile(str(input_file), 'r') + kmls = zipfile.ZipFile(str(input_file), "r") parser = lxml.etree.XMLParser(recover=True, remove_blank_text=True) # list of geodataframes for all RGTs RGTS = [] # for each kml in the zipfile (per RGT) for kml in kmls.filelist: - tree = lxml.etree.parse(kmls.open(kml, 'r'), parser) + tree = lxml.etree.parse(kmls.open(kml, "r"), parser) root = tree.getroot() # create list of rows rows = [] # find documents within kmz file - for document in root.iterfind('.//kml:Document//Folder//Placemark', root.nsmap): + for document in root.iterfind(".//kml:Document//Folder//Placemark", root.nsmap): # extract laser name, satellite track and coordinates of line strings - description = document.find('description', root.nsmap).text + description = document.find("description", root.nsmap).text columns = {} - columns['RGT'], = re.findall(r'RGT\s(\d+)',description) + (columns["RGT"],) = re.findall(r"RGT\s(\d+)", description) # get date and time - date = re.findall(r'\d{2}\-\w{3}\-\d{4}\s\d{2}\:\d{2}\:\d{2}', description).pop() - columns['date'] = geopandas.pd.to_datetime(date, format='%d-%b-%Y %H:%M:%S') - columns['DOY'], = re.findall(r'DOY\s(\d+)', description) - columns['cycle'], = re.findall(r'Cycle\s(\d+)', description) - coords = document.findall('Point/coordinates',root.nsmap) + date = re.findall( + r"\d{2}\-\w{3}\-\d{4}\s\d{2}\:\d{2}\:\d{2}", description + ).pop() + columns["date"] = geopandas.pd.to_datetime(date, format="%d-%b-%Y %H:%M:%S") + (columns["DOY"],) = re.findall(r"DOY\s(\d+)", description) + (columns["cycle"],) = re.findall(r"Cycle\s(\d+)", description) + coords = document.findall("Point/coordinates", root.nsmap) # for each set of coordinates - for i,c in enumerate(coords): - points = np.array([x.split(',')[:2] for x in c.text.split()],dtype='f8') - columns['geometry'], = geopandas.points_from_xy(points[:,0],points[:,1]) + for i, c in enumerate(coords): + points = np.array( + [x.split(",")[:2] for x in c.text.split()], dtype="f8" + ) + (columns["geometry"],) = geopandas.points_from_xy( + points[:, 0], points[:, 1] + ) rows.append(columns) # create geopandas geodataframe for points gdf = geopandas.GeoDataFrame(rows) RGTS.append(gdf) # return the concatenated geodataframe - return geopandas.pd.concat(RGTS).set_index('date') + return geopandas.pd.concat(RGTS).set_index("date") # PURPOSE: read ICESat-2 ground tracks def ICESat2_mission_groundtrack(input_file): # decompress and parse KMZ file input_file = pathlib.Path(input_file).expanduser().absolute() - kmzs = zipfile.ZipFile(str(input_file), 'r') + kmzs = zipfile.ZipFile(str(input_file), "r") parser = lxml.etree.XMLParser(recover=True, remove_blank_text=True) # for each kml in the zipfile (per GT) GTs = [] for kmz in kmzs.filelist: - kmls = zipfile.ZipFile(kmzs.open(kmz, 'r')) + kmls = zipfile.ZipFile(kmzs.open(kmz, "r")) for kml in kmls.filelist: - tree = lxml.etree.parse(kmls.open(kml, 'r'), parser) + tree = lxml.etree.parse(kmls.open(kml, "r"), parser) root = tree.getroot() # find documents within kmz file - for document in root.iterfind('.//kml:Document', root.nsmap): + for document in root.iterfind(".//kml:Document", root.nsmap): # extract laser name, satellite track and coordinates of line strings - name = document.find('name',root.nsmap).text - placemarks = document.findall('Placemark/name',root.nsmap) - coords = document.findall('Placemark/LineString/coordinates',root.nsmap) + name = document.find("name", root.nsmap).text + placemarks = document.findall("Placemark/name", root.nsmap) + coords = document.findall( + "Placemark/LineString/coordinates", root.nsmap + ) # create list of rows rows = [] wkt = [] # for each set of coordinates - for i,c in enumerate(coords): + for i, c in enumerate(coords): columns = {} - columns['Laser'], = re.findall(r'laser(\d+)', name) - columns['GT'], = re.findall(r'GT\d[LR]?', kmz.filename) - columns['RGT'] = int(placemarks[i].text) - coords = document.findall('Point/coordinates',root.nsmap) + (columns["Laser"],) = re.findall(r"laser(\d+)", name) + (columns["GT"],) = re.findall(r"GT\d[LR]?", kmz.filename) + columns["RGT"] = int(placemarks[i].text) + coords = document.findall("Point/coordinates", root.nsmap) # create LineString object linestring = osgeo.ogr.Geometry(osgeo.ogr.wkbLineString) - line = np.array([x.split(',')[:2] for x in c.text.split()],dtype='f8') - for ln,lt in zip(line[:,0],line[:,1]): + line = np.array( + [x.split(",")[:2] for x in c.text.split()], dtype="f8" + ) + for ln, lt in zip(line[:, 0], line[:, 1]): linestring.AddPoint(ln, lt) # convert to wkt and then add to geometry wkt.append(linestring.ExportToWkt()) rows.append(columns) # create geopandas geodataframe for points - gdf = geopandas.GeoDataFrame(rows, - geometry=geopandas.GeoSeries.from_wkt(wkt) + gdf = geopandas.GeoDataFrame( + rows, geometry=geopandas.GeoSeries.from_wkt(wkt) ) GTs.append(gdf) # return the concatenated geodataframe return geopandas.pd.concat(GTs) + # PURPOSE: read ICESat-2 mission ground tracks as points def ICESat2_mission_points(input_file): # decompress and parse KMZ file input_file = pathlib.Path(input_file).expanduser().absolute() - kmzs = zipfile.ZipFile(str(input_file), 'r') + kmzs = zipfile.ZipFile(str(input_file), "r") parser = lxml.etree.XMLParser(recover=True, remove_blank_text=True) # for each kml in the zipfile (per GT) GTs = [] for kmz in kmzs.filelist: - kmls = zipfile.ZipFile(kmzs.open(kmz, 'r')) + kmls = zipfile.ZipFile(kmzs.open(kmz, "r")) for kml in kmls.filelist: - tree = lxml.etree.parse(kmls.open(kml, 'r'), parser) + tree = lxml.etree.parse(kmls.open(kml, "r"), parser) root = tree.getroot() # find documents within kmz file - for document in root.iterfind('.//kml:Document', root.nsmap): + for document in root.iterfind(".//kml:Document", root.nsmap): # extract laser name, satellite track and coordinates of line strings - name = document.find('name', root.nsmap).text - placemarks = document.findall('Placemark/name', root.nsmap) - coords = document.findall('Placemark/LineString/coordinates', root.nsmap) + name = document.find("name", root.nsmap).text + placemarks = document.findall("Placemark/name", root.nsmap) + coords = document.findall( + "Placemark/LineString/coordinates", root.nsmap + ) # create list of rows rows = [] - x = []; y = [] + x = [] + y = [] # for each set of coordinates - for i,c in enumerate(coords): + for i, c in enumerate(coords): # create a line string of coordinates - line = np.array([x.split(',')[:2] for x in c.text.split()], dtype='f8') - for ln,lt in zip(line[:,0], line[:,1]): + line = np.array( + [x.split(",")[:2] for x in c.text.split()], dtype="f8" + ) + for ln, lt in zip(line[:, 0], line[:, 1]): columns = {} - columns['Laser'], = re.findall(r'laser(\d+)', name) - columns['GT'], = re.findall(r'GT\d[LR]?', kmz.filename) - columns['RGT'] = int(placemarks[i].text) + (columns["Laser"],) = re.findall(r"laser(\d+)", name) + (columns["GT"],) = re.findall(r"GT\d[LR]?", kmz.filename) + columns["RGT"] = int(placemarks[i].text) rows.append(columns) x.append(ln) y.append(lt) # create geopandas geodataframe for points - gdf = geopandas.GeoDataFrame(rows, - geometry=geopandas.points_from_xy(x,y) + gdf = geopandas.GeoDataFrame( + rows, geometry=geopandas.points_from_xy(x, y) ) GTs.append(gdf) # return the concatenated and georefernced geodataframe G = geopandas.pd.concat(GTs) - G.geometry.crs = {'init': 'epsg:4326'} + G.geometry.crs = {"init": "epsg:4326"} return G - diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/sliderule_converter_tools.py b/src/icesat2_tracks/ICEsat2_SI_tools/sliderule_converter_tools.py index a370c3cc..f084db97 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/sliderule_converter_tools.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/sliderule_converter_tools.py @@ -406,7 +406,6 @@ def define_x_coordinate_in_polygon(table_data, polygon, round=True): if ascending_test(table_data): table_data["x"] = table_data["x_atc"] - min_eq_dist else: - # print('descending') table_data["x"] = ((np.pi * 6371 * 1e3) - min_eq_dist) - table_data["x_atc"] return table_data diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py b/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py index d46d9ea2..bcc8afde 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py @@ -2,7 +2,7 @@ # basic functions -def create_chunk_boundaries(L, dsize, ov= None, iter_flag=True): +def create_chunk_boundaries(L, dsize, ov=None, iter_flag=True): """ returns all need chunk boudaries and center position given L, and ov inputs: @@ -13,17 +13,18 @@ def create_chunk_boundaries(L, dsize, ov= None, iter_flag=True): if iter_flag True returns iter else it returns an ndarray """ - ov=int(np.round(L/2)) if ov is None else ov + ov = int(np.round(L / 2)) if ov is None else ov - - xleft = np.arange(0,dsize-int(L-ov),int(L-ov)) - xright = np.arange(int(L-ov)*2,dsize+1,int(L-ov)) - xcenter_pos = np.arange(int(L-ov),dsize-int(L-ov)+1,int(L-ov)) - max_size = min([xleft.size , xcenter_pos.size, xright.size]) + xleft = np.arange(0, dsize - int(L - ov), int(L - ov)) + xright = np.arange(int(L - ov) * 2, dsize + 1, int(L - ov)) + xcenter_pos = np.arange(int(L - ov), dsize - int(L - ov) + 1, int(L - ov)) + max_size = min([xleft.size, xcenter_pos.size, xright.size]) # if xright[max_size-1] < dsize: # print('left out last ' + str(dsize- xright[max_size-1]) + ' data points' ) - #print([xleft[0:max_size], xcenter_pos[0:max_size], xright[0:max_size]]) - position_stancil = np.vstack([xleft[0:max_size], xcenter_pos[0:max_size], xright[0:max_size]]) + # print([xleft[0:max_size], xcenter_pos[0:max_size], xright[0:max_size]]) + position_stancil = np.vstack( + [xleft[0:max_size], xcenter_pos[0:max_size], xright[0:max_size]] + ) if iter_flag is True: return iter(position_stancil.T.tolist()) @@ -31,7 +32,7 @@ def create_chunk_boundaries(L, dsize, ov= None, iter_flag=True): return position_stancil -def create_chunk_boundaries_unit_lengths(L_unit, data_limits, ov= None, iter_flag=True): +def create_chunk_boundaries_unit_lengths(L_unit, data_limits, ov=None, iter_flag=True): """ returns all need chunk boudaries and center position given L, and ov inputs: @@ -42,74 +43,78 @@ def create_chunk_boundaries_unit_lengths(L_unit, data_limits, ov= None, iter_fl if iter_flag True returns iter else it returns an ndarray """ - L= L_unit - ov=np.round(L/2) if ov is None else ov - #print(ov) - dl = (L-ov) - xleft = np.arange(data_limits[0] , data_limits[1]-dl, dl ) - xcenter_pos = np.arange(data_limits[0]+ L/2 , data_limits[1]-dl+1, dl ) - xright = np.arange(data_limits[0] + L , data_limits[1]+1, dl ) - - - max_size = min([xleft.size , xcenter_pos.size, xright.size]) + L = L_unit + ov = np.round(L / 2) if ov is None else ov + # print(ov) + dl = L - ov + xleft = np.arange(data_limits[0], data_limits[1] - dl, dl) + xcenter_pos = np.arange(data_limits[0] + L / 2, data_limits[1] - dl + 1, dl) + xright = np.arange(data_limits[0] + L, data_limits[1] + 1, dl) + + max_size = min([xleft.size, xcenter_pos.size, xright.size]) # if xright[max_size-1] < data_limits[1]: # print('left out last ' + str(data_limits[1]- xright[max_size-1]) + ' data points' ) - #print([xleft[0:max_size], xcenter_pos[0:max_size], xright[0:max_size]]) - position_stancil = np.vstack([xleft[0:max_size], xcenter_pos[0:max_size], xright[0:max_size]]) + # print([xleft[0:max_size], xcenter_pos[0:max_size], xright[0:max_size]]) + position_stancil = np.vstack( + [xleft[0:max_size], xcenter_pos[0:max_size], xright[0:max_size]] + ) if iter_flag is True: return iter(position_stancil.T.tolist()) else: return position_stancil + def Z_to_power(Z, df, N): - """ compute the 1d spectrum of a field phi + """compute the 1d spectrum of a field phi inputs: Z complex fourier coefficients df frequency / or wavenumber step N length of data vector in real space (= L) """ - spec = 2.*(Z*Z.conj()).real / df /N**2 - neven = True if (N%2) else False + spec = 2.0 * (Z * Z.conj()).real / df / N**2 + neven = True if (N % 2) else False # the zeroth frequency should be counted only once - spec[0] = spec[0]/2. + spec[0] = spec[0] / 2.0 if neven: - spec[-1] = spec[-1]/2. + spec[-1] = spec[-1] / 2.0 return spec + # 2nd cal spectra def calc_spectrum_fft(phi, df, N): - """ compute the 1d spectrum of a field phi + """compute the 1d spectrum of a field phi inputs: df frequency / or wavenumber step N length of data vector in real space (= L) neven bool, if True """ - neven = True if (N%2) else False + neven = True if (N % 2) else False phih = np.fft.rfft(phi) # the factor of 2 comes from the symmetry of the Fourier coeffs - spec = 2.*(phih*phih.conj()).real / df /N**2 + spec = 2.0 * (phih * phih.conj()).real / df / N**2 # the zeroth frequency should be counted only once - spec[0] = spec[0]/2. + spec[0] = spec[0] / 2.0 if neven: - spec[-1] = spec[-1]/2. + spec[-1] = spec[-1] / 2.0 return spec -def LS_power_to_PSD( ls_power, L , dff): +def LS_power_to_PSD(ls_power, L, dff): """ returns Power spectral density (unit^2/dfreq) ls_power output of astropy.timeseries.LombScargle.power with normalization='psd' """ - return 2 * ls_power / L /dff + return 2 * ls_power / L / dff -def calc_spectrum_LS( x, y, k, err=None, LS= None, dk =None): + +def calc_spectrum_LS(x, y, k, err=None, LS=None, dk=None): """ returns: Power spectral density of y given postitions x, for wanumbers k @@ -119,16 +124,18 @@ def calc_spectrum_LS( x, y, k, err=None, LS= None, dk =None): """ if LS is None: from astropy.timeseries import LombScargle - LS = LombScargle(x , y, dy = err, fit_mean=False, center_data=True) + + LS = LombScargle(x, y, dy=err, fit_mean=False, center_data=True) else: LS.t = x LS.y = y - LS.dy= err + LS.dy = err + + ls_power = LS.power(k, normalization="psd", assume_regular_frequency="False") - ls_power = LS.power(k, normalization='psd', assume_regular_frequency='False') + dk = np.diff(k).mean() if dk is None else dk + return 2 * ls_power / y.size / dk, LS - dk = np.diff(k).mean() if dk is None else dk - return 2 * ls_power / y.size / dk , LS def reconstruct_data_from_LS(LS, x_real_axis, freq): """ @@ -141,16 +148,18 @@ def reconstruct_data_from_LS(LS, x_real_axis, freq): y_reconstruct = LS.offset() * np.ones(len(x_real_axis)) freq_seq = freq[1:] if freq[0] == 0 else freq - freq_mask= freq <= 1/100 + freq_mask = freq <= 1 / 100 - #print(freq_seq) + # print(freq_seq) for fi in freq_seq: try: theta = LS.model_parameters(fi) except: - theta = [0,0] - y_reconstruct += theta[0] * np.sin(x_real_axis * 2 * np.pi *fi ) + theta[1]* np.cos(x_real_axis * 2 * np.pi *fi) - #y_reconstruct += LS.model(x_real_axis, fi) + theta = [0, 0] + y_reconstruct += theta[0] * np.sin(x_real_axis * 2 * np.pi * fi) + theta[ + 1 + ] * np.cos(x_real_axis * 2 * np.pi * fi) + # y_reconstruct += LS.model(x_real_axis, fi) # for fi in freq_seq[:]: # theta = LS.model_parameters(fi) @@ -159,20 +168,22 @@ def reconstruct_data_from_LS(LS, x_real_axis, freq): return y_reconstruct + def calc_freq_fft(x_grid, N): - """ calculate array of spectral variable (frequency or - wavenumber) in cycles per unit of L """ + """calculate array of spectral variable (frequency or + wavenumber) in cycles per unit of L""" - neven = True if (N%2) else False - #dx=np.diff(x_grid).mean() - dx = np.round(np.median(np.diff(x_grid) ), 1) - df = 1./((N-1)*dx) + neven = True if (N % 2) else False + # dx=np.diff(x_grid).mean() + dx = np.round(np.median(np.diff(x_grid)), 1) + df = 1.0 / ((N - 1) * dx) if neven: - f = df*np.arange(N/2+1) + f = df * np.arange(N / 2 + 1) else: - f = df*np.arange( (N-1)/2. + 1 ) - return f,df + f = df * np.arange((N - 1) / 2.0 + 1) + return f, df + def calc_freq_fft_given_dx(dx, N): """ @@ -182,17 +193,25 @@ def calc_freq_fft_given_dx(dx, N): N number of datapoints used in window """ - neven = True if (N%2) else False - df = 1./((N-1)*dx) + neven = True if (N % 2) else False + df = 1.0 / ((N - 1) * dx) if neven: - f = df*np.arange(N/2+1) + f = df * np.arange(N / 2 + 1) else: - f = df*np.arange( (N-1)/2. + 1 ) - return f,df - - -def calc_freq_LS(x, N, method='fftX2', dx=None, minimum_frequency=None, maximum_frequency=None, samples_per_peak=0.01): + f = df * np.arange((N - 1) / 2.0 + 1) + return f, df + + +def calc_freq_LS( + x, + N, + method="fftX2", + dx=None, + minimum_frequency=None, + maximum_frequency=None, + samples_per_peak=0.01, +): """ calculate array of spectral variable (frequency or wavenumber) in cycles per unit of N (window length in number of data points) @@ -204,42 +223,48 @@ def calc_freq_LS(x, N, method='fftX2', dx=None, minimum_frequency=None, maximum_ minimum_frequency, maximum_frequency only used for LS_auto """ - if method is 'fftX2': - neven = True if (N%2) else False + if method is "fftX2": + neven = True if (N % 2) else False dx = np.diff(x).mean() if dx is None else dx - df = 1./((N-1)*dx) /2 + df = 1.0 / ((N - 1) * dx) / 2 if neven: - f = df*np.arange(df, N+1) + f = df * np.arange(df, N + 1) else: - f = df* np.arange(df, (N-1) + 1 ) + f = df * np.arange(df, (N - 1) + 1) - elif method is 'fft': - neven = True if (N%2) else False + elif method is "fft": + neven = True if (N % 2) else False dx = np.diff(x).mean() if dx is None else dx - df = 1./((N-1)*dx) + df = 1.0 / ((N - 1) * dx) if neven: - f = df*np.arange(N/2+1) + f = df * np.arange(N / 2 + 1) else: - f = df*np.arange( (N-1)/2. + 1 ) + f = df * np.arange((N - 1) / 2.0 + 1) - elif method is 'LS_auto': + elif method is "LS_auto": from astropy.timeseries import LombScargle - f = LombScargle(x , np.random.randn(len(x)), fit_mean=True).autofrequency(minimum_frequency=minimum_frequency, maximum_frequency=maximum_frequency, samples_per_peak=samples_per_peak)##0.1) + + f = LombScargle(x, np.random.randn(len(x)), fit_mean=True).autofrequency( + minimum_frequency=minimum_frequency, + maximum_frequency=maximum_frequency, + samples_per_peak=samples_per_peak, + ) ##0.1) df = np.diff(f).mean() df = np.round(df, 5) - elif method is 'fixed_ratio': + elif method is "fixed_ratio": - neven = True if (N%2) else False + neven = True if (N % 2) else False dx = np.diff(x).mean() if dx is None else dx df = dx / 50 if neven: - f = df*np.arange(df, N +1) + f = df * np.arange(df, N + 1) else: - f = df* np.arange(df, N ) + f = df * np.arange(df, N) + + return f, df - return f ,df def create_window(L, window=None): """ @@ -247,65 +272,66 @@ def create_window(L, window=None): if window is not None it show have a length of N """ if window is None: - win=np.hanning(L) + win = np.hanning(L) else: - win=window + win = window - factor=np.sqrt(L/(win**2).sum()) - win*=factor + factor = np.sqrt(L / (win**2).sum()) + win *= factor return win + def create_weighted_window(data, window=None): """ define window function and weight it to conserve variance if window is not None it show have a length of N """ import scipy.signal.windows as WINDOWS + L = data.size if window is None: - #win=np.hanning(L) - win = WINDOWS.tukey(L, alpha=0.1, sym=True) + # win=np.hanning(L) + win = WINDOWS.tukey(L, alpha=0.1, sym=True) else: - win=window + win = window - factor = np.sqrt( np.var(data) / np.var(( data* win) ) ) - #factor=np.sqrt( L/(win**2).sum()) - win *= factor + factor = np.sqrt(np.var(data) / np.var((data * win))) + # factor=np.sqrt( L/(win**2).sum()) + win *= factor return win -def spec_error(E,sn,ci=.95): - - """ Computes confidence interval for one-dimensional spectral - estimate E (the power spectra). - Parameters - =========== - - sn is the number of spectral realizations; - it can be either an scalar or an array of size(E) - - ci = .95 for 95 % confidence interval +def spec_error(E, sn, ci=0.95): + """Computes confidence interval for one-dimensional spectral + estimate E (the power spectra). - Output - ========== - lower (El) and upper (Eu) bounds on E """ + Parameters + =========== + - sn is the number of spectral realizations; + it can be either an scalar or an array of size(E) + - ci = .95 for 95 % confidence interval + Output + ========== + lower (El) and upper (Eu) bounds on E""" - - def yNlu(sn,yN,ci): - """ compute yN[l] yN[u], that is, the lower and - upper limit of yN """ + def yNlu(sn, yN, ci): + """compute yN[l] yN[u], that is, the lower and + upper limit of yN""" from scipy.special import gammainc + # cdf of chi^2 dist. with 2*sn DOF - cdf = gammainc(sn,sn*yN) + cdf = gammainc(sn, sn * yN) # indices that delimit the wedge of the conf. interval fl = np.abs(cdf - ci).argmin() - fu = np.abs(cdf - 1. + ci).argmin() + fu = np.abs(cdf - 1.0 + ci).argmin() - return yN[fl],yN[fu] + return yN[fl], yN[fu] - dbin = .005 - yN = np.arange(0,2.+dbin,dbin) + dbin = 0.005 + yN = np.arange(0, 2.0 + dbin, dbin) El, Eu = np.empty_like(E), np.empty_like(E) @@ -319,19 +345,19 @@ def yNlu(sn,yN,ci): assert n == E.size, " *** sn has different size than E " for i in range(n): - yNl,yNu = yNlu(sn[i],yN=yN,ci=ci) - El[i] = E[i]/yNl - Eu[i] = E[i]/yNu + yNl, yNu = yNlu(sn[i], yN=yN, ci=ci) + El[i] = E[i] / yNl + Eu[i] = E[i] / yNu else: - yNl,yNu = yNlu(sn,yN=yN,ci=ci) - El = E/yNl - Eu = E/yNu + yNl, yNu = yNlu(sn, yN=yN, ci=ci) + El = E / yNl + Eu = E / yNu return El, Eu -def linear_gap_fill(F, key_lead, key_int): +def linear_gap_fill(F, key_lead, key_int): """ F pd.DataFrame key_lead key in F that determined the independent coordindate @@ -339,13 +365,13 @@ def linear_gap_fill(F, key_lead, key_int): """ y_g = np.array(F[key_int]) - nans, x2= np.isnan(y_g), lambda z: z.nonzero()[0] - y_g[nans]= np.interp(x2(nans), x2(~nans), y_g[~nans]) + nans, x2 = np.isnan(y_g), lambda z: z.nonzero()[0] + y_g[nans] = np.interp(x2(nans), x2(~nans), y_g[~nans]) return y_g -def sub_sample_coords(X, lons, lats, stancils, map_func =None): +def sub_sample_coords(X, lons, lats, stancils, map_func=None): """ X non-nan array of coodinate along beam lons, lats arrays of postion data that should be mapped. must have same size as X @@ -357,20 +383,21 @@ def sub_sample_coords(X, lons, lats, stancils, map_func =None): """ import copy + def get_lon_lat_coords(stancil): - x_mask= (stancil[0] <= X) & (X <= stancil[-1]) - #print(stancil[1]) - if sum(x_mask) ==0: # if there are not enough photos set results to nan + x_mask = (stancil[0] <= X) & (X <= stancil[-1]) + # print(stancil[1]) + if sum(x_mask) == 0: # if there are not enough photos set results to nan return np.array([stancil[1], np.nan, np.nan]) lon_bin, lat_bin = lons[x_mask].mean(), lats[x_mask].mean() - return np.array([stancil[1],lon_bin, lat_bin]) + return np.array([stancil[1], lon_bin, lat_bin]) map_func = map if map_func is None else map_func - #print(map_func) - coord_positions = list(map_func( get_lon_lat_coords, copy.copy(stancils) )) + # print(map_func) + coord_positions = list(map_func(get_lon_lat_coords, copy.copy(stancils))) coord_positions = np.vstack(coord_positions) return coord_positions @@ -394,23 +421,26 @@ def __init__(self, x_grid, data, Lpoints, ov=None, window=None): other arributes are in the .attr dict. """ - self.Lpoints = Lpoints - self.ov = int(Lpoints/2) if ov is None else ov #when not defined in create_chunk_boundaries then L/2 + self.Lpoints = Lpoints + self.ov = ( + int(Lpoints / 2) if ov is None else ov + ) # when not defined in create_chunk_boundaries then L/2 - self.data = data + self.data = data # create subsample k - self.k, self.dk = calc_freq_fft(x_grid, Lpoints) # return 1/ unit of frid points + self.k, self.dk = calc_freq_fft( + x_grid, Lpoints + ) # return 1/ unit of frid points # to get the waveumber units (2 pi/ lambda), multiply by 2 pi - self.k, self.dk = self.k * 2 * np.pi, self.dk * 2 * np.pi + self.k, self.dk = self.k * 2 * np.pi, self.dk * 2 * np.pi # print(self.k[0], self.k[-1]) # print(self.dk) # create window - self.win = create_window(Lpoints) + self.win = create_window(Lpoints) def cal_spectrogram(self, data=None, name=None): - """ defines apply function and calculated all sub-sample sprectra using map """ @@ -418,102 +448,109 @@ def cal_spectrogram(self, data=None, name=None): DATA = self.data if data is None else data Lpoints, dk = self.Lpoints, self.dk - win =self.win + win = self.win def calc_spectrum_apply(stancil): "returns spectrum per stencil, detrends and windows the data" from scipy.signal import detrend - idata = DATA[stancil[0]:stancil[-1]] + idata = DATA[stancil[0] : stancil[-1]] idata = detrend(idata) * win - return stancil[1], calc_spectrum_fft(idata , dk, Lpoints) + return stancil[1], calc_spectrum_fft(idata, dk, Lpoints) # def test_func(i_stancil): # return i_stancil[1], yy[i_stancil[0]:i_stancil[-1]].shape # %% derive L2 stancil - stancil_iter = create_chunk_boundaries(Lpoints, DATA.size, ov= self.ov) + stancil_iter = create_chunk_boundaries(Lpoints, DATA.size, ov=self.ov) # apply func to all stancils - D_specs = dict(map(calc_spectrum_apply,stancil_iter)) + D_specs = dict(map(calc_spectrum_apply, stancil_iter)) chunk_positions = np.array(list(D_specs.keys())) - self.N_stancils = len(chunk_positions) # number of spectal relazations + self.N_stancils = len(chunk_positions) # number of spectal relazations # repack data, create xarray - self.spec_name = 'power_spec' if name is None else name - G =dict() - for xi,I in D_specs.items(): - G[xi] = xr.DataArray(I, dims=['k'], coords={'k': self.k, 'x': xi } , name=self.spec_name) - - self.G = xr.concat(G.values(), dim='x').T#.to_dataset() + self.spec_name = "power_spec" if name is None else name + G = dict() + for xi, I in D_specs.items(): + G[xi] = xr.DataArray( + I, dims=["k"], coords={"k": self.k, "x": xi}, name=self.spec_name + ) + + self.G = xr.concat(G.values(), dim="x").T # .to_dataset() if self.G.k[0] == 0: self.G = self.G[1:, :] - self.G.attrs['ov'] = self.ov - self.G.attrs['L'] = self.Lpoints + self.G.attrs["ov"] = self.ov + self.G.attrs["L"] = self.Lpoints return self.G # cal variance def calc_var(self): - """ Compute total variance from spectragram """ - return self.dk*self.G.mean('x').sum().data # do not consider zeroth frequency + """Compute total variance from spectragram""" + return self.dk * self.G.mean("x").sum().data # do not consider zeroth frequency - def mean_spectral_error(self, mask=None, confidence = 0.95): + def mean_spectral_error(self, mask=None, confidence=0.95): "retrurns spetral error for the x-mean spectral estimate and stores it as coordindate in the dataarray" # make error estimate if mask is not None: - meanspec= self.G.isel(x=mask).mean('x') + meanspec = self.G.isel(x=mask).mean("x") N = int(sum(mask)) else: - meanspec= self.G.mean('x') + meanspec = self.G.mean("x") N = self.N_stancils - El_of_mean, Eu_of_mean = spec_error(meanspec, N , confidence ) - El_of_mean.name = 'El_mean' - Eu_of_mean.name = 'Eu_mean' + El_of_mean, Eu_of_mean = spec_error(meanspec, N, confidence) + El_of_mean.name = "El_mean" + Eu_of_mean.name = "Eu_mean" - self.G.coords['mean_El'] = (('k'), El_of_mean.data) - self.G.coords['mean_Eu'] = (('k'), Eu_of_mean.data) + self.G.coords["mean_El"] = (("k"), El_of_mean.data) + self.G.coords["mean_Eu"] = (("k"), Eu_of_mean.data) - def parceval(self, add_attrs=True ): + def parceval(self, add_attrs=True): "test Parceval theorem" DATA = self.data L = self.Lpoints - # derive mean variances of stancils stancil_iter = create_chunk_boundaries(L, DATA.size) def get_stancil_var_apply(stancil): from scipy.signal import detrend + "returns the variance of yy for stancil" - idata = DATA[stancil[0]:stancil[-1]] - idata = detrend(idata)# * win + idata = DATA[stancil[0] : stancil[-1]] + idata = detrend(idata) # * win return stancil[1], idata.var() - D_vars = dict(map(get_stancil_var_apply,stancil_iter)) + D_vars = dict(map(get_stancil_var_apply, stancil_iter)) - stancil_vars =list() + stancil_vars = list() for I in D_vars.values(): stancil_vars.append(I) - print('Parcevals Theorem:') - print('variance of unweighted timeseries: ',DATA.var()) - print('mean variance of detrended chunks: ', np.array(stancil_vars).mean()) - #print('variance of weighted timeseries: ',self.phi.var() ) - #self.calc_var(self) - print('variance of the pwelch Spectrum: ', self.calc_var()) + print("Parcevals Theorem:") + print("variance of unweighted timeseries: ", DATA.var()) + print("mean variance of detrended chunks: ", np.array(stancil_vars).mean()) + # print('variance of weighted timeseries: ',self.phi.var() ) + # self.calc_var(self) + print("variance of the pwelch Spectrum: ", self.calc_var()) if add_attrs: - self.G.attrs['variance_unweighted_data'] = DATA.var() - self.G.attrs['mean_variance_detrended_chunks'] = np.array(stancil_vars).mean() - self.G.attrs['mean_variance_pwelch_spectrum'] = self.calc_var() + self.G.attrs["variance_unweighted_data"] = DATA.var() + self.G.attrs["mean_variance_detrended_chunks"] = np.array( + stancil_vars + ).mean() + self.G.attrs["mean_variance_pwelch_spectrum"] = self.calc_var() + class wavenumber_spectrogram_LS_even: - def __init__(self, x, data, L, waven_method = 'fftX2' , dy=None , ov=None, window=None, kjumps=1): + def __init__( + self, x, data, L, waven_method="fftX2", dy=None, ov=None, window=None, kjumps=1 + ): """ returns a wavenumber spectrogram with the resolution L-ov this uses Lombscargle @@ -532,29 +569,30 @@ def __init__(self, x, data, L, waven_method = 'fftX2' , dy=None , ov=None, wind other arributes are in the .attr dict. """ from astropy.timeseries import LombScargle - self.L = L - self.ov = int(L/2) if ov is None else ov #when not defined in create_chunk_boundaries then L/2 - self.x = x - self.data = data - self.dy = dy + self.L = L + self.ov = ( + int(L / 2) if ov is None else ov + ) # when not defined in create_chunk_boundaries then L/2 + self.x = x + self.data = data + self.dy = dy # create subsample k - #print(waven_method) + # print(waven_method) if type(waven_method) is str: - self.k, self.dk = calc_freq_LS(x, L, method = waven_method ) + self.k, self.dk = calc_freq_LS(x, L, method=waven_method) elif type(waven_method) is np.ndarray: - self.k, self.dk = waven_method, np.diff(waven_method).mean() + self.k, self.dk = waven_method, np.diff(waven_method).mean() else: - raise ValueError('waven_method is neither string nor an array') + raise ValueError("waven_method is neither string nor an array") - self.k, self.dk = self.k[::kjumps], self.dk*kjumps + self.k, self.dk = self.k[::kjumps], self.dk * kjumps # create window - self.win = None #create_window(L) - - def cal_spectrogram(self, x = None, data=None, name=None, dx=1): + self.win = None # create_window(L) + def cal_spectrogram(self, x=None, data=None, name=None, dx=1): """ defines apply function and calculated all sub-sample sprectra using map dx nominal resolution of the data resolutionif not set, dx= 1 @@ -562,64 +600,71 @@ def cal_spectrogram(self, x = None, data=None, name=None, dx=1): from astropy.timeseries import LombScargle import xarray as xr - X = self.x if x is None else x # all x positions - DATA = self.data if data is None else data # all data points - L, dk = self.L, self.dk - win = self.win + X = self.x if x is None else x # all x positions + DATA = self.data if data is None else data # all data points + L, dk = self.L, self.dk + win = self.win self.dx = dx # init Lomb scargle object with noise as nummy data () - #dy_fake= np.random.randn(len(dy))*0.001 if self.dy is not None else None - self.LS = LombScargle(X[0:L] , np.random.randn(L)*0.001, fit_mean=True) - + # dy_fake= np.random.randn(len(dy))*0.001 if self.dy is not None else None + self.LS = LombScargle(X[0:L], np.random.randn(L) * 0.001, fit_mean=True) def calc_spectrum_apply(stancil): "returns spectrum per stencil, detrends and windows the data" from scipy.signal import detrend - x = X[stancil[0]:stancil[-1]] - #x_mask= (stancil[0] < X) & (X <= stancil[-1]) - #x = X[x_mask] - idata = DATA[stancil[0]:stancil[-1]] - y = detrend(idata)# * win + x = X[stancil[0] : stancil[-1]] + # x_mask= (stancil[0] < X) & (X <= stancil[-1]) + # x = X[x_mask] + idata = DATA[stancil[0] : stancil[-1]] + y = detrend(idata) # * win - LS_PSD, LS_object = calc_spectrum_LS( x, y, self.k, LS= self.LS, dk =self.dk) + LS_PSD, LS_object = calc_spectrum_LS(x, y, self.k, LS=self.LS, dk=self.dk) return stancil[1], LS_PSD # % derive L2 stancil - stancil_iter = create_chunk_boundaries(L, DATA.size, ov= self.ov) + stancil_iter = create_chunk_boundaries(L, DATA.size, ov=self.ov) # apply func to all stancils - D_specs = dict(map(calc_spectrum_apply,stancil_iter)) + D_specs = dict(map(calc_spectrum_apply, stancil_iter)) chunk_positions = np.array(list(D_specs.keys())) - self.N_stancils = len(chunk_positions) # number of spectal relazations + self.N_stancils = len(chunk_positions) # number of spectal relazations # repack data, create xarray - self.spec_name = 'power_spec' if name is None else name - G =dict() - for xi,I in D_specs.items(): - G[xi] = xr.DataArray(I, dims=['k'], coords={'k': self.k, 'x': xi * self.dx } , name=self.spec_name) - - self.G = xr.concat(G.values(), dim='x').T#.to_dataset() + self.spec_name = "power_spec" if name is None else name + G = dict() + for xi, I in D_specs.items(): + G[xi] = xr.DataArray( + I, + dims=["k"], + coords={"k": self.k, "x": xi * self.dx}, + name=self.spec_name, + ) + + self.G = xr.concat(G.values(), dim="x").T # .to_dataset() if self.G.k[0] == 0: self.G = self.G[1:, :] - self.G.attrs['ov'] = self.ov - self.G.attrs['L'] = self.L + self.G.attrs["ov"] = self.ov + self.G.attrs["L"] = self.L return self.G def calc_var(self): return wavenumber_spectrogram.calc_var(self) - def parceval(self, add_attrs=True ): - return wavenumber_spectrogram.parceval(self, add_attrs= add_attrs ) + def parceval(self, add_attrs=True): + return wavenumber_spectrogram.parceval(self, add_attrs=add_attrs) + + def mean_spectral_error(self, confidence=0.95): + return wavenumber_spectrogram.mean_spectral_error(self, confidence=confidence) - def mean_spectral_error(self, confidence = 0.95 ): - return wavenumber_spectrogram.mean_spectral_error(self, confidence= confidence ) class wavenumber_spectrogram_LS: - def __init__(self, x, data, L, dx, dy = None, waven_method = 'fftX2', ov=None, window=None): + def __init__( + self, x, data, L, dx, dy=None, waven_method="fftX2", ov=None, window=None + ): """ returns a wavenumber spectrogram with the resolution L-ov this uses Lombscargle @@ -639,31 +684,42 @@ def __init__(self, x, data, L, dx, dy = None, waven_method = 'fftX2', ov=None, w other arributes are in the .attr dict. """ from astropy.timeseries import LombScargle - self.L = L - self.ov = int(L/2) if ov is None else ov #when not defined in create_chunk_boundaries then L/2 - self.x = x - self.dx = dx - self.data = data - self.error = dy if dy is not None else None - self.Lpoints= int(self.L/self.dx) + self.L = L + self.ov = ( + int(L / 2) if ov is None else ov + ) # when not defined in create_chunk_boundaries then L/2 + self.x = x + self.dx = dx + self.data = data + self.error = dy if dy is not None else None + self.Lpoints = int(self.L / self.dx) # create subsample k - #print(waven_method) + # print(waven_method) if type(waven_method) is str: - self.k, self.dk = calc_freq_LS(x, self.Lpoints, method = waven_method ) + self.k, self.dk = calc_freq_LS(x, self.Lpoints, method=waven_method) elif type(waven_method) is np.ndarray: - self.k, self.dk = waven_method, np.diff(waven_method).mean() + self.k, self.dk = waven_method, np.diff(waven_method).mean() else: - raise ValueError('waven_method is neither string nor an array') + raise ValueError("waven_method is neither string nor an array") - #self.k, self.dk = self.k[::kjumps], self.dk*kjumps + # self.k, self.dk = self.k[::kjumps], self.dk*kjumps # create window - self.win = None #create_window(L) - - def cal_spectrogram(self, x = None, data=None, error=None, name=None, xlims =None, weight_data= True, max_nfev = None, map_func=None): - + self.win = None # create_window(L) + + def cal_spectrogram( + self, + x=None, + data=None, + error=None, + name=None, + xlims=None, + weight_data=True, + max_nfev=None, + map_func=None, + ): """ defines apply function and calculated all sub-sample sprectra using map @@ -687,77 +743,81 @@ def cal_spectrogram(self, x = None, data=None, error=None, name=None, xlims =Non import copy import pandas as pd - X = self.x if x is None else x # all x positions - DATA = self.data if data is None else data # all data points - ERR = self.error if error is None else error # all error for points - L, dk = self.L, self.dk - #win = self.win - self.xlims = ( np.round(X.min()), X.max() ) if xlims is None else xlims + X = self.x if x is None else x # all x positions + DATA = self.data if data is None else data # all data points + ERR = self.error if error is None else error # all error for points + L, dk = self.L, self.dk + # win = self.win + self.xlims = (np.round(X.min()), X.max()) if xlims is None else xlims # init Lomb scargle object with noise as nummy data () - #dy_fake= np.random.randn(len(dy))*0.001 if self.dy is not None else None - #self.LS = LombScargle(X[0:L] , np.random.randn(L)*0.001, fit_mean=True) + # dy_fake= np.random.randn(len(dy))*0.001 if self.dy is not None else None + # self.LS = LombScargle(X[0:L] , np.random.randn(L)*0.001, fit_mean=True) # define window import scipy.signal.windows as WINDOWS - self.win = WINDOWS.tukey(self.Lpoints, alpha=0.1, sym=True) + self.win = WINDOWS.tukey(self.Lpoints, alpha=0.1, sym=True) def calc_spectrum_and_field_apply(stancil): - """ windows the data accoding to stencil and applies LS spectrogram returns: stancil center, spectrum for this stencil, number of datapoints in stancil """ from scipy.signal import detrend - #x = X[stancil[0]:stancil[-1]] - x_mask= (stancil[0] <= X) & (X <= stancil[-1]) + # x = X[stancil[0]:stancil[-1]] + x_mask = (stancil[0] <= X) & (X <= stancil[-1]) - #print(stancil[1]) + # print(stancil[1]) x = X[x_mask] - if x.size < 200: # if there are not enough photos set results to nan - #return stancil[1], self.k*np.nan, np.fft.rfftfreq( int(self.Lpoints), d=self.dx)*np.nan, x.size - return stancil[1], self.k*np.nan, self.k*np.nan, np.nan, x.size + if x.size < 200: # if there are not enough photos set results to nan + # return stancil[1], self.k*np.nan, np.fft.rfftfreq( int(self.Lpoints), d=self.dx)*np.nan, x.size + return stancil[1], self.k * np.nan, self.k * np.nan, np.nan, x.size y = DATA[x_mask] - #make x positions - x_pos = (np.round( (x - stancil[0])/ 10.0 -1 , 0) ).astype('int') + # make x positions + x_pos = (np.round((x - stancil[0]) / 10.0 - 1, 0)).astype("int") # weight data if weight_data: window = self.win[x_pos] - y = y * window * np.sqrt( np.var(y) / np.var(( y* window) ) ) + y = y * window * np.sqrt(np.var(y) / np.var((y * window))) - #make y gridded - x_model = np.arange(stancil[0], stancil[-1], self.dx) - y_gridded = np.copy(x_model) * np.nan + # make y gridded + x_model = np.arange(stancil[0], stancil[-1], self.dx) + y_gridded = np.copy(x_model) * np.nan y_gridded[x_pos] = y - nan_mask =np.isnan(y_gridded) + nan_mask = np.isnan(y_gridded) err = ERR[x_mask] if ERR is not None else None - #print(x.shape, y.shape, self.k, self.LS) - LS_PSD, LS_object = calc_spectrum_LS( x, y, self.k, err=err, LS= None, dk =self.dk) - - y_model = reconstruct_data_from_LS(LS_object, x_model, self.k) + # print(x.shape, y.shape, self.k, self.LS) + LS_PSD, LS_object = calc_spectrum_LS( + x, y, self.k, err=err, LS=None, dk=self.dk + ) + y_model = reconstruct_data_from_LS(LS_object, x_model, self.k) # print(stancil[-1], x_model[-1]) # print(stancil[0], x_model[0]) # print(np.fft.rfft(y_model).size , kk.size) # print(x_model.size, y_gridded.size, y_model.size) # print('--') - P = conserve_variance(np.fft.rfft(y_model), self.k, y_gridded, nan_mask = nan_mask ) + P = conserve_variance( + np.fft.rfft(y_model), self.k, y_gridded, nan_mask=nan_mask + ) P.set_parameters() - #P.test_ojective_func(P.tanh_weight_function, plot_flag=False) + # P.test_ojective_func(P.tanh_weight_function, plot_flag=False) fitter = P.optimize(max_nfev=max_nfev) return stancil[1], LS_PSD, P.best_guess_Z(), fitter.params, x.size # % derive L2 stancil - self.stancil_iter = create_chunk_boundaries_unit_lengths(L, self.xlims, ov= self.ov, iter_flag=True) - #stancil_iter = create_chunk_boundaries_unit_lengths(L, ( np.round(X.min()), X.max() ), ov= self.ov, iter_flag=True) + self.stancil_iter = create_chunk_boundaries_unit_lengths( + L, self.xlims, ov=self.ov, iter_flag=True + ) + # stancil_iter = create_chunk_boundaries_unit_lengths(L, ( np.round(X.min()), X.max() ), ov= self.ov, iter_flag=True) # apply func to all stancils # Spec_returns=list() @@ -767,34 +827,38 @@ def calc_spectrum_and_field_apply(stancil): map_func = map if map_func is None else map_func print(map_func) - Spec_returns = list(map_func( calc_spectrum_and_field_apply, copy.copy(self.stancil_iter) )) + Spec_returns = list( + map_func(calc_spectrum_and_field_apply, copy.copy(self.stancil_iter)) + ) # # linear version - #Spec_returns = list(map( calc_spectrum_and_field_apply, copy.copy(self.stancil_iter) )) + # Spec_returns = list(map( calc_spectrum_and_field_apply, copy.copy(self.stancil_iter) )) # unpack resutls of the mapping: - D_specs = dict() - Y_model = dict() - Pars = dict() - N_per_stancil = list() + D_specs = dict() + Y_model = dict() + Pars = dict() + N_per_stancil = list() for I in Spec_returns: - #print(I[1].shape, I[2].shape) - D_specs[I[0]] = I[1] - Y_model[I[0]] = I[2] - Pars[I[0]] = I[3] + # print(I[1].shape, I[2].shape) + D_specs[I[0]] = I[1] + Y_model[I[0]] = I[2] + Pars[I[0]] = I[3] N_per_stancil.append(I[4]) self.N_per_stancil = N_per_stancil chunk_positions = np.array(list(D_specs.keys())) - self.N_stancils = len(chunk_positions) # number of spectral realizatiobs + self.N_stancils = len(chunk_positions) # number of spectral realizatiobs # repack data, create xarray # 1st LS spectal estimates - self.spec_name = 'LS_spectal_power' if name is None else name - G_LS_power =dict() - for xi,I in D_specs.items(): - G_LS_power[xi] = xr.DataArray(I, dims=['k'], coords={'k': self.k, 'x': xi } , name=self.spec_name) + self.spec_name = "LS_spectal_power" if name is None else name + G_LS_power = dict() + for xi, I in D_specs.items(): + G_LS_power[xi] = xr.DataArray( + I, dims=["k"], coords={"k": self.k, "x": xi}, name=self.spec_name + ) - G_LS_power = xr.concat(G_LS_power.values(), dim='x').T#.to_dataset() + G_LS_power = xr.concat(G_LS_power.values(), dim="x").T # .to_dataset() # 2nd Y_model # G_model =dict() @@ -816,52 +880,52 @@ def calc_spectrum_and_field_apply(stancil): # self.G_model.attrs['Lpoints'] = self.Lpoints # self.G_model.coords['N_per_stancil'] = ( ('x'), N_per_stancil) - - #2nd FFT(Y_model) - G_fft =dict() - Y_model_k_fft = np.fft.rfftfreq( int(self.Lpoints), d=self.dx) - for xi,I in Y_model.items(): + # 2nd FFT(Y_model) + G_fft = dict() + Y_model_k_fft = np.fft.rfftfreq(int(self.Lpoints), d=self.dx) + for xi, I in Y_model.items(): if I.size < Y_model_k_fft.size: I = np.insert(I, -1, I[-1]) - G_fft[xi] = xr.DataArray(I, dims=['k'], coords={'k': Y_model_k_fft, 'x': xi } , name='Y_model_hat') + G_fft[xi] = xr.DataArray( + I, dims=["k"], coords={"k": Y_model_k_fft, "x": xi}, name="Y_model_hat" + ) - G_fft = xr.concat(G_fft.values(), dim='x').T#.to_dataset() + G_fft = xr.concat(G_fft.values(), dim="x").T # .to_dataset() # generate power spec as well self.G = Z_to_power(G_fft, self.dk, self.Lpoints) - self.G.name = 'spectral_power_optm' - + self.G.name = "spectral_power_optm" # merge both datasets self.GG = xr.merge([G_LS_power, G_fft, self.G]) - self.GG.attrs['ov'] = self.ov - self.GG.attrs['L'] = self.L - self.GG.attrs['Lpoints'] = self.Lpoints - self.GG.coords['N_per_stancil'] = ( ('x'), N_per_stancil) - - self.GG.expand_dims(dim='eta') - self.GG.coords['eta'] = ( ('eta'), np.arange(0, self.L + self.dx, self.dx) - self.L/2 ) - self.GG['win'] = ( ('eta'), np.insert(self.win, -1, self.win[-1])) + self.GG.attrs["ov"] = self.ov + self.GG.attrs["L"] = self.L + self.GG.attrs["Lpoints"] = self.Lpoints + self.GG.coords["N_per_stancil"] = (("x"), N_per_stancil) + + self.GG.expand_dims(dim="eta") + self.GG.coords["eta"] = ( + ("eta"), + np.arange(0, self.L + self.dx, self.dx) - self.L / 2, + ) + self.GG["win"] = (("eta"), np.insert(self.win, -1, self.win[-1])) # create dataframe with fitted parameters - PP2= dict() + PP2 = dict() for k, I in Pars.items(): if I is not np.nan: - PP2[k] =I + PP2[k] = I keys = PP2[next(iter(PP2))].keys() - params_dataframe = pd.DataFrame(index =keys) + params_dataframe = pd.DataFrame(index=keys) - for k,I in PP2.items(): + for k, I in PP2.items(): I.values() params_dataframe[k] = list(I.valuesdict().values()) return self.GG, params_dataframe - - - def calc_var(self): Gmean = np.nanmean(self.G, 1) @@ -872,61 +936,67 @@ def calc_var(self): # def parceval(self, add_attrs=True ): # return wavenumber_spectrogram.parceval(self, add_attrs= add_attrs ) - def parceval(self, add_attrs=True, weight_data=False ): + def parceval(self, add_attrs=True, weight_data=False): "test Parceval theorem" import copy + DATA = self.data L = self.L X = self.x # derive mean variances of stancils - #stancil_iter = create_chunk_boundaries_unit_lengths(L, self.xlims, ov= self.ov ) + # stancil_iter = create_chunk_boundaries_unit_lengths(L, self.xlims, ov= self.ov ) def get_stancil_var_apply(stancil): from scipy.signal import detrend + "returns the variance of yy for stancil" - x_mask= (stancil[0] < X) & (X <= stancil[-1]) + x_mask = (stancil[0] < X) & (X <= stancil[-1]) idata = DATA[x_mask] if len(idata) < 1: return stancil[1], np.nan, len(idata) idata = detrend(idata) # weight data - x_pos = (np.round( (X[x_mask] - stancil[0])/ 10.0 , 0) ).astype('int') + x_pos = (np.round((X[x_mask] - stancil[0]) / 10.0, 0)).astype("int") if weight_data: window = self.win[x_pos] - idata = idata * window * np.sqrt( np.var(idata) / np.var(( idata* window) ) ) + idata = ( + idata * window * np.sqrt(np.var(idata) / np.var((idata * window))) + ) return stancil[1], idata.var(), len(idata) - D_vars = list(map(get_stancil_var_apply, copy.copy(self.stancil_iter) )) + D_vars = list(map(get_stancil_var_apply, copy.copy(self.stancil_iter))) - stancil_vars, Nphotons =list(), 0 + stancil_vars, Nphotons = list(), 0 for I in D_vars: - stancil_vars.append(I[1] * I[2]) - Nphotons += I[2] + stancil_vars.append(I[1] * I[2]) + Nphotons += I[2] - stancil_weighted_variance = np.nansum(np.array(stancil_vars))/Nphotons + stancil_weighted_variance = np.nansum(np.array(stancil_vars)) / Nphotons - print('Parcevals Theorem:') - print('variance of timeseries: ', DATA.var()) - print('mean variance of stancils: ', stancil_weighted_variance ) - #print('variance of weighted timeseries: ',self.phi.var() ) - #self.calc_var(self) - print('variance of the optimzed windowed LS Spectrum: ', self.calc_var()) + print("Parcevals Theorem:") + print("variance of timeseries: ", DATA.var()) + print("mean variance of stancils: ", stancil_weighted_variance) + # print('variance of weighted timeseries: ',self.phi.var() ) + # self.calc_var(self) + print("variance of the optimzed windowed LS Spectrum: ", self.calc_var()) if add_attrs: - self.G.attrs['variance_unweighted_data'] = DATA.var() - self.G.attrs['mean_variance_stancils'] = np.nanmean(np.array(stancil_vars) ) - self.G.attrs['mean_variance_LS_pwelch_spectrum'] = self.calc_var() - - - def mean_spectral_error(self, mask=None, confidence = 0.95 ): - return wavenumber_spectrogram.mean_spectral_error(self, mask=mask, confidence= confidence ) + self.G.attrs["variance_unweighted_data"] = DATA.var() + self.G.attrs["mean_variance_stancils"] = np.nanmean(np.array(stancil_vars)) + self.G.attrs["mean_variance_LS_pwelch_spectrum"] = self.calc_var() + def mean_spectral_error(self, mask=None, confidence=0.95): + return wavenumber_spectrogram.mean_spectral_error( + self, mask=mask, confidence=confidence + ) # class for getting standard Pwelch spectrum. old version, deprechiate class wavenumber_pwelch: - def __init__(self,data, x, L, ov=None, window=None, save_chunks=False, plot_chunks=False): + def __init__( + self, data, x, L, ov=None, window=None, save_chunks=False, plot_chunks=False + ): """ returns a wavenumber spectrum using the pwelch method @@ -948,93 +1018,90 @@ def __init__(self,data, x, L, ov=None, window=None, save_chunks=False, plot_chun """ from scipy import signal + self.data = data # field to be analyzed + self.dx = np.diff(x)[0] # sampling interval + self.save_chunks = save_chunks + dsize = data.size - self.data = data # field to be analyzed - self.dx = np.diff(x)[0] # sampling interval - self.save_chunks= save_chunks - dsize = data.size - - ov=int(np.round(L/2)) if ov is None else ov + ov = int(np.round(L / 2)) if ov is None else ov self.n = L if window is None: - win=np.hanning(self.n) + win = np.hanning(self.n) else: - win=window + win = window - factor=np.sqrt(self.n/(win**2).sum()) - win*=factor + factor = np.sqrt(self.n / (win**2).sum()) + win *= factor # test if n is even - if (self.n%2): + if self.n % 2: self.neven = False else: self.neven = True - #calculate freq - self.k = self.calc_freq() - #del(self.df) + # calculate freq + self.k = self.calc_freq() + # del(self.df) - #print(data.size, L, ov, int(L-ov) ) - nbin=int(np.floor(dsize/(L-ov))) - #print(nbin) + # print(data.size, L, ov, int(L-ov) ) + nbin = int(np.floor(dsize / (L - ov))) + # print(nbin) if save_chunks: - chunks=np.empty([int(nbin),int(L)]) - - self.specs=np.empty([int(nbin),self.k.size]) - #print(chunks.shape) - #result_array = np.empty((0, 100)) - #if plot_chunks: - #M.figure_axis_xy() - last_k=0 - k=0 - #print('iter range', np.arange(0,data.size,int(L-ov))) - for i in np.arange(0,dsize-int(L-ov)+1,int(L-ov)): - - if (plot_chunks) and (i >= dsize-6*int(L-ov)): + chunks = np.empty([int(nbin), int(L)]) + + self.specs = np.empty([int(nbin), self.k.size]) + # print(chunks.shape) + # result_array = np.empty((0, 100)) + # if plot_chunks: + # M.figure_axis_xy() + last_k = 0 + k = 0 + # print('iter range', np.arange(0,data.size,int(L-ov))) + for i in np.arange(0, dsize - int(L - ov) + 1, int(L - ov)): + + if (plot_chunks) and (i >= dsize - 6 * int(L - ov)): M.figure_axis_xy() - self.phi=data[int(i):int(i+L)] + self.phi = data[int(i) : int(i + L)] - #self.ii=np.append(self.ii,[i,i+L]) - #print(self.phi.max()) + # self.ii=np.append(self.ii,[i,i+L]) + # print(self.phi.max()) - #print(self.phi.mean()) - #print(self.phi.shape) - #print('i',int(i), int(i+L)) - #print(chunk.size, l) - if int(i+L) <= data.size-1: + # print(self.phi.mean()) + # print(self.phi.shape) + # print('i',int(i), int(i+L)) + # print(chunk.size, l) + if int(i + L) <= data.size - 1: if save_chunks: - chunks[k,:]=self.phi - + chunks[k, :] = self.phi - self.phi=signal.detrend(self.phi)*win + self.phi = signal.detrend(self.phi) * win if plot_chunks: - #MT.stats_format(self.phi, 'chunk '+str(i)) + # MT.stats_format(self.phi, 'chunk '+str(i)) plt.plot(self.phi) - self.specs[k,:]= self.calc_spectrum() - last_k=k - last_used_TS=int(i+L) - #if plot_chunks: + self.specs[k, :] = self.calc_spectrum() + last_k = k + last_used_TS = int(i + L) + # if plot_chunks: # MT.stats_format(self.spec, 'spec '+str(i)) else: if plot_chunks: - print('end of TS is reached') - print('last spec No: '+str(last_k)) - print('spec container: '+str(specs.shape)) - print('last used Timestep: '+str(last_used_TS)) - print('length of TS '+ str(dsize) +'ms') - - k+=1 + print("end of TS is reached") + print("last spec No: " + str(last_k)) + print("spec container: " + str(specs.shape)) + print("last used Timestep: " + str(last_used_TS)) + print("length of TS " + str(dsize) + "ms") + k += 1 if save_chunks: - self.chunks=chunks - #del(chunks) + self.chunks = chunks + # del(chunks) - self.spec_est=self.specs.mean(axis=0) + self.spec_est = self.specs.mean(axis=0) # if prewhite is None: # self.specs=specs[:last_k,:] # self.spec_est=self.specs.mean(axis=0) @@ -1045,69 +1112,74 @@ def __init__(self,data, x, L, ov=None, window=None, save_chunks=False, plot_chun # self.specs=specs[:last_k,:]*(2*np.pi*self.f)**2 # self.spec_est=self.specs.mean(axis=0) - - self.n_spec,_=self.specs.shape + self.n_spec, _ = self.specs.shape self.calc_var() - #self.phi=self.data - #self.phi*=win*np.sqrt(factor) + # self.phi=self.data + # self.phi*=win*np.sqrt(factor) def calc_freq(self): - """ calculate array of spectral variable (frequency or - wavenumber) in cycles per unit of L """ + """calculate array of spectral variable (frequency or + wavenumber) in cycles per unit of L""" - self.df = 1./((self.n-1)*self.dx) + self.df = 1.0 / ((self.n - 1) * self.dx) if self.neven: - f = self.df*np.arange(self.n/2+1) + f = self.df * np.arange(self.n / 2 + 1) else: - f = self.df*np.arange( (self.n-1)/2. + 1 ) + f = self.df * np.arange((self.n - 1) / 2.0 + 1) return f def calc_spectrum(self): - """ compute the 1d spectrum of a field phi """ + """compute the 1d spectrum of a field phi""" self.phih = np.fft.rfft(self.phi) # the factor of 2 comes from the symmetry of the Fourier coeffs - spec = 2.*(self.phih*self.phih.conj()).real / self.df /self.n**2 + spec = 2.0 * (self.phih * self.phih.conj()).real / self.df / self.n**2 # the zeroth frequency should be counted only once - spec[0] = spec[0]/2. + spec[0] = spec[0] / 2.0 if self.neven: - spec[-1] = spec[-1]/2. + spec[-1] = spec[-1] / 2.0 return spec - def error(self, ci=0.95): - self.El, self.Eu =spec_error(self.spec_est,self.n_spec,ci=ci) + self.El, self.Eu = spec_error(self.spec_est, self.n_spec, ci=ci) def parceval(self): - print('Parcevals Theorem:') - print('variance of unweighted timeseries: ',self.data.var()) - print('mean variance of timeseries chunks: ',self.chunks.var(axis=1).mean() if self.save_chunks is True else 'data not saved') - #print('variance of weighted timeseries: ',self.phi.var() ) - #self.calc_var(self) - print('variance of the pwelch Spectrum: ',self.var) + print("Parcevals Theorem:") + print("variance of unweighted timeseries: ", self.data.var()) + print( + "mean variance of timeseries chunks: ", + ( + self.chunks.var(axis=1).mean() + if self.save_chunks is True + else "data not saved" + ), + ) + # print('variance of weighted timeseries: ',self.phi.var() ) + # self.calc_var(self) + print("variance of the pwelch Spectrum: ", self.var) def calc_var(self): - """ Compute total variance from spectrum """ - self.var = self.df* np.nanmean(self.specs[1:], 0).sum() # do not consider zeroth frequency - + """Compute total variance from spectrum""" + self.var = ( + self.df * np.nanmean(self.specs[1:], 0).sum() + ) # do not consider zeroth frequency # %% optimze spectral variance -class conserve_variance: - def __init__(self,Z, freq, data, nan_mask= None): - - """ - """ +class conserve_variance: + def __init__(self, Z, freq, data, nan_mask=None): + """ """ import lmfit as LM - self.LM =LM + + self.LM = LM self.data = data - self.Z = Z + self.Z = Z self.freq = freq self.nan_mask = nan_mask @@ -1115,59 +1187,79 @@ def set_parameters(self): params = self.LM.Parameters() - p_smothed = self.runningmean(np.abs(self.Z ), 20, tailcopy=True) + p_smothed = self.runningmean(np.abs(self.Z), 20, tailcopy=True) f_max = self.freq[p_smothed[~np.isnan(p_smothed)].argmax()] - - lambda_max = 9.81 * 5 **2/ (2* np.pi) - params.add('x_cutoff', 1/lambda_max , min=0, max=1, vary=False) - params.add('x_max_pos', f_max , min=f_max*0.75, max=f_max*5+0.001, vary=False) - params.add('LF_amp', 1 , min=0.5 , max=1.2, vary= True) - params.add('HF_amp', 0.5 , min=0 , max=1.5, vary= True) - params.add('sigma_g', 0.002 , min=0.001 , max=0.05, vary= False) - params.add('Gauss_amp', 0.5 , min=0.01 , max=2, vary= True) - + lambda_max = 9.81 * 5**2 / (2 * np.pi) + params.add("x_cutoff", 1 / lambda_max, min=0, max=1, vary=False) + params.add( + "x_max_pos", f_max, min=f_max * 0.75, max=f_max * 5 + 0.001, vary=False + ) + params.add("LF_amp", 1, min=0.5, max=1.2, vary=True) + params.add("HF_amp", 0.5, min=0, max=1.5, vary=True) + params.add("sigma_g", 0.002, min=0.001, max=0.05, vary=False) + params.add("Gauss_amp", 0.5, min=0.01, max=2, vary=True) self.params = params return params def test_ojective_func(self, weight_func, plot_flag=True): - self.objective_func(self.params, self.data, self.Z, weight_func, self.freq, self.nan_mask, plot_flag=plot_flag) - - def tanh_weight_function(self,ff, params): - return self.tanh_weight(ff, params['x_cutoff'].value, - params['x_max_pos'].value, - params['LF_amp'].value, - params['HF_amp'].value, - params['Gauss_amp'].value, - params['sigma_g'].value ) - - def tanh_weight(self,x, x_cutoff , x_max_pos, LF_amp, HF_amp, Gauss_amp, sigma_g): + self.objective_func( + self.params, + self.data, + self.Z, + weight_func, + self.freq, + self.nan_mask, + plot_flag=plot_flag, + ) + + def tanh_weight_function(self, ff, params): + return self.tanh_weight( + ff, + params["x_cutoff"].value, + params["x_max_pos"].value, + params["LF_amp"].value, + params["HF_amp"].value, + params["Gauss_amp"].value, + params["sigma_g"].value, + ) + + def tanh_weight(self, x, x_cutoff, x_max_pos, LF_amp, HF_amp, Gauss_amp, sigma_g): """ - zdgfsg + zdgfsg """ - HF_amp1 = (LF_amp-HF_amp) - decay = 0.5 - np.tanh( (x-x_cutoff)/sigma_g )/2 - y = decay * HF_amp1 + (1 - HF_amp1) - y = y- y[0] +LF_amp + HF_amp1 = LF_amp - HF_amp + decay = 0.5 - np.tanh((x - x_cutoff) / sigma_g) / 2 + y = decay * HF_amp1 + (1 - HF_amp1) + y = y - y[0] + LF_amp - def gaus(x, x_0, amp, sigma_g ): - return amp* np.exp(-0.5 * ( (x-x_0)/sigma_g)**2) + def gaus(x, x_0, amp, sigma_g): + return amp * np.exp(-0.5 * ((x - x_0) / sigma_g) ** 2) - y += gaus(x, x_max_pos, Gauss_amp, sigma_g ) + y += gaus(x, x_max_pos, Gauss_amp, sigma_g) - #y = y * LF_amp + # y = y * LF_amp return y + def objective_func( + self, + params, + data_x, + Z_results, + weight_func, + freq, + nan_mask=None, + plot_flag=False, + ): - def objective_func(self, params, data_x, Z_results, weight_func, freq, nan_mask = None, plot_flag=False): + alpha = 1e7 - alpha =1e7 def model_real_space(Z, weights, n=None): """ Both inputs must have the same length """ - return np.fft.irfft(Z*weights, n = n) + return np.fft.irfft(Z * weights, n=n) weights = weight_func(freq, params) @@ -1175,10 +1267,10 @@ def model_real_space(Z, weights, n=None): weights = np.insert(weights, -1, weights[-1]) if nan_mask is not None: - model = model_real_space(Z_results, weights, n= data_x.size)[~nan_mask] + model = model_real_space(Z_results, weights, n=data_x.size)[~nan_mask] dd = data_x[~nan_mask][:] else: - model = model_real_space(Z_results, weights, n= data_x.size)[:] + model = model_real_space(Z_results, weights, n=data_x.size)[:] dd = data_x[:] if model.size > dd.size: @@ -1190,102 +1282,156 @@ def model_real_space(Z, weights, n=None): import m_general_ph3 as M from matplotlib.gridspec import GridSpec import matplotlib.pyplot as plt - F= M.figure_axis_xy(10, 4.1 * 2.5, view_scale= 0.5, container = True) - gs = GridSpec(5,1, wspace=0.1, hspace=0.4)#figure=fig, - pos0,pos1,pos2 = gs[0:3, 0],gs[3, 0],gs[4, 0]#,gs[3, 0] + F = M.figure_axis_xy(10, 4.1 * 2.5, view_scale=0.5, container=True) + + gs = GridSpec(5, 1, wspace=0.1, hspace=0.4) # figure=fig, + pos0, pos1, pos2 = gs[0:3, 0], gs[3, 0], gs[4, 0] # ,gs[3, 0] ax1 = F.fig.add_subplot(pos0) - plt.title('Stacked Timeseries', loc='left') + plt.title("Stacked Timeseries", loc="left") - chunk_l= 400 - chunk_iter = create_chunk_boundaries(chunk_l, data_x.size, ov=0, iter_flag = True) + chunk_l = 400 + chunk_iter = create_chunk_boundaries( + chunk_l, data_x.size, ov=0, iter_flag=True + ) - ofsett0= 6 + ofsett0 = 6 ofsett = np.copy(ofsett0) for chi in chunk_iter: - v1= np.round(np.nanvar(dd), 4) - plt.plot(ofsett+ data_x[chi[0]:chi[-1]] , linewidth=3, alpha=0.5 , c='black', label=' org. data (var:'+str(v1)+')') - - v1= np.round(model_real_space(Z_results, weights*0 +1)[~nan_mask[1:]].var(), 4) - plt.plot(ofsett + model_real_space(Z_results, weights*0 +1)[chi[0]:chi[-1]] ,linewidth= 0.8, c='red', label='LS model init (var:'+str(v1)+')') - - v1= np.round(model.var(), 4) - plt.plot(ofsett + model_real_space(Z_results, weights)[chi[0]:chi[-1]],linewidth= 0.8, c='blue', label='LS model weighted (var:'+str(v1)+')') + v1 = np.round(np.nanvar(dd), 4) + plt.plot( + ofsett + data_x[chi[0] : chi[-1]], + linewidth=3, + alpha=0.5, + c="black", + label=" org. data (var:" + str(v1) + ")", + ) + + v1 = np.round( + model_real_space(Z_results, weights * 0 + 1)[~nan_mask[1:]].var(), 4 + ) + plt.plot( + ofsett + + model_real_space(Z_results, weights * 0 + 1)[chi[0] : chi[-1]], + linewidth=0.8, + c="red", + label="LS model init (var:" + str(v1) + ")", + ) + + v1 = np.round(model.var(), 4) + plt.plot( + ofsett + model_real_space(Z_results, weights)[chi[0] : chi[-1]], + linewidth=0.8, + c="blue", + label="LS model weighted (var:" + str(v1) + ")", + ) if ofsett == ofsett0: plt.legend() ofsett -= 1 - plt.ylim(ofsett, ofsett0+1) - plt.xlim(0, chunk_l*2) - + plt.ylim(ofsett, ofsett0 + 1) + plt.xlim(0, chunk_l * 2) ax2 = F.fig.add_subplot(pos1) - #ax2 = plt.subplot(3, 1, 2) - plt.title('Amplitude Weight Function', loc='left') - plt.plot(weights , c='black') - ax2.set_xscale('log') + # ax2 = plt.subplot(3, 1, 2) + plt.title("Amplitude Weight Function", loc="left") + plt.plot(weights, c="black") + ax2.set_xscale("log") ax3 = F.fig.add_subplot(pos2) - plt.title('Initial and tuned |Z|', loc='left') + plt.title("Initial and tuned |Z|", loc="left") - #ax3 = plt.subplot(3, 1, 3) + # ax3 = plt.subplot(3, 1, 3) # v2_fft= np.fft.rfft(data_x) # v2 = np.round( (2.*(v2_fft*v2_fft.conj()).real /data_x.size**2 ).sum(), 4) # plt.plot(abs(v2_fft) , linewidth=2, alpha=0.5 , c='black', label='org data (var: '+str(v2) +')') - v2 = np.round( (4.*(Z_results*Z_results.conj()).real /data_x.size**2 ).sum(), 4) - plt.plot(abs(Z_results), linewidth= 0.8, c='red', label='Z (var: '+str(v2) +')') - plt.plot(M.runningmean(abs(Z_results) , 20, tailcopy=True), linewidth= 1.5, c='red', zorder=12) - - Z2= Z_results* weights - v2 = np.round( (4.*(Z2*Z2.conj()).real /data_x.size**2 ).sum(), 4) - plt.plot(abs(Z2), linewidth= 0.8, c='blue', label='weighted Z(var: '+str(v2) +')') - plt.plot(M.runningmean(abs(Z2) , 20, tailcopy=True), linewidth= 1.5, c='blue', zorder=12) + v2 = np.round( + (4.0 * (Z_results * Z_results.conj()).real / data_x.size**2).sum(), 4 + ) + plt.plot( + abs(Z_results), linewidth=0.8, c="red", label="Z (var: " + str(v2) + ")" + ) + plt.plot( + M.runningmean(abs(Z_results), 20, tailcopy=True), + linewidth=1.5, + c="red", + zorder=12, + ) + + Z2 = Z_results * weights + v2 = np.round((4.0 * (Z2 * Z2.conj()).real / data_x.size**2).sum(), 4) + plt.plot( + abs(Z2), + linewidth=0.8, + c="blue", + label="weighted Z(var: " + str(v2) + ")", + ) + plt.plot( + M.runningmean(abs(Z2), 20, tailcopy=True), + linewidth=1.5, + c="blue", + zorder=12, + ) plt.legend() - plt.ylim( np.percentile(abs(Z_results), 0.5), abs(Z_results).max()*1.3 ) - plt.xlabel('wavenumber k') - ax3.set_xscale('log') - ax3.set_yscale('log') - - fitting_cost =( abs(dd - model) / dd.std() )**2 - variance_cost =( abs(dd.var() - model.var()) / dd.std() ) **2 + plt.ylim(np.percentile(abs(Z_results), 0.5), abs(Z_results).max() * 1.3) + plt.xlabel("wavenumber k") + ax3.set_xscale("log") + ax3.set_yscale("log") - return fitting_cost.sum() , alpha* variance_cost + fitting_cost = (abs(dd - model) / dd.std()) ** 2 + variance_cost = (abs(dd.var() - model.var()) / dd.std()) ** 2 - def optimize(self, fitting_args= None , method='dual_annealing', max_nfev=None): + return fitting_cost.sum(), alpha * variance_cost + def optimize(self, fitting_args=None, method="dual_annealing", max_nfev=None): if fitting_args is None: fitting_args = (self.data, self.Z, self.tanh_weight_function, self.freq) self.weight_func = fitting_args[2] - self.fitter = self.LM.minimize(self.objective_func, self.params, args=fitting_args, kws={'nan_mask':self.nan_mask} , method=method, max_nfev=max_nfev) + self.fitter = self.LM.minimize( + self.objective_func, + self.params, + args=fitting_args, + kws={"nan_mask": self.nan_mask}, + method=method, + max_nfev=max_nfev, + ) return self.fitter def plot_result(self): - self.objective_func(self.fitter.params, self.data, self.Z, self.weight_func, self.freq, self.nan_mask, plot_flag=True) + self.objective_func( + self.fitter.params, + self.data, + self.Z, + self.weight_func, + self.freq, + self.nan_mask, + plot_flag=True, + ) def best_guess_Z(self): return self.Z * self.weight_func(self.freq, self.fitter.params) def runningmean(self, var, m, tailcopy=False): - m=int(m) - s =var.shape - if s[0] <= 2*m: - print('0 Dimension is smaller then averaging length') + m = int(m) + s = var.shape + if s[0] <= 2 * m: + print("0 Dimension is smaller then averaging length") return - rr=np.asarray(var)*np.nan - #print(type(rr)) - var_range=np.arange(m,int(s[0])-m-1,1) - for i in var_range[np.isfinite(var[m:int(s[0])-m-1])]: - #rm.append(var[i-m:i+m].mean()) - rr[int(i)]=np.nanmean(var[i-m:i+m]) + rr = np.asarray(var) * np.nan + # print(type(rr)) + var_range = np.arange(m, int(s[0]) - m - 1, 1) + for i in var_range[np.isfinite(var[m : int(s[0]) - m - 1])]: + # rm.append(var[i-m:i+m].mean()) + rr[int(i)] = np.nanmean(var[i - m : i + m]) if tailcopy: - rr[0:m]=rr[m+1] - rr[-m-1:-1]=rr[-m-2] + rr[0:m] = rr[m + 1] + rr[-m - 1 : -1] = rr[-m - 2] return rr diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/spicke_remover.py b/src/icesat2_tracks/ICEsat2_SI_tools/spicke_remover.py index deb19bb7..1964383e 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/spicke_remover.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/spicke_remover.py @@ -1,7 +1,7 @@ - import numpy as np -def spicke_remover(data, nstd=20.0, spreed=500.0, max_loops=10.0 , verbose=False): + +def spicke_remover(data, nstd=20.0, spreed=500.0, max_loops=10.0, verbose=False): """ This function removes spickes from timeseries based on its std and maximum values inputs: @@ -10,81 +10,77 @@ def spicke_remover(data, nstd=20.0, spreed=500.0, max_loops=10.0 , verbose=False spreed width of data between wich will be linear interpolated around the spike. width is in units of data points (dt) max_loops number of maximum possible loobs until the spike removal is stopped, even it the first creteria is not true """ - datastd=np.nanstd(data) - data2=np.copy(data) + datastd = np.nanstd(data) + data2 = np.copy(data) - peak_remove=True - looper_count=0 - act_flag=False + peak_remove = True + looper_count = 0 + act_flag = False while peak_remove is True: - if nstd* datastd < np.nanmax(np.abs(data2)): - act_flag=True + if nstd * datastd < np.nanmax(np.abs(data2)): + act_flag = True if verbose: - print('true: '+ str(nstd* datastd) +' < '+str( np.nanmax(np.abs(data)) ) ) - data2=spickes_to_mean(data2, nloop=0, spreed=spreed, gaussian=False) - looper_count+=1 + print( + "true: " + + str(nstd * datastd) + + " < " + + str(np.nanmax(np.abs(data))) + ) + data2 = spickes_to_mean(data2, nloop=0, spreed=spreed, gaussian=False) + looper_count += 1 else: if verbose: - print('False: '+ str(nstd* datastd) +' > '+str( np.nanmax(np.abs(data)) ) ) - peak_remove=False + print( + "False: " + + str(nstd * datastd) + + " > " + + str(np.nanmax(np.abs(data))) + ) + peak_remove = False if looper_count > max_loops: - peak_remove=False + peak_remove = False if verbose: - print('stoped by max#') - + print("stoped by max#") if verbose: import matplotlib.pyplot as plt - plt.plot(data, 'r') - plt.plot(data2, 'b') - return data2 , act_flag + plt.plot(data, "r") + plt.plot(data2, "b") + + return data2, act_flag def spickes_to_mean(ts, nloop=None, spreed=1, gaussian=True): from scipy import signal - nloop=0 if nloop is None else nloop - i=0 - tsmean=ts.mean() - b=2*spreed - gaus=signal.gaussian(b, std=b/10) + nloop = 0 if nloop is None else nloop + i = 0 + tsmean = ts.mean() + b = 2 * spreed + gaus = signal.gaussian(b, std=b / 10) while i <= nloop: - #print(i) - #ts.max() - #print(np.where(ts == ts.max())) - tsabs=np.abs(ts) - tmax=np.nanmax(tsabs) - #print(tsabs, tmax) - pa=np.where(tsabs == tmax)[0][0] + tsabs = np.abs(ts) + tmax = np.nanmax(tsabs) + pa = np.where(tsabs == tmax)[0][0] if gaussian: - tsm=np.mean([ts[pa-spreed],ts[pa+spreed]]) #ts[pa-spreed:pa+spreed] - #print(ts[pa-spreed:pa+spreed].shape) - #print((gaus*(tmax-tsm)).shape) - #print(np.shape(gaus*(tmax-tsm))) - le=int(pa-spreed) - ue=int(pa+spreed) + tsm = np.mean([ts[pa - spreed], ts[pa + spreed]]) + le = int(pa - spreed) + ue = int(pa + spreed) - ts[le:ue]=ts[le:ue]-gaus*(tmax-tsm) + ts[le:ue] = ts[le:ue] - gaus * (tmax - tsm) else: - #tsm=np.mean([ts[pa-spreed],ts[pa+spreed]]) #ts[pa-spreed:pa+spreed] - #print(ts[pa-spreed:pa+spreed].shape) - #print((gaus*(tmax-tsm)).shape) - #print(np.shape(gaus*(tmax-tsm))) - #print(len(ts)) - #print(pa+spreed) - if pa+spreed > len(ts): - le= int(pa-spreed) - ts[le:-1]=np.linspace(ts[le],ts[-1],len(ts[le:-1])) + if pa + spreed > len(ts): + le = int(pa - spreed) + ts[le:-1] = np.linspace(ts[le], ts[-1], len(ts[le:-1])) else: - le=int(pa-spreed) - ue=int(pa+spreed) - #print(le, ue) - ts[ le : ue ]=np.linspace(ts[le],ts[ue],len(ts[le:ue])) + le = int(pa - spreed) + ue = int(pa + spreed) + ts[le:ue] = np.linspace(ts[le], ts[ue], len(ts[le:ue])) - i=i+1 + i = i + 1 return ts diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/wave_tools.py b/src/icesat2_tracks/ICEsat2_SI_tools/wave_tools.py index 62732c16..09288f95 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/wave_tools.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/wave_tools.py @@ -1,17 +1,17 @@ -def to_vec(amp, angle, deg = True): +def to_vec(amp, angle, deg=True): "from anlge deg to vect" import numpy as np if deg: - u, v = amp * np.cos(angle * np.pi/180), amp * np.sin(angle * np.pi/180) + u, v = amp * np.cos(angle * np.pi / 180), amp * np.sin(angle * np.pi / 180) else: - u, v = amp * np.cos(angle ), amp * np.sin(angle ) + u, v = amp * np.cos(angle), amp * np.sin(angle) - return u,v + return u, v -def to_deg(u,v, deg = True): +def to_deg(u, v, deg=True): """ from vect to angle, amp angle is -180 to 180 @@ -20,26 +20,22 @@ def to_deg(u,v, deg = True): import numpy as np amp = np.sqrt(u**2 + v**2) - angle = np.arctan2(v, u)# + 2 * np.pi - - # nan_mask = np.isnan(angle) - # angle= np.where(angle > np.pi, angle - 2 * np.pi , angle) - # angle= np.where(angle <= - np.pi, angle + 2 * np.pi , angle) - # angle= np.where(nan_mask, np.nan , angle) + angle = np.arctan2(v, u) if deg: - angle = angle * 180/np.pi + angle = angle * 180 / np.pi return amp, angle -def get_ave_amp_angle(amp, angle, deg = True): + +def get_ave_amp_angle(amp, angle, deg=True): import numpy as np - u,v = to_vec(amp, angle , deg= deg) + + u, v = to_vec(amp, angle, deg=deg) # average angle in vector space - #print(u, v) - _ , ave_deg = to_deg( np.nanmean(u) , np.nanmean(v), deg = deg) - _ , std_deg = to_deg( np.nanstd(u) , np.nanstd(v) , deg = deg) + _, ave_deg = to_deg(np.nanmean(u), np.nanmean(v), deg=deg) + _, std_deg = to_deg(np.nanstd(u), np.nanstd(v), deg=deg) - #average amp in angle space + # average amp in angle space ave_amp = np.nanmean(amp) std_amp = np.nanstd(amp) From 45dca27d200e13d31bb7d097c79799dba3382a1d Mon Sep 17 00:00:00 2001 From: Camilo Diaz Date: Mon, 26 Feb 2024 07:53:58 -0500 Subject: [PATCH 02/22] applying review comments --- src/icesat2_tracks/ICEsat2_SI_tools/angle_optimizer.py | 3 +-- src/icesat2_tracks/ICEsat2_SI_tools/iotools.py | 2 +- src/icesat2_tracks/ICEsat2_SI_tools/lanczos.py | 10 +++++----- src/icesat2_tracks/ICEsat2_SI_tools/spicke_remover.py | 2 +- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/angle_optimizer.py b/src/icesat2_tracks/ICEsat2_SI_tools/angle_optimizer.py index e3e28b53..44ace1ef 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/angle_optimizer.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/angle_optimizer.py @@ -19,7 +19,6 @@ def get_wavenumbers_polar(amp, angle_rad): returns: wavenumber k,l """ - import numpy as np k0 = amp * np.cos(angle_rad) l0 = amp * np.sin(angle_rad) @@ -105,7 +104,7 @@ def likelyhood_func( pars, x, y, z, z_error=None, test_flag=False, prior=None, prior_weight=2 ): """ - ---not well testet --- + ---not well tested --- likelyhood function using log(p) """ diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/iotools.py b/src/icesat2_tracks/ICEsat2_SI_tools/iotools.py index 3fb63f35..27e2653b 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/iotools.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/iotools.py @@ -183,7 +183,7 @@ def set_ATL03_trackname(self): def set_ATL10_trackname(self): block1 = (self.YY, self.MM, self.DD) block1b = (self.HH, self.MN, self.SS) - block2 = (self.TRK, self.CYC, "01") + block2 = (self.TRK, self.CYC, "01") # granule is always '01' for ATL10 if self.RL is "": raise ValueError("RL not set") if self.VRS is "": diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/lanczos.py b/src/icesat2_tracks/ICEsat2_SI_tools/lanczos.py index f8ade32a..ad5bd1d0 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/lanczos.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/lanczos.py @@ -1,5 +1,5 @@ import numpy as np - +import scipy.signal as signal def lanczos_1d(width, dx, a=2): """ @@ -53,12 +53,12 @@ def lanczos_filter_1d(x, data, width, a=2, mode="same", method="direct"): returns data_lp low-passed data, same size as before. """ - import scipy.signal as signal + dx = np.diff(x).mean() x, L = lanczos_1d(width, dx, a=a) - data_lp = signal.convolve(data, L, mode=mode, method=method) # * + data_lp = signal.convolve(data, L, mode=mode, method=method) return data_lp @@ -110,7 +110,7 @@ def lanczos_filter_2d(x, data, width, a=2, mode="same"): dx = abs(np.diff(x).mean()) x, L2d = lanczos_2d(width, dx, a=a) - data_lp = signal.convolve(data, L2d, mode=mode) # * + data_lp = signal.convolve(data, L2d, mode=mode) return data_lp @@ -147,6 +147,6 @@ def lanczos_filter_3d(x, data, width, a=2, mode="same"): dx = abs(np.diff(x).mean()) x, L3d = lanczos_3d(width, dx, a=a) - data_lp = signal.convolve(data, L3d, mode=mode) # * + data_lp = signal.convolve(data, L3d, mode=mode) return data_lp diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/spicke_remover.py b/src/icesat2_tracks/ICEsat2_SI_tools/spicke_remover.py index 1964383e..9ea22e47 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/spicke_remover.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/spicke_remover.py @@ -3,7 +3,7 @@ def spicke_remover(data, nstd=20.0, spreed=500.0, max_loops=10.0, verbose=False): """ - This function removes spickes from timeseries based on its std and maximum values + This function removes spikes from timeseries based on its std and maximum values inputs: data timeseries as1d arrays nstd number of standard deviations that have to be exceeded by the maximum data value From d2dcd3696b1da2eaf0aee2b05b2c4674c0b3c5ce Mon Sep 17 00:00:00 2001 From: Camilo Diaz Date: Wed, 28 Feb 2024 15:40:19 -0500 Subject: [PATCH 03/22] applying suggested changed in review --- .../ICEsat2_SI_tools/angle_optimizer.py | 13 +--- .../ICEsat2_SI_tools/beam_stats.py | 21 +++--- .../ICEsat2_SI_tools/filter_regrid.py | 65 ++++++++----------- .../ICEsat2_SI_tools/generalized_FT.py | 2 +- .../ICEsat2_SI_tools/lanczos.py | 13 ++-- .../ICEsat2_SI_tools/read_ground_tracks.py | 6 +- .../sliderule_converter_tools.py | 38 ++++------- .../ICEsat2_SI_tools/spicke_remover.py | 5 +- .../ICEsat2_SI_tools/wave_tools.py | 7 +- .../analysis_db/B02_make_spectra_gFT.py | 2 +- 10 files changed, 67 insertions(+), 105 deletions(-) diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/angle_optimizer.py b/src/icesat2_tracks/ICEsat2_SI_tools/angle_optimizer.py index 44ace1ef..dac43084 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/angle_optimizer.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/angle_optimizer.py @@ -4,7 +4,8 @@ from numba import jit import numpy as np - +import lmfit as LM +import matplotlib.pyplot as plt numba_parallel = False @@ -28,8 +29,6 @@ def get_wavenumbers_polar(amp, angle_rad): def wavemodel(XX, YY, ks, ls, amps, group_phase=0): - import numpy as np - G = np.vstack( [ np.cos(np.outer(XX, ks) + np.outer(YY, ls)).T, @@ -174,8 +173,6 @@ class sample_with_mcmc: def __init__(self, params): - import lmfit as LM - self.LM = LM self.set_parameters(params) @@ -241,7 +238,6 @@ def sample( print("results at self.fitter") def plot_sample(self, **kargs): - import matplotlib.pyplot as plt chain = self.chain() nwalkers = self.nwalkers @@ -266,7 +262,6 @@ def optimize(self, fitting_args=None, method="dual_annealing", verbose=True): print("results at self.fitter_optimize") def plot_optimze(self, **kargs): - import matplotlib.pyplot as plt plt.plot( self.fitter_optimize.params["phase"].value, @@ -295,8 +290,6 @@ def brute(self, fitting_args=None, method="brute", verbose=True, N_grid=30): def plot_brute(self, clevel=np.linspace(-3.2, 3.2, 30), **kargs): - import matplotlib.pyplot as plt - fitter_brute = self.fitter_brute dd = ( @@ -355,8 +348,6 @@ def get_marginal_dist( y_hist = y_hist / var_dx / y_hist.sum() if plot_flag: - import matplotlib.pyplot as plt - plt.stairs(y_hist, bins) return y_hist, bins, bins_pos diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py b/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py index 90473ffa..8b13e3cf 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py @@ -5,6 +5,7 @@ import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec +import h5py def derive_beam_statistics(Gd, all_beams, Lmeter=10e3, dx=10): @@ -15,7 +16,6 @@ def derive_beam_statistics(Gd, all_beams, Lmeter=10e3, dx=10): Lemter is the length of the segment in meters for the statistics dx is the nominal resolution of the ATL06 data in meters """ - import h5py D = dict() for k in all_beams: @@ -82,13 +82,10 @@ def plot_beam_statistics(D, high_beams, low_beams, col_dict, track_name=None): col_dict is a dict with the colors for the beams track_name is the name of the track """ - import matplotlib.pyplot as plt if track_name is not None: plt.suptitle(track_name, fontsize=10) - import matplotlib.gridspec as gridspec - gs = gridspec.GridSpec(2, 3) # make 2 x 2 plot @@ -146,10 +143,12 @@ def plot_beam_statistics(D, high_beams, low_beams, col_dict, track_name=None): plt.title("low beams N", loc="left") plt.xlabel("along track distance (km)") + ax5 = plt.subplot(gs[0:2, 2]) + lat_shift = 0 for k in low_beams: Di = D[k] - plt.scatter( + ax5.scatter( Di["x"] / 1e3, Di["lat"] + lat_shift, s=np.exp(Di["N"] * 5), @@ -162,7 +161,7 @@ def plot_beam_statistics(D, high_beams, low_beams, col_dict, track_name=None): for k in high_beams: Di = D[k] - plt.scatter( + ax5.scatter( Di["x"] / 1e3, Di["lat"] + lat_shift, s=np.exp(Di["N"] * 5), @@ -173,11 +172,11 @@ def plot_beam_statistics(D, high_beams, low_beams, col_dict, track_name=None): ) lat_shift = lat_shift + 2 - plt.title("Density in space", loc="left") - plt.ylabel("Latitude (deg)") - plt.xlabel("along track distance (km)") - plt.legend() - plt.show() + ax5.title("Density in space", loc="left") + ax5.ylabel("Latitude (deg)") + ax5.xlabel("along track distance (km)") + ax5.legend() + ax5.show() ## plot track stats basics for sliderules ATL06 output diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/filter_regrid.py b/src/icesat2_tracks/ICEsat2_SI_tools/filter_regrid.py index 77766490..5799f0ca 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/filter_regrid.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/filter_regrid.py @@ -1,5 +1,10 @@ import numpy as np from numba import jit +import pandas as pd + + +def get_hemis(B, beams_list): + return "SH" if B[beams_list[0]]["lats"].iloc[0] < 0 else "NH" def correct_heights(T03, T03c, coord="delta_time"): @@ -58,10 +63,7 @@ def lat_min_max_extended(B, beams_list, accent=None): accent = regrid.track_type(B[beams_list[0]]) if accent is None else accent - if B[beams_list[0]]["lats"].iloc[0] < 0: - hemis = "SH" - else: - hemis = "NH" + hemis = get_hemis(B, beams_list) track_pos_start, track_pos_end = list(), list() for k in beams_list: @@ -83,9 +85,6 @@ def lat_min_max_extended(B, beams_list, accent=None): track_lat_end.append(ll["lats"]) track_lon_end.append(ll["lons"]) - if accent: - track_lon_start - if (hemis == "SH") & accent: return ( [max(track_lat_start), min(track_lat_end)], @@ -117,26 +116,22 @@ def lat_min_max_extended(B, beams_list, accent=None): def lat_min_max(B, beams_list, accent=None): """ defines common boundaries for beams_list in B - iunputs: - beams_list list of concidered beams + inputs: + beams_list list of considered beams B is dict of Pandas tables with beams - accent if track is accending or decending. if None, this will try to use the track time to get this + accent if track is ascending or descending. if None, this will try to use the track time to get this returns: - min_lat, max_lat, accent min and max latitudes of the beams, (True/False) True if the track is accending + min_lat, max_lat, accent min and max latitudes of the beams, (True/False) True if the track is ascending """ accent = track_type(B[beams_list[0]]) if accent is None else accent - if B[beams_list[0]]["lats"].iloc[0] < 0: - hemis = "SH" - else: - hemis = "NH" + hemis = get_hemis(B, beams_list) - track_lat_mins, track_lat_maxs = list(), list() - for k in beams_list: - track_lat_mins.append(B[k]["lats"].min()) - track_lat_maxs.append(B[k]["lats"].max()) + track_lat_mins, track_lat_maxs = zip( + *[(B[k]["lats"].min(), B[k]["lats"].max()) for k in beams_list] + ) if hemis == "SH": return max(track_lat_maxs), min(track_lat_mins), accent @@ -177,8 +172,6 @@ def derive_axis(TT, lat_lims=None): # set 1st dist to 0, not used if global limits are used if lat_lims is None: TT["dist"] = TT["dist"] - TT["dist"].min() - else: - TT["dist"] = TT["dist"] # - lat_lims[0] TT = TT.sort_values(by="dist") return TT @@ -194,12 +187,9 @@ def reduce_to_height_distance(TT, key, dx=1, lat_lims=None): returns: x1, y1 position, height """ - from scipy.interpolate import interp1d - if type(dx) is np.ndarray: - x1 = dx - else: - x1 = np.arange(0, TT["dist"].max(), dx) + x1 = dx if isinstance(dx, np.ndarray) else np.arange(0, TT["dist"].max(), dx) + y1 = np.interp(x1, TT["dist"], TT[key]) return x1, y1 @@ -238,7 +228,6 @@ def poly_correct(x, y, poly_order=7, plot_flag=False): ### regridding -# @jit(nopython=True) def get_mode(y, bins=np.arange(-5, 5, 0.1)): "returns modes of histogram of y defined by bins" hist, xbin = np.histogram(y, bins=bins) @@ -249,7 +238,6 @@ def get_mode(y, bins=np.arange(-5, 5, 0.1)): def weighted_mean(x_rel, y): "returns the gaussian weighted mean for stencil" - # @jit(nopython=True, parallel= False) def weight_fnk(x): "returns gaussian weight given the distance to the center x" return np.exp(-((x / 0.5) ** 2)) @@ -287,7 +275,6 @@ def get_stencil_stats_shift( the column 'key' is rename to key+'_median' """ - import pandas as pd stencil_1 = stencil_iter[:, ::2] stencil_1half = stencil_iter[:, 1::2] @@ -320,7 +307,7 @@ def calc_stencil_stats(group, key, key_x_coord, stancil_width, stancils): return Tweight.T T_sets = list() - stancil_set = stencil_1 + for stancil_set in [stencil_1, stencil_1half]: # select photons that are in bins @@ -424,12 +411,12 @@ def calc_stencil_stats(istencil): y = y_data[i_mask] Tmedian = T2[i_mask].median() - Tmedian[key + "_weighted_mean"] = weighted_mean(x_rel, y) - Tmedian[key + "_mode"] = get_mode(y) + Tmedian[f"{key}_weighted_mean"] = weighted_mean(x_rel, y) + Tmedian[f"{key}_mode"] = get_mode(y) Tmedian["N_photos"] = Nphoton - Tmedian[key + "_std"] = y.std() + Tmedian[f"{key}_std"] = y.std() - print(str(istencil[1]) + " s" + str(time.time() - tstart)) + print(f"{istencil[1]} s{time.time() - tstart}") return istencil[1], Tmedian # apply func to all stancils @@ -438,10 +425,10 @@ def calc_stencil_stats(istencil): DF_filt = pd.DataFrame.from_dict(D_filt, orient="index") DF_filt = DF_filt.rename( - columns={key: key + "_median", key_x_coord: "median_" + key_x_coord} + columns={key: f"{key}_median", key_x_coord: f"median_{key_x_coord}"} ) - DF_filt[key + "_median"][ - np.isnan(DF_filt[key + "_std"]) + DF_filt[f"{key}_median"][ + np.isnan(DF_filt[f"{key}_std"]) ] = np.nan # replace median calculation with nans DF_filt[key_x_coord] = DF_filt.index DF_filt = DF_filt.reset_index() @@ -452,10 +439,10 @@ def calc_stencil_stats(istencil): # derive bin means def bin_means(T2, dist_grid): dF_mean = pd.DataFrame(index=T2.columns) - ilim = int(len(dist_grid)) + ilim = len(dist_grid) N_i = list() - for i in np.arange(1, ilim - 1, 1): + for i in np.arange(1, ilim - 1): if i % 5000 == 0: print(i) i_mask = (T2["dist"] >= dist_grid[i - 1]) & (T2["dist"] < dist_grid[i + 1]) diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/generalized_FT.py b/src/icesat2_tracks/ICEsat2_SI_tools/generalized_FT.py index abec0dbc..bcb174da 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/generalized_FT.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/generalized_FT.py @@ -668,7 +668,7 @@ def get_stancil_var_apply(stancil): self.G.attrs["mean_variance_LS_pwelch_spectrum"] = self.calc_var() def mean_spectral_error(self, mask=None, confidence=0.95): - return spec.wavenumber_spectrogram.mean_spectral_error( + return spec.WavenumberSpectrogram.mean_spectral_error( self, mask=mask, confidence=confidence ) diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/lanczos.py b/src/icesat2_tracks/ICEsat2_SI_tools/lanczos.py index ad5bd1d0..5a7771ee 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/lanczos.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/lanczos.py @@ -1,5 +1,8 @@ import numpy as np import scipy.signal as signal +import scipy.ndimage +import scipy.ndimage.filters as signal + def lanczos_1d(width, dx, a=2): """ @@ -53,12 +56,11 @@ def lanczos_filter_1d(x, data, width, a=2, mode="same", method="direct"): returns data_lp low-passed data, same size as before. """ - dx = np.diff(x).mean() x, L = lanczos_1d(width, dx, a=a) - data_lp = signal.convolve(data, L, mode=mode, method=method) + data_lp = signal.convolve(data, L, mode=mode, method=method) return data_lp @@ -80,7 +82,6 @@ def lanczos_filter_1d_wrapping(x, data, width, a=2, mode="wrap"): returns data_lp low-passed data, same size as before. """ - import scipy.ndimage dx = np.diff(x).mean() x, L = lanczos_1d(width, dx, a=a) @@ -105,12 +106,11 @@ def lanczos_filter_2d(x, data, width, a=2, mode="same"): returns data_lp low-passed data, same size as before. """ - import scipy.ndimage.filters as signal dx = abs(np.diff(x).mean()) x, L2d = lanczos_2d(width, dx, a=a) - data_lp = signal.convolve(data, L2d, mode=mode) + data_lp = signal.convolve(data, L2d, mode=mode) return data_lp @@ -142,11 +142,10 @@ def lanczos_filter_3d(x, data, width, a=2, mode="same"): returns data_lp low-passed data, same size as before. """ - import scipy.ndimage.filters as signal dx = abs(np.diff(x).mean()) x, L3d = lanczos_3d(width, dx, a=a) - data_lp = signal.convolve(data, L3d, mode=mode) + data_lp = signal.convolve(data, L3d, mode=mode) return data_lp diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/read_ground_tracks.py b/src/icesat2_tracks/ICEsat2_SI_tools/read_ground_tracks.py index 1777d1f6..85fdf230 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/read_ground_tracks.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/read_ground_tracks.py @@ -50,7 +50,8 @@ def read_ICESat2_groundtrack(input_file): gdf = geopandas.GeoDataFrame(rows) RGTS.append(gdf) # return the concatenated geodataframe - return geopandas.pd.concat(RGTS).set_index("date") + concatenated_df = geopandas.pd.concat(RGTS).set_index("date") + return concatenated_df # PURPOSE: read ICESat-2 ground tracks @@ -100,7 +101,8 @@ def ICESat2_mission_groundtrack(input_file): ) GTs.append(gdf) # return the concatenated geodataframe - return geopandas.pd.concat(GTs) + concatenated_df = geopandas.pd.concat(GTs) + return concatenated_df # PURPOSE: read ICESat-2 mission ground tracks as points diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/sliderule_converter_tools.py b/src/icesat2_tracks/ICEsat2_SI_tools/sliderule_converter_tools.py index f084db97..47b25490 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/sliderule_converter_tools.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/sliderule_converter_tools.py @@ -1,4 +1,15 @@ -from ipyleaflet import basemaps +from ipyleaflet import ( + Map, + basemaps, + Polygon, +) +import numpy as np +from math import radians, cos, sin, asin, sqrt +from shapely.geometry import Polygon +import pandas as pd +import geopandas as gpd + +import matplotlib.pyplot as plt # height correction tools @@ -17,8 +28,6 @@ def correct_and_remove_height(Gi, height_limit): def make_plot_polygon(poly_test, color="green"): """create a plot polygon from the given coordinates""" - from ipyleaflet import Polygon - bb = [ poly_test[0]["lon"], poly_test[0]["lat"], @@ -32,20 +41,7 @@ def make_plot_polygon(poly_test, color="green"): def plot_polygon(poly_test, basemap=basemaps.Esri.WorldImagery, zoom=3): """plots polygon in the map""" - from ipyleaflet import ( - Map, - GeoData, - LayersControl, - Rectangle, - basemaps, - basemap_to_tiles, - TileLayer, - SplitMapControl, - Polygon, - ) - # icepx will want a bounding box with LL lon/lat, UR lon/lat - import numpy as np polygon_plot = make_plot_polygon(poly_test, color="green") @@ -60,8 +56,6 @@ def plot_polygon(poly_test, basemap=basemaps.Esri.WorldImagery, zoom=3): # gemetric tools def haversine(lon1, lat1, lon2, lat2, arc=False): - from math import radians, cos, sin, asin, sqrt - """ Calculate the great circle distance between two points on the earth (specified in decimal degrees) @@ -98,7 +92,6 @@ def get_min_eq_dist(ppoly): def create_polygons(latR, lonR): - from shapely.geometry import Polygon, Point latR.sort() lonR.sort() @@ -123,8 +116,6 @@ def find_highest_point_on_RGT(Gs, RGT): # find_lowest_point_on_RGT(Gs, 2).geometry def get_RGT_start_points(Gs, RGT="RGT"): - import pandas as pd - import geopandas as gpd G_lowest = pd.concat( [find_lowest_point_on_RGT(Gs, rgt).T for rgt in Gs[RGT].unique()], axis=1 @@ -134,8 +125,6 @@ def get_RGT_start_points(Gs, RGT="RGT"): def get_RGT_end_points(Gs, RGT="RGT"): - import pandas as pd - import geopandas as gpd G_lowest = pd.concat( [find_highest_point_on_RGT(Gs, rgt).T for rgt in Gs[RGT].unique()], axis=1 @@ -225,7 +214,6 @@ def plot_reference_point_coordinates(tmp, start_point_dist, start_point): start_point_dist: the distance from the equator to the reference point start_point: the reference point (GeoDataFrame) """ - import matplotlib.pyplot as plt rgt = tmp["rgt"].unique()[0] spoint_color = "black" @@ -273,7 +261,6 @@ def plot_data_in_domain(gdf2, polygon_list): inputs: gdf: GeoDataFrame with the down """ - import matplotlib.pyplot as plt # make two panel figure, on the left plot the photon postions, on the the hoffmoeller diagram fig, axx = plt.subplots(1, 2, figsize=(8, 4)) @@ -368,7 +355,6 @@ def check_RGT_in_domain(Gtrack_lowest, gdf): returns: result: set of RGT that are in both Gtrack_lowest and gdf """ - import collections gdf_list = list(gdf["rgt"].unique()) result = set(gdf_list).intersection(set(Gtrack_lowest["RGT"])) diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/spicke_remover.py b/src/icesat2_tracks/ICEsat2_SI_tools/spicke_remover.py index 9ea22e47..13828fe5 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/spicke_remover.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/spicke_remover.py @@ -1,4 +1,6 @@ import numpy as np +import matplotlib.pyplot as plt +from scipy import signal def spicke_remover(data, nstd=20.0, spreed=500.0, max_loops=10.0, verbose=False): @@ -45,7 +47,6 @@ def spicke_remover(data, nstd=20.0, spreed=500.0, max_loops=10.0, verbose=False) print("stoped by max#") if verbose: - import matplotlib.pyplot as plt plt.plot(data, "r") plt.plot(data2, "b") @@ -55,8 +56,6 @@ def spicke_remover(data, nstd=20.0, spreed=500.0, max_loops=10.0, verbose=False) def spickes_to_mean(ts, nloop=None, spreed=1, gaussian=True): - from scipy import signal - nloop = 0 if nloop is None else nloop i = 0 tsmean = ts.mean() diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/wave_tools.py b/src/icesat2_tracks/ICEsat2_SI_tools/wave_tools.py index 09288f95..42d9f8ff 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/wave_tools.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/wave_tools.py @@ -1,8 +1,9 @@ +import numpy as np + + def to_vec(amp, angle, deg=True): "from anlge deg to vect" - import numpy as np - if deg: u, v = amp * np.cos(angle * np.pi / 180), amp * np.sin(angle * np.pi / 180) else: @@ -17,7 +18,6 @@ def to_deg(u, v, deg=True): angle is -180 to 180 this is a different definiton then WW3 [0, 360 ), but (-180, 180] is more convient for the problem """ - import numpy as np amp = np.sqrt(u**2 + v**2) angle = np.arctan2(v, u) @@ -28,7 +28,6 @@ def to_deg(u, v, deg=True): def get_ave_amp_angle(amp, angle, deg=True): - import numpy as np u, v = to_vec(amp, angle, deg=deg) # average angle in vector space diff --git a/src/icesat2_tracks/analysis_db/B02_make_spectra_gFT.py b/src/icesat2_tracks/analysis_db/B02_make_spectra_gFT.py index fca302da..b5c2f8eb 100644 --- a/src/icesat2_tracks/analysis_db/B02_make_spectra_gFT.py +++ b/src/icesat2_tracks/analysis_db/B02_make_spectra_gFT.py @@ -459,7 +459,7 @@ def _get_stancil_nans(stancil, Gd_cut=Gd_cut): print("FFT") dd[dd_nans] = 0 - S = spec.wavenumber_spectrogram(x, dd, Lpoints) + S = spec.WavenumberSpectrogram(x, dd, Lpoints) G = S.cal_spectrogram() S.mean_spectral_error() # add x-mean spectral error estimate to xarray S.parceval(add_attrs=True) From 9aef485ed17d4658e722ed4c790c425430524ae0 Mon Sep 17 00:00:00 2001 From: Camilo Diaz Date: Wed, 28 Feb 2024 17:03:46 -0500 Subject: [PATCH 04/22] fix bug on beam_stats.py --- src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py b/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py index 8b13e3cf..6315da16 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py @@ -7,7 +7,6 @@ import matplotlib.gridspec as gridspec import h5py - def derive_beam_statistics(Gd, all_beams, Lmeter=10e3, dx=10): """ this method returns a dict of dataframes with the beam statistics @@ -16,6 +15,7 @@ def derive_beam_statistics(Gd, all_beams, Lmeter=10e3, dx=10): Lemter is the length of the segment in meters for the statistics dx is the nominal resolution of the ATL06 data in meters """ + D = dict() for k in all_beams: @@ -86,6 +86,7 @@ def plot_beam_statistics(D, high_beams, low_beams, col_dict, track_name=None): if track_name is not None: plt.suptitle(track_name, fontsize=10) + gs = gridspec.GridSpec(2, 3) # make 2 x 2 plot @@ -172,7 +173,7 @@ def plot_beam_statistics(D, high_beams, low_beams, col_dict, track_name=None): ) lat_shift = lat_shift + 2 - ax5.title("Density in space", loc="left") + ax5.set_title("Density in space", loc="left") ax5.ylabel("Latitude (deg)") ax5.xlabel("along track distance (km)") ax5.legend() From 2f471626b5bbc82fe228da3d7dc9f8fb1cf5b34e Mon Sep 17 00:00:00 2001 From: Carlos Paniagua Date: Wed, 28 Feb 2024 18:49:18 -0500 Subject: [PATCH 05/22] fix: update xlabel, ylabel to set_xlabel, set_ylabel --- src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py b/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py index 6315da16..28c8bc6e 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py @@ -174,10 +174,10 @@ def plot_beam_statistics(D, high_beams, low_beams, col_dict, track_name=None): lat_shift = lat_shift + 2 ax5.set_title("Density in space", loc="left") - ax5.ylabel("Latitude (deg)") - ax5.xlabel("along track distance (km)") + ax5.set_ylabel("Latitude (deg)") + ax5.set_xlabel("along track distance (km)") ax5.legend() - ax5.show() + plt.show() ## plot track stats basics for sliderules ATL06 output From f5953705ad8f0991d3b7d2a25674323ee7b1fbd5 Mon Sep 17 00:00:00 2001 From: Camilo Diaz Date: Thu, 29 Feb 2024 09:29:13 -0500 Subject: [PATCH 06/22] it seems spectral_estimates.py was not included in the last commit --- .../ICEsat2_SI_tools/spectral_estimates.py | 1276 +++++++---------- 1 file changed, 518 insertions(+), 758 deletions(-) diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py b/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py index bcc8afde..2e9e54bd 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py @@ -1,10 +1,50 @@ import numpy as np +from astropy.timeseries import LombScargle +import scipy.signal.windows as WINDOWS +from scipy.special import gammainc +import copy +import xarray as xr +from scipy.signal import detrend +import pandas as pd +from scipy import signal +import lmfit as LM +import icesat2_tracks.local_modules.m_general_ph3 as M + + +def is_even(number): + return not (number % 2) + +def define_chunk_boundaries_per_type(L_unit, ov, limits_size): + + ov = int(np.round(L_unit / 2)) if ov is None else ov + dl = L_unit - ov + boundaries = [] + + # Handling when limits_size is an integer + if isinstance(limits_size, int): + boundaries = [ + (0, limits_size - dl,dl), + (dl, limits_size - dl + 1,dl), + (dl * 2, limits_size + 1,dl) + ] + + # Handling when limits_size is a list + elif isinstance(limits_size, list) and len(limits_size) == 2: + start, end = limits_size + boundaries = [ + (start, end - dl,dl), + (start + L_unit / 2, end - dl + 1,dl), + (start + L_unit, end + 1,dl) + ] + else: + raise ValueError("limits_size must be either an int or a list of two elements") + return boundaries # basic functions -def create_chunk_boundaries(L, dsize, ov=None, iter_flag=True): +def create_chunk_boundaries(L, dsize, ov= None, iter_flag=True): """ - returns all need chunk boudaries and center position given L, and ov + returns all need chunk boundaries and center position given L, and ov inputs: L desired length of window, dsize size of the data @@ -13,108 +53,68 @@ def create_chunk_boundaries(L, dsize, ov=None, iter_flag=True): if iter_flag True returns iter else it returns an ndarray """ - ov = int(np.round(L / 2)) if ov is None else ov - - xleft = np.arange(0, dsize - int(L - ov), int(L - ov)) - xright = np.arange(int(L - ov) * 2, dsize + 1, int(L - ov)) - xcenter_pos = np.arange(int(L - ov), dsize - int(L - ov) + 1, int(L - ov)) - max_size = min([xleft.size, xcenter_pos.size, xright.size]) - # if xright[max_size-1] < dsize: - # print('left out last ' + str(dsize- xright[max_size-1]) + ' data points' ) - # print([xleft[0:max_size], xcenter_pos[0:max_size], xright[0:max_size]]) - position_stancil = np.vstack( - [xleft[0:max_size], xcenter_pos[0:max_size], xright[0:max_size]] - ) - - if iter_flag is True: - return iter(position_stancil.T.tolist()) - else: - return position_stancil + boundaries = define_chunk_boundaries_per_type(L,ov,dsize) + + xleft = np.arange(boundaries[0]) + xcenter_pos = np.arange(boundaries[1]) + xright = np.arange(boundaries[3]) + + max_size = min([xleft.size , xcenter_pos.size, xright.size]) -def create_chunk_boundaries_unit_lengths(L_unit, data_limits, ov=None, iter_flag=True): - """ - returns all need chunk boudaries and center position given L, and ov - inputs: - L desired length of window in units of the x axis of the data, - data_limits (x_min, x_max) tuple with the beginning and end the the derived window stancils - - if ov is None, = L/2 - if iter_flag True returns iter else it returns an ndarray + position_stancil = np.vstack([xleft[0:max_size], xcenter_pos[0:max_size], xright[0:max_size]]) - """ - L = L_unit - ov = np.round(L / 2) if ov is None else ov - # print(ov) - dl = L - ov - xleft = np.arange(data_limits[0], data_limits[1] - dl, dl) - xcenter_pos = np.arange(data_limits[0] + L / 2, data_limits[1] - dl + 1, dl) - xright = np.arange(data_limits[0] + L, data_limits[1] + 1, dl) - - max_size = min([xleft.size, xcenter_pos.size, xright.size]) - # if xright[max_size-1] < data_limits[1]: - # print('left out last ' + str(data_limits[1]- xright[max_size-1]) + ' data points' ) - # print([xleft[0:max_size], xcenter_pos[0:max_size], xright[0:max_size]]) - position_stancil = np.vstack( - [xleft[0:max_size], xcenter_pos[0:max_size], xright[0:max_size]] - ) - - if iter_flag is True: - return iter(position_stancil.T.tolist()) - else: - return position_stancil + return iter(position_stancil.T.tolist()) if iter_flag else position_stancil def Z_to_power(Z, df, N): - """compute the 1d spectrum of a field phi + """ compute the 1d spectrum of a field phi inputs: Z complex fourier coefficients df frequency / or wavenumber step N length of data vector in real space (= L) """ - spec = 2.0 * (Z * Z.conj()).real / df / N**2 - neven = True if (N % 2) else False + spec = 2.*(Z*Z.conj()).real / df /N**2 + neven = is_even(N) # the zeroth frequency should be counted only once - spec[0] = spec[0] / 2.0 + spec[0] = spec[0]/2. if neven: - spec[-1] = spec[-1] / 2.0 + spec[-1] = spec[-1]/2. return spec - # 2nd cal spectra def calc_spectrum_fft(phi, df, N): - """compute the 1d spectrum of a field phi + """ compute the 1d spectrum of a field phi inputs: df frequency / or wavenumber step N length of data vector in real space (= L) neven bool, if True """ - neven = True if (N % 2) else False + neven = is_even(N) phih = np.fft.rfft(phi) # the factor of 2 comes from the symmetry of the Fourier coeffs - spec = 2.0 * (phih * phih.conj()).real / df / N**2 + spec = 2.*(phih*phih.conj()).real / df /N**2 # the zeroth frequency should be counted only once - spec[0] = spec[0] / 2.0 + spec[0] = spec[0]/2. if neven: - spec[-1] = spec[-1] / 2.0 + spec[-1] = spec[-1]/2. return spec -def LS_power_to_PSD(ls_power, L, dff): +def LS_power_to_PSD( ls_power, L , dff): """ returns Power spectral density (unit^2/dfreq) ls_power output of astropy.timeseries.LombScargle.power with normalization='psd' """ - return 2 * ls_power / L / dff - + return 2 * ls_power / L /dff -def calc_spectrum_LS(x, y, k, err=None, LS=None, dk=None): +def calc_spectrum_LS( x, y, k, err=None, LS= None, dk =None): """ returns: Power spectral density of y given postitions x, for wanumbers k @@ -123,19 +123,16 @@ def calc_spectrum_LS(x, y, k, err=None, LS=None, dk=None): LS LombScargle object """ if LS is None: - from astropy.timeseries import LombScargle - - LS = LombScargle(x, y, dy=err, fit_mean=False, center_data=True) + LS = LombScargle(x , y, dy = err, fit_mean=False, center_data=True) else: LS.t = x LS.y = y - LS.dy = err - - ls_power = LS.power(k, normalization="psd", assume_regular_frequency="False") + LS.dy= err - dk = np.diff(k).mean() if dk is None else dk - return 2 * ls_power / y.size / dk, LS + ls_power = LS.power(k, normalization='psd', assume_regular_frequency='False') + dk = np.diff(k).mean() if dk is None else dk + return 2 * ls_power / y.size / dk , LS def reconstruct_data_from_LS(LS, x_real_axis, freq): """ @@ -148,42 +145,32 @@ def reconstruct_data_from_LS(LS, x_real_axis, freq): y_reconstruct = LS.offset() * np.ones(len(x_real_axis)) freq_seq = freq[1:] if freq[0] == 0 else freq - freq_mask = freq <= 1 / 100 + freq_mask= freq <= 1/100 - # print(freq_seq) for fi in freq_seq: try: theta = LS.model_parameters(fi) except: - theta = [0, 0] - y_reconstruct += theta[0] * np.sin(x_real_axis * 2 * np.pi * fi) + theta[ - 1 - ] * np.cos(x_real_axis * 2 * np.pi * fi) - # y_reconstruct += LS.model(x_real_axis, fi) - - # for fi in freq_seq[:]: - # theta = LS.model_parameters(fi) - # y_reconstruct += theta[0] + theta[1] * np.sin(x_real_axis * 2 * np.pi *fi ) + theta[2]* np.cos(x_real_axis * 2 * np.pi *fi) - # #y_reconstruct += LS.model(x_real_axis, fi) + theta = [0,0] + y_reconstruct += theta[0] * np.sin(x_real_axis * 2 * np.pi *fi ) + theta[1]* np.cos(x_real_axis * 2 * np.pi *fi) + return y_reconstruct - def calc_freq_fft(x_grid, N): - """calculate array of spectral variable (frequency or - wavenumber) in cycles per unit of L""" + """ calculate array of spectral variable (frequency or + wavenumber) in cycles per unit of L """ - neven = True if (N % 2) else False - # dx=np.diff(x_grid).mean() - dx = np.round(np.median(np.diff(x_grid)), 1) - df = 1.0 / ((N - 1) * dx) + neven = is_even(N) + + dx = np.round(np.median(np.diff(x_grid) ), 1) + df = 1./((N-1)*dx) if neven: - f = df * np.arange(N / 2 + 1) + f = df*np.arange(N/2+1) else: - f = df * np.arange((N - 1) / 2.0 + 1) - return f, df - + f = df*np.arange( (N-1)/2. + 1 ) + return f,df def calc_freq_fft_given_dx(dx, N): """ @@ -193,25 +180,17 @@ def calc_freq_fft_given_dx(dx, N): N number of datapoints used in window """ - neven = True if (N % 2) else False - df = 1.0 / ((N - 1) * dx) + neven = is_even(N) + df = 1./((N-1)*dx) if neven: - f = df * np.arange(N / 2 + 1) + f = df*np.arange(N/2+1) else: - f = df * np.arange((N - 1) / 2.0 + 1) - return f, df - - -def calc_freq_LS( - x, - N, - method="fftX2", - dx=None, - minimum_frequency=None, - maximum_frequency=None, - samples_per_peak=0.01, -): + f = df*np.arange( (N-1)/2. + 1 ) + return f,df + + +def calc_freq_LS(x, N, method='fftX2', dx=None, minimum_frequency=None, maximum_frequency=None, samples_per_peak=0.01): """ calculate array of spectral variable (frequency or wavenumber) in cycles per unit of N (window length in number of data points) @@ -223,48 +202,40 @@ def calc_freq_LS( minimum_frequency, maximum_frequency only used for LS_auto """ - if method is "fftX2": - neven = True if (N % 2) else False + if method is 'fftX2': + neven = is_even(N) dx = np.diff(x).mean() if dx is None else dx - df = 1.0 / ((N - 1) * dx) / 2 + df = 1./((N-1)*dx) /2 if neven: - f = df * np.arange(df, N + 1) + f = df*np.arange(df, N+1) else: - f = df * np.arange(df, (N - 1) + 1) + f = df* np.arange(df, (N-1) + 1 ) - elif method is "fft": - neven = True if (N % 2) else False + elif method is 'fft': + neven = is_even(N) dx = np.diff(x).mean() if dx is None else dx - df = 1.0 / ((N - 1) * dx) + df = 1./((N-1)*dx) if neven: - f = df * np.arange(N / 2 + 1) + f = df*np.arange(N/2+1) else: - f = df * np.arange((N - 1) / 2.0 + 1) + f = df*np.arange( (N-1)/2. + 1 ) - elif method is "LS_auto": - from astropy.timeseries import LombScargle - - f = LombScargle(x, np.random.randn(len(x)), fit_mean=True).autofrequency( - minimum_frequency=minimum_frequency, - maximum_frequency=maximum_frequency, - samples_per_peak=samples_per_peak, - ) ##0.1) + elif method is 'LS_auto': + + f = LombScargle(x , np.random.randn(len(x)), fit_mean=True).autofrequency(minimum_frequency=minimum_frequency, maximum_frequency=maximum_frequency, samples_per_peak=samples_per_peak)##0.1) df = np.diff(f).mean() df = np.round(df, 5) - elif method is "fixed_ratio": + elif method is 'fixed_ratio': - neven = True if (N % 2) else False + neven = is_even(N) dx = np.diff(x).mean() if dx is None else dx df = dx / 50 - if neven: - f = df * np.arange(df, N + 1) - else: - f = df * np.arange(df, N) - - return f, df + f = df * np.arange(df, N + 1) if neven else df * np.arange(df, N) + + return f ,df def create_window(L, window=None): """ @@ -272,66 +243,63 @@ def create_window(L, window=None): if window is not None it show have a length of N """ if window is None: - win = np.hanning(L) + win=np.hanning(L) else: - win = window + win=window - factor = np.sqrt(L / (win**2).sum()) - win *= factor + factor=np.sqrt(L/(win**2).sum()) + win*=factor return win - def create_weighted_window(data, window=None): """ define window function and weight it to conserve variance if window is not None it show have a length of N """ - import scipy.signal.windows as WINDOWS - + L = data.size if window is None: - # win=np.hanning(L) - win = WINDOWS.tukey(L, alpha=0.1, sym=True) + win = WINDOWS.tukey(L, alpha=0.1, sym=True) else: - win = window + win=window - factor = np.sqrt(np.var(data) / np.var((data * win))) - # factor=np.sqrt( L/(win**2).sum()) - win *= factor + factor = np.sqrt( np.var(data) / np.var(( data* win) ) ) + win *= factor return win +def spec_error(E,sn,ci=.95): + + """ Computes confidence interval for one-dimensional spectral + estimate E (the power spectra). -def spec_error(E, sn, ci=0.95): - """Computes confidence interval for one-dimensional spectral - estimate E (the power spectra). + Parameters + =========== + - sn is the number of spectral realizations; + it can be either an scalar or an array of size(E) + - ci = .95 for 95 % confidence interval - Parameters - =========== - - sn is the number of spectral realizations; - it can be either an scalar or an array of size(E) - - ci = .95 for 95 % confidence interval + Output + ========== + lower (El) and upper (Eu) bounds on E """ - Output - ========== - lower (El) and upper (Eu) bounds on E""" - def yNlu(sn, yN, ci): - """compute yN[l] yN[u], that is, the lower and - upper limit of yN""" - from scipy.special import gammainc + def yNlu(sn,yN,ci): + """ compute yN[l] yN[u], that is, the lower and + upper limit of yN """ + # cdf of chi^2 dist. with 2*sn DOF - cdf = gammainc(sn, sn * yN) + cdf = gammainc(sn,sn*yN) # indices that delimit the wedge of the conf. interval fl = np.abs(cdf - ci).argmin() - fu = np.abs(cdf - 1.0 + ci).argmin() + fu = np.abs(cdf - 1. + ci).argmin() - return yN[fl], yN[fu] + return yN[fl],yN[fu] - dbin = 0.005 - yN = np.arange(0, 2.0 + dbin, dbin) + dbin = .005 + yN = np.arange(0,2.+dbin,dbin) El, Eu = np.empty_like(E), np.empty_like(E) @@ -345,19 +313,19 @@ def yNlu(sn, yN, ci): assert n == E.size, " *** sn has different size than E " for i in range(n): - yNl, yNu = yNlu(sn[i], yN=yN, ci=ci) - El[i] = E[i] / yNl - Eu[i] = E[i] / yNu + yNl,yNu = yNlu(sn[i],yN=yN,ci=ci) + El[i] = E[i]/yNl + Eu[i] = E[i]/yNu else: - yNl, yNu = yNlu(sn, yN=yN, ci=ci) - El = E / yNl - Eu = E / yNu + yNl,yNu = yNlu(sn,yN=yN,ci=ci) + El = E/yNl + Eu = E/yNu return El, Eu - def linear_gap_fill(F, key_lead, key_int): + """ F pd.DataFrame key_lead key in F that determined the independent coordindate @@ -365,13 +333,13 @@ def linear_gap_fill(F, key_lead, key_int): """ y_g = np.array(F[key_int]) - nans, x2 = np.isnan(y_g), lambda z: z.nonzero()[0] - y_g[nans] = np.interp(x2(nans), x2(~nans), y_g[~nans]) + nans, x2= np.isnan(y_g), lambda z: z.nonzero()[0] + y_g[nans]= np.interp(x2(nans), x2(~nans), y_g[~nans]) return y_g +def sub_sample_coords(X, lons, lats, stancils, map_func =None): -def sub_sample_coords(X, lons, lats, stancils, map_func=None): """ X non-nan array of coodinate along beam lons, lats arrays of postion data that should be mapped. must have same size as X @@ -382,28 +350,25 @@ def sub_sample_coords(X, lons, lats, stancils, map_func=None): nparray(3, N) 1st column is the stancil center, 2nd and 3rd collumn are the mapped lons and lats """ - import copy - + def get_lon_lat_coords(stancil): - x_mask = (stancil[0] <= X) & (X <= stancil[-1]) - # print(stancil[1]) - if sum(x_mask) == 0: # if there are not enough photos set results to nan + x_mask= (stancil[0] <= X) & (X <= stancil[-1]) + if sum(x_mask) ==0: # if there are not enough photos set results to nan return np.array([stancil[1], np.nan, np.nan]) lon_bin, lat_bin = lons[x_mask].mean(), lats[x_mask].mean() - return np.array([stancil[1], lon_bin, lat_bin]) + return np.array([stancil[1],lon_bin, lat_bin]) map_func = map if map_func is None else map_func - # print(map_func) - coord_positions = list(map_func(get_lon_lat_coords, copy.copy(stancils))) + coord_positions = list(map_func( get_lon_lat_coords, copy.copy(stancils) )) coord_positions = np.vstack(coord_positions) return coord_positions -class wavenumber_spectrogram: +class WavenumberSpectrogram: def __init__(self, x_grid, data, Lpoints, ov=None, window=None): """ returns a wavenumber spectrogram with the resolution L-ov @@ -421,136 +386,119 @@ def __init__(self, x_grid, data, Lpoints, ov=None, window=None): other arributes are in the .attr dict. """ - self.Lpoints = Lpoints - self.ov = ( - int(Lpoints / 2) if ov is None else ov - ) # when not defined in create_chunk_boundaries then L/2 + self.Lpoints = Lpoints + self.ov = int(Lpoints/2) if ov is None else ov #when not defined in create_chunk_boundaries then L/2 - self.data = data + self.data = data # create subsample k - self.k, self.dk = calc_freq_fft( - x_grid, Lpoints - ) # return 1/ unit of frid points + self.k, self.dk = calc_freq_fft(x_grid, Lpoints) # return 1/ unit of frid points # to get the waveumber units (2 pi/ lambda), multiply by 2 pi - self.k, self.dk = self.k * 2 * np.pi, self.dk * 2 * np.pi + self.k, self.dk = self.k * 2 * np.pi, self.dk * 2 * np.pi - # print(self.k[0], self.k[-1]) - # print(self.dk) # create window - self.win = create_window(Lpoints) + self.win = create_window(Lpoints) def cal_spectrogram(self, data=None, name=None): + """ defines apply function and calculated all sub-sample sprectra using map """ - import xarray as xr + DATA = self.data if data is None else data Lpoints, dk = self.Lpoints, self.dk - win = self.win + win =self.win def calc_spectrum_apply(stancil): "returns spectrum per stencil, detrends and windows the data" - from scipy.signal import detrend + - idata = DATA[stancil[0] : stancil[-1]] + idata = DATA[stancil[0]:stancil[-1]] idata = detrend(idata) * win - return stancil[1], calc_spectrum_fft(idata, dk, Lpoints) + return stancil[1], calc_spectrum_fft(idata , dk, Lpoints) - # def test_func(i_stancil): - # return i_stancil[1], yy[i_stancil[0]:i_stancil[-1]].shape - - # %% derive L2 stancil - stancil_iter = create_chunk_boundaries(Lpoints, DATA.size, ov=self.ov) + # derive L2 stancil + stancil_iter = create_chunk_boundaries(Lpoints, DATA.size, ov= self.ov) # apply func to all stancils - D_specs = dict(map(calc_spectrum_apply, stancil_iter)) + D_specs = dict(map(calc_spectrum_apply,stancil_iter)) chunk_positions = np.array(list(D_specs.keys())) - self.N_stancils = len(chunk_positions) # number of spectal relazations + self.N_stancils = len(chunk_positions) # number of spectal relazations # repack data, create xarray - self.spec_name = "power_spec" if name is None else name - G = dict() - for xi, I in D_specs.items(): - G[xi] = xr.DataArray( - I, dims=["k"], coords={"k": self.k, "x": xi}, name=self.spec_name - ) + self.spec_name = 'power_spec' if name is None else name + G =dict() + for xi,I in D_specs.items(): + G[xi] = xr.DataArray(I, dims=['k'], coords={'k': self.k, 'x': xi } , name=self.spec_name) - self.G = xr.concat(G.values(), dim="x").T # .to_dataset() + self.G = xr.concat(G.values(), dim='x').T if self.G.k[0] == 0: self.G = self.G[1:, :] - self.G.attrs["ov"] = self.ov - self.G.attrs["L"] = self.Lpoints + self.G.attrs['ov'] = self.ov + self.G.attrs['L'] = self.Lpoints return self.G - # cal variance + def calc_var(self): - """Compute total variance from spectragram""" - return self.dk * self.G.mean("x").sum().data # do not consider zeroth frequency + """ Compute total variance from spectragram """ + return self.dk*self.G.mean('x').sum().data # do not consider zeroth frequency - def mean_spectral_error(self, mask=None, confidence=0.95): + def mean_spectral_error(self, mask=None, confidence = 0.95): "retrurns spetral error for the x-mean spectral estimate and stores it as coordindate in the dataarray" # make error estimate if mask is not None: - meanspec = self.G.isel(x=mask).mean("x") + meanspec= self.G.isel(x=mask).mean('x') N = int(sum(mask)) else: - meanspec = self.G.mean("x") + meanspec= self.G.mean('x') N = self.N_stancils - El_of_mean, Eu_of_mean = spec_error(meanspec, N, confidence) - El_of_mean.name = "El_mean" - Eu_of_mean.name = "Eu_mean" + El_of_mean, Eu_of_mean = spec_error(meanspec, N , confidence ) + El_of_mean.name = 'El_mean' + Eu_of_mean.name = 'Eu_mean' - self.G.coords["mean_El"] = (("k"), El_of_mean.data) - self.G.coords["mean_Eu"] = (("k"), Eu_of_mean.data) + self.G.coords['mean_El'] = (('k'), El_of_mean.data) + self.G.coords['mean_Eu'] = (('k'), Eu_of_mean.data) - def parceval(self, add_attrs=True): + def parceval(self, add_attrs=True ): "test Parceval theorem" DATA = self.data L = self.Lpoints + # derive mean variances of stancils stancil_iter = create_chunk_boundaries(L, DATA.size) def get_stancil_var_apply(stancil): - from scipy.signal import detrend - + "returns the variance of yy for stancil" - idata = DATA[stancil[0] : stancil[-1]] - idata = detrend(idata) # * win + idata = DATA[stancil[0]:stancil[-1]] + idata = detrend(idata) return stancil[1], idata.var() - D_vars = dict(map(get_stancil_var_apply, stancil_iter)) + D_vars = dict(map(get_stancil_var_apply,stancil_iter)) - stancil_vars = list() + stancil_vars =list() for I in D_vars.values(): stancil_vars.append(I) - print("Parcevals Theorem:") - print("variance of unweighted timeseries: ", DATA.var()) - print("mean variance of detrended chunks: ", np.array(stancil_vars).mean()) - # print('variance of weighted timeseries: ',self.phi.var() ) - # self.calc_var(self) - print("variance of the pwelch Spectrum: ", self.calc_var()) + print('Parcevals Theorem:') + print('variance of unweighted timeseries: ',DATA.var()) + print('mean variance of detrended chunks: ', np.array(stancil_vars).mean()) + print('variance of the pwelch Spectrum: ', self.calc_var()) if add_attrs: - self.G.attrs["variance_unweighted_data"] = DATA.var() - self.G.attrs["mean_variance_detrended_chunks"] = np.array( - stancil_vars - ).mean() - self.G.attrs["mean_variance_pwelch_spectrum"] = self.calc_var() + self.G.attrs['variance_unweighted_data'] = DATA.var() + self.G.attrs['mean_variance_detrended_chunks'] = np.array(stancil_vars).mean() + self.G.attrs['mean_variance_pwelch_spectrum'] = self.calc_var() - -class wavenumber_spectrogram_LS_even: - def __init__( - self, x, data, L, waven_method="fftX2", dy=None, ov=None, window=None, kjumps=1 - ): +class WavenumberSpectrogramLSEven: + def __init__(self, x, data, L, waven_method = 'fftX2' , dy=None , ov=None, window=None, kjumps=1): """ returns a wavenumber spectrogram with the resolution L-ov this uses Lombscargle @@ -568,103 +516,96 @@ def __init__( xr.Dataset with x, k as cooridates of the spectrogram and the mean error other arributes are in the .attr dict. """ - from astropy.timeseries import LombScargle + + self.L = L + self.ov = int(L/2) if ov is None else ov #when not defined in create_chunk_boundaries then L/2 - self.L = L - self.ov = ( - int(L / 2) if ov is None else ov - ) # when not defined in create_chunk_boundaries then L/2 + self.x = x + self.data = data + self.dy = dy - self.x = x - self.data = data - self.dy = dy # create subsample k - # print(waven_method) if type(waven_method) is str: - self.k, self.dk = calc_freq_LS(x, L, method=waven_method) + self.k, self.dk = calc_freq_LS(x, L, method = waven_method ) elif type(waven_method) is np.ndarray: - self.k, self.dk = waven_method, np.diff(waven_method).mean() + self.k, self.dk = waven_method, np.diff(waven_method).mean() else: - raise ValueError("waven_method is neither string nor an array") + raise ValueError('waven_method is neither string nor an array') - self.k, self.dk = self.k[::kjumps], self.dk * kjumps + self.k, self.dk = self.k[::kjumps], self.dk*kjumps # create window - self.win = None # create_window(L) + self.win = None + + def cal_spectrogram(self, x = None, data=None, name=None, dx=1): - def cal_spectrogram(self, x=None, data=None, name=None, dx=1): """ defines apply function and calculated all sub-sample sprectra using map dx nominal resolution of the data resolutionif not set, dx= 1 """ - from astropy.timeseries import LombScargle - import xarray as xr + - X = self.x if x is None else x # all x positions - DATA = self.data if data is None else data # all data points - L, dk = self.L, self.dk - win = self.win + X = self.x if x is None else x # all x positions + DATA = self.data if data is None else data # all data points + L, dk = self.L, self.dk + win = self.win self.dx = dx # init Lomb scargle object with noise as nummy data () - # dy_fake= np.random.randn(len(dy))*0.001 if self.dy is not None else None - self.LS = LombScargle(X[0:L], np.random.randn(L) * 0.001, fit_mean=True) + self.LS = LombScargle(X[0:L] , np.random.randn(L)*0.001, fit_mean=True) + def calc_spectrum_apply(stancil): "returns spectrum per stencil, detrends and windows the data" - from scipy.signal import detrend + - x = X[stancil[0] : stancil[-1]] - # x_mask= (stancil[0] < X) & (X <= stancil[-1]) - # x = X[x_mask] - idata = DATA[stancil[0] : stancil[-1]] - y = detrend(idata) # * win + x = X[stancil[0]:stancil[-1]] + idata = DATA[stancil[0]:stancil[-1]] + y = detrend(idata) - LS_PSD, LS_object = calc_spectrum_LS(x, y, self.k, LS=self.LS, dk=self.dk) + LS_PSD, LS_object = calc_spectrum_LS( x, y, self.k, LS= self.LS, dk =self.dk) return stancil[1], LS_PSD # % derive L2 stancil - stancil_iter = create_chunk_boundaries(L, DATA.size, ov=self.ov) + stancil_iter = create_chunk_boundaries(L, DATA.size, ov= self.ov) # apply func to all stancils - D_specs = dict(map(calc_spectrum_apply, stancil_iter)) + D_specs = dict(map(calc_spectrum_apply,stancil_iter)) chunk_positions = np.array(list(D_specs.keys())) - self.N_stancils = len(chunk_positions) # number of spectal relazations + self.N_stancils = len(chunk_positions) # number of spectal relazations # repack data, create xarray - self.spec_name = "power_spec" if name is None else name - G = dict() - for xi, I in D_specs.items(): - G[xi] = xr.DataArray( + self.spec_name = 'power_spec' if name is None else name + G = { + xi: xr.DataArray( I, dims=["k"], coords={"k": self.k, "x": xi * self.dx}, name=self.spec_name, ) + for xi, I in D_specs.items() + } - self.G = xr.concat(G.values(), dim="x").T # .to_dataset() + self.G = xr.concat(G.values(), dim='x').T if self.G.k[0] == 0: self.G = self.G[1:, :] - self.G.attrs["ov"] = self.ov - self.G.attrs["L"] = self.L + self.G.attrs['ov'] = self.ov + self.G.attrs['L'] = self.L return self.G def calc_var(self): - return wavenumber_spectrogram.calc_var(self) + return WavenumberSpectrogram.calc_var(self) - def parceval(self, add_attrs=True): - return wavenumber_spectrogram.parceval(self, add_attrs=add_attrs) + def parceval(self, add_attrs=True ): + return WavenumberSpectrogram.parceval(self, add_attrs= add_attrs ) - def mean_spectral_error(self, confidence=0.95): - return wavenumber_spectrogram.mean_spectral_error(self, confidence=confidence) + def mean_spectral_error(self, confidence = 0.95 ): + return WavenumberSpectrogram.mean_spectral_error(self, confidence= confidence ) - -class wavenumber_spectrogram_LS: - def __init__( - self, x, data, L, dx, dy=None, waven_method="fftX2", ov=None, window=None - ): +class WavenumberSpectrogramLS: + def __init__(self, x, data, L, dx, dy = None, waven_method = 'fftX2', ov=None, window=None): """ returns a wavenumber spectrogram with the resolution L-ov this uses Lombscargle @@ -683,43 +624,31 @@ def __init__( xr.Dataset with x, k as cooridates of the spectrogram and the mean error other arributes are in the .attr dict. """ - from astropy.timeseries import LombScargle - - self.L = L - self.ov = ( - int(L / 2) if ov is None else ov - ) # when not defined in create_chunk_boundaries then L/2 + + self.L = L + self.ov = int(L/2) if ov is None else ov #when not defined in create_chunk_boundaries then L/2 - self.x = x - self.dx = dx - self.data = data + self.x = x + self.dx = dx + self.data = data self.error = dy if dy is not None else None - self.Lpoints = int(self.L / self.dx) + self.Lpoints= int(self.L/self.dx) - # create subsample k - # print(waven_method) + + # create subsample k if type(waven_method) is str: - self.k, self.dk = calc_freq_LS(x, self.Lpoints, method=waven_method) + self.k, self.dk = calc_freq_LS(x, self.Lpoints, method = waven_method ) elif type(waven_method) is np.ndarray: - self.k, self.dk = waven_method, np.diff(waven_method).mean() + self.k, self.dk = waven_method, np.diff(waven_method).mean() else: - raise ValueError("waven_method is neither string nor an array") + raise ValueError('waven_method is neither string nor an array') - # self.k, self.dk = self.k[::kjumps], self.dk*kjumps + # create window - self.win = None # create_window(L) - - def cal_spectrogram( - self, - x=None, - data=None, - error=None, - name=None, - xlims=None, - weight_data=True, - max_nfev=None, - map_func=None, - ): + self.win = None + + def cal_spectrogram(self, x = None, data=None, error=None, name=None, xlims =None, weight_data= True, max_nfev = None, map_func=None): + """ defines apply function and calculated all sub-sample sprectra using map @@ -738,194 +667,143 @@ def cal_spectrogram( self.GG, params_dataframe params_dataframe is a pd.DataFrame that containes all the parameters of the fitting process (and may contain uncertainties too once they are calculated) """ - from astropy.timeseries import LombScargle - import xarray as xr - import copy - import pandas as pd - - X = self.x if x is None else x # all x positions - DATA = self.data if data is None else data # all data points - ERR = self.error if error is None else error # all error for points - L, dk = self.L, self.dk - # win = self.win - self.xlims = (np.round(X.min()), X.max()) if xlims is None else xlims + + - # init Lomb scargle object with noise as nummy data () - # dy_fake= np.random.randn(len(dy))*0.001 if self.dy is not None else None - # self.LS = LombScargle(X[0:L] , np.random.randn(L)*0.001, fit_mean=True) + X = self.x if x is None else x # all x positions + DATA = self.data if data is None else data # all data points + ERR = self.error if error is None else error # all error for points + L, dk = self.L, self.dk + self.xlims = ( np.round(X.min()), X.max() ) if xlims is None else xlims - # define window - import scipy.signal.windows as WINDOWS + # define window self.win = WINDOWS.tukey(self.Lpoints, alpha=0.1, sym=True) + def calc_spectrum_and_field_apply(stancil): + """ windows the data accoding to stencil and applies LS spectrogram returns: stancil center, spectrum for this stencil, number of datapoints in stancil """ - from scipy.signal import detrend + - # x = X[stancil[0]:stancil[-1]] - x_mask = (stancil[0] <= X) & (X <= stancil[-1]) + x_mask= (stancil[0] <= X) & (X <= stancil[-1]) - # print(stancil[1]) x = X[x_mask] - if x.size < 200: # if there are not enough photos set results to nan - # return stancil[1], self.k*np.nan, np.fft.rfftfreq( int(self.Lpoints), d=self.dx)*np.nan, x.size - return stancil[1], self.k * np.nan, self.k * np.nan, np.nan, x.size + if x.size < 200: # if there are not enough photos set results to nan + return stancil[1], self.k*np.nan, self.k*np.nan, np.nan, x.size y = DATA[x_mask] - # make x positions - x_pos = (np.round((x - stancil[0]) / 10.0 - 1, 0)).astype("int") + #make x positions + x_pos = (np.round( (x - stancil[0])/ 10.0 -1 , 0) ).astype('int') # weight data if weight_data: window = self.win[x_pos] - y = y * window * np.sqrt(np.var(y) / np.var((y * window))) + y = y * window * np.sqrt( np.var(y) / np.var(( y* window) ) ) - # make y gridded - x_model = np.arange(stancil[0], stancil[-1], self.dx) - y_gridded = np.copy(x_model) * np.nan + #make y gridded + x_model = np.arange(stancil[0], stancil[-1], self.dx) + y_gridded = np.copy(x_model) * np.nan y_gridded[x_pos] = y - nan_mask = np.isnan(y_gridded) + nan_mask =np.isnan(y_gridded) err = ERR[x_mask] if ERR is not None else None - # print(x.shape, y.shape, self.k, self.LS) - LS_PSD, LS_object = calc_spectrum_LS( - x, y, self.k, err=err, LS=None, dk=self.dk - ) + LS_PSD, LS_object = calc_spectrum_LS( x, y, self.k, err=err, LS= None, dk =self.dk) - y_model = reconstruct_data_from_LS(LS_object, x_model, self.k) + y_model = reconstruct_data_from_LS(LS_object, x_model, self.k) - # print(stancil[-1], x_model[-1]) - # print(stancil[0], x_model[0]) - # print(np.fft.rfft(y_model).size , kk.size) - # print(x_model.size, y_gridded.size, y_model.size) - # print('--') - P = conserve_variance( - np.fft.rfft(y_model), self.k, y_gridded, nan_mask=nan_mask - ) + + + P = conserve_variance(np.fft.rfft(y_model), self.k, y_gridded, nan_mask = nan_mask ) P.set_parameters() - # P.test_ojective_func(P.tanh_weight_function, plot_flag=False) fitter = P.optimize(max_nfev=max_nfev) return stancil[1], LS_PSD, P.best_guess_Z(), fitter.params, x.size - # % derive L2 stancil - self.stancil_iter = create_chunk_boundaries_unit_lengths( - L, self.xlims, ov=self.ov, iter_flag=True - ) - # stancil_iter = create_chunk_boundaries_unit_lengths(L, ( np.round(X.min()), X.max() ), ov= self.ov, iter_flag=True) + # derive L2 stancil + self.stancil_iter = create_chunk_boundaries(L, self.xlims, ov= self.ov, iter_flag=True) - # apply func to all stancils - # Spec_returns=list() - # for ss in stancil_iter: - # print(ss) - # Spec_returns.append( calc_spectrum_apply(ss) ) map_func = map if map_func is None else map_func print(map_func) - Spec_returns = list( - map_func(calc_spectrum_and_field_apply, copy.copy(self.stancil_iter)) - ) - # # linear version - # Spec_returns = list(map( calc_spectrum_and_field_apply, copy.copy(self.stancil_iter) )) + Spec_returns = list(map_func( calc_spectrum_and_field_apply, copy.copy(self.stancil_iter) )) + # unpack resutls of the mapping: - D_specs = dict() - Y_model = dict() - Pars = dict() - N_per_stancil = list() + D_specs = dict() + Y_model = dict() + Pars = dict() + N_per_stancil = list() for I in Spec_returns: - # print(I[1].shape, I[2].shape) - D_specs[I[0]] = I[1] - Y_model[I[0]] = I[2] - Pars[I[0]] = I[3] + D_specs[I[0]] = I[1] + Y_model[I[0]] = I[2] + Pars[I[0]] = I[3] N_per_stancil.append(I[4]) self.N_per_stancil = N_per_stancil chunk_positions = np.array(list(D_specs.keys())) - self.N_stancils = len(chunk_positions) # number of spectral realizatiobs + self.N_stancils = len(chunk_positions) # number of spectral realizatiobs # repack data, create xarray # 1st LS spectal estimates - self.spec_name = "LS_spectal_power" if name is None else name - G_LS_power = dict() - for xi, I in D_specs.items(): - G_LS_power[xi] = xr.DataArray( - I, dims=["k"], coords={"k": self.k, "x": xi}, name=self.spec_name - ) + self.spec_name = 'LS_spectal_power' if name is None else name + G_LS_power =dict() + for xi,I in D_specs.items(): + G_LS_power[xi] = xr.DataArray(I, dims=['k'], coords={'k': self.k, 'x': xi } , name=self.spec_name) + + G_LS_power = xr.concat(G_LS_power.values(), dim='x').T + + - G_LS_power = xr.concat(G_LS_power.values(), dim="x").T # .to_dataset() - - # 2nd Y_model - # G_model =dict() - # # define relative x-coodinate. relative to center - # #eta = np.arange(0, self.L, self.dx) - self.L/2 - # eta = np.arange(0, self.L + self.dx, self.dx) - self.L/2 - # for xi,I in Y_model.items(): - # if I.size < eta.size: - # I = np.insert(I, -1, I[-1]) - # - # G_model[xi] = xr.DataArray(I, dims=['eta'], coords={'eta': eta, 'x': xi } , name="y_model") - # - # self.G_model = xr.concat(G_model.values(), dim='x').T#.to_dataset() - # # if self.G_fft.k[0] == 0: - # # self.G_fft = self.G_fft[1:, :] - # - # self.G_model.attrs['ov'] = self.ov - # self.G_model.attrs['L'] = self.L - # self.G_model.attrs['Lpoints'] = self.Lpoints - # self.G_model.coords['N_per_stancil'] = ( ('x'), N_per_stancil) - - # 2nd FFT(Y_model) - G_fft = dict() - Y_model_k_fft = np.fft.rfftfreq(int(self.Lpoints), d=self.dx) - for xi, I in Y_model.items(): + #2nd FFT(Y_model) + G_fft =dict() + Y_model_k_fft = np.fft.rfftfreq( int(self.Lpoints), d=self.dx) + for xi,I in Y_model.items(): if I.size < Y_model_k_fft.size: I = np.insert(I, -1, I[-1]) - G_fft[xi] = xr.DataArray( - I, dims=["k"], coords={"k": Y_model_k_fft, "x": xi}, name="Y_model_hat" - ) + G_fft[xi] = xr.DataArray(I, dims=['k'], coords={'k': Y_model_k_fft, 'x': xi } , name='Y_model_hat') - G_fft = xr.concat(G_fft.values(), dim="x").T # .to_dataset() + G_fft = xr.concat(G_fft.values(), dim='x').T # generate power spec as well self.G = Z_to_power(G_fft, self.dk, self.Lpoints) - self.G.name = "spectral_power_optm" + self.G.name = 'spectral_power_optm' + # merge both datasets self.GG = xr.merge([G_LS_power, G_fft, self.G]) - self.GG.attrs["ov"] = self.ov - self.GG.attrs["L"] = self.L - self.GG.attrs["Lpoints"] = self.Lpoints - self.GG.coords["N_per_stancil"] = (("x"), N_per_stancil) - - self.GG.expand_dims(dim="eta") - self.GG.coords["eta"] = ( - ("eta"), - np.arange(0, self.L + self.dx, self.dx) - self.L / 2, - ) - self.GG["win"] = (("eta"), np.insert(self.win, -1, self.win[-1])) + self.GG.attrs['ov'] = self.ov + self.GG.attrs['L'] = self.L + self.GG.attrs['Lpoints'] = self.Lpoints + self.GG.coords['N_per_stancil'] = ( ('x'), N_per_stancil) + + self.GG.expand_dims(dim='eta') + self.GG.coords['eta'] = ( ('eta'), np.arange(0, self.L + self.dx, self.dx) - self.L/2 ) + self.GG['win'] = ( ('eta'), np.insert(self.win, -1, self.win[-1])) # create dataframe with fitted parameters - PP2 = dict() + PP2= dict() for k, I in Pars.items(): if I is not np.nan: - PP2[k] = I + PP2[k] =I keys = PP2[next(iter(PP2))].keys() - params_dataframe = pd.DataFrame(index=keys) + params_dataframe = pd.DataFrame(index =keys) - for k, I in PP2.items(): + for k,I in PP2.items(): I.values() params_dataframe[k] = list(I.valuesdict().values()) return self.GG, params_dataframe + + + def calc_var(self): Gmean = np.nanmean(self.G, 1) @@ -933,70 +811,60 @@ def calc_var(self): return self.dk * Gmean[~infmask].sum().data - # def parceval(self, add_attrs=True ): - # return wavenumber_spectrogram.parceval(self, add_attrs= add_attrs ) + - def parceval(self, add_attrs=True, weight_data=False): + def parceval(self, add_attrs=True, weight_data=False ): "test Parceval theorem" - import copy - + DATA = self.data L = self.L X = self.x - # derive mean variances of stancils - # stancil_iter = create_chunk_boundaries_unit_lengths(L, self.xlims, ov= self.ov ) def get_stancil_var_apply(stancil): - from scipy.signal import detrend - + "returns the variance of yy for stancil" - x_mask = (stancil[0] < X) & (X <= stancil[-1]) + x_mask= (stancil[0] < X) & (X <= stancil[-1]) idata = DATA[x_mask] if len(idata) < 1: return stancil[1], np.nan, len(idata) idata = detrend(idata) # weight data - x_pos = (np.round((X[x_mask] - stancil[0]) / 10.0, 0)).astype("int") + x_pos = (np.round( (X[x_mask] - stancil[0])/ 10.0 , 0) ).astype('int') if weight_data: window = self.win[x_pos] - idata = ( - idata * window * np.sqrt(np.var(idata) / np.var((idata * window))) - ) + idata = idata * window * np.sqrt( np.var(idata) / np.var(( idata* window) ) ) return stancil[1], idata.var(), len(idata) - D_vars = list(map(get_stancil_var_apply, copy.copy(self.stancil_iter))) + D_vars = list(map(get_stancil_var_apply, copy.copy(self.stancil_iter) )) - stancil_vars, Nphotons = list(), 0 + stancil_vars, Nphotons =list(), 0 for I in D_vars: - stancil_vars.append(I[1] * I[2]) - Nphotons += I[2] + stancil_vars.append(I[1] * I[2]) + Nphotons += I[2] - stancil_weighted_variance = np.nansum(np.array(stancil_vars)) / Nphotons + stancil_weighted_variance = np.nansum(np.array(stancil_vars))/Nphotons - print("Parcevals Theorem:") - print("variance of timeseries: ", DATA.var()) - print("mean variance of stancils: ", stancil_weighted_variance) - # print('variance of weighted timeseries: ',self.phi.var() ) - # self.calc_var(self) - print("variance of the optimzed windowed LS Spectrum: ", self.calc_var()) + print('Parcevals Theorem:') + print('variance of timeseries: ', DATA.var()) + print('mean variance of stancils: ', stancil_weighted_variance ) + + print('variance of the optimzed windowed LS Spectrum: ', self.calc_var()) if add_attrs: - self.G.attrs["variance_unweighted_data"] = DATA.var() - self.G.attrs["mean_variance_stancils"] = np.nanmean(np.array(stancil_vars)) - self.G.attrs["mean_variance_LS_pwelch_spectrum"] = self.calc_var() + self.G.attrs['variance_unweighted_data'] = DATA.var() + self.G.attrs['mean_variance_stancils'] = np.nanmean(np.array(stancil_vars) ) + self.G.attrs['mean_variance_LS_pwelch_spectrum'] = self.calc_var() + + + def mean_spectral_error(self, mask=None, confidence = 0.95 ): + return WavenumberSpectrogram.mean_spectral_error(self, mask=mask, confidence= confidence ) - def mean_spectral_error(self, mask=None, confidence=0.95): - return wavenumber_spectrogram.mean_spectral_error( - self, mask=mask, confidence=confidence - ) # class for getting standard Pwelch spectrum. old version, deprechiate -class wavenumber_pwelch: - def __init__( - self, data, x, L, ov=None, window=None, save_chunks=False, plot_chunks=False - ): +class WavenumberPwelch: + def __init__(self,data, x, L, ov=None, window=None, save_chunks=False, plot_chunks=False): """ returns a wavenumber spectrum using the pwelch method @@ -1016,170 +884,143 @@ def __init__( self.dx self.n_spec """ - from scipy import signal + + - self.data = data # field to be analyzed - self.dx = np.diff(x)[0] # sampling interval - self.save_chunks = save_chunks - dsize = data.size + self.data = data # field to be analyzed + self.dx = np.diff(x)[0] # sampling interval + self.save_chunks= save_chunks + dsize = data.size - ov = int(np.round(L / 2)) if ov is None else ov + ov=int(np.round(L/2)) if ov is None else ov self.n = L if window is None: - win = np.hanning(self.n) + win=np.hanning(self.n) else: - win = window + win=window - factor = np.sqrt(self.n / (win**2).sum()) - win *= factor + factor=np.sqrt(self.n/(win**2).sum()) + win*=factor # test if n is even - if self.n % 2: + if (self.n%2): self.neven = False else: self.neven = True - # calculate freq - self.k = self.calc_freq() - # del(self.df) + #calculate freq + self.k = self.calc_freq() + nbin=int(np.floor(dsize/(L-ov))) - # print(data.size, L, ov, int(L-ov) ) - nbin = int(np.floor(dsize / (L - ov))) - # print(nbin) if save_chunks: - chunks = np.empty([int(nbin), int(L)]) - - self.specs = np.empty([int(nbin), self.k.size]) - # print(chunks.shape) - # result_array = np.empty((0, 100)) - # if plot_chunks: - # M.figure_axis_xy() - last_k = 0 - k = 0 - # print('iter range', np.arange(0,data.size,int(L-ov))) - for i in np.arange(0, dsize - int(L - ov) + 1, int(L - ov)): - - if (plot_chunks) and (i >= dsize - 6 * int(L - ov)): + chunks=np.empty([int(nbin),int(L)]) + + self.specs=np.empty([int(nbin),self.k.size]) + + last_k=0 + k=0 + + for i in np.arange(0,dsize-int(L-ov)+1,int(L-ov)): + + if (plot_chunks) and (i >= dsize-6*int(L-ov)): M.figure_axis_xy() - self.phi = data[int(i) : int(i + L)] + self.phi=data[int(i):int(i+L)] - # self.ii=np.append(self.ii,[i,i+L]) - # print(self.phi.max()) - # print(self.phi.mean()) - # print(self.phi.shape) - # print('i',int(i), int(i+L)) - # print(chunk.size, l) - if int(i + L) <= data.size - 1: + if int(i+L) <= data.size-1: if save_chunks: - chunks[k, :] = self.phi + chunks[k,:]=self.phi - self.phi = signal.detrend(self.phi) * win + + self.phi=signal.detrend(self.phi)*win if plot_chunks: - # MT.stats_format(self.phi, 'chunk '+str(i)) plt.plot(self.phi) - self.specs[k, :] = self.calc_spectrum() - last_k = k - last_used_TS = int(i + L) - # if plot_chunks: - # MT.stats_format(self.spec, 'spec '+str(i)) + self.specs[k,:]= self.calc_spectrum() + last_k=k + last_used_TS=int(i+L) + else: if plot_chunks: - print("end of TS is reached") - print("last spec No: " + str(last_k)) - print("spec container: " + str(specs.shape)) - print("last used Timestep: " + str(last_used_TS)) - print("length of TS " + str(dsize) + "ms") + print('end of TS is reached') + print('last spec No: '+str(last_k)) + print('spec container: '+str(specs.shape)) + print('last used Timestep: '+str(last_used_TS)) + print('length of TS '+ str(dsize) +'ms') + + k+=1 - k += 1 if save_chunks: - self.chunks = chunks - # del(chunks) - - self.spec_est = self.specs.mean(axis=0) - # if prewhite is None: - # self.specs=specs[:last_k,:] - # self.spec_est=self.specs.mean(axis=0) - # elif prewhite ==1: - # self.specs=specs[:last_k,:]*(2*np.pi*self.f) - # self.spec_est=self.specs.mean(axis=0) - # elif prewhite ==2: - # self.specs=specs[:last_k,:]*(2*np.pi*self.f)**2 - # self.spec_est=self.specs.mean(axis=0) - - self.n_spec, _ = self.specs.shape + self.chunks=chunks + + + self.spec_est=self.specs.mean(axis=0) + + + + self.n_spec,_=self.specs.shape self.calc_var() - # self.phi=self.data - # self.phi*=win*np.sqrt(factor) + def calc_freq(self): - """calculate array of spectral variable (frequency or - wavenumber) in cycles per unit of L""" + """ calculate array of spectral variable (frequency or + wavenumber) in cycles per unit of L """ - self.df = 1.0 / ((self.n - 1) * self.dx) + self.df = 1./((self.n-1)*self.dx) if self.neven: - f = self.df * np.arange(self.n / 2 + 1) + f = self.df*np.arange(self.n/2+1) else: - f = self.df * np.arange((self.n - 1) / 2.0 + 1) + f = self.df*np.arange( (self.n-1)/2. + 1 ) return f def calc_spectrum(self): - """compute the 1d spectrum of a field phi""" + """ compute the 1d spectrum of a field phi """ self.phih = np.fft.rfft(self.phi) # the factor of 2 comes from the symmetry of the Fourier coeffs - spec = 2.0 * (self.phih * self.phih.conj()).real / self.df / self.n**2 + spec = 2.*(self.phih*self.phih.conj()).real / self.df /self.n**2 # the zeroth frequency should be counted only once - spec[0] = spec[0] / 2.0 + spec[0] = spec[0]/2. if self.neven: - spec[-1] = spec[-1] / 2.0 + spec[-1] = spec[-1]/2. return spec + def error(self, ci=0.95): - self.El, self.Eu = spec_error(self.spec_est, self.n_spec, ci=ci) + self.El, self.Eu =spec_error(self.spec_est,self.n_spec,ci=ci) def parceval(self): - print("Parcevals Theorem:") - print("variance of unweighted timeseries: ", self.data.var()) - print( - "mean variance of timeseries chunks: ", - ( - self.chunks.var(axis=1).mean() - if self.save_chunks is True - else "data not saved" - ), - ) - # print('variance of weighted timeseries: ',self.phi.var() ) - # self.calc_var(self) - print("variance of the pwelch Spectrum: ", self.var) + print('Parcevals Theorem:') + print('variance of unweighted timeseries: ',self.data.var()) + print('mean variance of timeseries chunks: ',self.chunks.var(axis=1).mean() if self.save_chunks is True else 'data not saved') + print('variance of the pwelch Spectrum: ',self.var) def calc_var(self): - """Compute total variance from spectrum""" - self.var = ( - self.df * np.nanmean(self.specs[1:], 0).sum() - ) # do not consider zeroth frequency + """ Compute total variance from spectrum """ + self.var = self.df* np.nanmean(self.specs[1:], 0).sum() # do not consider zeroth frequency -# %% optimze spectral variance +# optimze spectral variance class conserve_variance: - def __init__(self, Z, freq, data, nan_mask=None): - """ """ - import lmfit as LM + def __init__(self,Z, freq, data, nan_mask= None): - self.LM = LM + """ + + """ + + self.LM =LM self.data = data - self.Z = Z + self.Z = Z self.freq = freq self.nan_mask = nan_mask @@ -1187,79 +1028,58 @@ def set_parameters(self): params = self.LM.Parameters() - p_smothed = self.runningmean(np.abs(self.Z), 20, tailcopy=True) + p_smothed = self.runningmean(np.abs(self.Z ), 20, tailcopy=True) f_max = self.freq[p_smothed[~np.isnan(p_smothed)].argmax()] - lambda_max = 9.81 * 5**2 / (2 * np.pi) - params.add("x_cutoff", 1 / lambda_max, min=0, max=1, vary=False) - params.add( - "x_max_pos", f_max, min=f_max * 0.75, max=f_max * 5 + 0.001, vary=False - ) - params.add("LF_amp", 1, min=0.5, max=1.2, vary=True) - params.add("HF_amp", 0.5, min=0, max=1.5, vary=True) - params.add("sigma_g", 0.002, min=0.001, max=0.05, vary=False) - params.add("Gauss_amp", 0.5, min=0.01, max=2, vary=True) + + lambda_max = 9.81 * 5 **2/ (2* np.pi) + params.add('x_cutoff', 1/lambda_max , min=0, max=1, vary=False) + params.add('x_max_pos', f_max , min=f_max*0.75, max=f_max*5+0.001, vary=False) + params.add('LF_amp', 1 , min=0.5 , max=1.2, vary= True) + params.add('HF_amp', 0.5 , min=0 , max=1.5, vary= True) + params.add('sigma_g', 0.002 , min=0.001 , max=0.05, vary= False) + params.add('Gauss_amp', 0.5 , min=0.01 , max=2, vary= True) + self.params = params return params def test_ojective_func(self, weight_func, plot_flag=True): - self.objective_func( - self.params, - self.data, - self.Z, - weight_func, - self.freq, - self.nan_mask, - plot_flag=plot_flag, - ) - - def tanh_weight_function(self, ff, params): - return self.tanh_weight( - ff, - params["x_cutoff"].value, - params["x_max_pos"].value, - params["LF_amp"].value, - params["HF_amp"].value, - params["Gauss_amp"].value, - params["sigma_g"].value, - ) - - def tanh_weight(self, x, x_cutoff, x_max_pos, LF_amp, HF_amp, Gauss_amp, sigma_g): + self.objective_func(self.params, self.data, self.Z, weight_func, self.freq, self.nan_mask, plot_flag=plot_flag) + + def tanh_weight_function(self,ff, params): + return self.tanh_weight(ff, params['x_cutoff'].value, + params['x_max_pos'].value, + params['LF_amp'].value, + params['HF_amp'].value, + params['Gauss_amp'].value, + params['sigma_g'].value ) + + def tanh_weight(self,x, x_cutoff , x_max_pos, LF_amp, HF_amp, Gauss_amp, sigma_g): """ - zdgfsg + zdgfsg """ - HF_amp1 = LF_amp - HF_amp - decay = 0.5 - np.tanh((x - x_cutoff) / sigma_g) / 2 - y = decay * HF_amp1 + (1 - HF_amp1) - y = y - y[0] + LF_amp + HF_amp1 = (LF_amp-HF_amp) + decay = 0.5 - np.tanh( (x-x_cutoff)/sigma_g )/2 + y = decay * HF_amp1 + (1 - HF_amp1) + y = y- y[0] +LF_amp - def gaus(x, x_0, amp, sigma_g): - return amp * np.exp(-0.5 * ((x - x_0) / sigma_g) ** 2) + def gaus(x, x_0, amp, sigma_g ): + return amp* np.exp(-0.5 * ( (x-x_0)/sigma_g)**2) - y += gaus(x, x_max_pos, Gauss_amp, sigma_g) + y += gaus(x, x_max_pos, Gauss_amp, sigma_g ) - # y = y * LF_amp return y - def objective_func( - self, - params, - data_x, - Z_results, - weight_func, - freq, - nan_mask=None, - plot_flag=False, - ): - alpha = 1e7 + def objective_func(self, params, data_x, Z_results, weight_func, freq, nan_mask = None, plot_flag=False): + alpha =1e7 def model_real_space(Z, weights, n=None): """ Both inputs must have the same length """ - return np.fft.irfft(Z * weights, n=n) + return np.fft.irfft(Z*weights, n = n) weights = weight_func(freq, params) @@ -1267,10 +1087,10 @@ def model_real_space(Z, weights, n=None): weights = np.insert(weights, -1, weights[-1]) if nan_mask is not None: - model = model_real_space(Z_results, weights, n=data_x.size)[~nan_mask] + model = model_real_space(Z_results, weights, n= data_x.size)[~nan_mask] dd = data_x[~nan_mask][:] else: - model = model_real_space(Z_results, weights, n=data_x.size)[:] + model = model_real_space(Z_results, weights, n= data_x.size)[:] dd = data_x[:] if model.size > dd.size: @@ -1279,159 +1099,99 @@ def model_real_space(Z, weights, n=None): dd = dd[:-1] if plot_flag: - import m_general_ph3 as M + from matplotlib.gridspec import GridSpec import matplotlib.pyplot as plt + F= M.figure_axis_xy(10, 4.1 * 2.5, view_scale= 0.5, container = True) - F = M.figure_axis_xy(10, 4.1 * 2.5, view_scale=0.5, container=True) - - gs = GridSpec(5, 1, wspace=0.1, hspace=0.4) # figure=fig, - pos0, pos1, pos2 = gs[0:3, 0], gs[3, 0], gs[4, 0] # ,gs[3, 0] + gs = GridSpec(5,1, wspace=0.1, hspace=0.4) + pos0,pos1,pos2 = gs[0:3, 0],gs[3, 0],gs[4, 0] ax1 = F.fig.add_subplot(pos0) - plt.title("Stacked Timeseries", loc="left") + plt.title('Stacked Timeseries', loc='left') - chunk_l = 400 - chunk_iter = create_chunk_boundaries( - chunk_l, data_x.size, ov=0, iter_flag=True - ) + chunk_l= 400 + chunk_iter = create_chunk_boundaries(chunk_l, data_x.size, ov=0, iter_flag = True) - ofsett0 = 6 + ofsett0= 6 ofsett = np.copy(ofsett0) for chi in chunk_iter: - v1 = np.round(np.nanvar(dd), 4) - plt.plot( - ofsett + data_x[chi[0] : chi[-1]], - linewidth=3, - alpha=0.5, - c="black", - label=" org. data (var:" + str(v1) + ")", - ) - - v1 = np.round( - model_real_space(Z_results, weights * 0 + 1)[~nan_mask[1:]].var(), 4 - ) - plt.plot( - ofsett - + model_real_space(Z_results, weights * 0 + 1)[chi[0] : chi[-1]], - linewidth=0.8, - c="red", - label="LS model init (var:" + str(v1) + ")", - ) - - v1 = np.round(model.var(), 4) - plt.plot( - ofsett + model_real_space(Z_results, weights)[chi[0] : chi[-1]], - linewidth=0.8, - c="blue", - label="LS model weighted (var:" + str(v1) + ")", - ) + v1= np.round(np.nanvar(dd), 4) + plt.plot(ofsett+ data_x[chi[0]:chi[-1]] , linewidth=3, alpha=0.5 , c='black', label=' org. data (var:'+str(v1)+')') + + v1= np.round(model_real_space(Z_results, weights*0 +1)[~nan_mask[1:]].var(), 4) + plt.plot(ofsett + model_real_space(Z_results, weights*0 +1)[chi[0]:chi[-1]] ,linewidth= 0.8, c='red', label='LS model init (var:'+str(v1)+')') + + v1= np.round(model.var(), 4) + plt.plot(ofsett + model_real_space(Z_results, weights)[chi[0]:chi[-1]],linewidth= 0.8, c='blue', label='LS model weighted (var:'+str(v1)+')') if ofsett == ofsett0: plt.legend() ofsett -= 1 - plt.ylim(ofsett, ofsett0 + 1) - plt.xlim(0, chunk_l * 2) + plt.ylim(ofsett, ofsett0+1) + plt.xlim(0, chunk_l*2) + ax2 = F.fig.add_subplot(pos1) - # ax2 = plt.subplot(3, 1, 2) - plt.title("Amplitude Weight Function", loc="left") - plt.plot(weights, c="black") - ax2.set_xscale("log") + plt.title('Amplitude Weight Function', loc='left') + plt.plot(weights , c='black') + ax2.set_xscale('log') ax3 = F.fig.add_subplot(pos2) - plt.title("Initial and tuned |Z|", loc="left") + plt.title('Initial and tuned |Z|', loc='left') - # ax3 = plt.subplot(3, 1, 3) - # v2_fft= np.fft.rfft(data_x) - # v2 = np.round( (2.*(v2_fft*v2_fft.conj()).real /data_x.size**2 ).sum(), 4) - # plt.plot(abs(v2_fft) , linewidth=2, alpha=0.5 , c='black', label='org data (var: '+str(v2) +')') + v2 = np.round( (4.*(Z_results*Z_results.conj()).real /data_x.size**2 ).sum(), 4) + plt.plot(abs(Z_results), linewidth= 0.8, c='red', label='Z (var: '+str(v2) +')') + plt.plot(M.runningmean(abs(Z_results) , 20, tailcopy=True), linewidth= 1.5, c='red', zorder=12) - v2 = np.round( - (4.0 * (Z_results * Z_results.conj()).real / data_x.size**2).sum(), 4 - ) - plt.plot( - abs(Z_results), linewidth=0.8, c="red", label="Z (var: " + str(v2) + ")" - ) - plt.plot( - M.runningmean(abs(Z_results), 20, tailcopy=True), - linewidth=1.5, - c="red", - zorder=12, - ) - - Z2 = Z_results * weights - v2 = np.round((4.0 * (Z2 * Z2.conj()).real / data_x.size**2).sum(), 4) - plt.plot( - abs(Z2), - linewidth=0.8, - c="blue", - label="weighted Z(var: " + str(v2) + ")", - ) - plt.plot( - M.runningmean(abs(Z2), 20, tailcopy=True), - linewidth=1.5, - c="blue", - zorder=12, - ) + Z2= Z_results* weights + v2 = np.round( (4.*(Z2*Z2.conj()).real /data_x.size**2 ).sum(), 4) + plt.plot(abs(Z2), linewidth= 0.8, c='blue', label='weighted Z(var: '+str(v2) +')') + plt.plot(M.runningmean(abs(Z2) , 20, tailcopy=True), linewidth= 1.5, c='blue', zorder=12) plt.legend() - plt.ylim(np.percentile(abs(Z_results), 0.5), abs(Z_results).max() * 1.3) - plt.xlabel("wavenumber k") - ax3.set_xscale("log") - ax3.set_yscale("log") + plt.ylim( np.percentile(abs(Z_results), 0.5), abs(Z_results).max()*1.3 ) + plt.xlabel('wavenumber k') + ax3.set_xscale('log') + ax3.set_yscale('log') + + fitting_cost =( abs(dd - model) / dd.std() )**2 + variance_cost =( abs(dd.var() - model.var()) / dd.std() ) **2 - fitting_cost = (abs(dd - model) / dd.std()) ** 2 - variance_cost = (abs(dd.var() - model.var()) / dd.std()) ** 2 + return fitting_cost.sum() , alpha* variance_cost - return fitting_cost.sum(), alpha * variance_cost + def optimize(self, fitting_args= None , method='dual_annealing', max_nfev=None): - def optimize(self, fitting_args=None, method="dual_annealing", max_nfev=None): if fitting_args is None: fitting_args = (self.data, self.Z, self.tanh_weight_function, self.freq) self.weight_func = fitting_args[2] - self.fitter = self.LM.minimize( - self.objective_func, - self.params, - args=fitting_args, - kws={"nan_mask": self.nan_mask}, - method=method, - max_nfev=max_nfev, - ) + self.fitter = self.LM.minimize(self.objective_func, self.params, args=fitting_args, kws={'nan_mask':self.nan_mask} , method=method, max_nfev=max_nfev) return self.fitter def plot_result(self): - self.objective_func( - self.fitter.params, - self.data, - self.Z, - self.weight_func, - self.freq, - self.nan_mask, - plot_flag=True, - ) + self.objective_func(self.fitter.params, self.data, self.Z, self.weight_func, self.freq, self.nan_mask, plot_flag=True) def best_guess_Z(self): return self.Z * self.weight_func(self.freq, self.fitter.params) def runningmean(self, var, m, tailcopy=False): - m = int(m) - s = var.shape - if s[0] <= 2 * m: - print("0 Dimension is smaller then averaging length") + m=int(m) + s =var.shape + if s[0] <= 2*m: + print('0 Dimension is smaller then averaging length') return - rr = np.asarray(var) * np.nan - # print(type(rr)) + rr=np.asarray(var)*np.nan + var_range = np.arange(m, int(s[0]) - m - 1, 1) for i in var_range[np.isfinite(var[m : int(s[0]) - m - 1])]: - # rm.append(var[i-m:i+m].mean()) rr[int(i)] = np.nanmean(var[i - m : i + m]) + if tailcopy: - rr[0:m] = rr[m + 1] - rr[-m - 1 : -1] = rr[-m - 2] + rr[0:m]=rr[m+1] + rr[-m-1:-1]=rr[-m-2] return rr From 708e6a9847f388cf40697a14f0bbd12d709fa14d Mon Sep 17 00:00:00 2001 From: Camilo Diaz Date: Thu, 29 Feb 2024 09:47:08 -0500 Subject: [PATCH 07/22] added astropy dependency --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index e340d58d..c10d9f40 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -123,6 +123,8 @@ dependencies = [ # Optional "h5py >=3.5.0, < 4.0.0", "termcolor >=2.4.0, < 3.0.0", "typer >=0.9.0, < 1.0.0", + "getmem >=1.0, <= 1.0.0", + "astropy >=5.2, <= 6.0.0" ] # List additional groups of dependencies here (e.g. development From 0e5dc6d4117105379c0bae5421a265af32732c6e Mon Sep 17 00:00:00 2001 From: Camilo Diaz Date: Thu, 29 Feb 2024 10:20:06 -0500 Subject: [PATCH 08/22] removing calls to create_chunk_boundaries --- src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py | 8 ++++---- src/icesat2_tracks/ICEsat2_SI_tools/generalized_FT.py | 2 +- .../analysis_db/A01b_ALT07_SHNH_variance_tester.py | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py b/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py index 28c8bc6e..aed0c10e 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py @@ -42,21 +42,21 @@ def get_lat(sti): mask = (sti[0] < xx) & (xx <= sti[1]) return np.nanmean(Gi["lats"][mask]) - iter_x = spec.create_chunk_boundaries_unit_lengths( + iter_x = spec.create_chunk_boundaries( Lmeter, [xx.min(), xx.max()], ov=0, iter_flag=False )[1, :] - stencil_iter = spec.create_chunk_boundaries_unit_lengths( + stencil_iter = spec.create_chunk_boundaries( Lmeter, [xx.min(), xx.max()], ov=0, iter_flag=True ) var_list = np.array(list(map(get_var, stencil_iter))) - stencil_iter = spec.create_chunk_boundaries_unit_lengths( + stencil_iter = spec.create_chunk_boundaries( Lmeter, [xx.min(), xx.max()], ov=0, iter_flag=True ) N_list = np.array(list(map(get_N, stencil_iter))) - stencil_iter = spec.create_chunk_boundaries_unit_lengths( + stencil_iter = spec.create_chunk_boundaries( Lmeter, [xx.min(), xx.max()], ov=0, iter_flag=True ) lat_list = np.array(list(map(get_lat, stencil_iter))) diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/generalized_FT.py b/src/icesat2_tracks/ICEsat2_SI_tools/generalized_FT.py index bcb174da..153eb4a6 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/generalized_FT.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/generalized_FT.py @@ -384,7 +384,7 @@ def calc_gFT_apply(stancil, prior): return return_dict # derive L2 stancil - self.stancil_iter_list = spec.create_chunk_boundaries_unit_lengths( + self.stancil_iter_list = spec.create_chunk_boundaries( Lmeters, self.xlims, ov=self.ov, iter_flag=False ) self.stancil_iter = iter(self.stancil_iter_list.T.tolist()) diff --git a/src/icesat2_tracks/analysis_db/A01b_ALT07_SHNH_variance_tester.py b/src/icesat2_tracks/analysis_db/A01b_ALT07_SHNH_variance_tester.py index 21abbaa1..d455ee37 100644 --- a/src/icesat2_tracks/analysis_db/A01b_ALT07_SHNH_variance_tester.py +++ b/src/icesat2_tracks/analysis_db/A01b_ALT07_SHNH_variance_tester.py @@ -20,8 +20,8 @@ from random import sample import imp import icesat2_tracks.ICEsat2_SI_tools.convert_GPS_time as cGPS -import icesat2_tracks.ICEsat2_SI_tools.io as io -from icesat2_tracks.ICEsat2_SI_tools.spectral_estimates import create_chunk_boundaries_unit_lengths, create_chunk_boundaries +import icesat2_tracks.ICEsat2_SI_tools.iotools as io +from icesat2_tracks.ICEsat2_SI_tools.spectral_estimates import create_chunk_boundaries import icesat2_tracks.ICEsat2_SI_tools.spectral_estimates as spec import icesat2_tracks.ICEsat2_SI_tools.filter_regrid as regrid @@ -149,8 +149,8 @@ def get_var(sti): def get_breakingpoints(xx, dd ,Lmeter= 3000): nsize = dd.size - stencil_iter = spec.create_chunk_boundaries_unit_lengths( Lmeter, [ xx.min(), xx.max()],ov =Lmeter*3/4, iter_flag= True) - iter_x = spec.create_chunk_boundaries_unit_lengths( Lmeter, [ xx.min(), xx.max()],ov =Lmeter*3/4, iter_flag= False)[1,:] + stencil_iter = spec.create_chunk_boundaries( Lmeter, [ xx.min(), xx.max()],ov =Lmeter*3/4, iter_flag= True) + iter_x = spec.create_chunk_boundaries( Lmeter, [ xx.min(), xx.max()],ov =Lmeter*3/4, iter_flag= False)[1,:] def get_var(sti): mask = (sti[0] < xx) & (xx <= sti[1]) From d3e5d55b1b46437fdf1de0407613c0bdba530243 Mon Sep 17 00:00:00 2001 From: Camilo Diaz Date: Thu, 29 Feb 2024 11:10:16 -0500 Subject: [PATCH 09/22] Fixed create_chunk_boundaries implementaiton --- src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py b/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py index 2e9e54bd..1e4bfbba 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py @@ -55,10 +55,10 @@ def create_chunk_boundaries(L, dsize, ov= None, iter_flag=True): """ boundaries = define_chunk_boundaries_per_type(L,ov,dsize) - - xleft = np.arange(boundaries[0]) - xcenter_pos = np.arange(boundaries[1]) - xright = np.arange(boundaries[3]) + + xleft, xcenter_pos, xright = [ + np.arange(*boundary) for boundary in boundaries + ] max_size = min([xleft.size , xcenter_pos.size, xright.size]) From 4beefa8875a21353aeeb79402fd81ace2565ba78 Mon Sep 17 00:00:00 2001 From: Camilo Diaz Date: Thu, 29 Feb 2024 13:14:55 -0500 Subject: [PATCH 10/22] fixed imports and function argument types --- src/icesat2_tracks/ICEsat2_SI_tools/lanczos.py | 14 +++++++------- .../ICEsat2_SI_tools/spectral_estimates.py | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/lanczos.py b/src/icesat2_tracks/ICEsat2_SI_tools/lanczos.py index 5a7771ee..530b4e32 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/lanczos.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/lanczos.py @@ -1,7 +1,7 @@ import numpy as np import scipy.signal as signal -import scipy.ndimage -import scipy.ndimage.filters as signal +from scipy.ndimage import convolve +import scipy.ndimage.filters as filters def lanczos_1d(width, dx, a=2): @@ -59,8 +59,8 @@ def lanczos_filter_1d(x, data, width, a=2, mode="same", method="direct"): dx = np.diff(x).mean() x, L = lanczos_1d(width, dx, a=a) - - data_lp = signal.convolve(data, L, mode=mode, method=method) + _method = method + data_lp = signal.convolve(data, L, mode=mode, method=_method) return data_lp @@ -86,7 +86,7 @@ def lanczos_filter_1d_wrapping(x, data, width, a=2, mode="wrap"): dx = np.diff(x).mean() x, L = lanczos_1d(width, dx, a=a) - data_lp = scipy.ndimage.convolve(data, L, mode=mode) # * + data_lp = convolve(data, L, mode=mode) # * return data_lp @@ -110,7 +110,7 @@ def lanczos_filter_2d(x, data, width, a=2, mode="same"): dx = abs(np.diff(x).mean()) x, L2d = lanczos_2d(width, dx, a=a) - data_lp = signal.convolve(data, L2d, mode=mode) + data_lp = filters.convolve(data, L2d, mode=mode) return data_lp @@ -146,6 +146,6 @@ def lanczos_filter_3d(x, data, width, a=2, mode="same"): dx = abs(np.diff(x).mean()) x, L3d = lanczos_3d(width, dx, a=a) - data_lp = signal.convolve(data, L3d, mode=mode) + data_lp = filters.convolve(data, L3d, mode=mode) return data_lp diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py b/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py index 1e4bfbba..f705c575 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py @@ -29,7 +29,7 @@ def define_chunk_boundaries_per_type(L_unit, ov, limits_size): ] # Handling when limits_size is a list - elif isinstance(limits_size, list) and len(limits_size) == 2: + elif isinstance(limits_size, tuple) and len(limits_size) == 2: start, end = limits_size boundaries = [ (start, end - dl,dl), From 8c05ea45e012df4ce49c70d1b74f0960d5df039c Mon Sep 17 00:00:00 2001 From: Camilo Diaz Date: Thu, 29 Feb 2024 15:42:35 -0500 Subject: [PATCH 11/22] refactor create_chunk_boundaries function --- .../ICEsat2_SI_tools/spectral_estimates.py | 48 ++++++++++--------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py b/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py index f705c575..47797b58 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py @@ -9,40 +9,42 @@ from scipy import signal import lmfit as LM import icesat2_tracks.local_modules.m_general_ph3 as M - +from functools import singledispatch def is_even(number): return not (number % 2) -def define_chunk_boundaries_per_type(L_unit, ov, limits_size): - - ov = int(np.round(L_unit / 2)) if ov is None else ov - dl = L_unit - ov - boundaries = [] +@singledispatch +def define_chunk_boundaries(L_unit,dl, limits_size): + raise ValueError("limits_size must be either an int or a list of two elements") - # Handling when limits_size is an integer - if isinstance(limits_size, int): - boundaries = [ - (0, limits_size - dl,dl), +@define_chunk_boundaries.register +def _(limits_size:int,L_unit:int,dl:int): + L_unit = 0 + boundaries = [ + (L_unit, limits_size - dl,dl), (dl, limits_size - dl + 1,dl), (dl * 2, limits_size + 1,dl) ] - - # Handling when limits_size is a list - elif isinstance(limits_size, tuple) and len(limits_size) == 2: - start, end = limits_size - boundaries = [ + return boundaries + +@define_chunk_boundaries.register +def _( limits_size:tuple,L_unit:int,dl:int): + return define_chunk_boundaries(list(limits_size),L_unit,dl) + +@define_chunk_boundaries.register +def _(limits_size:list, L_unit:int,dl:int ): + start, end = limits_size + boundaries = [ (start, end - dl,dl), (start + L_unit / 2, end - dl + 1,dl), (start + L_unit, end + 1,dl) - ] - else: - raise ValueError("limits_size must be either an int or a list of two elements") - + ] return boundaries + # basic functions -def create_chunk_boundaries(L, dsize, ov= None, iter_flag=True): +def create_chunk_boundaries(L_unit, dsize, ov= None, iter_flag=True): """ returns all need chunk boundaries and center position given L, and ov inputs: @@ -53,8 +55,10 @@ def create_chunk_boundaries(L, dsize, ov= None, iter_flag=True): if iter_flag True returns iter else it returns an ndarray """ - - boundaries = define_chunk_boundaries_per_type(L,ov,dsize) + ov = int(np.round(L_unit / 2)) if ov is None else ov + dl = int( L_unit - ov) + + boundaries = define_chunk_boundaries(dsize,int(L_unit),dl) xleft, xcenter_pos, xright = [ np.arange(*boundary) for boundary in boundaries From e9ddc4e40f99cf5e3edd1589423956e74a78c69c Mon Sep 17 00:00:00 2001 From: Camilo Diaz Date: Fri, 1 Mar 2024 06:56:30 -0500 Subject: [PATCH 12/22] added review suggestions --- .../ICEsat2_SI_tools/convert_GPS_time.py | 1 - .../ICEsat2_SI_tools/filter_regrid.py | 136 +++++++++--------- 2 files changed, 72 insertions(+), 65 deletions(-) diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/convert_GPS_time.py b/src/icesat2_tracks/ICEsat2_SI_tools/convert_GPS_time.py index 7a2b7bc8..0aa326f5 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/convert_GPS_time.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/convert_GPS_time.py @@ -31,7 +31,6 @@ """ import numpy as np from .convert_julian import convert_julian -import pdb # -- PURPOSE: Define GPS leap seconds diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/filter_regrid.py b/src/icesat2_tracks/ICEsat2_SI_tools/filter_regrid.py index 5799f0ca..e70bf994 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/filter_regrid.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/filter_regrid.py @@ -1,6 +1,42 @@ import numpy as np from numba import jit import pandas as pd +import matplotlib as plt + +def process_single_stencil_set( + stancil_set, T2, key_var, key_x_coord, stancil_width, calc_stencil_stats +): + # Select photons that are in bins + Ti_sel = T2[(stancil_set[0, 0] < T2["x"]) & (T2["x"] < stancil_set[2, -1])] + + # Put each photon in a bin + bin_labels = np.searchsorted(stancil_set[0, :], Ti_sel["x"]) + Ti_sel["x_bins"] = bin_labels + + # Group data by this bin + Ti_g = Ti_sel.groupby("x_bins", dropna=False, as_index=True) + + # Take median of the data + Ti_median = Ti_g.median() + + # Apply weighted mean and count photons + Ti_weight = Ti_g.apply( + calc_stencil_stats, key_var, key_x_coord, stancil_width, stancil_set + ) + + # Merge both datasets + T_merged = pd.concat([Ti_median, Ti_weight], axis=1) + + # Rename columns + T_merged = T_merged.rename( + columns={key_var: key_var + "_median", key_x_coord: key_x_coord + "_median"} + ) + T_merged[key_var + "_median"][np.isnan(T_merged[key_var + "_std"])] = np.nan + + # Set stencil center as new x-coordinate + T_merged["x"] = stancil_set[1, T_merged.index - 1] + + return T_merged def get_hemis(B, beams_list): @@ -61,7 +97,7 @@ def lat_min_max_extended(B, beams_list, accent=None): min_lat, max_lat, accent min and max latitudes of the beams, (True/False) True if the track is accending """ - accent = regrid.track_type(B[beams_list[0]]) if accent is None else accent + accent = track_type(B[beams_list[0]]) if accent is None else accent hemis = get_hemis(B, beams_list) @@ -85,32 +121,36 @@ def lat_min_max_extended(B, beams_list, accent=None): track_lat_end.append(ll["lats"]) track_lon_end.append(ll["lons"]) - if (hemis == "SH") & accent: - return ( - [max(track_lat_start), min(track_lat_end)], - [max(track_lon_start), min(track_lon_end)], - accent, - ) # accenting SH mean start is in the top right - elif (hemis == "SH") & ~accent: - return ( - [max(track_lat_start), min(track_lat_end)], - [min(track_lon_start), max(track_lon_end)], - accent, - ) # decent SH mean start is in the top left - elif (hemis == "NH") & accent: - return ( - [min(track_lat_start), max(track_lat_end)], - [min(track_lon_start), max(track_lon_end)], - accent, - ) # accent NH mean start is in the lower left - elif (hemis == "NH") & ~accent: - return ( - [min(track_lat_start), max(track_lat_end)], - [max(track_lon_start), min(track_lon_end)], - accent, - ) # decent NH mean start is in the lower right - else: - raise ValueError("some defintions went wrong") + # Define a dictionary to map the conditions to the functions + func_map = { + ("SH", True): ( + max, + min, + max, + min, + ), # accenting SH mean start is in the top right + ("SH", False): (max, min, min, max), # decent SH mean start is in the top left + ("NH", True): (min, max, min, max), # accent NH mean start is in the lower left + ("NH", False): ( + min, + max, + max, + min, + ), # decent NH mean start is in the lower right + } + # Get the functions based on the conditions + funcs = func_map.get((hemis, accent)) + # If the key is not found in the dictionary, raise an error + if funcs is None: + raise ValueError("some definitions went wrong") + lat_start_func, lat_end_func, lon_start_func, lon_end_func = funcs + # Use the functions to calculate the start and end of latitude and longitude + # Return these values along with accent + return ( + [lat_start_func(track_lat_start), lat_end_func(track_lat_end)], + [lon_start_func(track_lon_start), lon_end_func(track_lon_end)], + accent, + ) def lat_min_max(B, beams_list, accent=None): @@ -306,42 +346,12 @@ def calc_stencil_stats(group, key, key_x_coord, stancil_width, stancils): return Tweight.T - T_sets = list() - - for stancil_set in [stencil_1, stencil_1half]: - - # select photons that are in bins - Ti_sel = T2[(stancil_set[0, 0] < T2["x"]) & (T2["x"] < stancil_set[2, -1])] - - # put each photon in a bin - bin_labels = np.searchsorted(stancil_set[0, :], Ti_sel["x"]) - - Ti_sel["x_bins"] = bin_labels - # group data by this bin - Ti_g = Ti_sel.groupby(Ti_sel["x_bins"], dropna=False, as_index=True) - # take median of the data - Ti_median = Ti_g.median() - - # apply weighted mean and count photons - args = [key_var, key_x_coord, stancil_width, stancil_set] - - Ti_weight = Ti_g.apply(calc_stencil_stats, *args) - - # merge both datasets - T_merged = pd.concat([Ti_median, Ti_weight], axis=1) - - # rename columns - T_merged = T_merged.rename( - columns={key_var: key_var + "_median", key_x_coord: key_x_coord + "_median"} + T_sets = [ + process_single_stencil_set( + stancil_set, T2, key_var, key_x_coord, stancil_width, calc_stencil_stats ) - T_merged[key_var + "_median"][ - np.isnan(T_merged[key_var + "_std"]) - ] = np.nan # replace median calculation with nans - - # set stancil center an new x-coodinate - T_merged["x"] = stancil_set[1, T_merged.index - 1] - - T_sets.append(T_merged) + for stancil_set in [stencil_1, stencil_1half] + ] # mergeboth stancils T3 = pd.concat(T_sets).sort_values(by="x").reset_index() @@ -443,8 +453,6 @@ def bin_means(T2, dist_grid): N_i = list() for i in np.arange(1, ilim - 1): - if i % 5000 == 0: - print(i) i_mask = (T2["dist"] >= dist_grid[i - 1]) & (T2["dist"] < dist_grid[i + 1]) dF_mean[i] = T2[i_mask].mean() N_i.append(i_mask.sum()) From 082478fbc508cba983da4dd56a5d5bcb1f15427d Mon Sep 17 00:00:00 2001 From: Carlos Paniagua Date: Wed, 28 Feb 2024 18:49:18 -0500 Subject: [PATCH 13/22] fix: update xlabel, ylabel to set_xlabel, set_ylabel From 99c2eb7b9d7e3de377edd9a2c7097754d77d0ff6 Mon Sep 17 00:00:00 2001 From: Camilo Diaz Date: Fri, 1 Mar 2024 13:22:24 -0500 Subject: [PATCH 14/22] cleaning and formatting more files in ICEsat2_SI_tools folder --- .../ICEsat2_SI_tools/beam_stats.py | 11 +- .../ICEsat2_SI_tools/generalized_FT.py | 2 +- .../ICEsat2_SI_tools/spectral_estimates.py | 1123 +++++++++-------- 3 files changed, 615 insertions(+), 521 deletions(-) diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py b/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py index aed0c10e..4cc0ba97 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py @@ -7,6 +7,7 @@ import matplotlib.gridspec as gridspec import h5py + def derive_beam_statistics(Gd, all_beams, Lmeter=10e3, dx=10): """ this method returns a dict of dataframes with the beam statistics @@ -15,7 +16,6 @@ def derive_beam_statistics(Gd, all_beams, Lmeter=10e3, dx=10): Lemter is the length of the segment in meters for the statistics dx is the nominal resolution of the ATL06 data in meters """ - D = dict() for k in all_beams: @@ -42,21 +42,21 @@ def get_lat(sti): mask = (sti[0] < xx) & (xx <= sti[1]) return np.nanmean(Gi["lats"][mask]) - iter_x = spec.create_chunk_boundaries( + iter_x = spec.create_chunk_boundaries_unit_lengths( Lmeter, [xx.min(), xx.max()], ov=0, iter_flag=False )[1, :] - stencil_iter = spec.create_chunk_boundaries( + stencil_iter = spec.create_chunk_boundaries_unit_lengths( Lmeter, [xx.min(), xx.max()], ov=0, iter_flag=True ) var_list = np.array(list(map(get_var, stencil_iter))) - stencil_iter = spec.create_chunk_boundaries( + stencil_iter = spec.create_chunk_boundaries_unit_lengths( Lmeter, [xx.min(), xx.max()], ov=0, iter_flag=True ) N_list = np.array(list(map(get_N, stencil_iter))) - stencil_iter = spec.create_chunk_boundaries( + stencil_iter = spec.create_chunk_boundaries_unit_lengths( Lmeter, [xx.min(), xx.max()], ov=0, iter_flag=True ) lat_list = np.array(list(map(get_lat, stencil_iter))) @@ -86,7 +86,6 @@ def plot_beam_statistics(D, high_beams, low_beams, col_dict, track_name=None): if track_name is not None: plt.suptitle(track_name, fontsize=10) - gs = gridspec.GridSpec(2, 3) # make 2 x 2 plot diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/generalized_FT.py b/src/icesat2_tracks/ICEsat2_SI_tools/generalized_FT.py index 153eb4a6..bcb174da 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/generalized_FT.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/generalized_FT.py @@ -384,7 +384,7 @@ def calc_gFT_apply(stancil, prior): return return_dict # derive L2 stancil - self.stancil_iter_list = spec.create_chunk_boundaries( + self.stancil_iter_list = spec.create_chunk_boundaries_unit_lengths( Lmeters, self.xlims, ov=self.ov, iter_flag=False ) self.stancil_iter = iter(self.stancil_iter_list.T.tolist()) diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py b/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py index 47797b58..9759b3c5 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py @@ -9,44 +9,18 @@ from scipy import signal import lmfit as LM import icesat2_tracks.local_modules.m_general_ph3 as M -from functools import singledispatch - -def is_even(number): - return not (number % 2) - -@singledispatch -def define_chunk_boundaries(L_unit,dl, limits_size): - raise ValueError("limits_size must be either an int or a list of two elements") - -@define_chunk_boundaries.register -def _(limits_size:int,L_unit:int,dl:int): - L_unit = 0 - boundaries = [ - (L_unit, limits_size - dl,dl), - (dl, limits_size - dl + 1,dl), - (dl * 2, limits_size + 1,dl) - ] - return boundaries - -@define_chunk_boundaries.register -def _( limits_size:tuple,L_unit:int,dl:int): - return define_chunk_boundaries(list(limits_size),L_unit,dl) - -@define_chunk_boundaries.register -def _(limits_size:list, L_unit:int,dl:int ): - start, end = limits_size - boundaries = [ - (start, end - dl,dl), - (start + L_unit / 2, end - dl + 1,dl), - (start + L_unit, end + 1,dl) - ] - return boundaries +from matplotlib.gridspec import GridSpec +import matplotlib.pyplot as plt + + +def is_not_even(number): + return True if (number % 2) else False # basic functions -def create_chunk_boundaries(L_unit, dsize, ov= None, iter_flag=True): +def create_chunk_boundaries(L, dsize, ov=None, iter_flag=True): """ - returns all need chunk boundaries and center position given L, and ov + returns all need chunk boudaries and center position given L, and ov inputs: L desired length of window, dsize size of the data @@ -55,70 +29,101 @@ def create_chunk_boundaries(L_unit, dsize, ov= None, iter_flag=True): if iter_flag True returns iter else it returns an ndarray """ - ov = int(np.round(L_unit / 2)) if ov is None else ov - dl = int( L_unit - ov) - - boundaries = define_chunk_boundaries(dsize,int(L_unit),dl) + ov = int(np.round(L / 2)) if ov is None else ov + + xleft = np.arange(0, dsize - int(L - ov), int(L - ov)) + xright = np.arange(int(L - ov) * 2, dsize + 1, int(L - ov)) + xcenter_pos = np.arange(int(L - ov), dsize - int(L - ov) + 1, int(L - ov)) + max_size = min([xleft.size, xcenter_pos.size, xright.size]) + position_stancil = np.vstack( + [xleft[0:max_size], xcenter_pos[0:max_size], xright[0:max_size]] + ) + + if iter_flag is True: + return iter(position_stancil.T.tolist()) + else: + return position_stancil - xleft, xcenter_pos, xright = [ - np.arange(*boundary) for boundary in boundaries - ] - - max_size = min([xleft.size , xcenter_pos.size, xright.size]) - position_stancil = np.vstack([xleft[0:max_size], xcenter_pos[0:max_size], xright[0:max_size]]) +def create_chunk_boundaries_unit_lengths(L_unit, data_limits, ov=None, iter_flag=True): + """ + returns all need chunk boudaries and center position given L, and ov + inputs: + L desired length of window in units of the x axis of the data, + data_limits (x_min, x_max) tuple with the beginning and end the the derived window stancils - return iter(position_stancil.T.tolist()) if iter_flag else position_stancil + if ov is None, = L/2 + if iter_flag True returns iter else it returns an ndarray + + """ + L = L_unit + ov = np.round(L / 2) if ov is None else ov + dl = L - ov + xleft = np.arange(data_limits[0], data_limits[1] - dl, dl) + xcenter_pos = np.arange(data_limits[0] + L / 2, data_limits[1] - dl + 1, dl) + xright = np.arange(data_limits[0] + L, data_limits[1] + 1, dl) + + max_size = min([xleft.size, xcenter_pos.size, xright.size]) + + position_stancil = np.vstack( + [xleft[0:max_size], xcenter_pos[0:max_size], xright[0:max_size]] + ) + + if iter_flag is True: + return iter(position_stancil.T.tolist()) + else: + return position_stancil def Z_to_power(Z, df, N): - """ compute the 1d spectrum of a field phi + """compute the 1d spectrum of a field phi inputs: Z complex fourier coefficients df frequency / or wavenumber step N length of data vector in real space (= L) """ - spec = 2.*(Z*Z.conj()).real / df /N**2 - neven = is_even(N) - # the zeroth frequency should be counted only once - spec[0] = spec[0]/2. + spec = 2.0 * (Z * Z.conj()).real / df / N**2 + neven = is_not_even(N) + spec[0] = spec[0] / 2.0 if neven: - spec[-1] = spec[-1]/2. + spec[-1] = spec[-1] / 2.0 return spec + # 2nd cal spectra def calc_spectrum_fft(phi, df, N): - """ compute the 1d spectrum of a field phi + """compute the 1d spectrum of a field phi inputs: df frequency / or wavenumber step N length of data vector in real space (= L) neven bool, if True """ - neven = is_even(N) + neven = is_not_even(N) phih = np.fft.rfft(phi) # the factor of 2 comes from the symmetry of the Fourier coeffs - spec = 2.*(phih*phih.conj()).real / df /N**2 + spec = 2.0 * (phih * phih.conj()).real / df / N**2 # the zeroth frequency should be counted only once - spec[0] = spec[0]/2. + spec[0] = spec[0] / 2.0 if neven: - spec[-1] = spec[-1]/2. + spec[-1] = spec[-1] / 2.0 return spec -def LS_power_to_PSD( ls_power, L , dff): +def LS_power_to_PSD(ls_power, L, dff): """ returns Power spectral density (unit^2/dfreq) ls_power output of astropy.timeseries.LombScargle.power with normalization='psd' """ - return 2 * ls_power / L /dff + return 2 * ls_power / L / dff -def calc_spectrum_LS( x, y, k, err=None, LS= None, dk =None): + +def calc_spectrum_LS(x, y, k, err=None, LS=None, dk=None): """ returns: Power spectral density of y given postitions x, for wanumbers k @@ -127,16 +132,17 @@ def calc_spectrum_LS( x, y, k, err=None, LS= None, dk =None): LS LombScargle object """ if LS is None: - LS = LombScargle(x , y, dy = err, fit_mean=False, center_data=True) + LS = LombScargle(x, y, dy=err, fit_mean=False, center_data=True) else: LS.t = x LS.y = y - LS.dy= err + LS.dy = err + + ls_power = LS.power(k, normalization="psd", assume_regular_frequency="False") - ls_power = LS.power(k, normalization='psd', assume_regular_frequency='False') + dk = np.diff(k).mean() if dk is None else dk + return 2 * ls_power / y.size / dk, LS - dk = np.diff(k).mean() if dk is None else dk - return 2 * ls_power / y.size / dk , LS def reconstruct_data_from_LS(LS, x_real_axis, freq): """ @@ -145,36 +151,39 @@ def reconstruct_data_from_LS(LS, x_real_axis, freq): x_real_axis is the x coordinate of the original data (np.array) freq is the frequency grid on which the field is reconstructed """ - # t_base =np.array(Gi['dist']) + y_reconstruct = LS.offset() * np.ones(len(x_real_axis)) freq_seq = freq[1:] if freq[0] == 0 else freq - freq_mask= freq <= 1/100 + freq_mask = freq <= 1 / 100 for fi in freq_seq: try: theta = LS.model_parameters(fi) except: - theta = [0,0] - y_reconstruct += theta[0] * np.sin(x_real_axis * 2 * np.pi *fi ) + theta[1]* np.cos(x_real_axis * 2 * np.pi *fi) - + theta = [0, 0] + y_reconstruct += theta[0] * np.sin(x_real_axis * 2 * np.pi * fi) + theta[ + 1 + ] * np.cos(x_real_axis * 2 * np.pi * fi) return y_reconstruct + def calc_freq_fft(x_grid, N): - """ calculate array of spectral variable (frequency or - wavenumber) in cycles per unit of L """ + """calculate array of spectral variable (frequency or + wavenumber) in cycles per unit of L""" - neven = is_even(N) - - dx = np.round(np.median(np.diff(x_grid) ), 1) - df = 1./((N-1)*dx) + neven = is_not_even(N) + + dx = np.round(np.median(np.diff(x_grid)), 1) + df = 1.0 / ((N - 1) * dx) if neven: - f = df*np.arange(N/2+1) + f = df * np.arange(N / 2 + 1) else: - f = df*np.arange( (N-1)/2. + 1 ) - return f,df + f = df * np.arange((N - 1) / 2.0 + 1) + return f, df + def calc_freq_fft_given_dx(dx, N): """ @@ -184,17 +193,25 @@ def calc_freq_fft_given_dx(dx, N): N number of datapoints used in window """ - neven = is_even(N) - df = 1./((N-1)*dx) + neven = is_not_even(N) + df = 1.0 / ((N - 1) * dx) if neven: - f = df*np.arange(N/2+1) + f = df * np.arange(N / 2 + 1) else: - f = df*np.arange( (N-1)/2. + 1 ) - return f,df - - -def calc_freq_LS(x, N, method='fftX2', dx=None, minimum_frequency=None, maximum_frequency=None, samples_per_peak=0.01): + f = df * np.arange((N - 1) / 2.0 + 1) + return f, df + + +def calc_freq_LS( + x, + N, + method="fftX2", + dx=None, + minimum_frequency=None, + maximum_frequency=None, + samples_per_peak=0.01, +): """ calculate array of spectral variable (frequency or wavenumber) in cycles per unit of N (window length in number of data points) @@ -206,40 +223,47 @@ def calc_freq_LS(x, N, method='fftX2', dx=None, minimum_frequency=None, maximum_ minimum_frequency, maximum_frequency only used for LS_auto """ - if method is 'fftX2': - neven = is_even(N) + if method is "fftX2": + neven = is_not_even(N) dx = np.diff(x).mean() if dx is None else dx - df = 1./((N-1)*dx) /2 + df = 1.0 / ((N - 1) * dx) / 2 if neven: - f = df*np.arange(df, N+1) + f = df * np.arange(df, N + 1) else: - f = df* np.arange(df, (N-1) + 1 ) + f = df * np.arange(df, (N - 1) + 1) - elif method is 'fft': - neven = is_even(N) + elif method is "fft": + neven = is_not_even(N) dx = np.diff(x).mean() if dx is None else dx - df = 1./((N-1)*dx) + df = 1.0 / ((N - 1) * dx) if neven: - f = df*np.arange(N/2+1) + f = df * np.arange(N / 2 + 1) else: - f = df*np.arange( (N-1)/2. + 1 ) + f = df * np.arange((N - 1) / 2.0 + 1) + + elif method is "LS_auto": - elif method is 'LS_auto': - - f = LombScargle(x , np.random.randn(len(x)), fit_mean=True).autofrequency(minimum_frequency=minimum_frequency, maximum_frequency=maximum_frequency, samples_per_peak=samples_per_peak)##0.1) + f = LombScargle(x, np.random.randn(len(x)), fit_mean=True).autofrequency( + minimum_frequency=minimum_frequency, + maximum_frequency=maximum_frequency, + samples_per_peak=samples_per_peak, + ) df = np.diff(f).mean() df = np.round(df, 5) - elif method is 'fixed_ratio': + elif method is "fixed_ratio": - neven = is_even(N) + neven = is_not_even(N) dx = np.diff(x).mean() if dx is None else dx df = dx / 50 - f = df * np.arange(df, N + 1) if neven else df * np.arange(df, N) - + if neven: + f = df * np.arange(df, N + 1) + else: + f = df * np.arange(df, N) + + return f, df - return f ,df def create_window(L, window=None): """ @@ -247,63 +271,61 @@ def create_window(L, window=None): if window is not None it show have a length of N """ if window is None: - win=np.hanning(L) + win = np.hanning(L) else: - win=window + win = window - factor=np.sqrt(L/(win**2).sum()) - win*=factor + factor = np.sqrt(L / (win**2).sum()) + win *= factor return win + def create_weighted_window(data, window=None): """ define window function and weight it to conserve variance if window is not None it show have a length of N """ - + L = data.size if window is None: - win = WINDOWS.tukey(L, alpha=0.1, sym=True) + win = WINDOWS.tukey(L, alpha=0.1, sym=True) else: - win=window + win = window - factor = np.sqrt( np.var(data) / np.var(( data* win) ) ) - win *= factor + factor = np.sqrt(np.var(data) / np.var((data * win))) + win *= factor return win -def spec_error(E,sn,ci=.95): - """ Computes confidence interval for one-dimensional spectral - estimate E (the power spectra). +def spec_error(E, sn, ci=0.95): + """Computes confidence interval for one-dimensional spectral + estimate E (the power spectra). - Parameters - =========== - - sn is the number of spectral realizations; - it can be either an scalar or an array of size(E) - - ci = .95 for 95 % confidence interval + Parameters + =========== + - sn is the number of spectral realizations; + it can be either an scalar or an array of size(E) + - ci = .95 for 95 % confidence interval - Output - ========== - lower (El) and upper (Eu) bounds on E """ + Output + ========== + lower (El) and upper (Eu) bounds on E""" + def yNlu(sn, yN, ci): + """compute yN[l] yN[u], that is, the lower and + upper limit of yN""" - - def yNlu(sn,yN,ci): - """ compute yN[l] yN[u], that is, the lower and - upper limit of yN """ - - # cdf of chi^2 dist. with 2*sn DOF - cdf = gammainc(sn,sn*yN) + cdf = gammainc(sn, sn * yN) # indices that delimit the wedge of the conf. interval fl = np.abs(cdf - ci).argmin() - fu = np.abs(cdf - 1. + ci).argmin() + fu = np.abs(cdf - 1.0 + ci).argmin() - return yN[fl],yN[fu] + return yN[fl], yN[fu] - dbin = .005 - yN = np.arange(0,2.+dbin,dbin) + dbin = 0.005 + yN = np.arange(0, 2.0 + dbin, dbin) El, Eu = np.empty_like(E), np.empty_like(E) @@ -317,19 +339,19 @@ def yNlu(sn,yN,ci): assert n == E.size, " *** sn has different size than E " for i in range(n): - yNl,yNu = yNlu(sn[i],yN=yN,ci=ci) - El[i] = E[i]/yNl - Eu[i] = E[i]/yNu + yNl, yNu = yNlu(sn[i], yN=yN, ci=ci) + El[i] = E[i] / yNl + Eu[i] = E[i] / yNu else: - yNl,yNu = yNlu(sn,yN=yN,ci=ci) - El = E/yNl - Eu = E/yNu + yNl, yNu = yNlu(sn, yN=yN, ci=ci) + El = E / yNl + Eu = E / yNu return El, Eu -def linear_gap_fill(F, key_lead, key_int): +def linear_gap_fill(F, key_lead, key_int): """ F pd.DataFrame key_lead key in F that determined the independent coordindate @@ -337,13 +359,13 @@ def linear_gap_fill(F, key_lead, key_int): """ y_g = np.array(F[key_int]) - nans, x2= np.isnan(y_g), lambda z: z.nonzero()[0] - y_g[nans]= np.interp(x2(nans), x2(~nans), y_g[~nans]) + nans, x2 = np.isnan(y_g), lambda z: z.nonzero()[0] + y_g[nans] = np.interp(x2(nans), x2(~nans), y_g[~nans]) return y_g -def sub_sample_coords(X, lons, lats, stancils, map_func =None): +def sub_sample_coords(X, lons, lats, stancils, map_func=None): """ X non-nan array of coodinate along beam lons, lats arrays of postion data that should be mapped. must have same size as X @@ -354,19 +376,18 @@ def sub_sample_coords(X, lons, lats, stancils, map_func =None): nparray(3, N) 1st column is the stancil center, 2nd and 3rd collumn are the mapped lons and lats """ - def get_lon_lat_coords(stancil): - x_mask= (stancil[0] <= X) & (X <= stancil[-1]) - if sum(x_mask) ==0: # if there are not enough photos set results to nan + x_mask = (stancil[0] <= X) & (X <= stancil[-1]) + if sum(x_mask) == 0: return np.array([stancil[1], np.nan, np.nan]) lon_bin, lat_bin = lons[x_mask].mean(), lats[x_mask].mean() - return np.array([stancil[1],lon_bin, lat_bin]) + return np.array([stancil[1], lon_bin, lat_bin]) map_func = map if map_func is None else map_func - coord_positions = list(map_func( get_lon_lat_coords, copy.copy(stancils) )) + coord_positions = list(map_func(get_lon_lat_coords, copy.copy(stancils))) coord_positions = np.vstack(coord_positions) return coord_positions @@ -390,119 +411,124 @@ def __init__(self, x_grid, data, Lpoints, ov=None, window=None): other arributes are in the .attr dict. """ - self.Lpoints = Lpoints - self.ov = int(Lpoints/2) if ov is None else ov #when not defined in create_chunk_boundaries then L/2 + self.Lpoints = Lpoints + self.ov = int(Lpoints / 2) if ov is None else ov - self.data = data + self.data = data # create subsample k - self.k, self.dk = calc_freq_fft(x_grid, Lpoints) # return 1/ unit of frid points + self.k, self.dk = calc_freq_fft(x_grid, Lpoints) # to get the waveumber units (2 pi/ lambda), multiply by 2 pi - self.k, self.dk = self.k * 2 * np.pi, self.dk * 2 * np.pi + self.k, self.dk = self.k * 2 * np.pi, self.dk * 2 * np.pi # create window - self.win = create_window(Lpoints) + self.win = create_window(Lpoints) def cal_spectrogram(self, data=None, name=None): - """ defines apply function and calculated all sub-sample sprectra using map """ - DATA = self.data if data is None else data Lpoints, dk = self.Lpoints, self.dk - win =self.win + win = self.win def calc_spectrum_apply(stancil): "returns spectrum per stencil, detrends and windows the data" - - - idata = DATA[stancil[0]:stancil[-1]] + idata = DATA[stancil[0] : stancil[-1]] idata = detrend(idata) * win - return stancil[1], calc_spectrum_fft(idata , dk, Lpoints) + return stancil[1], calc_spectrum_fft(idata, dk, Lpoints) - # derive L2 stancil - stancil_iter = create_chunk_boundaries(Lpoints, DATA.size, ov= self.ov) + # derive L2 stancil + stancil_iter = create_chunk_boundaries(Lpoints, DATA.size, ov=self.ov) # apply func to all stancils - D_specs = dict(map(calc_spectrum_apply,stancil_iter)) + D_specs = dict(map(calc_spectrum_apply, stancil_iter)) chunk_positions = np.array(list(D_specs.keys())) - self.N_stancils = len(chunk_positions) # number of spectal relazations + + # number of spectal relazations + self.N_stancils = len(chunk_positions) # repack data, create xarray - self.spec_name = 'power_spec' if name is None else name - G =dict() - for xi,I in D_specs.items(): - G[xi] = xr.DataArray(I, dims=['k'], coords={'k': self.k, 'x': xi } , name=self.spec_name) + self.spec_name = "power_spec" if name is None else name + G = dict() + for xi, I in D_specs.items(): + G[xi] = xr.DataArray( + I, dims=["k"], coords={"k": self.k, "x": xi}, name=self.spec_name + ) - self.G = xr.concat(G.values(), dim='x').T + self.G = xr.concat(G.values(), dim="x").T if self.G.k[0] == 0: self.G = self.G[1:, :] - self.G.attrs['ov'] = self.ov - self.G.attrs['L'] = self.Lpoints + self.G.attrs["ov"] = self.ov + self.G.attrs["L"] = self.Lpoints return self.G - + # cal variance def calc_var(self): - """ Compute total variance from spectragram """ - return self.dk*self.G.mean('x').sum().data # do not consider zeroth frequency + """Compute total variance from spectragram""" + # do not consider zeroth frequency + return self.dk * self.G.mean("x").sum().data - def mean_spectral_error(self, mask=None, confidence = 0.95): + def mean_spectral_error(self, mask=None, confidence=0.95): "retrurns spetral error for the x-mean spectral estimate and stores it as coordindate in the dataarray" # make error estimate if mask is not None: - meanspec= self.G.isel(x=mask).mean('x') + meanspec = self.G.isel(x=mask).mean("x") N = int(sum(mask)) else: - meanspec= self.G.mean('x') + meanspec = self.G.mean("x") N = self.N_stancils - El_of_mean, Eu_of_mean = spec_error(meanspec, N , confidence ) - El_of_mean.name = 'El_mean' - Eu_of_mean.name = 'Eu_mean' + El_of_mean, Eu_of_mean = spec_error(meanspec, N, confidence) + El_of_mean.name = "El_mean" + Eu_of_mean.name = "Eu_mean" - self.G.coords['mean_El'] = (('k'), El_of_mean.data) - self.G.coords['mean_Eu'] = (('k'), Eu_of_mean.data) + self.G.coords["mean_El"] = (("k"), El_of_mean.data) + self.G.coords["mean_Eu"] = (("k"), Eu_of_mean.data) - def parceval(self, add_attrs=True ): + def parceval(self, add_attrs=True): "test Parceval theorem" DATA = self.data L = self.Lpoints - # derive mean variances of stancils stancil_iter = create_chunk_boundaries(L, DATA.size) def get_stancil_var_apply(stancil): - + "returns the variance of yy for stancil" - idata = DATA[stancil[0]:stancil[-1]] + idata = DATA[stancil[0] : stancil[-1]] idata = detrend(idata) return stancil[1], idata.var() - D_vars = dict(map(get_stancil_var_apply,stancil_iter)) + D_vars = dict(map(get_stancil_var_apply, stancil_iter)) - stancil_vars =list() + stancil_vars = list() for I in D_vars.values(): stancil_vars.append(I) - print('Parcevals Theorem:') - print('variance of unweighted timeseries: ',DATA.var()) - print('mean variance of detrended chunks: ', np.array(stancil_vars).mean()) - print('variance of the pwelch Spectrum: ', self.calc_var()) + print("Parcevals Theorem:") + print("variance of unweighted timeseries: ", DATA.var()) + print("mean variance of detrended chunks: ", np.array(stancil_vars).mean()) + print("variance of the pwelch Spectrum: ", self.calc_var()) if add_attrs: - self.G.attrs['variance_unweighted_data'] = DATA.var() - self.G.attrs['mean_variance_detrended_chunks'] = np.array(stancil_vars).mean() - self.G.attrs['mean_variance_pwelch_spectrum'] = self.calc_var() + self.G.attrs["variance_unweighted_data"] = DATA.var() + self.G.attrs["mean_variance_detrended_chunks"] = np.array( + stancil_vars + ).mean() + self.G.attrs["mean_variance_pwelch_spectrum"] = self.calc_var() + class WavenumberSpectrogramLSEven: - def __init__(self, x, data, L, waven_method = 'fftX2' , dy=None , ov=None, window=None, kjumps=1): + def __init__( + self, x, data, L, waven_method="fftX2", dy=None, ov=None, window=None, kjumps=1 + ): """ returns a wavenumber spectrogram with the resolution L-ov this uses Lombscargle @@ -520,96 +546,96 @@ def __init__(self, x, data, L, waven_method = 'fftX2' , dy=None , ov=None, wind xr.Dataset with x, k as cooridates of the spectrogram and the mean error other arributes are in the .attr dict. """ - - self.L = L - self.ov = int(L/2) if ov is None else ov #when not defined in create_chunk_boundaries then L/2 - self.x = x - self.data = data - self.dy = dy + self.L = L + self.ov = ( + int(L / 2) if ov is None else ov + ) # when not defined in create_chunk_boundaries then L/2 + self.x = x + self.data = data + self.dy = dy # create subsample k if type(waven_method) is str: - self.k, self.dk = calc_freq_LS(x, L, method = waven_method ) + self.k, self.dk = calc_freq_LS(x, L, method=waven_method) elif type(waven_method) is np.ndarray: - self.k, self.dk = waven_method, np.diff(waven_method).mean() + self.k, self.dk = waven_method, np.diff(waven_method).mean() else: - raise ValueError('waven_method is neither string nor an array') + raise ValueError("waven_method is neither string nor an array") - self.k, self.dk = self.k[::kjumps], self.dk*kjumps - # create window - self.win = None - - def cal_spectrogram(self, x = None, data=None, name=None, dx=1): + self.k, self.dk = self.k[::kjumps], self.dk * kjumps + self.win = None + def cal_spectrogram(self, x=None, data=None, name=None, dx=1): """ defines apply function and calculated all sub-sample sprectra using map dx nominal resolution of the data resolutionif not set, dx= 1 """ - - X = self.x if x is None else x # all x positions - DATA = self.data if data is None else data # all data points - L, dk = self.L, self.dk - win = self.win + X = self.x if x is None else x # all x positions + DATA = self.data if data is None else data # all data points + L, dk = self.L, self.dk + win = self.win self.dx = dx - # init Lomb scargle object with noise as nummy data () - self.LS = LombScargle(X[0:L] , np.random.randn(L)*0.001, fit_mean=True) + self.LS = LombScargle(X[0:L], np.random.randn(L) * 0.001, fit_mean=True) def calc_spectrum_apply(stancil): "returns spectrum per stencil, detrends and windows the data" - - x = X[stancil[0]:stancil[-1]] - idata = DATA[stancil[0]:stancil[-1]] + x = X[stancil[0] : stancil[-1]] + idata = DATA[stancil[0] : stancil[-1]] y = detrend(idata) - LS_PSD, LS_object = calc_spectrum_LS( x, y, self.k, LS= self.LS, dk =self.dk) + LS_PSD, LS_object = calc_spectrum_LS(x, y, self.k, LS=self.LS, dk=self.dk) return stancil[1], LS_PSD # % derive L2 stancil - stancil_iter = create_chunk_boundaries(L, DATA.size, ov= self.ov) + stancil_iter = create_chunk_boundaries(L, DATA.size, ov=self.ov) # apply func to all stancils - D_specs = dict(map(calc_spectrum_apply,stancil_iter)) + D_specs = dict(map(calc_spectrum_apply, stancil_iter)) chunk_positions = np.array(list(D_specs.keys())) - self.N_stancils = len(chunk_positions) # number of spectal relazations + + # number of spectal relazations + self.N_stancils = len(chunk_positions) # repack data, create xarray - self.spec_name = 'power_spec' if name is None else name - G = { - xi: xr.DataArray( + self.spec_name = "power_spec" if name is None else name + G = dict() + for xi, I in D_specs.items(): + G[xi] = xr.DataArray( I, dims=["k"], coords={"k": self.k, "x": xi * self.dx}, name=self.spec_name, ) - for xi, I in D_specs.items() - } - self.G = xr.concat(G.values(), dim='x').T + self.G = xr.concat(G.values(), dim="x").T if self.G.k[0] == 0: self.G = self.G[1:, :] - self.G.attrs['ov'] = self.ov - self.G.attrs['L'] = self.L + self.G.attrs["ov"] = self.ov + self.G.attrs["L"] = self.L return self.G def calc_var(self): return WavenumberSpectrogram.calc_var(self) - def parceval(self, add_attrs=True ): - return WavenumberSpectrogram.parceval(self, add_attrs= add_attrs ) + def parceval(self, add_attrs=True): + return WavenumberSpectrogram.parceval(self, add_attrs=add_attrs) + + def mean_spectral_error(self, confidence=0.95): + return WavenumberSpectrogram.mean_spectral_error(self, confidence=confidence) - def mean_spectral_error(self, confidence = 0.95 ): - return WavenumberSpectrogram.mean_spectral_error(self, confidence= confidence ) class WavenumberSpectrogramLS: - def __init__(self, x, data, L, dx, dy = None, waven_method = 'fftX2', ov=None, window=None): + def __init__( + self, x, data, L, dx, dy=None, waven_method="fftX2", ov=None, window=None + ): """ returns a wavenumber spectrogram with the resolution L-ov this uses Lombscargle @@ -628,31 +654,38 @@ def __init__(self, x, data, L, dx, dy = None, waven_method = 'fftX2', ov=None, w xr.Dataset with x, k as cooridates of the spectrogram and the mean error other arributes are in the .attr dict. """ - - self.L = L - self.ov = int(L/2) if ov is None else ov #when not defined in create_chunk_boundaries then L/2 - self.x = x - self.dx = dx - self.data = data - self.error = dy if dy is not None else None - self.Lpoints= int(self.L/self.dx) + self.L = L + self.ov = ( + int(L / 2) if ov is None else ov + ) # when not defined in create_chunk_boundaries then L/2 + self.x = x + self.dx = dx + self.data = data + self.error = dy if dy is not None else None + self.Lpoints = int(self.L / self.dx) - # create subsample k + # create subsample k if type(waven_method) is str: - self.k, self.dk = calc_freq_LS(x, self.Lpoints, method = waven_method ) + self.k, self.dk = calc_freq_LS(x, self.Lpoints, method=waven_method) elif type(waven_method) is np.ndarray: - self.k, self.dk = waven_method, np.diff(waven_method).mean() + self.k, self.dk = waven_method, np.diff(waven_method).mean() else: - raise ValueError('waven_method is neither string nor an array') - - - # create window - self.win = None - - def cal_spectrogram(self, x = None, data=None, error=None, name=None, xlims =None, weight_data= True, max_nfev = None, map_func=None): - + raise ValueError("waven_method is neither string nor an array") + self.win = None + + def cal_spectrogram( + self, + x=None, + data=None, + error=None, + name=None, + xlims=None, + weight_data=True, + max_nfev=None, + map_func=None, + ): """ defines apply function and calculated all sub-sample sprectra using map @@ -671,143 +704,144 @@ def cal_spectrogram(self, x = None, data=None, error=None, name=None, xlims =Non self.GG, params_dataframe params_dataframe is a pd.DataFrame that containes all the parameters of the fitting process (and may contain uncertainties too once they are calculated) """ - - - X = self.x if x is None else x # all x positions - DATA = self.data if data is None else data # all data points - ERR = self.error if error is None else error # all error for points - L, dk = self.L, self.dk - self.xlims = ( np.round(X.min()), X.max() ) if xlims is None else xlims + X = self.x if x is None else x # all x positions + DATA = self.data if data is None else data # all data points + ERR = self.error if error is None else error # all error for points + L, dk = self.L, self.dk + self.xlims = (np.round(X.min()), X.max()) if xlims is None else xlims # define window self.win = WINDOWS.tukey(self.Lpoints, alpha=0.1, sym=True) - def calc_spectrum_and_field_apply(stancil): - """ windows the data accoding to stencil and applies LS spectrogram returns: stancil center, spectrum for this stencil, number of datapoints in stancil """ - - x_mask= (stancil[0] <= X) & (X <= stancil[-1]) + x_mask = (stancil[0] <= X) & (X <= stancil[-1]) x = X[x_mask] - if x.size < 200: # if there are not enough photos set results to nan - return stancil[1], self.k*np.nan, self.k*np.nan, np.nan, x.size + if x.size < 200: # if there are not enough photos set results to nan + return stancil[1], self.k * np.nan, self.k * np.nan, np.nan, x.size y = DATA[x_mask] - #make x positions - x_pos = (np.round( (x - stancil[0])/ 10.0 -1 , 0) ).astype('int') + # make x positions + x_pos = (np.round((x - stancil[0]) / 10.0 - 1, 0)).astype("int") # weight data if weight_data: window = self.win[x_pos] - y = y * window * np.sqrt( np.var(y) / np.var(( y* window) ) ) + y = y * window * np.sqrt(np.var(y) / np.var((y * window))) - #make y gridded - x_model = np.arange(stancil[0], stancil[-1], self.dx) - y_gridded = np.copy(x_model) * np.nan + # make y gridded + x_model = np.arange(stancil[0], stancil[-1], self.dx) + y_gridded = np.copy(x_model) * np.nan y_gridded[x_pos] = y - nan_mask =np.isnan(y_gridded) + nan_mask = np.isnan(y_gridded) err = ERR[x_mask] if ERR is not None else None - LS_PSD, LS_object = calc_spectrum_LS( x, y, self.k, err=err, LS= None, dk =self.dk) - - y_model = reconstruct_data_from_LS(LS_object, x_model, self.k) - + LS_PSD, LS_object = calc_spectrum_LS( + x, y, self.k, err=err, LS=None, dk=self.dk + ) + y_model = reconstruct_data_from_LS(LS_object, x_model, self.k) - P = conserve_variance(np.fft.rfft(y_model), self.k, y_gridded, nan_mask = nan_mask ) + P = ConserveVariance( + np.fft.rfft(y_model), self.k, y_gridded, nan_mask=nan_mask + ) P.set_parameters() + fitter = P.optimize(max_nfev=max_nfev) return stancil[1], LS_PSD, P.best_guess_Z(), fitter.params, x.size # derive L2 stancil - self.stancil_iter = create_chunk_boundaries(L, self.xlims, ov= self.ov, iter_flag=True) - + self.stancil_iter = create_chunk_boundaries_unit_lengths( + L, self.xlims, ov=self.ov, iter_flag=True + ) map_func = map if map_func is None else map_func print(map_func) - Spec_returns = list(map_func( calc_spectrum_and_field_apply, copy.copy(self.stancil_iter) )) - + Spec_returns = list( + map_func(calc_spectrum_and_field_apply, copy.copy(self.stancil_iter)) + ) # unpack resutls of the mapping: - D_specs = dict() - Y_model = dict() - Pars = dict() - N_per_stancil = list() + D_specs = dict() + Y_model = dict() + Pars = dict() + N_per_stancil = list() for I in Spec_returns: - D_specs[I[0]] = I[1] - Y_model[I[0]] = I[2] - Pars[I[0]] = I[3] + D_specs[I[0]] = I[1] + Y_model[I[0]] = I[2] + Pars[I[0]] = I[3] N_per_stancil.append(I[4]) self.N_per_stancil = N_per_stancil chunk_positions = np.array(list(D_specs.keys())) - self.N_stancils = len(chunk_positions) # number of spectral realizatiobs + self.N_stancils = len(chunk_positions) # number of spectral realizatiobs # repack data, create xarray # 1st LS spectal estimates - self.spec_name = 'LS_spectal_power' if name is None else name - G_LS_power =dict() - for xi,I in D_specs.items(): - G_LS_power[xi] = xr.DataArray(I, dims=['k'], coords={'k': self.k, 'x': xi } , name=self.spec_name) - - G_LS_power = xr.concat(G_LS_power.values(), dim='x').T - + self.spec_name = "LS_spectal_power" if name is None else name + G_LS_power = dict() + for xi, I in D_specs.items(): + G_LS_power[xi] = xr.DataArray( + I, dims=["k"], coords={"k": self.k, "x": xi}, name=self.spec_name + ) + G_LS_power = xr.concat(G_LS_power.values(), dim="x").T - #2nd FFT(Y_model) - G_fft =dict() - Y_model_k_fft = np.fft.rfftfreq( int(self.Lpoints), d=self.dx) - for xi,I in Y_model.items(): + # 2nd FFT(Y_model) + G_fft = dict() + Y_model_k_fft = np.fft.rfftfreq(int(self.Lpoints), d=self.dx) + for xi, I in Y_model.items(): if I.size < Y_model_k_fft.size: I = np.insert(I, -1, I[-1]) - G_fft[xi] = xr.DataArray(I, dims=['k'], coords={'k': Y_model_k_fft, 'x': xi } , name='Y_model_hat') + G_fft[xi] = xr.DataArray( + I, dims=["k"], coords={"k": Y_model_k_fft, "x": xi}, name="Y_model_hat" + ) - G_fft = xr.concat(G_fft.values(), dim='x').T + G_fft = xr.concat(G_fft.values(), dim="x").T # generate power spec as well self.G = Z_to_power(G_fft, self.dk, self.Lpoints) - self.G.name = 'spectral_power_optm' - + self.G.name = "spectral_power_optm" # merge both datasets self.GG = xr.merge([G_LS_power, G_fft, self.G]) - self.GG.attrs['ov'] = self.ov - self.GG.attrs['L'] = self.L - self.GG.attrs['Lpoints'] = self.Lpoints - self.GG.coords['N_per_stancil'] = ( ('x'), N_per_stancil) - - self.GG.expand_dims(dim='eta') - self.GG.coords['eta'] = ( ('eta'), np.arange(0, self.L + self.dx, self.dx) - self.L/2 ) - self.GG['win'] = ( ('eta'), np.insert(self.win, -1, self.win[-1])) + self.GG.attrs["ov"] = self.ov + self.GG.attrs["L"] = self.L + self.GG.attrs["Lpoints"] = self.Lpoints + self.GG.coords["N_per_stancil"] = (("x"), N_per_stancil) + + self.GG.expand_dims(dim="eta") + self.GG.coords["eta"] = ( + ("eta"), + np.arange(0, self.L + self.dx, self.dx) - self.L / 2, + ) + self.GG["win"] = (("eta"), np.insert(self.win, -1, self.win[-1])) # create dataframe with fitted parameters - PP2= dict() + PP2 = dict() for k, I in Pars.items(): if I is not np.nan: - PP2[k] =I + PP2[k] = I keys = PP2[next(iter(PP2))].keys() - params_dataframe = pd.DataFrame(index =keys) + params_dataframe = pd.DataFrame(index=keys) - for k,I in PP2.items(): + for k, I in PP2.items(): I.values() params_dataframe[k] = list(I.valuesdict().values()) return self.GG, params_dataframe - - - def calc_var(self): Gmean = np.nanmean(self.G, 1) @@ -815,60 +849,59 @@ def calc_var(self): return self.dk * Gmean[~infmask].sum().data - - - def parceval(self, add_attrs=True, weight_data=False ): + def parceval(self, add_attrs=True, weight_data=False): "test Parceval theorem" - + DATA = self.data - L = self.L X = self.x - def get_stancil_var_apply(stancil): - + "returns the variance of yy for stancil" - x_mask= (stancil[0] < X) & (X <= stancil[-1]) + x_mask = (stancil[0] < X) & (X <= stancil[-1]) idata = DATA[x_mask] if len(idata) < 1: return stancil[1], np.nan, len(idata) idata = detrend(idata) # weight data - x_pos = (np.round( (X[x_mask] - stancil[0])/ 10.0 , 0) ).astype('int') + x_pos = (np.round((X[x_mask] - stancil[0]) / 10.0, 0)).astype("int") if weight_data: window = self.win[x_pos] - idata = idata * window * np.sqrt( np.var(idata) / np.var(( idata* window) ) ) + idata = ( + idata * window * np.sqrt(np.var(idata) / np.var((idata * window))) + ) return stancil[1], idata.var(), len(idata) - D_vars = list(map(get_stancil_var_apply, copy.copy(self.stancil_iter) )) + D_vars = list(map(get_stancil_var_apply, copy.copy(self.stancil_iter))) - stancil_vars, Nphotons =list(), 0 + stancil_vars, Nphotons = list(), 0 for I in D_vars: - stancil_vars.append(I[1] * I[2]) - Nphotons += I[2] - - stancil_weighted_variance = np.nansum(np.array(stancil_vars))/Nphotons + stancil_vars.append(I[1] * I[2]) + Nphotons += I[2] - print('Parcevals Theorem:') - print('variance of timeseries: ', DATA.var()) - print('mean variance of stancils: ', stancil_weighted_variance ) + stancil_weighted_variance = np.nansum(np.array(stancil_vars)) / Nphotons - print('variance of the optimzed windowed LS Spectrum: ', self.calc_var()) + print("Parcevals Theorem:") + print("variance of timeseries: ", DATA.var()) + print("mean variance of stancils: ", stancil_weighted_variance) + print("variance of the optimzed windowed LS Spectrum: ", self.calc_var()) if add_attrs: - self.G.attrs['variance_unweighted_data'] = DATA.var() - self.G.attrs['mean_variance_stancils'] = np.nanmean(np.array(stancil_vars) ) - self.G.attrs['mean_variance_LS_pwelch_spectrum'] = self.calc_var() - - - def mean_spectral_error(self, mask=None, confidence = 0.95 ): - return WavenumberSpectrogram.mean_spectral_error(self, mask=mask, confidence= confidence ) + self.G.attrs["variance_unweighted_data"] = DATA.var() + self.G.attrs["mean_variance_stancils"] = np.nanmean(np.array(stancil_vars)) + self.G.attrs["mean_variance_LS_pwelch_spectrum"] = self.calc_var() + def mean_spectral_error(self, mask=None, confidence=0.95): + return WavenumberSpectrogram.mean_spectral_error( + self, mask=mask, confidence=confidence + ) # class for getting standard Pwelch spectrum. old version, deprechiate class WavenumberPwelch: - def __init__(self,data, x, L, ov=None, window=None, save_chunks=False, plot_chunks=False): + def __init__( + self, data, x, L, ov=None, window=None, save_chunks=False, plot_chunks=False + ): """ returns a wavenumber spectrum using the pwelch method @@ -888,143 +921,137 @@ def __init__(self,data, x, L, ov=None, window=None, save_chunks=False, plot_chun self.dx self.n_spec """ - - - - self.data = data # field to be analyzed - self.dx = np.diff(x)[0] # sampling interval - self.save_chunks= save_chunks - dsize = data.size + # field to be analyzed + self.data = data + # sampling interval + self.dx = np.diff(x)[0] + self.save_chunks = save_chunks + dsize = data.size - ov=int(np.round(L/2)) if ov is None else ov + ov = int(np.round(L / 2)) if ov is None else ov self.n = L if window is None: - win=np.hanning(self.n) + win = np.hanning(self.n) else: - win=window + win = window - factor=np.sqrt(self.n/(win**2).sum()) - win*=factor + factor = np.sqrt(self.n / (win**2).sum()) + win *= factor # test if n is even - if (self.n%2): + if self.n % 2: self.neven = False else: self.neven = True - #calculate freq - self.k = self.calc_freq() - nbin=int(np.floor(dsize/(L-ov))) + # calculate freq + self.k = self.calc_freq() + nbin = int(np.floor(dsize / (L - ov))) if save_chunks: - chunks=np.empty([int(nbin),int(L)]) + chunks = np.empty([int(nbin), int(L)]) - self.specs=np.empty([int(nbin),self.k.size]) + self.specs = np.empty([int(nbin), self.k.size]) - last_k=0 - k=0 + last_k = 0 + k = 0 - for i in np.arange(0,dsize-int(L-ov)+1,int(L-ov)): + for i in np.arange(0, dsize - int(L - ov) + 1, int(L - ov)): - if (plot_chunks) and (i >= dsize-6*int(L-ov)): + if (plot_chunks) and (i >= dsize - 6 * int(L - ov)): M.figure_axis_xy() - self.phi=data[int(i):int(i+L)] + self.phi = data[int(i) : int(i + L)] - - if int(i+L) <= data.size-1: + if int(i + L) <= data.size - 1: if save_chunks: - chunks[k,:]=self.phi - + chunks[k, :] = self.phi - self.phi=signal.detrend(self.phi)*win + self.phi = signal.detrend(self.phi) * win if plot_chunks: plt.plot(self.phi) - self.specs[k,:]= self.calc_spectrum() - last_k=k - last_used_TS=int(i+L) - + self.specs[k, :] = self.calc_spectrum() + last_k = k + last_used_TS = int(i + L) else: if plot_chunks: - print('end of TS is reached') - print('last spec No: '+str(last_k)) - print('spec container: '+str(specs.shape)) - print('last used Timestep: '+str(last_used_TS)) - print('length of TS '+ str(dsize) +'ms') - - k+=1 + print("end of TS is reached") + print("last spec No: " + str(last_k)) + print("spec container: " + str(specs.shape)) + print("last used Timestep: " + str(last_used_TS)) + print("length of TS " + str(dsize) + "ms") + k += 1 if save_chunks: - self.chunks=chunks - - - self.spec_est=self.specs.mean(axis=0) - + self.chunks = chunks + self.spec_est = self.specs.mean(axis=0) - self.n_spec,_=self.specs.shape + self.n_spec, _ = self.specs.shape self.calc_var() - def calc_freq(self): - """ calculate array of spectral variable (frequency or - wavenumber) in cycles per unit of L """ + """calculate array of spectral variable (frequency or + wavenumber) in cycles per unit of L""" - self.df = 1./((self.n-1)*self.dx) + self.df = 1.0 / ((self.n - 1) * self.dx) if self.neven: - f = self.df*np.arange(self.n/2+1) + f = self.df * np.arange(self.n / 2 + 1) else: - f = self.df*np.arange( (self.n-1)/2. + 1 ) + f = self.df * np.arange((self.n - 1) / 2.0 + 1) return f def calc_spectrum(self): - """ compute the 1d spectrum of a field phi """ + """compute the 1d spectrum of a field phi""" self.phih = np.fft.rfft(self.phi) # the factor of 2 comes from the symmetry of the Fourier coeffs - spec = 2.*(self.phih*self.phih.conj()).real / self.df /self.n**2 + spec = 2.0 * (self.phih * self.phih.conj()).real / self.df / self.n**2 # the zeroth frequency should be counted only once - spec[0] = spec[0]/2. + spec[0] = spec[0] / 2.0 if self.neven: - spec[-1] = spec[-1]/2. + spec[-1] = spec[-1] / 2.0 return spec - def error(self, ci=0.95): - self.El, self.Eu =spec_error(self.spec_est,self.n_spec,ci=ci) + self.El, self.Eu = spec_error(self.spec_est, self.n_spec, ci=ci) def parceval(self): - print('Parcevals Theorem:') - print('variance of unweighted timeseries: ',self.data.var()) - print('mean variance of timeseries chunks: ',self.chunks.var(axis=1).mean() if self.save_chunks is True else 'data not saved') - print('variance of the pwelch Spectrum: ',self.var) + print("Parcevals Theorem:") + print("variance of unweighted timeseries: ", self.data.var()) + print( + "mean variance of timeseries chunks: ", + ( + self.chunks.var(axis=1).mean() + if self.save_chunks is True + else "data not saved" + ), + ) + print("variance of the pwelch Spectrum: ", self.var) def calc_var(self): - """ Compute total variance from spectrum """ - self.var = self.df* np.nanmean(self.specs[1:], 0).sum() # do not consider zeroth frequency - + """Compute total variance from spectrum""" + self.var = ( + self.df * np.nanmean(self.specs[1:], 0).sum() + ) # do not consider zeroth frequency # optimze spectral variance +class ConserveVariance: + def __init__(self, Z, freq, data, nan_mask=None): + """ """ -class conserve_variance: - def __init__(self,Z, freq, data, nan_mask= None): - - """ - - """ - - self.LM =LM + self.LM = LM self.data = data - self.Z = Z + self.Z = Z self.freq = freq self.nan_mask = nan_mask @@ -1032,58 +1059,78 @@ def set_parameters(self): params = self.LM.Parameters() - p_smothed = self.runningmean(np.abs(self.Z ), 20, tailcopy=True) + p_smothed = self.runningmean(np.abs(self.Z), 20, tailcopy=True) f_max = self.freq[p_smothed[~np.isnan(p_smothed)].argmax()] - - lambda_max = 9.81 * 5 **2/ (2* np.pi) - params.add('x_cutoff', 1/lambda_max , min=0, max=1, vary=False) - params.add('x_max_pos', f_max , min=f_max*0.75, max=f_max*5+0.001, vary=False) - params.add('LF_amp', 1 , min=0.5 , max=1.2, vary= True) - params.add('HF_amp', 0.5 , min=0 , max=1.5, vary= True) - params.add('sigma_g', 0.002 , min=0.001 , max=0.05, vary= False) - params.add('Gauss_amp', 0.5 , min=0.01 , max=2, vary= True) - + lambda_max = 9.81 * 5**2 / (2 * np.pi) + params.add("x_cutoff", 1 / lambda_max, min=0, max=1, vary=False) + params.add( + "x_max_pos", f_max, min=f_max * 0.75, max=f_max * 5 + 0.001, vary=False + ) + params.add("LF_amp", 1, min=0.5, max=1.2, vary=True) + params.add("HF_amp", 0.5, min=0, max=1.5, vary=True) + params.add("sigma_g", 0.002, min=0.001, max=0.05, vary=False) + params.add("Gauss_amp", 0.5, min=0.01, max=2, vary=True) self.params = params return params def test_ojective_func(self, weight_func, plot_flag=True): - self.objective_func(self.params, self.data, self.Z, weight_func, self.freq, self.nan_mask, plot_flag=plot_flag) - - def tanh_weight_function(self,ff, params): - return self.tanh_weight(ff, params['x_cutoff'].value, - params['x_max_pos'].value, - params['LF_amp'].value, - params['HF_amp'].value, - params['Gauss_amp'].value, - params['sigma_g'].value ) - - def tanh_weight(self,x, x_cutoff , x_max_pos, LF_amp, HF_amp, Gauss_amp, sigma_g): + self.objective_func( + self.params, + self.data, + self.Z, + weight_func, + self.freq, + self.nan_mask, + plot_flag=plot_flag, + ) + + def tanh_weight_function(self, ff, params): + return self.tanh_weight( + ff, + params["x_cutoff"].value, + params["x_max_pos"].value, + params["LF_amp"].value, + params["HF_amp"].value, + params["Gauss_amp"].value, + params["sigma_g"].value, + ) + + def tanh_weight(self, x, x_cutoff, x_max_pos, LF_amp, HF_amp, Gauss_amp, sigma_g): """ - zdgfsg + zdgfsg """ - HF_amp1 = (LF_amp-HF_amp) - decay = 0.5 - np.tanh( (x-x_cutoff)/sigma_g )/2 - y = decay * HF_amp1 + (1 - HF_amp1) - y = y- y[0] +LF_amp + HF_amp1 = LF_amp - HF_amp + decay = 0.5 - np.tanh((x - x_cutoff) / sigma_g) / 2 + y = decay * HF_amp1 + (1 - HF_amp1) + y = y - y[0] + LF_amp - def gaus(x, x_0, amp, sigma_g ): - return amp* np.exp(-0.5 * ( (x-x_0)/sigma_g)**2) + def gaus(x, x_0, amp, sigma_g): + return amp * np.exp(-0.5 * ((x - x_0) / sigma_g) ** 2) - y += gaus(x, x_max_pos, Gauss_amp, sigma_g ) + y += gaus(x, x_max_pos, Gauss_amp, sigma_g) return y + def objective_func( + self, + params, + data_x, + Z_results, + weight_func, + freq, + nan_mask=None, + plot_flag=False, + ): - def objective_func(self, params, data_x, Z_results, weight_func, freq, nan_mask = None, plot_flag=False): + alpha = 1e7 - alpha =1e7 def model_real_space(Z, weights, n=None): """ Both inputs must have the same length """ - return np.fft.irfft(Z*weights, n = n) + return np.fft.irfft(Z * weights, n=n) weights = weight_func(freq, params) @@ -1091,10 +1138,10 @@ def model_real_space(Z, weights, n=None): weights = np.insert(weights, -1, weights[-1]) if nan_mask is not None: - model = model_real_space(Z_results, weights, n= data_x.size)[~nan_mask] + model = model_real_space(Z_results, weights, n=data_x.size)[~nan_mask] dd = data_x[~nan_mask][:] else: - model = model_real_space(Z_results, weights, n= data_x.size)[:] + model = model_real_space(Z_results, weights, n=data_x.size)[:] dd = data_x[:] if model.size > dd.size: @@ -1103,99 +1150,147 @@ def model_real_space(Z, weights, n=None): dd = dd[:-1] if plot_flag: - - from matplotlib.gridspec import GridSpec - import matplotlib.pyplot as plt - F= M.figure_axis_xy(10, 4.1 * 2.5, view_scale= 0.5, container = True) - gs = GridSpec(5,1, wspace=0.1, hspace=0.4) - pos0,pos1,pos2 = gs[0:3, 0],gs[3, 0],gs[4, 0] + F = M.figure_axis_xy(10, 4.1 * 2.5, view_scale=0.5, container=True) + + gs = GridSpec(5, 1, wspace=0.1, hspace=0.4) + pos0, pos1, pos2 = gs[0:3, 0], gs[3, 0], gs[4, 0] ax1 = F.fig.add_subplot(pos0) - plt.title('Stacked Timeseries', loc='left') + plt.title("Stacked Timeseries", loc="left") - chunk_l= 400 - chunk_iter = create_chunk_boundaries(chunk_l, data_x.size, ov=0, iter_flag = True) + chunk_l = 400 + chunk_iter = create_chunk_boundaries( + chunk_l, data_x.size, ov=0, iter_flag=True + ) - ofsett0= 6 + ofsett0 = 6 ofsett = np.copy(ofsett0) for chi in chunk_iter: - v1= np.round(np.nanvar(dd), 4) - plt.plot(ofsett+ data_x[chi[0]:chi[-1]] , linewidth=3, alpha=0.5 , c='black', label=' org. data (var:'+str(v1)+')') - - v1= np.round(model_real_space(Z_results, weights*0 +1)[~nan_mask[1:]].var(), 4) - plt.plot(ofsett + model_real_space(Z_results, weights*0 +1)[chi[0]:chi[-1]] ,linewidth= 0.8, c='red', label='LS model init (var:'+str(v1)+')') - - v1= np.round(model.var(), 4) - plt.plot(ofsett + model_real_space(Z_results, weights)[chi[0]:chi[-1]],linewidth= 0.8, c='blue', label='LS model weighted (var:'+str(v1)+')') + v1 = np.round(np.nanvar(dd), 4) + plt.plot( + ofsett + data_x[chi[0] : chi[-1]], + linewidth=3, + alpha=0.5, + c="black", + label=" org. data (var:" + str(v1) + ")", + ) + + v1 = np.round( + model_real_space(Z_results, weights * 0 + 1)[~nan_mask[1:]].var(), 4 + ) + plt.plot( + ofsett + + model_real_space(Z_results, weights * 0 + 1)[chi[0] : chi[-1]], + linewidth=0.8, + c="red", + label="LS model init (var:" + str(v1) + ")", + ) + + v1 = np.round(model.var(), 4) + plt.plot( + ofsett + model_real_space(Z_results, weights)[chi[0] : chi[-1]], + linewidth=0.8, + c="blue", + label="LS model weighted (var:" + str(v1) + ")", + ) if ofsett == ofsett0: plt.legend() ofsett -= 1 - plt.ylim(ofsett, ofsett0+1) - plt.xlim(0, chunk_l*2) - + plt.ylim(ofsett, ofsett0 + 1) + plt.xlim(0, chunk_l * 2) ax2 = F.fig.add_subplot(pos1) - plt.title('Amplitude Weight Function', loc='left') - plt.plot(weights , c='black') - ax2.set_xscale('log') + plt.title("Amplitude Weight Function", loc="left") + plt.plot(weights, c="black") + ax2.set_xscale("log") ax3 = F.fig.add_subplot(pos2) - plt.title('Initial and tuned |Z|', loc='left') - + plt.title("Initial and tuned |Z|", loc="left") - v2 = np.round( (4.*(Z_results*Z_results.conj()).real /data_x.size**2 ).sum(), 4) - plt.plot(abs(Z_results), linewidth= 0.8, c='red', label='Z (var: '+str(v2) +')') - plt.plot(M.runningmean(abs(Z_results) , 20, tailcopy=True), linewidth= 1.5, c='red', zorder=12) + v2 = np.round( + (4.0 * (Z_results * Z_results.conj()).real / data_x.size**2).sum(), 4 + ) + plt.plot( + abs(Z_results), linewidth=0.8, c="red", label="Z (var: " + str(v2) + ")" + ) + plt.plot( + M.runningmean(abs(Z_results), 20, tailcopy=True), + linewidth=1.5, + c="red", + zorder=12, + ) - Z2= Z_results* weights - v2 = np.round( (4.*(Z2*Z2.conj()).real /data_x.size**2 ).sum(), 4) - plt.plot(abs(Z2), linewidth= 0.8, c='blue', label='weighted Z(var: '+str(v2) +')') - plt.plot(M.runningmean(abs(Z2) , 20, tailcopy=True), linewidth= 1.5, c='blue', zorder=12) + Z2 = Z_results * weights + v2 = np.round((4.0 * (Z2 * Z2.conj()).real / data_x.size**2).sum(), 4) + plt.plot( + abs(Z2), + linewidth=0.8, + c="blue", + label="weighted Z(var: " + str(v2) + ")", + ) + plt.plot( + M.runningmean(abs(Z2), 20, tailcopy=True), + linewidth=1.5, + c="blue", + zorder=12, + ) plt.legend() - plt.ylim( np.percentile(abs(Z_results), 0.5), abs(Z_results).max()*1.3 ) - plt.xlabel('wavenumber k') - ax3.set_xscale('log') - ax3.set_yscale('log') - - fitting_cost =( abs(dd - model) / dd.std() )**2 - variance_cost =( abs(dd.var() - model.var()) / dd.std() ) **2 + plt.ylim(np.percentile(abs(Z_results), 0.5), abs(Z_results).max() * 1.3) + plt.xlabel("wavenumber k") + ax3.set_xscale("log") + ax3.set_yscale("log") - return fitting_cost.sum() , alpha* variance_cost + fitting_cost = (abs(dd - model) / dd.std()) ** 2 + variance_cost = (abs(dd.var() - model.var()) / dd.std()) ** 2 - def optimize(self, fitting_args= None , method='dual_annealing', max_nfev=None): + return fitting_cost.sum(), alpha * variance_cost + def optimize(self, fitting_args=None, method="dual_annealing", max_nfev=None): if fitting_args is None: fitting_args = (self.data, self.Z, self.tanh_weight_function, self.freq) self.weight_func = fitting_args[2] - self.fitter = self.LM.minimize(self.objective_func, self.params, args=fitting_args, kws={'nan_mask':self.nan_mask} , method=method, max_nfev=max_nfev) + self.fitter = self.LM.minimize( + self.objective_func, + self.params, + args=fitting_args, + kws={"nan_mask": self.nan_mask}, + method=method, + max_nfev=max_nfev, + ) return self.fitter def plot_result(self): - self.objective_func(self.fitter.params, self.data, self.Z, self.weight_func, self.freq, self.nan_mask, plot_flag=True) + self.objective_func( + self.fitter.params, + self.data, + self.Z, + self.weight_func, + self.freq, + self.nan_mask, + plot_flag=True, + ) def best_guess_Z(self): return self.Z * self.weight_func(self.freq, self.fitter.params) def runningmean(self, var, m, tailcopy=False): - m=int(m) - s =var.shape - if s[0] <= 2*m: - print('0 Dimension is smaller then averaging length') + m = int(m) + s = var.shape + if s[0] <= 2 * m: + print("0 Dimension is smaller then averaging length") return - rr=np.asarray(var)*np.nan - + rr = np.asarray(var) * np.nan var_range = np.arange(m, int(s[0]) - m - 1, 1) for i in var_range[np.isfinite(var[m : int(s[0]) - m - 1])]: rr[int(i)] = np.nanmean(var[i - m : i + m]) - if tailcopy: - rr[0:m]=rr[m+1] - rr[-m-1:-1]=rr[-m-2] + rr[0:m] = rr[m + 1] + rr[-m - 1 : -1] = rr[-m - 2] return rr From 18c665d802a6f70ecbaf996eb5f933b93578cb38 Mon Sep 17 00:00:00 2001 From: Camilo Diaz Date: Sun, 3 Mar 2024 08:26:36 -0500 Subject: [PATCH 15/22] applied suggested changes in review --- .../ICEsat2_SI_tools/angle_optimizer.py | 36 ++++++++++--------- .../ICEsat2_SI_tools/filter_regrid.py | 14 +++++--- src/icesat2_tracks/analysis_db/B04_angle.py | 2 +- 3 files changed, 30 insertions(+), 22 deletions(-) diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/angle_optimizer.py b/src/icesat2_tracks/ICEsat2_SI_tools/angle_optimizer.py index dac43084..a3b40d86 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/angle_optimizer.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/angle_optimizer.py @@ -55,9 +55,10 @@ def get_z_model(x_positions, y_position, K_prime, K_amp, alpha_rad, group_phase) k = K_abs * np.cos(alpha_rad) l = K_abs * np.sin(alpha_rad) - return wavemodel( + _wavemodel = wavemodel( x_positions, y_position, k, l, np.array(K_amp), group_phase=group_phase ) + return _wavemodel @jit(nopython=True, parallel=False) @@ -70,10 +71,12 @@ def get_z_model_single_wave( k = K_abs * np.cos(alpha_rad) l = K_abs * np.sin(alpha_rad) - return wavemodel_single_wave( + _wavemodel_single_wave = wavemodel_single_wave( x_positions, y_position, k, l, K_amp, group_phase=group_phase ) + return _wavemodel_single_wave + def objective_func(pars, x, y, z, test_flag=False, prior=None, prior_weight=2): @@ -88,15 +91,16 @@ def objective_func(pars, x, y, z, test_flag=False, prior=None, prior_weight=2): ) if prior is not None: a_0, a_std = prior["alpha"] - penalties = np.array([(abs(a_0 - pars["alpha"]) ** 2 / a_std**2)]) + penalties = np.array([((a_0 - pars["alpha"]) / a_std) ** 2]) else: penalties = np.array([0]) - cost = (abs(z - z_model)) ** 2 / z.std() ** 2 + cost = ((z - z_model) / z.std()) ** 2 if test_flag: return z_model else: - return np.concatenate([cost, prior_weight * penalties]) + residual = np.concatenate([cost, prior_weight * penalties]) + return residual def likelyhood_func( @@ -112,7 +116,7 @@ def likelyhood_func( x, y, pars["K_prime"], pars["K_amp"], pars["alpha"], pars["phase"] ) # define cost - cost_sqrt = (abs(z - z_model)) ** 2 + cost_sqrt = (z - z_model) ** 2 # estimate total variance if z_error is None: @@ -140,7 +144,7 @@ def simple_log_panelty(x, x0, sigma): ) -class sample_with_mcmc: +class SampleWithMcmc: """ sample a 2nd surface using mcmc and other methods. its made for getting a quick estimate! @@ -190,14 +194,13 @@ def set_parameters(self, par_dict, verbose=False): params = self.LM.Parameters() - var_seeds = list() for k, I in par_dict.items(): params.add(k, (I[0] + I[1]) / 2, vary=True, min=I[0], max=I[1]) - var_seeds.append(np.linspace(I[0], I[1], I[2])) + var_seeds = [np.linspace(I[0], I[1], I[2]) for _, I in par_dict.items()] if len(var_seeds) > 2: - raise ValueError("nor proframmed for 3d") + raise ValueError("Dimensions larger than 2 not supported") self.nwalkers = int(var_seeds[0].size * var_seeds[1].size) @@ -210,12 +213,13 @@ def set_parameters(self, par_dict, verbose=False): print(self.params) def test_objective_func(self): - return self.objective_func( + obj_func = self.objective_func( self.params, *self.fitting_args, **self.fitting_kargs ) + return obj_func def sample( - self, fitting_args=None, method="emcee", steps=100, verbose=True, **kargs + self, fitting_args=None, method="emcee", steps=100, verbose=True, **kwargs ): fitting_args, fitting_kargs = self.fitting_args, self.fitting_kargs @@ -231,19 +235,19 @@ def sample( steps=steps, pos=self.seeds, nan_policy="omit", - **kargs, + **kwargs, ) if verbose: print(self.LM.report_fit(self.fitter)) print("results at self.fitter") - def plot_sample(self, **kargs): + def plot_sample(self, **kwargs): chain = self.chain() nwalkers = self.nwalkers for n in np.arange(nwalkers): - plt.plot(chain[:, n, 1], chain[:, n, 0], "-", **kargs) - plt.plot(chain[:, n, 1], chain[:, n, 0], ".", **kargs) + plt.plot(chain[:, n, 1], chain[:, n, 0], "-", **kwargs) + plt.plot(chain[:, n, 1], chain[:, n, 0], ".", **kwargs) def optimize(self, fitting_args=None, method="dual_annealing", verbose=True): diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/filter_regrid.py b/src/icesat2_tracks/ICEsat2_SI_tools/filter_regrid.py index e70bf994..50c60b79 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/filter_regrid.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/filter_regrid.py @@ -71,7 +71,10 @@ def track_pole_ward_file(hdf5_file, product="ALT03"): "1st lat =" + str(abs(T_lat[T_time.argmin()])), ";last lat =" + str(abs(T_lat[T_time.argmax()])), ) - return abs(T_lat[T_time.argmax()]) > abs(T_lat[T_time.argmin()]) + + _lhs = abs(T_lat[T_time.argmax()]) + _rhs = abs(T_lat[T_time.argmin()]) + return _lhs > _rhs def track_type(T): @@ -79,10 +82,11 @@ def track_type(T): Returns if track acending or desending T is a pandas table """ - return ( - T["lats"].iloc[T["delta_time"].argmax()] - - T["lats"].iloc[T["delta_time"].argmin()] - ) < 0 + + max_lat = T["lats"].iloc[T["delta_time"].argmax()] + min_lat = T["lats"].iloc[T["delta_time"].argmin()] + delta_lat = max_lat - min_lat + return delta_lat < 0 def lat_min_max_extended(B, beams_list, accent=None): diff --git a/src/icesat2_tracks/analysis_db/B04_angle.py b/src/icesat2_tracks/analysis_db/B04_angle.py index 6c85ec8a..38905c5d 100644 --- a/src/icesat2_tracks/analysis_db/B04_angle.py +++ b/src/icesat2_tracks/analysis_db/B04_angle.py @@ -615,7 +615,7 @@ def plot_instance( Marginals[ikey] = make_fake_data(xi, group) continue - SM = angle_optimizer.sample_with_mcmc(params_dict) + SM = angle_optimizer.SampleWithMcmc(params_dict) SM.set_objective_func(angle_optimizer.objective_func) nan_list = np.isnan(x_concat) | np.isnan(y_concat) | np.isnan(y_concat) x_concat[nan_list] = [] From 8988ae7acaa0aaeb0bf2785f522682b457f1e119 Mon Sep 17 00:00:00 2001 From: Camilo Diaz Date: Sun, 3 Mar 2024 08:30:36 -0500 Subject: [PATCH 16/22] remove int cast --- src/icesat2_tracks/ICEsat2_SI_tools/angle_optimizer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/angle_optimizer.py b/src/icesat2_tracks/ICEsat2_SI_tools/angle_optimizer.py index a3b40d86..b7cfbfd8 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/angle_optimizer.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/angle_optimizer.py @@ -202,7 +202,7 @@ def set_parameters(self, par_dict, verbose=False): if len(var_seeds) > 2: raise ValueError("Dimensions larger than 2 not supported") - self.nwalkers = int(var_seeds[0].size * var_seeds[1].size) + self.nwalkers = var_seeds[0].size * var_seeds[1].size pxx, pyy = np.meshgrid(var_seeds[0], var_seeds[1]) self.seeds = np.vstack([pxx.flatten(), pyy.flatten()]).T @@ -223,7 +223,7 @@ def sample( ): fitting_args, fitting_kargs = self.fitting_args, self.fitting_kargs - # TODO: this funciton throws an error in CI. The nan_policy='omit' policiy was added to avoid this issue + # TODO: this function throws an error in CI. The nan_policy='omit' policy was added to avoid this issue # according to the guidelines in https://lmfit.github.io/lmfit-py/faq.html#i-get-errors-from-nan-in-my-fit-what-can-i-do self.fitter = self.LM.minimize( self.objective_func, From cdcbd40d5710adb633b04cdf12d4e797e805de5c Mon Sep 17 00:00:00 2001 From: Camilo Diaz Date: Tue, 5 Mar 2024 16:09:23 -0500 Subject: [PATCH 17/22] Adding some changes missed in the last commit --- .../ICEsat2_SI_tools/angle_optimizer.py | 2 +- .../ICEsat2_SI_tools/filter_regrid.py | 13 +++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/angle_optimizer.py b/src/icesat2_tracks/ICEsat2_SI_tools/angle_optimizer.py index b7cfbfd8..497e835a 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/angle_optimizer.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/angle_optimizer.py @@ -125,7 +125,7 @@ def likelyhood_func( tot_var = z_error**2 + z_model**2 def simple_log_panelty(x, x0, sigma): - return -np.power((x - x0) / sigma, 2.0) / 2.0 + return -((x - x0) / sigma) ** 2.0 / 2.0 # try ot get prior if prior is not None: diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/filter_regrid.py b/src/icesat2_tracks/ICEsat2_SI_tools/filter_regrid.py index 50c60b79..9b3b5954 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/filter_regrid.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/filter_regrid.py @@ -3,6 +3,7 @@ import pandas as pd import matplotlib as plt + def process_single_stencil_set( stancil_set, T2, key_var, key_x_coord, stancil_width, calc_stencil_stats ): @@ -72,9 +73,9 @@ def track_pole_ward_file(hdf5_file, product="ALT03"): ";last lat =" + str(abs(T_lat[T_time.argmax()])), ) - _lhs = abs(T_lat[T_time.argmax()]) + _lhs = abs(T_lat[T_time.argmax()]) _rhs = abs(T_lat[T_time.argmin()]) - return _lhs > _rhs + return _lhs > _rhs def track_type(T): @@ -83,7 +84,7 @@ def track_type(T): T is a pandas table """ - max_lat = T["lats"].iloc[T["delta_time"].argmax()] + max_lat = T["lats"].iloc[T["delta_time"].argmax()] min_lat = T["lats"].iloc[T["delta_time"].argmin()] delta_lat = max_lat - min_lat return delta_lat < 0 @@ -414,10 +415,10 @@ def calc_stencil_stats(istencil): Tmedian = T2[i_mask].median() - Tmedian[key + "_weighted_mean"] = np.nan - Tmedian[key + "_mode"] = np.nan + Tmedian[f"{key}_weighted_mean"] = np.nan + Tmedian[f"{key}_mode"] = np.nan Tmedian["N_photos"] = Nphoton - Tmedian[key + "_std"] = np.nan + Tmedian[f"{key}_std"] = np.nan return istencil[1], Tmedian From 7d5e579a4d8364a80debb4d6a492ada80c05c089 Mon Sep 17 00:00:00 2001 From: Camilo Diaz Date: Tue, 5 Mar 2024 16:38:19 -0500 Subject: [PATCH 18/22] more missed changes in previous commits --- .../ICEsat2_SI_tools/spectral_estimates.py | 28 +++++++++---------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py b/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py index 9759b3c5..145087cb 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py @@ -20,7 +20,7 @@ def is_not_even(number): # basic functions def create_chunk_boundaries(L, dsize, ov=None, iter_flag=True): """ - returns all need chunk boudaries and center position given L, and ov + returns all need chunk boundaries and center position given L, and ov inputs: L desired length of window, dsize size of the data @@ -39,10 +39,7 @@ def create_chunk_boundaries(L, dsize, ov=None, iter_flag=True): [xleft[0:max_size], xcenter_pos[0:max_size], xright[0:max_size]] ) - if iter_flag is True: - return iter(position_stancil.T.tolist()) - else: - return position_stancil + return iter(position_stancil.T.tolist()) if iter_flag else position_stancil def create_chunk_boundaries_unit_lengths(L_unit, data_limits, ov=None, iter_flag=True): @@ -257,10 +254,7 @@ def calc_freq_LS( neven = is_not_even(N) dx = np.diff(x).mean() if dx is None else dx df = dx / 50 - if neven: - f = df * np.arange(df, N + 1) - else: - f = df * np.arange(df, N) + f = df * np.arange(df, N + 1) if neven else df * np.arange(df, N) return f, df @@ -453,11 +447,15 @@ def calc_spectrum_apply(stancil): # repack data, create xarray self.spec_name = "power_spec" if name is None else name - G = dict() - for xi, I in D_specs.items(): - G[xi] = xr.DataArray( - I, dims=["k"], coords={"k": self.k, "x": xi}, name=self.spec_name - ) + G = { + xi: xr.DataArray( + I, + dims=["k"], + coords={"k": self.k, "x": xi * self.dx}, + name=self.spec_name, + ) + for xi, I in D_specs.items() + } self.G = xr.concat(G.values(), dim="x").T if self.G.k[0] == 0: @@ -468,7 +466,7 @@ def calc_spectrum_apply(stancil): return self.G - # cal variance + def calc_var(self): """Compute total variance from spectragram""" # do not consider zeroth frequency From b9d963cd06eee896c8a2e3a121cd310640934480 Mon Sep 17 00:00:00 2001 From: Camilo Diaz Date: Wed, 6 Mar 2024 10:28:40 -0500 Subject: [PATCH 19/22] fixed bug in spectral_estimates.py --- src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py b/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py index 145087cb..fd619cd1 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py @@ -451,7 +451,7 @@ def calc_spectrum_apply(stancil): xi: xr.DataArray( I, dims=["k"], - coords={"k": self.k, "x": xi * self.dx}, + coords={"k": self.k, "x": xi }, name=self.spec_name, ) for xi, I in D_specs.items() From 290c3860551ad05a93712e64d0e73887871ca903 Mon Sep 17 00:00:00 2001 From: Camilo Diaz Date: Thu, 7 Mar 2024 12:11:22 -0500 Subject: [PATCH 20/22] applied suggested changed in review --- .../ICEsat2_SI_tools/spectral_estimates.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py b/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py index fd619cd1..7aef0a5d 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py @@ -220,7 +220,7 @@ def calc_freq_LS( minimum_frequency, maximum_frequency only used for LS_auto """ - if method is "fftX2": + if method == "fftX2": neven = is_not_even(N) dx = np.diff(x).mean() if dx is None else dx df = 1.0 / ((N - 1) * dx) / 2 @@ -406,19 +406,20 @@ def __init__(self, x_grid, data, Lpoints, ov=None, window=None): """ self.Lpoints = Lpoints + #when not defined in create_chunk_boundaries then L/2 self.ov = int(Lpoints / 2) if ov is None else ov self.data = data # create subsample k - self.k, self.dk = calc_freq_fft(x_grid, Lpoints) + self.k, self.dk = calc_freq_fft(x_grid, Lpoints) # return 1/ unit of frid points # to get the waveumber units (2 pi/ lambda), multiply by 2 pi self.k, self.dk = self.k * 2 * np.pi, self.dk * 2 * np.pi # create window self.win = create_window(Lpoints) - def cal_spectrogram(self, data=None, name=None): + def cal_spectrogram(self, data=None, name="power_spec"): """ defines apply function and calculated all sub-sample sprectra using map """ @@ -446,7 +447,7 @@ def calc_spectrum_apply(stancil): self.N_stancils = len(chunk_positions) # repack data, create xarray - self.spec_name = "power_spec" if name is None else name + self.spec_name = name G = { xi: xr.DataArray( I, @@ -565,7 +566,7 @@ def __init__( self.k, self.dk = self.k[::kjumps], self.dk * kjumps self.win = None - def cal_spectrogram(self, x=None, data=None, name=None, dx=1): + def cal_spectrogram(self, x=None, data=None, name="power_spec", dx=1): """ defines apply function and calculated all sub-sample sprectra using map dx nominal resolution of the data resolutionif not set, dx= 1 @@ -576,7 +577,7 @@ def cal_spectrogram(self, x=None, data=None, name=None, dx=1): L, dk = self.L, self.dk win = self.win self.dx = dx - + # init Lomb scargle object with noise as nummy data () self.LS = LombScargle(X[0:L], np.random.randn(L) * 0.001, fit_mean=True) def calc_spectrum_apply(stancil): @@ -601,7 +602,7 @@ def calc_spectrum_apply(stancil): self.N_stancils = len(chunk_positions) # repack data, create xarray - self.spec_name = "power_spec" if name is None else name + self.spec_name = name G = dict() for xi, I in D_specs.items(): G[xi] = xr.DataArray( From c60d0d7287e86cd14c92ea7550520add8799cccb Mon Sep 17 00:00:00 2001 From: Camilo Diaz Date: Thu, 7 Mar 2024 12:14:32 -0500 Subject: [PATCH 21/22] file spectral_estimates.py formatted --- .../ICEsat2_SI_tools/spectral_estimates.py | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py b/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py index 7aef0a5d..829426fa 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py @@ -406,13 +406,15 @@ def __init__(self, x_grid, data, Lpoints, ov=None, window=None): """ self.Lpoints = Lpoints - #when not defined in create_chunk_boundaries then L/2 + # when not defined in create_chunk_boundaries then L/2 self.ov = int(Lpoints / 2) if ov is None else ov self.data = data # create subsample k - self.k, self.dk = calc_freq_fft(x_grid, Lpoints) # return 1/ unit of frid points + self.k, self.dk = calc_freq_fft( + x_grid, Lpoints + ) # return 1/ unit of frid points # to get the waveumber units (2 pi/ lambda), multiply by 2 pi self.k, self.dk = self.k * 2 * np.pi, self.dk * 2 * np.pi @@ -449,14 +451,14 @@ def calc_spectrum_apply(stancil): # repack data, create xarray self.spec_name = name G = { - xi: xr.DataArray( - I, - dims=["k"], - coords={"k": self.k, "x": xi }, - name=self.spec_name, - ) - for xi, I in D_specs.items() - } + xi: xr.DataArray( + I, + dims=["k"], + coords={"k": self.k, "x": xi}, + name=self.spec_name, + ) + for xi, I in D_specs.items() + } self.G = xr.concat(G.values(), dim="x").T if self.G.k[0] == 0: @@ -467,7 +469,6 @@ def calc_spectrum_apply(stancil): return self.G - def calc_var(self): """Compute total variance from spectragram""" # do not consider zeroth frequency From 356d9f2001bcc10fb3cbf24448e552b3b5c73655 Mon Sep 17 00:00:00 2001 From: Camilo Diaz Date: Thu, 7 Mar 2024 13:19:07 -0500 Subject: [PATCH 22/22] added some code refactoring --- .../ICEsat2_SI_tools/spectral_estimates.py | 31 +++++-------------- 1 file changed, 7 insertions(+), 24 deletions(-) diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py b/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py index 829426fa..ed772575 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/spectral_estimates.py @@ -44,7 +44,7 @@ def create_chunk_boundaries(L, dsize, ov=None, iter_flag=True): def create_chunk_boundaries_unit_lengths(L_unit, data_limits, ov=None, iter_flag=True): """ - returns all need chunk boudaries and center position given L, and ov + returns all need chunk boundaries and center position given L, and ov inputs: L desired length of window in units of the x axis of the data, data_limits (x_min, x_max) tuple with the beginning and end the the derived window stancils @@ -65,11 +65,8 @@ def create_chunk_boundaries_unit_lengths(L_unit, data_limits, ov=None, iter_flag position_stancil = np.vstack( [xleft[0:max_size], xcenter_pos[0:max_size], xright[0:max_size]] ) - - if iter_flag is True: - return iter(position_stancil.T.tolist()) - else: - return position_stancil + + return iter(position_stancil.T.tolist()) if iter_flag else position_stancil def Z_to_power(Z, df, N): @@ -174,11 +171,7 @@ def calc_freq_fft(x_grid, N): dx = np.round(np.median(np.diff(x_grid)), 1) df = 1.0 / ((N - 1) * dx) - - if neven: - f = df * np.arange(N / 2 + 1) - else: - f = df * np.arange((N - 1) / 2.0 + 1) + f = df * np.arange(N / 2 + 1) if neven else df * np.arange((N - 1) / 2.0 + 1) return f, df @@ -192,11 +185,7 @@ def calc_freq_fft_given_dx(dx, N): neven = is_not_even(N) df = 1.0 / ((N - 1) * dx) - - if neven: - f = df * np.arange(N / 2 + 1) - else: - f = df * np.arange((N - 1) / 2.0 + 1) + f = df * np.arange(N / 2 + 1) if neven else df * np.arange((N - 1) / 2.0 + 1) return f, df @@ -224,19 +213,13 @@ def calc_freq_LS( neven = is_not_even(N) dx = np.diff(x).mean() if dx is None else dx df = 1.0 / ((N - 1) * dx) / 2 - if neven: - f = df * np.arange(df, N + 1) - else: - f = df * np.arange(df, (N - 1) + 1) + f = df * np.arange(df, N + 1) if neven else df * np.arange(df, N) elif method is "fft": neven = is_not_even(N) dx = np.diff(x).mean() if dx is None else dx df = 1.0 / ((N - 1) * dx) - if neven: - f = df * np.arange(N / 2 + 1) - else: - f = df * np.arange((N - 1) / 2.0 + 1) + f = df * np.arange(N / 2 + 1) if neven else df * np.arange((N - 1) / 2.0 + 1) elif method is "LS_auto":