diff --git a/LiCSBAS_lib/LiCSBAS_inv_lib.py b/LiCSBAS_lib/LiCSBAS_inv_lib.py index 1c0beeb..b24cc4c 100644 --- a/LiCSBAS_lib/LiCSBAS_inv_lib.py +++ b/LiCSBAS_lib/LiCSBAS_inv_lib.py @@ -8,6 +8,8 @@ ========= Changelog ========= +v1.5 20210305 Yu Morishita, GSI + - Add GPU option into invert_nsbas v1.4.2 20201118 Yu Morishita, GSI - Again Bug fix of multiprocessing v1.4.1 20201116 Yu Morishita, GSI @@ -80,7 +82,7 @@ def make_sb_matrix2(ifgdates): #%% -def invert_nsbas(unw, G, dt_cum, gamma, n_core): +def invert_nsbas(unw, G, dt_cum, gamma, n_core, gpu): """ Calculate increment displacement difference by NSBAS inversion. Points with all unw data are solved by simple SB inversion firstly at a time. @@ -91,6 +93,7 @@ def invert_nsbas(unw, G, dt_cum, gamma, n_core): dt_cum : Cumulative years(or days) for each image (n_im) gamma : Gamma value for NSBAS inversion, should be small enough (e.g., 0.0001) n_core : Number of cores for parallel processing + gpu : GPU flag Returns: inc : Incremental displacement (n_im-1, n_pt) @@ -120,20 +123,30 @@ def invert_nsbas(unw, G, dt_cum, gamma, n_core): if n_pt_full!=0: print(' Solving {0:6}/{1:6}th points with full unw at a time...'.format(n_pt_full, n_pt), flush=True) - + ## Sovle unw_tmp = np.concatenate((unw[bool_pt_full, :], np.zeros((n_pt_full, n_im), dtype=np.float32)), axis=1).transpose() - result[:, bool_pt_full] = np.linalg.lstsq(Gall, unw_tmp, rcond=None)[0] + if gpu: + print(' Using GPU') + import cupy as cp + unw_tmp_cp = cp.asarray(unw_tmp) + Gall_cp = cp.asarray(Gall) + _sol = cp.linalg.lstsq(Gall_cp, unw_tmp_cp, rcond=None)[0] + result[:, bool_pt_full] = cp.asnumpy(_sol) + del unw_tmp_cp, Gall_cp, _sol + else: + result[:, bool_pt_full] = np.linalg.lstsq(Gall, unw_tmp, rcond=None)[0] ### Solve other points with nan point by point. + ## Not use GPU because lstsq with small matrix is slower than CPU unw_tmp = np.concatenate((unw[~bool_pt_full, :], np.zeros((n_pt-n_pt_full, n_im), dtype=np.float32)), axis=1).transpose() mask = (~np.isnan(unw_tmp)) unw_tmp[np.isnan(unw_tmp)] = 0 print(' Next, solve {0} points including nan point-by-point...'.format(n_pt-n_pt_full), flush=True) - + if n_core == 1: - result[:, ~bool_pt_full] = censored_lstsq_slow(Gall, unw_tmp, mask) #(n_im+1, n_pt) + result[:, ~bool_pt_full] = censored_lstsq_slow(Gall, unw_tmp, mask) #(n_im+1, n_pt) else: print(' {} parallel processing'.format(n_core), flush=True) @@ -206,7 +219,7 @@ def invert_nsbas_wls(unw, var, G, dt_cum, gamma, n_core): if n_core == 1: for i in range(n_pt): - result[:, i] = wls_nsbas(i) #(n_im+1, n_pt) + result[:, i] = wls_nsbas(i) #(n_im+1, n_pt) else: print(' {} parallel processing'.format(n_core), flush=True) @@ -227,7 +240,7 @@ def wls_nsbas(i): ### Use global value of Gall, unw_tmp, mask if np.mod(i, 1000) == 0: print(' Running {0:6}/{1:6}th point...'.format(i, unw_tmp.shape[1]), flush=True) - + ## Weight unw and G Gall_w = Gall/np.sqrt(np.float64(var_tmp[:,i][:,np.newaxis])) @@ -256,7 +269,7 @@ def calc_vel(cum, dt_cum): """ n_pt, n_im = cum.shape result = np.zeros((2, n_pt), dtype=np.float32)*np.nan #[vconst, vel] - + G = np.stack((np.ones_like(dt_cum), dt_cum), axis=1) vconst = np.zeros((n_pt), dtype=np.float32)*np.nan vel = np.zeros((n_pt), dtype=np.float32)*np.nan @@ -266,7 +279,7 @@ def calc_vel(cum, dt_cum): if n_pt_full!=0: print(' Solving {0:6}/{1:6}th points with full cum at a time...'.format(n_pt_full, n_pt), flush=True) - + ## Sovle result[:, bool_pt_full] = np.linalg.lstsq(G, cum[bool_pt_full, :].transpose(), rcond=None)[0] @@ -275,9 +288,9 @@ def calc_vel(cum, dt_cum): mask = (~np.isnan(cum_tmp)) cum_tmp[np.isnan(cum_tmp)] = 0 print(' Next, solve {0} points including nan point-by-point...'.format(n_pt-n_pt_full), flush=True) - - result[:, ~bool_pt_full] = censored_lstsq_slow(G, cum_tmp, mask) #(n_im+1, n_pt) - + + result[:, ~bool_pt_full] = censored_lstsq_slow(G, cum_tmp, mask) #(n_im+1, n_pt) + vconst = result[0, :] vel = result[1, :] @@ -301,11 +314,11 @@ def calc_velsin(cum, dt_cum, imd0): dt : Time difference of sin function wrt Jan 1 (day) """ doy0 = (dt.datetime.strptime(imd0, '%Y%m%d')-dt.datetime.strptime(imd0[0:4]+'0101', '%Y%m%d')).days - + n_pt, n_im = cum.shape result = np.zeros((4, n_pt), dtype=np.float32)*np.nan #[vconst, vel, coef_s, coef_c] - - + + sin = np.sin(2*np.pi*dt_cum) cos = np.cos(2*np.pi*dt_cum) G = np.stack((np.ones_like(dt_cum), dt_cum, sin, cos), axis=1) @@ -320,7 +333,7 @@ def calc_velsin(cum, dt_cum, imd0): if n_pt_full!=0: print(' Solving {0:6}/{1:6}th points with full cum at a time...'.format(n_pt_full, n_pt), flush=True) - + ## Sovle result[:, bool_pt_full] = np.linalg.lstsq(G, cum[bool_pt_full, :].transpose(), rcond=None)[0] @@ -329,9 +342,9 @@ def calc_velsin(cum, dt_cum, imd0): mask = (~np.isnan(cum_tmp)) cum_tmp[np.isnan(cum_tmp)] = 0 print(' Next, solve {0} points including nan point-by-point...'.format(n_pt-n_pt_full), flush=True) - - result[:, ~bool_pt_full] = censored_lstsq_slow(G, cum_tmp, mask) #(n_im+1, n_pt) - + + result[:, ~bool_pt_full] = censored_lstsq_slow(G, cum_tmp, mask) #(n_im+1, n_pt) + vconst = result[0, :] vel = result[1, :] coef_s = result[2, :] @@ -362,21 +375,21 @@ def calc_velstd_withnan(cum, dt_cum): global bootcount, bootnum n_pt, n_im = cum.shape bootnum = 100 - bootcount = 0 + bootcount = 0 vstd = np.zeros((n_pt), dtype=np.float32) G = np.stack((np.ones_like(dt_cum), dt_cum), axis=1) - + data = cum.transpose().copy() ixs_day = np.arange(n_im) mask = (~np.isnan(data)) data[np.isnan(data)] = 0 - + velinv = lambda x : censored_lstsq2(G[x, :], data[x, :], mask[x, :])[1] with NumpyRNGContext(1): bootresult = bootstrap(ixs_day, bootnum, bootfunc=velinv) - + vstd = np.nanstd(bootresult, axis=0) print('') @@ -401,10 +414,10 @@ def censored_lstsq2(A, B, M): X = np.squeeze(np.linalg.solve(T, rhs)).T # transpose to get r x n except: ## In case Singular matrix X = np.zeros((B.shape[1]), dtype=np.float32)*np.nan - + return X - + #%% def calc_stc(cum): """ @@ -427,21 +440,21 @@ def calc_stc(cum): _stc = np.ones((length, width, 8), dtype=np.float32)*np.nan pixels = [[0, 0], [0, 1], [0, 2], [1, 0], [1, 2], [2, 0], [2, 1], [2, 2]] ## Left Top = [0, 0], Rigth Bottmon = [2, 2], Center = [1, 1] - + for i, pixel in enumerate(pixels): ### Spatial difference (surrounding pixel-center) d_cum = cum1[:, pixel[0]:length+pixel[0], pixel[1]:width+pixel[1]] - cum1[:, 1:length+1, 1:width+1] - + ### Temporal difference (double difference) dd_cum = d_cum[:-1,:,:]-d_cum[1:,:,:] - + ### STC (i.e., RMS of DD) sumsq_dd_cum = np.nansum(dd_cum**2, axis=0) n_dd_cum = np.float32(np.sum(~np.isnan(dd_cum), axis=0)) #nof non-nan n_dd_cum[n_dd_cum==0] = np.nan #to avoid 0 division _stc[:, :, i] = np.sqrt(sumsq_dd_cum/n_dd_cum) - ### Strange but some adjacent pixels can have identical time series, + ### Strange but some adjacent pixels can have identical time series, ### resulting in 0 of stc. To avoid this, replace 0 with nan. _stc[_stc==0] = np.nan @@ -516,6 +529,6 @@ def censored_lstsq_slow(A, B, M): X[:,i] = np.linalg.lstsq(A[m], B[m,i], rcond=None)[0] except: X[:,i] = np.nan - + print('') return X diff --git a/batch_LiCSBAS.sh b/batch_LiCSBAS.sh index c441aa8..34235cd 100755 --- a/batch_LiCSBAS.sh +++ b/batch_LiCSBAS.sh @@ -41,6 +41,7 @@ p05_clip_range_geo="" # e.g. 130.11/131.12/34.34/34.6 (in deg) p11_unw_thre="" # default: 0.3 p11_coh_thre="" # default: 0.05 p12_loop_thre="" # default: 1.5 rad +p13_gpu="n" # y/n. default: n p15_coh_thre="" # default: 0.05 p15_n_unw_r_thre="" # default: 1.5 p15_vstd_thre="" # default: 100 mm/yr @@ -285,6 +286,7 @@ if [ $start_step -le 13 -a $end_step -ge 13 ];then if [ ! -z $p13_n_para ];then p13_op="$p13_op --n_para $p13_n_para"; fi if [ ! -z $p13_n_unw_r_thre ];then p13_op="$p13_op --n_unw_r_thre $p13_n_unw_r_thre"; fi if [ $p13_keep_incfile == "y" ];then p13_op="$p13_op --keep_incfile"; fi + if [ $p13_gpu == "y" ];then p13_op="$p13_op --gpu"; fi if [ $check_only == "y" ];then echo "LiCSBAS13_sb_inv.py $p13_op" diff --git a/bin/LiCSBAS13_sb_inv.py b/bin/LiCSBAS13_sb_inv.py index 4221731..576d35d 100755 --- a/bin/LiCSBAS13_sb_inv.py +++ b/bin/LiCSBAS13_sb_inv.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -v1.4.8 20210127 Yu Morishita, GSI +v1.5.0 20210305 Yu Morishita, GSI This script inverts the SB network of unw to obtain the time series and velocity using NSBAS (López-Quiroz et al., 2009; Doin et al., 2011) approach. @@ -47,7 +47,7 @@ ===== Usage ===== -LiCSBAS13_sb_inv.py -d ifgdir [-t tsadir] [--inv_alg LS|WLS] [--mem_size float] [--gamma float] [--n_para int] [--n_unw_r_thre float] [--keep_incfile] +LiCSBAS13_sb_inv.py -d ifgdir [-t tsadir] [--inv_alg LS|WLS] [--mem_size float] [--gamma float] [--n_para int] [--n_unw_r_thre float] [--keep_incfile] [--gpu] -d Path to the GEOCml* dir containing stack of unw data -t Path to the output TS_GEOCml* dir. @@ -65,10 +65,14 @@ (Default: 1 and 0.5 for C- and L-band, respectively) --keep_incfile Not remove inc and resid files (Default: remove them) + --gpu Use GPU (Need cupy module) """ #%% Change log ''' +v1.5 20210305 Yu Morishita, GSI + - Add GPU option + - Speed up by activating n_para_inv and OMP_NUM_THREADS=1 v1.4.8 20210127 Yu Morishita, GSI - Automatically reduce mem_size if available RAM is small v1.4.7 20201124 Yu Morishita, GSI @@ -136,7 +140,7 @@ def main(argv=None): argv = sys.argv start = time.time() - ver="1.4.8"; date=20210127; author="Y. Morishita" + ver="1.5"; date=20210305; author="Y. Morishita" print("\n{} ver{} {} {}".format(os.path.basename(argv[0]), ver, date, author), flush=True) print("{} {}".format(os.path.basename(argv[0]), ' '.join(argv[1:])), flush=True) @@ -150,12 +154,16 @@ def main(argv=None): ifgdir = [] tsadir = [] inv_alg = 'LS' + gpu = False try: n_para = len(os.sched_getaffinity(0)) except: n_para = multi.cpu_count() - n_para_inv = 1 + + os.environ["OMP_NUM_THREADS"] = "1" + # Because np.linalg.lstsq use full CPU but not much faster than 1CPU. + # Instead parallelize by multiprocessing memory_size = 4000 gamma = 0.0001 @@ -173,7 +181,10 @@ def main(argv=None): #%% Read options try: try: - opts, args = getopt.getopt(argv[1:], "hd:t:", ["help", "mem_size=", "gamma=", "n_unw_r_thre=", "keep_incfile", "inv_alg=", "n_para="]) + opts, args = getopt.getopt(argv[1:], "hd:t:", + ["help", "mem_size=", "gamma=", + "n_unw_r_thre=", "keep_incfile", + "inv_alg=", "n_para=", "gpu"]) except getopt.error as msg: raise Usage(msg) for o, a in opts: @@ -196,6 +207,8 @@ def main(argv=None): inv_alg = a elif o == '--n_para': n_para = int(a) + elif o == '--gpu': + gpu = True if not ifgdir: raise Usage('No data directory given, -d is not optional!') @@ -203,6 +216,9 @@ def main(argv=None): raise Usage('No {} dir exists!'.format(ifgdir)) elif not os.path.exists(os.path.join(ifgdir, 'slc.mli.par')): raise Usage('No slc.mli.par file exists in {}!'.format(ifgdir)) + if gpu: + print("\nGPU option is activated. Need cupy module.\n") + import cupy as cp except Usage as err: print("\nERROR:", file=sys.stderr, end='') @@ -241,6 +257,12 @@ def main(argv=None): cumh5file = os.path.join(tsadir,'cum.h5') + if n_para > 32: + # Emprically >32 does not make much faster despite using large resource + n_para_inv = 32 + else: + n_para_inv = n_para + #%% Check files try: @@ -531,27 +553,74 @@ def main(argv=None): gap_patch = np.zeros((n_im-1, n_pt_all), dtype=np.int8) ns_ifg_noloop_patch = np.zeros((n_pt_all), dtype=np.float32)*np.nan maxTlen_patch = np.zeros((n_pt_all), dtype=np.float32)*np.nan - - ### Determine n_para - n_pt_patch_min = 1000 - if n_pt_patch_min*n_para > n_pt_unnan: - ## Too much n_para - n_para_gap = int(np.floor(n_pt_unnan/n_pt_patch_min)) - if n_para_gap == 0: n_para_gap = 1 - else: - n_para_gap = n_para - print('\n Identifing gaps, and counting n_gap and n_ifg_noloop,') - print(' with {} parallel processing...'.format(n_para_gap), flush=True) - ### Devide unwpatch by n_para for parallel processing - p = q.Pool(n_para_gap) - _result = np.array(p.map(count_gaps_wrapper, range(n_para_gap)), dtype=object) - p.close() + if gpu: + print(' using GPU...', flush=True) + n_loop, _ = Aloop.shape + unwpatch_cp = cp.asarray(unwpatch) + G_cp = cp.asarray(G) + Aloop_cp = cp.asarray(Aloop) + + ns_unw_unnan4inc = cp.array( + [(G_cp[:, i]*(~cp.isnan(unwpatch_cp))).sum( + axis=1, dtype=cp.int16) for i in range(n_im-1)]) + # n_ifg*(n_pt,n_ifg) -> (n_im-1,n_pt) + ns_gap_patch[ix_unnan_pt] = cp.asnumpy( + (ns_unw_unnan4inc==0).sum(axis=0)) #n_pt + gap_patch[:, ix_unnan_pt] = cp.asnumpy(ns_unw_unnan4inc==0) + + del ns_unw_unnan4inc + del G_cp + + ### n_ifg_noloop + # n_ifg*(n_pt,n_ifg)->(n_loop,n_pt) + # Number of ifgs for each loop at eath point. + # 3 means complete loop, 1 or 2 means broken loop. + ns_ifg4loop = cp.array([ + (cp.abs(Aloop_cp[i, :])*(~cp.isnan(unwpatch_cp))).sum(axis=1) + for i in range(n_loop)]) + bool_loop = (ns_ifg4loop==3) + #(n_loop,n_pt) identify complete loop only + + # n_loop*(n_loop,n_pt)*n_pt->(n_ifg,n_pt) + # Number of loops for each ifg at eath point. + ns_loop4ifg = cp.array([( + (cp.abs(Aloop_cp[:, i])*bool_loop.T).T* + (~cp.isnan(unwpatch_cp[:, i])) + ).sum(axis=0) for i in range(n_ifg)]) # + + ns_ifg_noloop_tmp = (ns_loop4ifg==0).sum(axis=0) #n_pt + ns_nan_ifg = cp.isnan(unwpatch_cp).sum(axis=1) #n_pt, nan ifg count + ns_ifg_noloop_patch[ix_unnan_pt] = cp.asnumpy( + ns_ifg_noloop_tmp - ns_nan_ifg) + + del bool_loop, ns_ifg4loop, ns_loop4ifg + del ns_ifg_noloop_tmp, ns_nan_ifg + del unwpatch_cp, Aloop_cp - ns_gap_patch[ix_unnan_pt] = np.hstack(_result[:, 0]) #n_pt - gap_patch[:, ix_unnan_pt] = np.hstack(_result[:, 1]) #n_im-1, n_pt - ns_ifg_noloop_patch[ix_unnan_pt] = np.hstack(_result[:, 2]) + else: + ### Determine n_para + n_pt_patch_min = 1000 + if n_pt_patch_min*n_para > n_pt_unnan: + ## Too much n_para + n_para_gap = int(np.floor(n_pt_unnan/n_pt_patch_min)) + if n_para_gap == 0: n_para_gap = 1 + else: + n_para_gap = n_para + + print(' with {} parallel processing...'.format(n_para_gap), + flush=True) + + ### Devide unwpatch by n_para for parallel processing + p = q.Pool(n_para_gap) + _result = np.array(p.map(count_gaps_wrapper, range(n_para_gap)), + dtype=object) + p.close() + + ns_gap_patch[ix_unnan_pt] = np.hstack(_result[:, 0]) #n_pt + gap_patch[:, ix_unnan_pt] = np.hstack(_result[:, 1]) #n_im-1, n_pt + ns_ifg_noloop_patch[ix_unnan_pt] = np.hstack(_result[:, 2]) ### maxTlen _maxTlen = np.zeros((n_pt_unnan), dtype=np.float32) #temporaly @@ -566,9 +635,11 @@ def main(argv=None): #%% Time series inversion print('\n Small Baseline inversion by {}...\n'.format(inv_alg), flush=True) if inv_alg == 'WLS': - inc_tmp, vel_tmp, vconst_tmp = inv_lib.invert_nsbas_wls(unwpatch, varpatch, G, dt_cum, gamma, n_para_inv) + inc_tmp, vel_tmp, vconst_tmp = inv_lib.invert_nsbas_wls( + unwpatch, varpatch, G, dt_cum, gamma, n_para_inv) else: - inc_tmp, vel_tmp, vconst_tmp = inv_lib.invert_nsbas(unwpatch, G, dt_cum, gamma, n_para_inv) + inc_tmp, vel_tmp, vconst_tmp = inv_lib.invert_nsbas( + unwpatch, G, dt_cum, gamma, n_para_inv, gpu) ### Set to valuables inc_patch = np.zeros((n_im-1, n_pt_all), dtype=np.float32)*np.nan