From db82aa8450c4fa5f7b830dee425420adec73961e Mon Sep 17 00:00:00 2001 From: Carlos Paniagua Date: Mon, 4 Mar 2024 14:02:43 -0500 Subject: [PATCH] chore: delete data prehandling/ Closes #154 --- data_prehandling/A01_download_tracks.py | 51 -------- .../S01_derive_tracklist_from_file.py | 105 ----------------- .../S02_download_gloabl_ocean_CMEMS_L3.py | 59 ---------- data_prehandling/S03_create_ALT07_batch.py | 109 ------------------ data_prehandling/S03_create_ALT10_batch.py | 89 -------------- .../S03_download_WW3_hindcast_GLOBAL-30M.py | 91 --------------- 6 files changed, 504 deletions(-) delete mode 100644 data_prehandling/A01_download_tracks.py delete mode 100644 data_prehandling/S01_derive_tracklist_from_file.py delete mode 100644 data_prehandling/S02_download_gloabl_ocean_CMEMS_L3.py delete mode 100644 data_prehandling/S03_create_ALT07_batch.py delete mode 100644 data_prehandling/S03_create_ALT10_batch.py delete mode 100644 data_prehandling/S03_download_WW3_hindcast_GLOBAL-30M.py diff --git a/data_prehandling/A01_download_tracks.py b/data_prehandling/A01_download_tracks.py deleted file mode 100644 index dca2003f..00000000 --- a/data_prehandling/A01_download_tracks.py +++ /dev/null @@ -1,51 +0,0 @@ -import os, sys -#execfile(os.environ['PYTHONSTARTUP']) - -""" -This file open a ICEsat2 track applied filters and corections and returns smoothed photon heights on a regular grid in an .nc file. -This is python 3 -""" - -exec(open(os.environ['PYTHONSTARTUP']).read()) -exec(open(STARTUP_2021_IceSAT2).read()) - -#%matplotlib inline -#from pprint import pprint - -import icepyx as ipx - -# %% -downlaod_path = mconfig['paths']['scratch'] +'/SH_batch01/' - -MT.mkdirs_r(downlaod_path) - -# %% -date_range =['2019-06-01','2019-06-05'] -region_a = ipx.Query('ATL03',[30, -70, -30, -55],date_range, \ - start_time='09:00:00', end_time='11:59:59') - -region_a.earthdata_login('mhell','mhell@ucsd.edu') -# @[49[4tK\-qBWB%5 - -# %% -#region_a.visualize_spatial_extent() -#region_a.order_vars.remove(all=True) - -ATL03_var_list = ['dem_h', 'delta_time', 'lon_ph', 'lat_ph', 'h_ph', 'dist_ph_along' ,'dist_ph_across', 'atlas_sdp_gps_epoch', 'signal_conf_ph', 'reference_photon_index', 'segment_dist_x', 'ph_index_beg', 'ph_id_count', 'segment_id'] -#ATL03_var_list = ['dem_h', 'delta_time', 'lon_ph', 'lat_ph', 'h_ph', 'dist_ph_along', 'atlas_sdp_gps_epoch', 'signal_conf_ph'] - -region_a.order_vars.append(var_list=ATL03_var_list)#, keyword_list=['orbit_info']) -region_a.order_vars.append( keyword_list=['orbit_info']) -region_a.order_vars.wanted - -region_a.subsetparams(Coverage=region_a.order_vars.wanted) -#region_a.tracks -#region_a.file_vars - -print('check how many granuals are available') -download_stars=region_a.avail_granules() -print( download_stars ) -# %% - -print('download '+ str(download_stars['Number of available granules']) + ' granules') -region_a.download_granules(downlaod_path) diff --git a/data_prehandling/S01_derive_tracklist_from_file.py b/data_prehandling/S01_derive_tracklist_from_file.py deleted file mode 100644 index 61eddade..00000000 --- a/data_prehandling/S01_derive_tracklist_from_file.py +++ /dev/null @@ -1,105 +0,0 @@ -import os, sys -#execfile(os.environ['PYTHONSTARTUP']) - -""" -This script takes in a list of -""" - -exec(open(os.environ['PYTHONSTARTUP']).read()) -exec(open(STARTUP_2021_IceSAT2).read()) - -base_path='/Users/Shared/Projects/2021_IceSAT2_tracks/' -sys.path.append(base_path +'modules/') -sys.path.append(base_path +'modules/ICEsat2_SI_tools/') - -#import m_general as M -#import m_tools as MT -import numpy as np -import os -import pandas as pd - -import imp -import icepyx as ipx - -# %% -path = mconfig['paths']['analysis']+'../track_lists/' - -# batch = 'Batch02_alex' -with open(path+ 'alex_ATL07_filelist.txt', 'r') as f: - contents = f.readlines() - -batch = 'batch03' -with open(path+ 'batch03_ATL07_filelist.txt', 'r') as f: - contents = f.readlines() - -h5_files= list() -for l in contents: - if '.h5' in l: - h5_files.append(l) - -file_instances = list() -for h in h5_files: - #h.split('.')[0].split('_') - file_instances.append( h.split('.')[0].split('_')[1:4] ) - - -MT.json_save(batch+'_tracks_components', path, file_instances) - -#file_instances -## make dataframe and derive ID that is need to compare the data: -D = pd.DataFrame(file_instances) - -def str2dt64(s): - return np.datetime64(s[0:4]+'-'+s[4:6]+'-'+s[6:8]) - -D['date'] = D[0].apply(lambda row: str2dt64(row[0:8]) ) - -dmin, dmax = D['date'].min(), D['date'].max() # needed for icspyx modules - -D['RGT'] = D[1].apply(lambda row: row[0:4]) -D['cycle'] = D[1].apply(lambda row: row[4:6]) -D['segment'] = D[1].apply(lambda row: row[6:8]) -#D['segment'].hist() - -D['id'] = D[0]+'_'+D[1] -#D['id_compare'] = D[0]+'_'+ -D['id_compare'] = D['RGT']+D['cycle'] - -# len(D['id_compare']) -# len(set(D['id_compare'])) - -# %% -dx= 100 -all_wanted_tracks = list() -for x in np.arange(0, int(len(D)), dx): - Dsub = D[x:x+dx] - - print('set ', x) - # % login to earth data .. - - date_range =[str(dmin).split(' ')[0],str(dmax).split(' ')[0]] - region_a = ipx.Query('ATL03',[180, -70, -180, -55],date_range, \ - start_time='00:00:00', end_time='23:59:59', \ - tracks = list(Dsub['RGT'])) - - region_a.earthdata_login('mhell','mhell@ucsd.edu') - # pw - # @[49[4tK\-qBWB%5 - - # % request available granuals in region and time frame - region_a.avail_granules() - region_a.avail_granules(ids=True) - - # % compare availabe ID's with the wanted ID's - gran_list = [i['producer_granule_id'] for i in region_a.granules.avail] - sub_set= list() - for id_wanted in Dsub['id_compare']: - sub_set.append([i for i in gran_list if id_wanted in i]) - - all_possible_tracks = [item for sublist in sub_set for item in sublist] - print( len(all_possible_tracks), ' matching granules found') - - [all_wanted_tracks.append(i) for i in all_possible_tracks] - -# %% save clean file list -MT.json_save(batch+'_ATL03_A00', path, all_wanted_tracks) diff --git a/data_prehandling/S02_download_gloabl_ocean_CMEMS_L3.py b/data_prehandling/S02_download_gloabl_ocean_CMEMS_L3.py deleted file mode 100644 index d7754081..00000000 --- a/data_prehandling/S02_download_gloabl_ocean_CMEMS_L3.py +++ /dev/null @@ -1,59 +0,0 @@ -import os, sys -#execfile(os.environ['PYTHONSTARTUP']) - -""" -This file download all L3 data from -https://resources.marine.copernicus.eu/product-detail/WAVE_GLO_WAV_L3_SPC_NRT_OBSERVATIONS_014_002/INFORMATION -""" - -exec(open(os.environ['PYTHONSTARTUP']).read()) -exec(open(STARTUP_2021_IceSAT2).read()) - -#%matplotlib inline - -import imp -import subprocess - -save_path = mconfig['paths']['work'] + '/CMEMS_WAVE_GLO_L3/' - -# print(time_range) -# # create timestamp according to fiels on ftp server: -# time_stamps_ftp = np.arange(time_range[0].astype('datetime64[3h]'), time_range[1].astype('datetime64[3h]') + np.timedelta64(3, 'h'), np.timedelta64(3, 'h')) -# time_stamps_ftp_str = [str(t).replace('-', '') for t in time_stamps_ftp] -# #plt.plot(G1['lons'], G1['lats'], '.' ) - - -username='mhell' -pw='BkUexT#72' - -# NRT product -# paths look like this: -#ftp://nrt.cmems-du.eu/Core/WAVE_GLO_WAV_L3_SPC_NRT_OBSERVATIONS_014_002/dataset-wav-sar-l3-spc-nrt-global-s1a/2020/03/dataset-wav-sar-l3-spc-nrt-global-s1a_20200302T000000Z_20200302T030000Z_P20200323T0646Z-3H-rep.nc - -subpath = 'WAVE_GLO_WAV_L3_SPC_NRT_OBSERVATIONS_014_002' -path='ftp://nrt.cmems-du.eu/Core/'+subpath+ '/' #dataset-wav-sar-l3-spc-rep-global-'+sat+'/'+y+'/'+m+'/ -file_card = 'dataset-wav-sar-l3-spc-nrt-global-*-3H-rep.nc' - -# REP product -# paths look like this: -#ftp://my.cmems-du.eu/Core/WAVE_GLO_PHY_SPC_L3_MY_014_006/dataset-wav-sar-l3-spc-rep-global-s1a/2019/07/dataset-wav-sar-l3-spc-rep-global-s1a_20190702T120000Z_20190702T150000Z_P20210619T1207Z-3H-rep.nc - -# subpath = 'WAVE_GLO_WAV_L3_SPC_REP_OBSERVATIONS_014_002' -# path='ftp://my.cmems-du.eu/Core/'+subpath+ '/' -# file_card = 'dataset-wav-sar-l3-spc-rep-global-*-3H-rep.nc' - -wget_str= ['wget', '-r', path ,'--ftp-user='+username ,'--ftp-password='+pw ,'--no-parent', '-A' , file_card ,'-nd', '-c'] -wget_str.append('-P') -wget_str.append(save_path +'/'+ subpath) - - -print(wget_str) -print('save to ' + save_path +'/'+ subpath) - -list_files = subprocess.run(' '.join(wget_str), shell=True,stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - -#list_files.stdout -#print(list_files.stderr) -flist_parameters= list() -#list_files.check_returncode() -print('download sugcess:', list_files.returncode == 0) diff --git a/data_prehandling/S03_create_ALT07_batch.py b/data_prehandling/S03_create_ALT07_batch.py deleted file mode 100644 index fbf4b2fe..00000000 --- a/data_prehandling/S03_create_ALT07_batch.py +++ /dev/null @@ -1,109 +0,0 @@ -import os, sys -#execfile(os.environ['PYTHONSTARTUP']) - -""" -This is a test file, playing with the Earth data login and icepyx. - -""" - -exec(open(os.environ['PYTHONSTARTUP']).read()) -exec(open(STARTUP_2021_IceSAT2).read()) - -#%matplotlib inline -#from pprint import pprint - -import icepyx as ipx -import m_tools_ph3 as MT - - - -# %% -#downlaod_path = mconfig['paths']['scratch'] +'/SH_batch02/' -path = mconfig['paths']['analysis']+'../track_lists/' -#MT.mkdirs_r(downlaod_path) - - -# batch = 'Batch02_alex' -# with open(path+ 'ALT10_tracks_complete.txt', 'r') as f: -# h5_files = f.readlines() - -with open(path+ 'batch05_NA_ATL10.txt', 'r') as f: - h5_files = f.readlines() - -print('total number of tracks:', len(h5_files)) - -all_file_names = list() -for h in h5_files: - all_file_names.append( h.split('/')[-1].split('.')[0] ) -len(all_file_names) -#MT.json_save(batch+'_tracks_components', path, file_instances) - -all_file_names_split =list() -for h in all_file_names: - all_file_names_split.append( h.split('_') ) - - -#flist = MT.json_load('Batch02_alex_tracks', path) - -D = pd.DataFrame(all_file_names_split, index =all_file_names , columns =['ALT', 'datestr', 'ttttccss', 'version', 'revision'] ) - -s = D.iloc[0]['datestr'] -s -def str2dt64(s): - return np.datetime64(s[0:4]+'-'+s[4:6]+'-'+s[6:8]+'T'+s[8:10]+':'+s[10:12]+':'+s[12:14]) - -D['date'] = D['datestr'].apply(lambda row: str2dt64(row) ) - -dmin, dmax = D['date'].min(), D['date'].max() -dmin, dmax - -D['RGT'] = D['ttttccss'].apply(lambda row: row[0:4]) -D['cycle'] = D['ttttccss'].apply(lambda row: row[4:6]) -D['segment'] = D['ttttccss'].apply(lambda row: int(row[6:8])) -D['hemis'] = D['ALT'].apply(lambda row: 'NH' if row[6:]=='01' else 'SH') - -# make ALT07 tracks - -D['ALT'] = [i[0:3]+'07'+i[5:] for i in D['ALT']] -D['revision'] = '01' -# redefine index: - -D.index = D.T.apply(lambda row: '_'.join( row[['ALT', 'datestr', 'ttttccss', 'version', 'revision' ]] )) - - - -#D['segment'].hist() - -# D['id'] = D[0]+'_'+D[1] -# #D['id_compare'] = D[0]+'_'+ -# D['id_compare'] = D['RGT']+D['cycle'] - -D['date'].min() -D['date'].max() - -# %% select wanted date range -# batch = 'batch04' -# dmin, dmax = np.datetime64('2019-01-01'), np.datetime64('2019-01-30') -# hemis = 'SH' - -# batch = 'batch04_test' -# dmin, dmax = np.datetime64('2019-01-01'), np.datetime64('2019-01-03') -# hemis = 'SH' -# - -batch = 'batch05_test' -dmin, dmax = np.datetime64('2019-03-01'), np.datetime64('2019-03-01T12:00:00') -hemis = 'NH' - - -Dsel = D[ (D['date'] >= dmin) & (D['date'] <= dmax) & (D['hemis'] == hemis)] -len(Dsel) - -# Dsel = D[ (D['date'] > dmin) & (D['date'] < dmax) & (D['hemis'] == 'NH')] -# len(Dsel) -Dsel = D -batch='batch05' - - -MT.json_save(batch+'_ATL07_A00', path, list(Dsel.index)) -MT.save_pandas_table(Dsel, batch+'_ATL07_A00', path) diff --git a/data_prehandling/S03_create_ALT10_batch.py b/data_prehandling/S03_create_ALT10_batch.py deleted file mode 100644 index 02e00536..00000000 --- a/data_prehandling/S03_create_ALT10_batch.py +++ /dev/null @@ -1,89 +0,0 @@ -import os, sys -#execfile(os.environ['PYTHONSTARTUP']) - -""" -This is a test file, playing with the Earth data login and icepyx. - -""" - -exec(open(os.environ['PYTHONSTARTUP']).read()) -exec(open(STARTUP_2021_IceSAT2).read()) - -#%matplotlib inline -#from pprint import pprint - -import icepyx as ipx -import m_tools_ph3 as MT - - - -# %% -#downlaod_path = mconfig['paths']['scratch'] +'/SH_batch02/' -path = mconfig['paths']['analysis']+'../track_lists/' -#MT.mkdirs_r(downlaod_path) - - -# batch = 'Batch02_alex' -with open(path+ 'ALT10_tracks_complete.txt', 'r') as f: - h5_files = f.readlines() - -print('total number of tracks:', len(h5_files)) - -all_file_names = list() -for h in h5_files: - all_file_names.append( h.split('/')[-1].split('.')[0] ) -len(all_file_names) -#MT.json_save(batch+'_tracks_components', path, file_instances) - -all_file_names_split =list() -for h in all_file_names: - all_file_names_split.append( h.split('_') ) - - -#flist = MT.json_load('Batch02_alex_tracks', path) - -D = pd.DataFrame(all_file_names_split, index =all_file_names , columns =['ALT', 'datestr', 'ttttccss', 'version', 'revision'] ) - -s = D.iloc[0]['datestr'] -s -def str2dt64(s): - return np.datetime64(s[0:4]+'-'+s[4:6]+'-'+s[6:8]+'T'+s[8:10]+':'+s[10:12]+':'+s[12:14]) - -D['date'] = D['datestr'].apply(lambda row: str2dt64(row) ) - -dmin, dmax = D['date'].min(), D['date'].max() -dmin, dmax - -D['RGT'] = D['ttttccss'].apply(lambda row: row[0:4]) -D['cycle'] = D['ttttccss'].apply(lambda row: row[4:6]) -D['segment'] = D['ttttccss'].apply(lambda row: int(row[6:8])) -D['hemis'] = D['ALT'].apply(lambda row: 'NH' if row[6:]=='01' else 'SH') - -#D['segment'].hist() - -# D['id'] = D[0]+'_'+D[1] -# #D['id_compare'] = D[0]+'_'+ -# D['id_compare'] = D['RGT']+D['cycle'] - -D['date'].min() -D['date'].max() - -# %% select wanted date range -# batch = 'batch04' -# dmin, dmax = np.datetime64('2019-01-01'), np.datetime64('2019-01-30') -# hemis = 'SH' - -batch = 'batch04_test' -dmin, dmax = np.datetime64('2019-01-01'), np.datetime64('2019-01-03') -hemis = 'SH' - -Dsel = D[ (D['date'] > dmin) & (D['date'] < dmax) & (D['hemis'] == hemis)] -len(Dsel) - -# Dsel = D[ (D['date'] > dmin) & (D['date'] < dmax) & (D['hemis'] == 'NH')] -# len(Dsel) - - - -MT.json_save(batch+'_ATL10_A00', path, list(Dsel.index)) -MT.save_pandas_table(Dsel, batch+'_ATL10_A00', path) diff --git a/data_prehandling/S03_download_WW3_hindcast_GLOBAL-30M.py b/data_prehandling/S03_download_WW3_hindcast_GLOBAL-30M.py deleted file mode 100644 index e803eb38..00000000 --- a/data_prehandling/S03_download_WW3_hindcast_GLOBAL-30M.py +++ /dev/null @@ -1,91 +0,0 @@ -import os, sys -#execfile(os.environ['PYTHONSTARTUP']) - -""" -This file download all L3 data from -#ftp://ftp.ifremer.fr/ifremer/ww3/HINDCAST/GLOBMULTI_ERA5_GLOBCUR_01/GLOB-30M/2019/FIELD_NC/ -""" - -exec(open(os.environ['PYTHONSTARTUP']).read()) -exec(open(STARTUP_2021_IceSAT2).read()) - -#%matplotlib inline - -import imp -import subprocess -import glob -import time - -save_path = mconfig['paths']['scratch']# + 'GLOBMULTI_ERA5_GLOBCUR_01/' -save_path2 = mconfig['paths']['work'] -#ftp://ftp.ifremer.fr/ifremer/ww3/HINDCAST/GLOBMULTI_ERA5_GLOBCUR_01/GLOB-30M/2019/FIELD_NC/LOPS_WW3-GLOB-30M_201905.nc - -lat_lim= 50 # deg north or South -var_list = [ 'dir', 'dp','fp', 'hs', 'ice', 'lm', 'spr', 't01', 't02', 't0m1', 'tws', -'pdp0', 'pdp1', 'pdp2', 'pdp3', 'pdp4', 'pdp5', -'pspr0', 'pspr1', 'pspr2', 'pspr3', 'pspr4', 'pspr5', -'ptp0', 'ptp1', 'ptp2', 'ptp3', 'ptp4', 'ptp5', -'phs0', 'phs1', 'phs2', 'phs3', 'phs4', 'phs5'] - - -flist_parameters= list() -subpath = 'GLOBMULTI_ERA5_GLOBCUR_01/' -try: - os.mkdir(save_path2 + '/'+ subpath) -except: - pass - -#year = 2018 -for year in np.arange(2018, 2022): - - year_str= str(year) - print('-----' + year_str) - path='ftp://ftp.ifremer.fr/ifremer/ww3/HINDCAST/'+subpath+'/GLOB-30M/'+ year_str +'/FIELD_NC/' #dataset-wav-sar-l3-spc-rep-global-'+sat+'/'+y+'/'+m+'/ - file_card = 'LOPS_WW3-GLOB-30M_'+year_str+'*.nc' - - wget_str= ['wget', '-r', path ,'--no-parent', '-A' , file_card ,'-nd', '-c'] - wget_str.append('-P') - wget_str.append(save_path +'/'+ subpath) - - print(' '.join(wget_str)) - print('save to ' + save_path +'/'+ subpath) - - # list_files = subprocess.run(' '.join(wget_str), shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - # - # print('download sugcess:', list_files.returncode == 0) - # if list_files.returncode == 0: - # flist_parameters.append(list_files) - - year_file_list = glob.glob(save_path + subpath+'*' + file_card) - year_file_list2 = list() - for f in year_file_list: - if '_p2l.nc' in f: - #os.remove(f) - print(f) - else: - year_file_list2.append(f) - print('open:') - year_file_list2.sort() - print(year_file_list2) - G_all = xr.open_mfdataset(year_file_list2) - - # NH - G2 = G_all[var_list].isel(latitude =G_all.latitude > lat_lim ) - mm, datasets = zip(*G2.groupby("time.month")) - paths = [save_path2 + '/'+ subpath +'/LOPS_WW3-GLOB-30M_'+year_str+'_'+str(m).zfill(2)+'_NH_select.nc' for m in mm] - xr.save_mfdataset(datasets, paths) - #G2.to_netcdf(save_path2 + '/'+ subpath +'/' + save_name ) - - # SH - G2 = G_all[var_list].isel(latitude =G_all.latitude < -lat_lim ) - mm, datasets = zip(*G2.groupby("time.month")) - paths = [save_path2 + '/'+ subpath +'/LOPS_WW3-GLOB-30M_'+year_str+'_'+str(m).zfill(2)+'_SH_select.nc' for m in mm] - xr.save_mfdataset(datasets, paths) - #G2.to_netcdf(save_path2 + '/'+ subpath +'/' + save_name ) - - print('merged and save needed variables in work directory') - #time.sleep(5) - #os.remove(year_file_list2) - - -print( flist_parameters )