From db82aa8450c4fa5f7b830dee425420adec73961e Mon Sep 17 00:00:00 2001
From: Carlos Paniagua <cpaniaguam@gmail.com>
Date: Mon, 4 Mar 2024 14:02:43 -0500
Subject: [PATCH] chore: delete data prehandling/ Closes #154

---
 data_prehandling/A01_download_tracks.py       |  51 --------
 .../S01_derive_tracklist_from_file.py         | 105 -----------------
 .../S02_download_gloabl_ocean_CMEMS_L3.py     |  59 ----------
 data_prehandling/S03_create_ALT07_batch.py    | 109 ------------------
 data_prehandling/S03_create_ALT10_batch.py    |  89 --------------
 .../S03_download_WW3_hindcast_GLOBAL-30M.py   |  91 ---------------
 6 files changed, 504 deletions(-)
 delete mode 100644 data_prehandling/A01_download_tracks.py
 delete mode 100644 data_prehandling/S01_derive_tracklist_from_file.py
 delete mode 100644 data_prehandling/S02_download_gloabl_ocean_CMEMS_L3.py
 delete mode 100644 data_prehandling/S03_create_ALT07_batch.py
 delete mode 100644 data_prehandling/S03_create_ALT10_batch.py
 delete mode 100644 data_prehandling/S03_download_WW3_hindcast_GLOBAL-30M.py

diff --git a/data_prehandling/A01_download_tracks.py b/data_prehandling/A01_download_tracks.py
deleted file mode 100644
index dca2003f..00000000
--- a/data_prehandling/A01_download_tracks.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import os, sys
-#execfile(os.environ['PYTHONSTARTUP'])
-
-"""
-This file open a ICEsat2 track applied filters and corections and returns smoothed photon heights on a regular grid in an .nc file.
-This is python 3
-"""
-
-exec(open(os.environ['PYTHONSTARTUP']).read())
-exec(open(STARTUP_2021_IceSAT2).read())
-
-#%matplotlib inline
-#from pprint import pprint
-
-import icepyx as ipx
-
-# %%
-downlaod_path = mconfig['paths']['scratch'] +'/SH_batch01/'
-
-MT.mkdirs_r(downlaod_path)
-
-# %%
-date_range =['2019-06-01','2019-06-05']
-region_a = ipx.Query('ATL03',[30, -70, -30, -55],date_range, \
-                           start_time='09:00:00', end_time='11:59:59')
-
-region_a.earthdata_login('mhell','mhell@ucsd.edu')
-# @[49[4tK\-qBWB%5
-
-# %%
-#region_a.visualize_spatial_extent()
-#region_a.order_vars.remove(all=True)
-
-ATL03_var_list = ['dem_h', 'delta_time', 'lon_ph', 'lat_ph', 'h_ph', 'dist_ph_along' ,'dist_ph_across', 'atlas_sdp_gps_epoch', 'signal_conf_ph', 'reference_photon_index', 'segment_dist_x', 'ph_index_beg', 'ph_id_count', 'segment_id']
-#ATL03_var_list = ['dem_h', 'delta_time', 'lon_ph', 'lat_ph', 'h_ph', 'dist_ph_along', 'atlas_sdp_gps_epoch', 'signal_conf_ph']
-
-region_a.order_vars.append(var_list=ATL03_var_list)#, keyword_list=['orbit_info'])
-region_a.order_vars.append( keyword_list=['orbit_info'])
-region_a.order_vars.wanted
-
-region_a.subsetparams(Coverage=region_a.order_vars.wanted)
-#region_a.tracks
-#region_a.file_vars
-
-print('check how many granuals are available')
-download_stars=region_a.avail_granules()
-print( download_stars )
-# %%
-
-print('download '+ str(download_stars['Number of available granules']) + ' granules')
-region_a.download_granules(downlaod_path)
diff --git a/data_prehandling/S01_derive_tracklist_from_file.py b/data_prehandling/S01_derive_tracklist_from_file.py
deleted file mode 100644
index 61eddade..00000000
--- a/data_prehandling/S01_derive_tracklist_from_file.py
+++ /dev/null
@@ -1,105 +0,0 @@
-import os, sys
-#execfile(os.environ['PYTHONSTARTUP'])
-
-"""
-This script takes in a list of
-"""
-
-exec(open(os.environ['PYTHONSTARTUP']).read())
-exec(open(STARTUP_2021_IceSAT2).read())
-
-base_path='/Users/Shared/Projects/2021_IceSAT2_tracks/'
-sys.path.append(base_path +'modules/')
-sys.path.append(base_path +'modules/ICEsat2_SI_tools/')
-
-#import m_general as M
-#import m_tools as MT
-import numpy as np
-import os
-import pandas as pd
-
-import imp
-import icepyx as ipx
-
-# %%
-path = mconfig['paths']['analysis']+'../track_lists/'
-
-# batch   = 'Batch02_alex'
-with open(path+  'alex_ATL07_filelist.txt', 'r') as f:
-    contents = f.readlines()
-
-batch   = 'batch03'
-with open(path+  'batch03_ATL07_filelist.txt', 'r') as f:
-    contents = f.readlines()
-
-h5_files= list()
-for l in contents:
-    if '.h5' in l:
-        h5_files.append(l)
-
-file_instances = list()
-for h in h5_files:
-    #h.split('.')[0].split('_')
-    file_instances.append(  h.split('.')[0].split('_')[1:4] )
-
-
-MT.json_save(batch+'_tracks_components', path, file_instances)
-
-#file_instances
-## make dataframe and derive ID that is need to compare the data:
-D = pd.DataFrame(file_instances)
-
-def str2dt64(s):
-    return np.datetime64(s[0:4]+'-'+s[4:6]+'-'+s[6:8])
-
-D['date'] = D[0].apply(lambda row: str2dt64(row[0:8])  )
-
-dmin, dmax = D['date'].min(), D['date'].max() # needed for icspyx modules
-
-D['RGT'] = D[1].apply(lambda row: row[0:4])
-D['cycle'] = D[1].apply(lambda row: row[4:6])
-D['segment'] = D[1].apply(lambda row: row[6:8])
-#D['segment'].hist()
-
-D['id'] = D[0]+'_'+D[1]
-#D['id_compare'] = D[0]+'_'+
-D['id_compare'] = D['RGT']+D['cycle']
-
-# len(D['id_compare'])
-# len(set(D['id_compare']))
-
-# %%
-dx= 100
-all_wanted_tracks = list()
-for x in np.arange(0, int(len(D)), dx):
-    Dsub = D[x:x+dx]
-
-    print('set ', x)
-    # % login to earth data  ..
-
-    date_range =[str(dmin).split(' ')[0],str(dmax).split(' ')[0]]
-    region_a = ipx.Query('ATL03',[180, -70, -180, -55],date_range, \
-                               start_time='00:00:00', end_time='23:59:59', \
-                                tracks = list(Dsub['RGT']))
-
-    region_a.earthdata_login('mhell','mhell@ucsd.edu')
-    # pw
-    # @[49[4tK\-qBWB%5
-
-    # % request available granuals in region and time frame
-    region_a.avail_granules()
-    region_a.avail_granules(ids=True)
-
-    # % compare availabe ID's with the wanted ID's
-    gran_list = [i['producer_granule_id'] for i in region_a.granules.avail]
-    sub_set= list()
-    for id_wanted in Dsub['id_compare']:
-        sub_set.append([i for i in gran_list if id_wanted in i])
-
-    all_possible_tracks =  [item for sublist in sub_set for item in sublist]
-    print( len(all_possible_tracks), ' matching granules found')
-
-    [all_wanted_tracks.append(i) for i in all_possible_tracks]
-
-# %% save clean file list
-MT.json_save(batch+'_ATL03_A00', path, all_wanted_tracks)
diff --git a/data_prehandling/S02_download_gloabl_ocean_CMEMS_L3.py b/data_prehandling/S02_download_gloabl_ocean_CMEMS_L3.py
deleted file mode 100644
index d7754081..00000000
--- a/data_prehandling/S02_download_gloabl_ocean_CMEMS_L3.py
+++ /dev/null
@@ -1,59 +0,0 @@
-import os, sys
-#execfile(os.environ['PYTHONSTARTUP'])
-
-"""
-This file download all L3 data from
-https://resources.marine.copernicus.eu/product-detail/WAVE_GLO_WAV_L3_SPC_NRT_OBSERVATIONS_014_002/INFORMATION
-"""
-
-exec(open(os.environ['PYTHONSTARTUP']).read())
-exec(open(STARTUP_2021_IceSAT2).read())
-
-#%matplotlib inline
-
-import imp
-import subprocess
-
-save_path   = mconfig['paths']['work'] + '/CMEMS_WAVE_GLO_L3/'
-
-# print(time_range)
-# # create timestamp according to fiels on ftp server:
-# time_stamps_ftp = np.arange(time_range[0].astype('datetime64[3h]'), time_range[1].astype('datetime64[3h]') +  np.timedelta64(3, 'h'), np.timedelta64(3, 'h'))
-# time_stamps_ftp_str = [str(t).replace('-', '') for t in time_stamps_ftp]
-# #plt.plot(G1['lons'], G1['lats'], '.' )
-
-
-username='mhell'
-pw='BkUexT#72'
-
-# NRT product
-# paths look like this:
-#ftp://nrt.cmems-du.eu/Core/WAVE_GLO_WAV_L3_SPC_NRT_OBSERVATIONS_014_002/dataset-wav-sar-l3-spc-nrt-global-s1a/2020/03/dataset-wav-sar-l3-spc-nrt-global-s1a_20200302T000000Z_20200302T030000Z_P20200323T0646Z-3H-rep.nc
-
-subpath = 'WAVE_GLO_WAV_L3_SPC_NRT_OBSERVATIONS_014_002'
-path='ftp://nrt.cmems-du.eu/Core/'+subpath+ '/' #dataset-wav-sar-l3-spc-rep-global-'+sat+'/'+y+'/'+m+'/
-file_card = 'dataset-wav-sar-l3-spc-nrt-global-*-3H-rep.nc'
-
-# REP product
-# paths look like this:
-#ftp://my.cmems-du.eu/Core/WAVE_GLO_PHY_SPC_L3_MY_014_006/dataset-wav-sar-l3-spc-rep-global-s1a/2019/07/dataset-wav-sar-l3-spc-rep-global-s1a_20190702T120000Z_20190702T150000Z_P20210619T1207Z-3H-rep.nc
-
-# subpath = 'WAVE_GLO_WAV_L3_SPC_REP_OBSERVATIONS_014_002'
-# path='ftp://my.cmems-du.eu/Core/'+subpath+ '/'
-# file_card = 'dataset-wav-sar-l3-spc-rep-global-*-3H-rep.nc'
-
-wget_str= ['wget',  '-r', path ,'--ftp-user='+username ,'--ftp-password='+pw ,'--no-parent', '-A' , file_card ,'-nd', '-c']
-wget_str.append('-P')
-wget_str.append(save_path +'/'+ subpath)
-
-
-print(wget_str)
-print('save to ' + save_path +'/'+ subpath)
-
-list_files = subprocess.run(' '.join(wget_str), shell=True,stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-
-#list_files.stdout
-#print(list_files.stderr)
-flist_parameters= list()
-#list_files.check_returncode()
-print('download sugcess:', list_files.returncode == 0)
diff --git a/data_prehandling/S03_create_ALT07_batch.py b/data_prehandling/S03_create_ALT07_batch.py
deleted file mode 100644
index fbf4b2fe..00000000
--- a/data_prehandling/S03_create_ALT07_batch.py
+++ /dev/null
@@ -1,109 +0,0 @@
-import os, sys
-#execfile(os.environ['PYTHONSTARTUP'])
-
-"""
-This is a test file, playing with the Earth data login and icepyx.
-
-"""
-
-exec(open(os.environ['PYTHONSTARTUP']).read())
-exec(open(STARTUP_2021_IceSAT2).read())
-
-#%matplotlib inline
-#from pprint import pprint
-
-import icepyx as ipx
-import m_tools_ph3 as  MT
-
-
-
-# %%
-#downlaod_path = mconfig['paths']['scratch'] +'/SH_batch02/'
-path = mconfig['paths']['analysis']+'../track_lists/'
-#MT.mkdirs_r(downlaod_path)
-
-
-# batch   = 'Batch02_alex'
-# with open(path+  'ALT10_tracks_complete.txt', 'r') as f:
-#     h5_files = f.readlines()
-
-with open(path+  'batch05_NA_ATL10.txt', 'r') as f:
-    h5_files = f.readlines()
-
-print('total number of tracks:', len(h5_files))
-
-all_file_names = list()
-for h in h5_files:
-    all_file_names.append(  h.split('/')[-1].split('.')[0] )
-len(all_file_names)
-#MT.json_save(batch+'_tracks_components', path, file_instances)
-
-all_file_names_split =list()
-for h in all_file_names:
-    all_file_names_split.append(  h.split('_') )
-
-
-#flist = MT.json_load('Batch02_alex_tracks', path)
-
-D = pd.DataFrame(all_file_names_split, index =all_file_names , columns =['ALT', 'datestr', 'ttttccss', 'version', 'revision'] )
-
-s = D.iloc[0]['datestr']
-s
-def str2dt64(s):
-    return np.datetime64(s[0:4]+'-'+s[4:6]+'-'+s[6:8]+'T'+s[8:10]+':'+s[10:12]+':'+s[12:14])
-
-D['date'] = D['datestr'].apply(lambda row: str2dt64(row)  )
-
-dmin, dmax = D['date'].min(), D['date'].max()
-dmin, dmax
-
-D['RGT'] = D['ttttccss'].apply(lambda row: row[0:4])
-D['cycle'] = D['ttttccss'].apply(lambda row: row[4:6])
-D['segment'] = D['ttttccss'].apply(lambda row: int(row[6:8]))
-D['hemis'] = D['ALT'].apply(lambda row: 'NH' if row[6:]=='01' else 'SH')
-
-# make ALT07 tracks
-
-D['ALT'] = [i[0:3]+'07'+i[5:] for i in D['ALT']]
-D['revision'] = '01'
-# redefine index:
-
-D.index  = D.T.apply(lambda row: '_'.join( row[['ALT', 'datestr', 'ttttccss', 'version', 'revision' ]] ))
-
-
-
-#D['segment'].hist()
-
-# D['id'] = D[0]+'_'+D[1]
-# #D['id_compare'] = D[0]+'_'+
-# D['id_compare'] = D['RGT']+D['cycle']
-
-D['date'].min()
-D['date'].max()
-
-# %% select wanted date range
-# batch = 'batch04'
-# dmin, dmax = np.datetime64('2019-01-01'), np.datetime64('2019-01-30')
-# hemis = 'SH'
-
-# batch = 'batch04_test'
-# dmin, dmax = np.datetime64('2019-01-01'), np.datetime64('2019-01-03')
-# hemis = 'SH'
-#
-
-batch = 'batch05_test'
-dmin, dmax = np.datetime64('2019-03-01'), np.datetime64('2019-03-01T12:00:00')
-hemis = 'NH'
-
-
-Dsel = D[ (D['date'] >= dmin) & (D['date'] <= dmax)  & (D['hemis'] == hemis)]
-len(Dsel)
-
-# Dsel = D[ (D['date'] > dmin) & (D['date'] < dmax)  & (D['hemis'] == 'NH')]
-# len(Dsel)
-Dsel = D
-batch='batch05'
-
-
-MT.json_save(batch+'_ATL07_A00', path, list(Dsel.index))
-MT.save_pandas_table(Dsel, batch+'_ATL07_A00', path)
diff --git a/data_prehandling/S03_create_ALT10_batch.py b/data_prehandling/S03_create_ALT10_batch.py
deleted file mode 100644
index 02e00536..00000000
--- a/data_prehandling/S03_create_ALT10_batch.py
+++ /dev/null
@@ -1,89 +0,0 @@
-import os, sys
-#execfile(os.environ['PYTHONSTARTUP'])
-
-"""
-This is a test file, playing with the Earth data login and icepyx.
-
-"""
-
-exec(open(os.environ['PYTHONSTARTUP']).read())
-exec(open(STARTUP_2021_IceSAT2).read())
-
-#%matplotlib inline
-#from pprint import pprint
-
-import icepyx as ipx
-import m_tools_ph3 as  MT
-
-
-
-# %%
-#downlaod_path = mconfig['paths']['scratch'] +'/SH_batch02/'
-path = mconfig['paths']['analysis']+'../track_lists/'
-#MT.mkdirs_r(downlaod_path)
-
-
-# batch   = 'Batch02_alex'
-with open(path+  'ALT10_tracks_complete.txt', 'r') as f:
-    h5_files = f.readlines()
-
-print('total number of tracks:', len(h5_files))
-
-all_file_names = list()
-for h in h5_files:
-    all_file_names.append(  h.split('/')[-1].split('.')[0] )
-len(all_file_names)
-#MT.json_save(batch+'_tracks_components', path, file_instances)
-
-all_file_names_split =list()
-for h in all_file_names:
-    all_file_names_split.append(  h.split('_') )
-
-
-#flist = MT.json_load('Batch02_alex_tracks', path)
-
-D = pd.DataFrame(all_file_names_split, index =all_file_names , columns =['ALT', 'datestr', 'ttttccss', 'version', 'revision'] )
-
-s = D.iloc[0]['datestr']
-s
-def str2dt64(s):
-    return np.datetime64(s[0:4]+'-'+s[4:6]+'-'+s[6:8]+'T'+s[8:10]+':'+s[10:12]+':'+s[12:14])
-
-D['date'] = D['datestr'].apply(lambda row: str2dt64(row)  )
-
-dmin, dmax = D['date'].min(), D['date'].max()
-dmin, dmax
-
-D['RGT'] = D['ttttccss'].apply(lambda row: row[0:4])
-D['cycle'] = D['ttttccss'].apply(lambda row: row[4:6])
-D['segment'] = D['ttttccss'].apply(lambda row: int(row[6:8]))
-D['hemis'] = D['ALT'].apply(lambda row: 'NH' if row[6:]=='01' else 'SH')
-
-#D['segment'].hist()
-
-# D['id'] = D[0]+'_'+D[1]
-# #D['id_compare'] = D[0]+'_'+
-# D['id_compare'] = D['RGT']+D['cycle']
-
-D['date'].min()
-D['date'].max()
-
-# %% select wanted date range
-# batch = 'batch04'
-# dmin, dmax = np.datetime64('2019-01-01'), np.datetime64('2019-01-30')
-# hemis = 'SH'
-
-batch = 'batch04_test'
-dmin, dmax = np.datetime64('2019-01-01'), np.datetime64('2019-01-03')
-hemis = 'SH'
-
-Dsel = D[ (D['date'] > dmin) & (D['date'] < dmax)  & (D['hemis'] == hemis)]
-len(Dsel)
-
-# Dsel = D[ (D['date'] > dmin) & (D['date'] < dmax)  & (D['hemis'] == 'NH')]
-# len(Dsel)
-
-
-
-MT.json_save(batch+'_ATL10_A00', path, list(Dsel.index))
-MT.save_pandas_table(Dsel, batch+'_ATL10_A00', path)
diff --git a/data_prehandling/S03_download_WW3_hindcast_GLOBAL-30M.py b/data_prehandling/S03_download_WW3_hindcast_GLOBAL-30M.py
deleted file mode 100644
index e803eb38..00000000
--- a/data_prehandling/S03_download_WW3_hindcast_GLOBAL-30M.py
+++ /dev/null
@@ -1,91 +0,0 @@
-import os, sys
-#execfile(os.environ['PYTHONSTARTUP'])
-
-"""
-This file download all L3 data from
-#ftp://ftp.ifremer.fr/ifremer/ww3/HINDCAST/GLOBMULTI_ERA5_GLOBCUR_01/GLOB-30M/2019/FIELD_NC/
-"""
-
-exec(open(os.environ['PYTHONSTARTUP']).read())
-exec(open(STARTUP_2021_IceSAT2).read())
-
-#%matplotlib inline
-
-import imp
-import subprocess
-import glob
-import time
-
-save_path   = mconfig['paths']['scratch']# + 'GLOBMULTI_ERA5_GLOBCUR_01/'
-save_path2   = mconfig['paths']['work']
-#ftp://ftp.ifremer.fr/ifremer/ww3/HINDCAST/GLOBMULTI_ERA5_GLOBCUR_01/GLOB-30M/2019/FIELD_NC/LOPS_WW3-GLOB-30M_201905.nc
-
-lat_lim= 50 # deg north or South
-var_list = [ 'dir', 'dp','fp', 'hs', 'ice', 'lm', 'spr', 't01', 't02', 't0m1', 'tws',
-'pdp0',  'pdp1',  'pdp2',  'pdp3',  'pdp4',  'pdp5',
-'pspr0',  'pspr1',  'pspr2',  'pspr3',  'pspr4',  'pspr5',
-'ptp0',  'ptp1',  'ptp2',  'ptp3',  'ptp4',  'ptp5',
-'phs0',  'phs1',  'phs2',  'phs3',  'phs4',  'phs5']
-
-
-flist_parameters= list()
-subpath = 'GLOBMULTI_ERA5_GLOBCUR_01/'
-try:
-    os.mkdir(save_path2 + '/'+ subpath)
-except:
-    pass
-
-#year = 2018
-for year in np.arange(2018, 2022):
-
-    year_str= str(year)
-    print('-----' + year_str)
-    path='ftp://ftp.ifremer.fr/ifremer/ww3/HINDCAST/'+subpath+'/GLOB-30M/'+ year_str +'/FIELD_NC/' #dataset-wav-sar-l3-spc-rep-global-'+sat+'/'+y+'/'+m+'/
-    file_card = 'LOPS_WW3-GLOB-30M_'+year_str+'*.nc'
-
-    wget_str= ['wget',  '-r', path ,'--no-parent', '-A' , file_card ,'-nd', '-c']
-    wget_str.append('-P')
-    wget_str.append(save_path +'/'+ subpath)
-
-    print(' '.join(wget_str))
-    print('save to ' + save_path +'/'+ subpath)
-
-    # list_files = subprocess.run(' '.join(wget_str), shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    #
-    # print('download sugcess:', list_files.returncode == 0)
-    # if list_files.returncode == 0:
-    #    flist_parameters.append(list_files)
-
-    year_file_list  =  glob.glob(save_path + subpath+'*' + file_card)
-    year_file_list2 = list()
-    for f in year_file_list:
-        if '_p2l.nc' in f:
-            #os.remove(f)
-            print(f)
-        else:
-            year_file_list2.append(f)
-    print('open:')
-    year_file_list2.sort()
-    print(year_file_list2)
-    G_all   = xr.open_mfdataset(year_file_list2)
-
-    # NH
-    G2              = G_all[var_list].isel(latitude =G_all.latitude > lat_lim )
-    mm, datasets    = zip(*G2.groupby("time.month"))
-    paths           = [save_path2 + '/'+ subpath +'/LOPS_WW3-GLOB-30M_'+year_str+'_'+str(m).zfill(2)+'_NH_select.nc' for m in mm]
-    xr.save_mfdataset(datasets, paths)
-    #G2.to_netcdf(save_path2 + '/'+ subpath +'/' + save_name )
-
-    # SH
-    G2              = G_all[var_list].isel(latitude =G_all.latitude < -lat_lim )
-    mm, datasets    = zip(*G2.groupby("time.month"))
-    paths           = [save_path2 + '/'+ subpath +'/LOPS_WW3-GLOB-30M_'+year_str+'_'+str(m).zfill(2)+'_SH_select.nc' for m in mm]
-    xr.save_mfdataset(datasets, paths)
-    #G2.to_netcdf(save_path2 + '/'+ subpath +'/' + save_name )
-
-    print('merged and save needed variables in work directory')
-    #time.sleep(5)
-    #os.remove(year_file_list2)
-
-
-print( flist_parameters )