-
Notifications
You must be signed in to change notification settings - Fork 1
/
SOAP2ArchiveCSV.py
executable file
·140 lines (112 loc) · 5.76 KB
/
SOAP2ArchiveCSV.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
"""
Background:
===========
SOAP2ArchiveCSV.py
Purpose:
========
Format results from soap call to clean "traditional" csv archival format. This format is
known as the .y or 'year files' and was previously created by DGK and Benny's routines
File Format:
============
Currently only processes csv returned files from SOAP (not xml).
(Very Long) Example Usage:
==========================
History:
========
2023-06-27: New Beacon types where added in 2023, they show up as SUBSURF and MOOR_BUOY,
address these by now splitting on DRIFT or SUBSURF/MOOR_BUOY.
Compatibility:
==============
python >=3.6 **tested**
python 2.7 **tested** but may break in the future
"""
import argparse
import pandas as pd
import numpy as np
import datetime
# parse incoming command line options
parser = argparse.ArgumentParser(description='Format ARGOS Soap retrieved files to archive csv files')
parser.add_argument('infile',
metavar='infile',
type=str,
help='full path to infile')
parser.add_argument('-year','--archive_year',
type=str,
help='year of data being input, will default to current year if omitted')
parser.add_argument('-getactiveids', '--getactiveids',
action="store_true",
help='get active listing of platformIds')
parser.add_argument('-buoyyearfiles', '--buoyyearfiles',
action="store_true",
help='create buoy year files - 28882 or 28883')
parser.add_argument('-buoyid', '--buoyid',
type=str,
default='28882',
help='default - 28882')
parser.add_argument('-drifteryearfiles', '--drifteryearfiles',
action="store_true",
help='create all drifter year files from activeid listing')
parser.add_argument('-beaconyearfiles', '--beaconyearfiles',
action="store_true",
help='create all beacon year files from platform types "SUBSURF" or "MOOR_BUOY"')
args = parser.parse_args()
df = pd.read_csv(args.infile,sep=';',index_col=False,dtype=object,on_bad_lines='skip')
if not args.archive_year:
year = str(datetime.datetime.now().year)
else:
year = args.archive_year
if args.getactiveids:
gb = df.groupby('platformId')
print(gb.groups.keys())
if args.buoyyearfiles:
gb = df.groupby('platformId')
keep_columns=['platformId','latitude','longitude','bestDate','value'] + ['value.'+str(i) for i in range(1,32)] + ['locationClass']
try:
bd = gb.get_group(args.buoyid)
bd_thinned = bd[keep_columns].copy()
bd_thinned['year'] = bd_thinned.apply(lambda row: str(pd.to_datetime(row['bestDate']).year), axis=1)
bd_thinned['doy'] = bd_thinned.apply(lambda row: str(pd.to_datetime(row['bestDate']).dayofyear), axis=1)
bd_thinned['hhmm'] = bd_thinned.apply(lambda row: str(pd.to_datetime(row['bestDate']).hour).zfill(2)+str(pd.to_datetime(row['bestDate']).minute).zfill(2), axis=1)
out_columns=['platformId','latitude','longitude','year','doy','hhmm','value'] + ['value.'+str(i) for i in range(1,32)] + ['locationClass']
bd_thinned[out_columns].to_csv('0'+args.buoyid+'.y' + year,sep=' ',header=False,index=False,na_rep=np.nan,mode='a')
except:
print("no 28882 data in this file")
if args.drifteryearfiles:
pb = df.groupby('platformType')
keep_columns=['platformId','latitude','longitude','locationDate','value'] + ['value.'+str(i) for i in range(1,7)] + ['locationClass']
for j in pb.groups.keys():
if j in ['DRIFT']:
print(j)
cd = pb.get_group(j)
gb = cd.groupby('platformId')
for k in gb.groups.keys():
print(k)
bd = gb.get_group(k)
bd_thinned = bd[keep_columns].copy()
bd_thinned['year'] = bd_thinned.apply(lambda row: str(pd.to_datetime(row['locationDate']).year), axis=1)
bd_thinned['doy'] = bd_thinned.apply(lambda row: str(pd.to_datetime(row['locationDate']).dayofyear), axis=1)
bd_thinned['hhmm'] = bd_thinned.apply(lambda row: str(pd.to_datetime(row['locationDate']).hour).zfill(2)+str(pd.to_datetime(row['locationDate']).minute).zfill(2), axis=1)
#make special case for 122531, the peggy backup locator buoy
if k=='122531':
out_columns=['platformId','latitude','longitude','year','doy','hhmm','value'] + ['locationClass']
bd_thinned[out_columns].dropna().to_csv(k + '.y' + year,sep=' ',header=False,index=False,na_rep=np.nan,mode='a')
else:
out_columns=['platformId','latitude','longitude','year','doy','hhmm','value'] + ['value.'+str(i) for i in range(1,7)] + ['locationClass']
bd_thinned[out_columns].dropna(subset=['latitude','longitude']).to_csv(k + '.y' + year,sep=' ',header=False,index=False,na_rep=np.nan,mode='a')
if args.beaconyearfiles:
pb = df.groupby('platformType')
keep_columns=['platformId','latitude','longitude','locationDate','value'] + ['value.'+str(i) for i in range(1,7)] + ['locationClass']
for j in pb.groups.keys():
if j in ['MOOR_BUOY', 'SUBSURF']:
print(j)
cd = pb.get_group(j)
gb = cd.groupby('platformId')
for k in gb.groups.keys():
print(k)
bd = gb.get_group(k)
bd_thinned = bd[keep_columns].copy()
bd_thinned['year'] = bd_thinned.apply(lambda row: str(pd.to_datetime(row['locationDate']).year), axis=1)
bd_thinned['doy'] = bd_thinned.apply(lambda row: str(pd.to_datetime(row['locationDate']).dayofyear), axis=1)
bd_thinned['hhmm'] = bd_thinned.apply(lambda row: str(pd.to_datetime(row['locationDate']).hour).zfill(2)+str(pd.to_datetime(row['locationDate']).minute).zfill(2), axis=1)
out_columns=['platformId','latitude','longitude','year','doy','hhmm','value'] + ['locationClass']
bd_thinned[out_columns].dropna().to_csv(k + '.y' + year,sep=' ',header=False,index=False,na_rep=np.nan,mode='a')