-
Notifications
You must be signed in to change notification settings - Fork 0
/
diag.trajectory.drifters.colloc.discrete.triple.jl
286 lines (256 loc) · 14.8 KB
/
diag.trajectory.drifters.colloc.discrete.triple.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
#=
= Perform a single or a series of analysis calibration and performance estimates,
= with the single option employing all available collocations, or for a series of
= estimates performed across a parameter space defined in terms of a set of
= hypercube dimensions (e.g., surface current and lat/lon position, mainly because
= these are readily available from in situ obs, inertial oscillation proxy, Lagrangian
= or Eulerian parameters such as MLD, SSH/SST/SSS structure, bathymetry or nearness
= to the coast, and possibly date or season). For the hypercube option, fixed-size
= subsets of available collocations are selected based on geometrical closeness to
= the target parameters (by equating units and ignoring pdf shape, for example) and
= employed to obtain each calibration and performance estimate. Target parameters
= are then varied with each subset, yielding the actual mean parameters by a simple
= average (and where two averages are close, these should be combined). The ungridded
= variations in analysis quality are then gridded in order to permit a lookup table
= for maps of analysis quality that depend on these parameters. Simple polynomials
= (whose coefficients are found by least squares) are employed to regrid - RD May 2016.
=#
using My, Optim
const ODAT = 1 # identify indecies of the input data:
const OLAT = 2 # date/lat/lon on the collocation grid
const OLON = 3
const OCUR = 4 # drifter current component as well as
const TOTB = 11 # the two total current estimates from
const TOTA = 12 # before and after
const FRAC = 0.9 # fractional update during iterations
const DELTA = 0.001 # generic convergence criterion
const SDTRIM = 6.0 # standard deviation trimming limit
const RECALIB = true # perform an affine recalibration
const GLOBAL = false # employ all collocations or a targeted subsets
const ANALYS = 1 # number of current analyses
const HYPER = 1 # number of hypercube dimensions (independent variables)
const DIRS = ["v2.0_global_025_deg_total_15m"]
const UCUAC = [ 0.00312866]
const UCUBC = [ 1.45594908]
const VCUAC = [ 0.00055068]
const VCUBC = [ 1.48561760]
const UCUAV = [ 0.00387766]
const UCUBV = [ 1.30775895]
const VCUAV = [ 0.00135119]
const VCUBV = [ 1.32680928]
const UCLAC = [ 0.04417926 -0.10298563 0.02980934]
const UCLBC = [ 4.01156863 -6.30790060 3.07021902]
const VCLAC = [ -0.05374986 0.06217939 -0.01455402]
const VCLBC = [ 1.60881144 -3.01035929 2.12685222]
const UCLAV = [ -0.11382001 0.08965406 0.00104184]
const UCLBV = [ 2.98988430 -4.72762832 2.52096027]
const VCLAV = [ -0.02881019 0.00653482 0.01137809]
const VCLBV = [ 4.51764345 -6.78495552 3.07755129]
if (argc = length(ARGS)) != 1
print("\nUsage: jjj $(basename(@__FILE__)) buoydata_1993_2014_drogON.asc.nonmdt.locate_2.0_calib.ucur.got2000_obs.comb\n\n")
exit(1)
end
#=
= Function returning triple collocation cal/val measures for a group of analyses, following McColl
= et al. (2014). Inputs are an array of collocated values and stats are returned for each analysis,
= where it is assumed that extrapolation from before and after is done using the same analysis, so
= no consideration of relative effective resolution (and no iteration as in Vogelzang et al. 2011)
= is necessary (i.e., in situ is highest resolution, but there is no representativeness error
= associated with one analysis being intermediate resolution and another being even lower resolution).
=#
function triple(curr::Array{Float64,3})
allalp = Array(Float64, ANALYS)
allbet = Array(Float64, ANALYS)
allsig = Array(Float64, ANALYS)
allcor = Array(Float64, ANALYS)
allmas = Array(Float64, ANALYS, HYPER)
for a = 1:ANALYS # get the parametric center of mass
mask = masquextreme(curr[1, :,ANALYS+1], SDTRIM) & # that defines each subset in terms of
masquextreme(curr[1, :, a], SDTRIM) & # hypercube independent variable (after
masquextreme(curr[2, :, a], SDTRIM) # trimming extreme values first)
sampsitu = curr[1,mask,ANALYS+1]
samprefa = curr[1,mask, a]
samprefb = curr[2,mask, a]
allmas[a,1] = mean(curr[2,mask,ANALYS+1])
avg1 = mean(sampsitu) # and use a robust calculation of covariance
avg2 = mean(samprefa) # (two-pass here, but more algorithms are at
avg3 = mean(samprefb) # en.wikipedia.org/wiki/Algorithms_for_calculating_variance)
cv11 = mean((sampsitu - avg1) .* (sampsitu - avg1))
cv12 = mean((sampsitu - avg1) .* (samprefa - avg2))
cv13 = mean((sampsitu - avg1) .* (samprefb - avg3))
cv22 = mean((samprefa - avg2) .* (samprefa - avg2))
cv23 = mean((samprefa - avg2) .* (samprefb - avg3))
cv33 = mean((samprefb - avg3) .* (samprefb - avg3))
bet2 = cv23 / cv13
bet3 = cv23 / cv12
alp2 = avg2 - bet2 * avg1
alp3 = avg3 - bet3 * avg1
tmpval = cv11 - cv12 * cv13 / cv23 ; sig1 = tmpval > 0 ? sqrt(tmpval) : 0.0
tmpval = cv22 - cv12 * cv23 / cv13 ; sig2 = tmpval > 0 ? sqrt(tmpval) : 0.0
tmpval = cv33 - cv13 * cv23 / cv12 ; sig3 = tmpval > 0 ? sqrt(tmpval) : 0.0
tmpval = cv12 * cv13 / cv11 / cv23 ; cor1 = tmpval > 0 ? sqrt(tmpval) : 0.0
tmpval = cv12 * cv23 / cv22 / cv13 ; cor2 = tmpval > 0 ? sqrt(tmpval) : 0.0
tmpval = cv13 * cv23 / cv33 / cv12 ; cor3 = tmpval > 0 ? sqrt(tmpval) : 0.0
allalp[a] = 0.5 * (alp2 + alp3)
allbet[a] = 0.5 * (bet2 + bet3)
allsig[a] = 0.5 * (sig2 + sig3)
allcor[a] = 0.5 * (cor2 + cor3)
end
return(allmas, allalp, allbet, allsig, allcor) # then return the average stats
end
#=
= main program
=#
const MISS = -9999.0 # generic missing value
if GLOBAL || RECALIB
const RANGE = 0.0:10.0: 0.0 # target sampling range for current speed
const CUTOFF = 4500000000 # number of collocations in a subset
else
const RANGE = 0.1: 0.1: 1.3 # target sampling range for current speed
const CUTOFF = 200 # number of collocations in a subset
end
const ALPH = 1 # error model x = ALPH + BETA * truth + error
const BETA = 2 # error model x = ALPH + BETA * truth + error
const SIGM = 3 # triple coll RMSE
const CORR = 4 # triple coll correlation coefficient
const MSPD = 5 # center-of-mass parameter
const PARAMS = 5 # number of triple collocation parameters
contains(ARGS[1], "calib.ucur") && (alph = UCUAV ; beta = UCUBV ; alpl = UCLAV ; betl = UCLBV ; varname = "UC..C")
contains(ARGS[1], "calib.vcur") && (alph = VCUAV ; beta = VCUBV ; alpl = VCLAV ; betl = VCLBV ; varname = "VC..C")
contains(ARGS[1], "valid.ucur") && (alph = UCUAC ; beta = UCUBC ; alpl = UCLAC ; betl = UCLBC ; varname = "UC..V")
contains(ARGS[1], "valid.vcur") && (alph = VCUAC ; beta = VCUBC ; alpl = VCLAC ; betl = VCLBC ; varname = "VC..V")
contains(ARGS[1], ".ucur") && (uvuv = replace(ARGS[1], "ucur", "vcur"))
contains(ARGS[1], ".vcur") && (uvuv = replace(ARGS[1], "vcur", "ucur"))
contains(ARGS[1], "calib.ucur") && (alpu = UCUAV ; betu = UCUBV ; alpv = VCUAV ; betv = VCUBV)
contains(ARGS[1], "calib.vcur") && (alpu = VCUAV ; betu = VCUBV ; alpv = UCUAV ; betv = UCUBV)
contains(ARGS[1], "valid.ucur") && (alpu = UCUAC ; betu = UCUBC ; alpv = VCUAC ; betv = VCUBC)
contains(ARGS[1], "valid.vcur") && (alpu = VCUAC ; betu = VCUBC ; alpv = UCUAC ; betv = UCUBC)
fpa = My.ouvre(ARGS[1], "r") ; lines = readlines(fpa) ; close(fpa) # read and count all triple collocations
fpa = My.ouvre( uvuv, "r") ; linez = readlines(fpa) ; close(fpa) # (including both current components) and
linum = length(lines) ; linuz = length(linez) # allocate for the target parameters and
if linum != linuz
print("\nERROR: number of lines in $(ARGS[1]) and $uvuv are $linum != $linuz\n\n")
exit(-1)
end
dist = zeros(linum) # distance to them, the resulting mean params,
chnk = linum < CUTOFF ? linum : CUTOFF # and the triple collocation cal/val estimates
curr = zeros(2, chnk, ANALYS + 1)
calval = [MISS for a = RANGE, b = 1:ANALYS, c = 1:PARAMS]
for a = 1:ANALYS # delete files so as to append below
filename = ARGS[1] * "." * DIRS[a] * ".cali.locl"
isfile(filename) && (print("rm $filename\n") ; rm(filename))
end
for (a, rana) in enumerate(RANGE) # loop through the target parameters and
for d = 1:linum # isolate the nearest CUTOFF set of obs
vala = float(split(lines[d]))
valb = float(split(linez[d]))
cspd = (vala[OCUR]^2.0 + valb[OCUR]^2.0)^0.5
dist[d] = abs(rana - cspd)
end
lims = sort(dist)[chnk]
e = 1 # get cal/val parameters for this subset
for d = 1:linum # (possibly after recalibrating)
if dist[d] <= lims && e <= chnk
vala = float(split(lines[d]))
valb = float(split(linez[d]))
cspd = (vala[OCUR]^2.0 + valb[OCUR]^2.0)^0.5
curr[1,e,:] = [vala[TOTB] vala[OCUR]]
curr[2,e,:] = [vala[TOTA] cspd]
if RECALIB
if GLOBAL
for f = 1:ANALYS
curr[1,e,f] = (curr[1,e,f] - alph[f]) / beta[f]
curr[2,e,f] = (curr[2,e,f] - alph[f]) / beta[f]
end
else
for f = 1:ANALYS
cspd = (((vala[TOTB] - alpu[f]) / betu[f])^2.0 + ((valb[TOTB] - alpv[f]) / betv[f])^2.0)^0.5
localph = alpl[1] * cspd^2 + alpl[2] * cspd + alpl[3]
locbeta = betl[1] * cspd^2 + betl[2] * cspd + betl[3]
curr[1,e,f] = (curr[1,e,f] - localph) / locbeta
cspd = (((vala[TOTA] - alpu[f]) / betu[f])^2.0 + ((valb[TOTA] - alpv[f]) / betv[f])^2.0)^0.5
localph = alpl[1] * cspd^2 + alpl[2] * cspd + alpl[3]
locbeta = betl[1] * cspd^2 + betl[2] * cspd + betl[3]
curr[2,e,f] = (curr[2,e,f] - localph) / locbeta
end
end
end
e += 1
end
end
(allmas, allalp, allbet, allsig, allcor) = triple(curr)
calval[a,:,ALPH] = allalp
calval[a,:,BETA] = allbet
calval[a,:,SIGM] = allsig
calval[a,:,CORR] = allcor
calval[a,:,MSPD] = allmas[:,1]
if GLOBAL || RECALIB
fpb = My.ouvre(ARGS[1] * ".cali.glob", "w")
form = @sprintf("const %sUA%c = [%15.8lf]\n", varname[1:2], varname[5], allalp[1])
write(fpb, form)
form = @sprintf("const %sUB%c = [%15.8lf]\n", varname[1:2], varname[5], allbet[1])
write(fpb, form)
form = @sprintf("\ntarget param CSPD is %6.2f\n", rana)
write(fpb, form)
form = @sprintf(" mean param CSPD is %6.2f\n\n", mean(allmas[:,1]))
write(fpb, form)
form = @sprintf("%33s %8s %8s %8s %8s\n", " ", "allalp", "allbet", "allsig", "allcor")
write(fpb, form)
for d = 1:ANALYS
form = @sprintf("%33s %8.3f %8.3f %8.3f %8.3f\n", DIRS[d], allalp[d], allbet[d], allsig[d], allcor[d])
write(fpb, form)
end
close(fpb)
else
for d = 1:ANALYS
fpb = My.ouvre(ARGS[1] * "." * DIRS[d] * ".cali.locl", "a")
form = @sprintf("%33s %15s %15s %8s %8s %8s %8s\n",
"analysis", "target value", "mean value", "allalp", "allbet", "allsig", "allcor")
a == 1 && write(fpb, form)
form = @sprintf("%33s %15.3f %15.3f %8.3f %8.3f %8.3f %8.3f\n",
DIRS[d], rana, mean(allmas[d,1]), allalp[d], allbet[d], allsig[d], allcor[d])
write(fpb, form)
close(fpb)
end
end
@printf("cala = [%15.8lf]\n", allalp[1])
@printf("calb = [%15.8lf]\n", allbet[1])
@printf("\ntarget param CSPD is %6.2f\n", rana)
@printf(" mean param CSPD is %6.2f\n\n", mean(allmas[:,1]))
@printf("%33s %8s %8s %8s %8s\n", " ", "allalp", "allbet", "allsig", "allcor")
for d = 1:ANALYS
@printf("%33s %8.3f %8.3f %8.3f %8.3f\n", DIRS[d], allalp[d], allbet[d], allsig[d], allcor[d])
end
end
if GLOBAL || RECALIB exit(0) end
varspd = Array(Float64, 0) # the sqerror closure requires data
varcol = Array(Float64, 0) # arrays in global scope
function sqerror(coef::Array{Float64,1}) # define the least squares metric
err = 0.0
for i in 1:length(varspd)
res = coef[1] * varspd[i]^2 + coef[2] * varspd[i] + coef[3]
err += (varcol[i] - res)^2
end
return err
end
for a = 1:ANALYS # for each analysis, store the dependence
fpa = My.ouvre(ARGS[1] * "." * DIRS[a] * ".cali.locl", "a") # of the four triple-collocation statistics
for b = 1:4 # in terms of second-order poynomial coeffs
varspd = Array(Float64, 0) # obtained using unconstrained optimization
varcol = Array(Float64, 0) # (so the stats' "hypercubes" are reduced to
for (c, ranc) in enumerate(RANGE) # variations on a multidimensional curve, but
push!(varspd, calval[c,a,MSPD]) # with values everywhere in parameter space)
push!(varcol, calval[c,a,b])
end
res = optimize(sqerror, [0.0, 0.0, 0.0], iterations = 10000)
b == 1 && (stra = "const" ; strb = "A")
b == 2 && (stra = "const" ; strb = "B")
b == 3 && (stra = " " ; strb = "S")
b == 4 && (stra = " " ; strb = "C")
line = @sprintf("%s %sL%s%c = [%15.8f %15.8f %15.8f]\n",
stra, varname[1:2], strb, varname[5], res.minimum[1], res.minimum[2], res.minimum[3])
write(fpa, line) ; print("$(DIRS[a]) $b $(show(res)) \n")
end
close(fpa)
end
exit(0)