-
Notifications
You must be signed in to change notification settings - Fork 0
/
coads.gts.ncepnrt.heat.flux.colloc.discrete.triple.cfsr.jl
325 lines (295 loc) · 17.3 KB
/
coads.gts.ncepnrt.heat.flux.colloc.discrete.triple.cfsr.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
#=
= Perform a single or a series of analysis calibration and performance estimates,
= with the single option employing all available collocations, or for a series of
= estimates performed for a range of values of the variable of interest (e.g., air
= temperature or sensible heat flux). Fixed-size subsets are defined by in situ
= values being as close as possible to the target values and each calibration and
= performance estimate is obtained from these. Note that the actual mean parameters
= are calculated for each subset, since extreme values are few and extreme averages
= that are close should probably be treated as one. These irregularly distributed
= variations in analysis quality are then gridded in order to permit a lookup table
= for maps of analysis quality that depend on these parameters. Simple polynomials
= (whose coefficients are found by least squares) are employed to regrid - RD May 2016.
=#
using My, Optim
const ODAT = 1 # identify indecies of the input data:
const OLAT = 2 # date/lat/lon on the collocation grid
const OLON = 3
const OFLX = 4 # then five buoy parameters
const SUHM = 5
const WSPD = 6
const AIRT = 7
const SSTT = 8
const ERAB = 9 # and for each of eight analyses, two
const ERAA = 10 # extrapolations from before and after
const HOAB = 11
const HOAA = 12
const IFRB = 13
const IFRA = 14
const JOFB = 15
const JOFA = 16
const MERB = 17
const MERA = 18
const OAFB = 19
const OAFA = 20
const SEAB = 21
const SEAA = 22
const FRAC = 0.9 # fractional update during iterations
const DELTA = 0.001 # generic convergence criterion
const PERTRIM = 0.7 # percent deviation from mean trimming limit
const RECALIB = true # perform an affine recalibration
const GLOBAL = true # employ all collocations or a targeted subsets
const ANALYS = 7 # number of analyses
const HYPER = 1 # number of hypercube dimensions (independent variables)
const DIRS = [ "erainterim", "hoaps", "ifremerflux", "jofuro", "merra", "oaflux", "seaflux"]
const LHFAC = [ 4.54226165, -24.23769546, 10.57257240, 0.78621753, 5.30069338, 0.87558627, 15.56280765]
const LHFBC = [ 1.27420735, 1.52948296, 1.03009329, 1.18665526, 1.08436103, 1.19315736, 0.91969119]
const LHFAV = [ 0.67161582, -27.65864935, 9.89104997, -5.64814519, 2.63421698, -0.05127182, 5.60035426]
const LHFBV = [ 1.23816519, 1.48545609, 0.96254007, 1.21224506, 1.04406791, 1.12319564, 0.98863245]
const LHLAC = [ 0.04417926 -0.10298563 0.02980934]
const LHLBC = [ 4.01156863 -6.30790060 3.07021902]
const LHLAV = [ -0.02881019 0.00653482 0.01137809]
const LHLBV = [ 4.51764345 -6.78495552 3.07755129]
if (argc = length(ARGS)) != 1
print("\nUsage: jjj $(basename(@__FILE__)) all.flux.daily.locate_2.0_calib.airt.got2000_obs.comb\n\n")
exit(1)
end
#=
= Function returning triple collocation cal/val measures for a group of analyses, following McColl
= et al. (2014). Inputs are an array of collocated values and stats are returned for each analysis,
= where it is assumed that extrapolation from before and after is done using the same analysis, so
= no consideration of relative effective resolution (and no iteration as in Vogelzang et al. 2011)
= is necessary (i.e., in situ is highest resolution, but there is no representativeness error
= associated with one analysis being intermediate resolution and another being even lower resolution).
=#
function triple(coll::Array{Float64,3})
allalp = Array(Float64, ANALYS)
allbet = Array(Float64, ANALYS)
allsig = Array(Float64, ANALYS)
allcor = Array(Float64, ANALYS)
allmas = Array(Float64, ANALYS, HYPER)
totlen = msklen = 0
for a = 1:ANALYS # get the parametric center of mass
mask = masquepourcent(coll[1, :,ANALYS+1], PERTRIM) & # that defines each subset in terms of
masquepourcent(coll[1, :, a], PERTRIM) & # hypercube independent variable (after
masquepourcent(coll[2, :, a], PERTRIM) # trimming extreme values first)
sampsitu = coll[1,mask,ANALYS+1]
samprefa = coll[1,mask, a]
samprefb = coll[2,mask, a]
allmas[a,1] = mean(coll[2,mask,ANALYS+1])
totlen += length(coll[1,:,a]) ; msklen += length(coll[1,mask,a])
avg1 = mean(sampsitu) # and use a robust calculation of covariance
avg2 = mean(samprefa) # (two-pass here, but more algorithms are at
avg3 = mean(samprefb) # en.wikipedia.org/wiki/Algorithms_for_calculating_variance)
cv11 = mean((sampsitu - avg1) .* (sampsitu - avg1))
cv12 = mean((sampsitu - avg1) .* (samprefa - avg2))
cv13 = mean((sampsitu - avg1) .* (samprefb - avg3))
cv22 = mean((samprefa - avg2) .* (samprefa - avg2))
cv23 = mean((samprefa - avg2) .* (samprefb - avg3))
cv33 = mean((samprefb - avg3) .* (samprefb - avg3))
bet2 = cv23 / cv13
bet3 = cv23 / cv12
alp2 = avg2 - bet2 * avg1
alp3 = avg3 - bet3 * avg1
tmpval = cv11 - cv12 * cv13 / cv23 ; sig1 = tmpval > 0 ? sqrt(tmpval) : 0.0
tmpval = cv22 - cv12 * cv23 / cv13 ; sig2 = tmpval > 0 ? sqrt(tmpval) : 0.0
tmpval = cv33 - cv13 * cv23 / cv12 ; sig3 = tmpval > 0 ? sqrt(tmpval) : 0.0
tmpval = cv12 * cv13 / cv11 / cv23 ; cor1 = tmpval > 0 ? sqrt(tmpval) : 0.0
tmpval = cv12 * cv23 / cv22 / cv13 ; cor2 = tmpval > 0 ? sqrt(tmpval) : 0.0
tmpval = cv13 * cv23 / cv33 / cv12 ; cor3 = tmpval > 0 ? sqrt(tmpval) : 0.0
allalp[a] = 0.5 * (alp2 + alp3)
allbet[a] = 0.5 * (bet2 + bet3)
allsig[a] = 0.5 * (sig2 + sig3)
allcor[a] = 0.5 * (cor2 + cor3)
end
totlen /= ANALYS ; msklen /= ANALYS
@show totlen msklen
return(allmas, allalp, allbet, allsig, allcor) # then return the average stats
end
#=
= main program
=#
const MISS = -9999.0 # generic missing value
if GLOBAL || RECALIB
const RANGE = 0.0:10.0: 0.0 # target the sampling range of the subsets
const CUTOFF = 4500000000 # and the number of collocations in each subset
else
contains(ARGS[1], "shfx") && (const RANGE = -5.0:10.0: 25.0)
contains(ARGS[1], "lhfx") && (const RANGE = 50.0:50.0:200.0)
contains(ARGS[1], "wspd") && (const RANGE = 2.5: 5.0: 12.5)
contains(ARGS[1], "airt") && (const RANGE = 10.0:10.0: 30.0)
contains(ARGS[1], "sstt") && (const RANGE = 10.0:10.0: 30.0)
contains(ARGS[1], "shum") && (const RANGE = 7.5: 5.0: 17.5)
const CUTOFF = 5000
end
const ALPH = 1 # error model x = ALPH + BETA * truth + error
const BETA = 2 # error model x = ALPH + BETA * truth + error
const SIGM = 3 # triple coll RMSE
const CORR = 4 # triple coll correlation coefficient
const MASS = 5 # center-of-mass parameter
const PARAMS = 5 # number of triple collocation parameters
contains(ARGS[1], "calib.shfx") && (alph = SHFAV ; beta = SHFBV ; alpl = SHLAV ; betl = SHLBV ; varname = "SHF.C")
contains(ARGS[1], "calib.lhfx") && (alph = LHFAV ; beta = LHFBV ; alpl = LHLAV ; betl = LHLBV ; varname = "LHF.C")
contains(ARGS[1], "calib.wspd") && (alph = WSPAV ; beta = WSPBV ; alpl = WSLAV ; betl = WSLBV ; varname = "WSP.C")
contains(ARGS[1], "calib.airt") && (alph = AIRAV ; beta = AIRBV ; alpl = AILAV ; betl = AILBV ; varname = "AIR.C")
contains(ARGS[1], "calib.sstt") && (alph = SSTAV ; beta = SSTBV ; alpl = SSLAV ; betl = SSLBV ; varname = "SST.C")
contains(ARGS[1], "calib.shum") && (alph = SUHAV ; beta = SUHBV ; alpl = SULAV ; betl = SULBV ; varname = "SUH.C")
contains(ARGS[1], "valid.shfx") && (alph = SHFAC ; beta = SHFBC ; alpl = SHLAC ; betl = SHLBC ; varname = "SHF.V")
contains(ARGS[1], "valid.lhfx") && (alph = LHFAC ; beta = LHFBC ; alpl = LHLAC ; betl = LHLBC ; varname = "LHF.V")
contains(ARGS[1], "valid.wspd") && (alph = WSPAC ; beta = WSPBC ; alpl = WSLAC ; betl = WSLBC ; varname = "WSP.V")
contains(ARGS[1], "valid.airt") && (alph = AIRAC ; beta = AIRBC ; alpl = AILAC ; betl = AILBC ; varname = "AIR.V")
contains(ARGS[1], "valid.sstt") && (alph = SSTAC ; beta = SSTBC ; alpl = SSLAC ; betl = SSLBC ; varname = "SST.V")
contains(ARGS[1], "valid.shum") && (alph = SUHAC ; beta = SUHBC ; alpl = SULAC ; betl = SULBC ; varname = "SUH.V")
#=
contains(ARGS[1], "calib.shfx") && (alph = SHFAC ; beta = SHFBC ; alpl = SHLAC ; betl = SHLBC ; varname = "SHF.C")
contains(ARGS[1], "calib.lhfx") && (alph = LHFAC ; beta = LHFBC ; alpl = LHLAC ; betl = LHLBC ; varname = "LHF.C")
contains(ARGS[1], "calib.wspd") && (alph = WSPAC ; beta = WSPBC ; alpl = WSLAC ; betl = WSLBC ; varname = "WSP.C")
contains(ARGS[1], "calib.airt") && (alph = AIRAC ; beta = AIRBC ; alpl = AILAC ; betl = AILBC ; varname = "AIR.C")
contains(ARGS[1], "calib.sstt") && (alph = SSTAC ; beta = SSTBC ; alpl = SSLAC ; betl = SSLBC ; varname = "SST.C")
contains(ARGS[1], "calib.shum") && (alph = SUHAC ; beta = SUHBC ; alpl = SULAC ; betl = SULBC ; varname = "SUH.C")
contains(ARGS[1], "valid.shfx") && exit(0)
contains(ARGS[1], "valid.lhfx") && exit(0)
contains(ARGS[1], "valid.wspd") && exit(0)
contains(ARGS[1], "valid.airt") && exit(0)
contains(ARGS[1], "valid.sstt") && exit(0)
contains(ARGS[1], "valid.shum") && exit(0)
=#
fpa = My.ouvre(ARGS[1], "r") ; lines = readlines(fpa) ; close(fpa) # read and count all triple collocations
linum = length(lines) # allocate for the target parameters and
dist = zeros(linum) # distance to them, the resulting mean params,
chnk = linum < CUTOFF ? linum : CUTOFF # and the triple collocation cal/val estimates
coll = zeros(2, chnk, ANALYS + 1)
calval = [MISS for a = RANGE, b = 1:ANALYS, c = 1:PARAMS]
if !(GLOBAL || RECALIB)
fpa = My.ouvre(ARGS[1] * ".cali.locl", "w")
form = @sprintf("%33s %15s %15s %8s %8s %8s %8s\n",
"analysis", "target value", "mean value", "allalp", "allbet", "allsig", "allcor")
write(fpa, form)
close(fpa)
end
for (a, rana) in enumerate(RANGE) # loop through the target parameters and
for d = 1:linum # isolate the nearest CUTOFF set of obs
vals = float(split(lines[d]))
dist[d] = abs(rana - vals[OFLX])
end
lims = sort(dist)[chnk]
e = 1 # get cal/val parameters for this subset
for d = 1:linum # (possibly after recalibrating)
if dist[d] <= lims && e <= chnk
vals = float(split(lines[d]))
coll[1,e,:] = [vals[ERAB] vals[HOAB] vals[IFRB] vals[JOFB] vals[MERB] vals[OAFB] vals[SEAB] vals[OFLX]]
coll[2,e,:] = [vals[ERAA] vals[HOAA] vals[IFRA] vals[JOFA] vals[MERA] vals[OAFA] vals[SEAA] vals[OFLX]]
if RECALIB
if GLOBAL
for f = 1:ANALYS
coll[1,e,f] = (coll[1,e,f] - alph[f]) / beta[f]
coll[2,e,f] = (coll[2,e,f] - alph[f]) / beta[f]
end
else # for the local calibration, choose local
for f = 1:ANALYS # alpha and beta based on a global
fshift = (f-1) * 3 # recalibration (rather than the raw value)
ctmp = (coll[1,e,f] - alph[f]) / beta[f]
# ctmp = vals[OFLX]
localph = alpl[fshift+1] * ctmp^2 + alpl[fshift+2] * ctmp + alpl[fshift+3]
locbeta = betl[fshift+1] * ctmp^2 + betl[fshift+2] * ctmp + betl[fshift+3]
coll[1,e,f] = (coll[1,e,f] - localph) / locbeta
ctmp = (coll[2,e,f] - alph[f]) / beta[f]
# ctmp = vals[OFLX]
localph = alpl[fshift+1] * ctmp^2 + alpl[fshift+2] * ctmp + alpl[fshift+3]
locbeta = betl[fshift+1] * ctmp^2 + betl[fshift+2] * ctmp + betl[fshift+3]
coll[2,e,f] = (coll[2,e,f] - localph) / locbeta
end
end
end
e += 1
end
end
#@show coll
(allmas, allalp, allbet, allsig, allcor) = triple(coll)
calval[a,:,ALPH] = allalp
calval[a,:,BETA] = allbet
calval[a,:,SIGM] = allsig
calval[a,:,CORR] = allcor
calval[a,:,MASS] = allmas[:,1]
if GLOBAL || RECALIB
fpb = My.ouvre(ARGS[1] * ".cali.glob", "w")
form = @sprintf("const %sA%c = [%15.8lf, %15.8lf, %15.8lf, %15.8lf, %15.8lf, %15.8lf, %15.8lf]\n",
varname[1:3], varname[5], allalp[1], allalp[2], allalp[3], allalp[4], allalp[5], allalp[6], allalp[7])
write(fpb, form)
form = @sprintf("const %sB%c = [%15.8lf, %15.8lf, %15.8lf, %15.8lf, %15.8lf, %15.8lf, %15.8lf]\n",
varname[1:3], varname[5], allbet[1], allbet[2], allbet[3], allbet[4], allbet[5], allbet[6], allbet[7])
write(fpb, form)
form = @sprintf("\ntarget param MASS is %6.2f\n", rana)
write(fpb, form)
form = @sprintf(" mean param MASS is %6.2f\n\n", mean(allmas[:,1]))
write(fpb, form)
form = @sprintf("%33s %8s %8s %8s %8s\n", " ", "allalp", "allbet", "allsig", "allcor")
write(fpb, form)
for d = 1:ANALYS
form = @sprintf("%33s %8.3f %8.3f %8.3f %8.3f\n", DIRS[d], allalp[d], allbet[d], allsig[d], allcor[d])
write(fpb, form)
end
close(fpb)
else
for d = 1:ANALYS
fpb = My.ouvre(ARGS[1] * ".cali.locl", "a")
form = @sprintf("%33s %15.3f %15.3f %8.3f %8.3f %8.3f %8.3f\n",
DIRS[d], rana, mean(allmas[d,1]), allalp[d], allbet[d], allsig[d], allcor[d])
write(fpb, form)
close(fpb)
end
end
@printf("cala = [%15.8lf, %15.8lf, %15.8lf, %15.8lf, %15.8lf, %15.8lf, %15.8lf]\n",
allalp[1], allalp[2], allalp[3], allalp[4], allalp[5], allalp[6], allalp[7])
@printf("calb = [%15.8lf, %15.8lf, %15.8lf, %15.8lf, %15.8lf, %15.8lf, %15.8lf]\n",
allbet[1], allbet[2], allbet[3], allbet[4], allbet[5], allbet[6], allbet[7])
@printf("\ntarget param MASS is %6.2f\n", rana)
@printf(" mean param MASS is %6.2f\n\n", mean(allmas[:,1]))
@printf("%33s %8s %8s %8s %8s\n", " ", "allalp", "allbet", "allsig", "allcor")
for d = 1:ANALYS
@printf("%33s %8.3f %8.3f %8.3f %8.3f\n", DIRS[d], allalp[d], allbet[d], allsig[d], allcor[d])
end
end
if GLOBAL || RECALIB exit(0) end
varmas = Array(Float64, 0) # the sqerror closure requires data
varcol = Array(Float64, 0) # arrays in global scope
function sqerror(coef::Array{Float64,1}) # define the least squares metric
err = 0.0
for i in 1:length(varmas)
res = coef[1] * varmas[i]^2 + coef[2] * varmas[i] + coef[3]
err += (varcol[i] - res)^2
end
return err
end
coeff = Array(Float64, 3, 4, ANALYS) # for each analysis, store the dependence
for a = 1:ANALYS # of the four triple-collocation statistics
for b = 1:4 # in terms of second-order poynomial coeffs
varmas = Array(Float64, 0) # obtained using unconstrained optimization
varcol = Array(Float64, 0) # (so the stats' "hypercubes" are reduced to
for (c, ranc) in enumerate(RANGE) # variations on a multidimensional curve, but
push!(varmas, calval[c,a,MASS]) # with values everywhere in parameter space)
push!(varcol, calval[c,a,b])
end
res = optimize(sqerror, [0.0, 0.0, 0.0], iterations = 10000)
coeff[1,b,a] = res.minimum[1]
coeff[2,b,a] = res.minimum[2]
coeff[3,b,a] = res.minimum[3]
print("$(DIRS[a]) $b $(show(res)) \n")
end
end
fpa = My.ouvre(ARGS[1] * ".cali.locl", "a")
for a = 1:4
a == 1 && (stra = "const" ; strb = "A")
a == 2 && (stra = "const" ; strb = "B")
a == 3 && (stra = " " ; strb = "S")
a == 4 && (stra = " " ; strb = "C")
line = @sprintf("%s %sL%s%c = [", stra, varname[1:2], strb, varname[5])
for b = 1:ANALYS
line *= @sprintf(" %15.8f %15.8f %15.8f", coeff[1,a,b], coeff[2,a,b], coeff[3,a,b])
end
line *= "]\n"
write(fpa, line)
print(line)
end
close(fpa)
exit(0)