-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathstereo.py
599 lines (440 loc) · 17.6 KB
/
stereo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
"""
stereo matching tools
Copyright (C) 2017-2018, Gabriele Facciolo <[email protected]>
"""
from __future__ import print_function
import numpy as np
# in case numba jit is not installed
try:
from numba import jit
except:
print('WARNING: numba package is not installed')
def jit(x):
return x
# cost volume functions
def censustransform_64(img, cw=5, cp=None, sep=1):
'''
Efficiently compute the census transform (CT) of img
using windows limited to 8 * 8 (cw<=8)
Args:
img: numpy array containing the input image
cw: size of the census window cw*cw-1 <= 64
cp: optional image with centralpixel values of all pixels,
useful for implementing the modified census transform
sep: optional control the spacing of the CT samples (default 1)
Returns:
a numpy array containing the CT at each pixel packed as a uint64 image
derived from: http://stackoverflow.com/questions/38265364/census-transform-in-python-opencv
'''
if cw > 8:
printf('census window cannot be larger than 8x8')
cw = min(cw,8)
hcw = int(cw/2)
# Initialize 64bit output array
census = np.zeros(img.shape, dtype='uint64')
# Center pixels
if cp is None:
cp = img
# Offsets of non-central pixels
offsets = [(u-hcw, v-hcw) for v in range(cw)
for u in range(cw)
if not u == hcw == v]
# Fill census bitstring
for u,v in offsets:
census = (census << 1) | (np.roll(img,(-v*sep,-u*sep), axis=(0,1)) >= cp)
return census
def censustransform(img, cw=5, cp=None, sep=1):
'''
Efficiently compute the census transform (CT) of img
sing windows of size cw * cw
Args:
img: numpy array containing the input image
cw: size of the census window, the transform will have cw*cw-1 bits
cp: optional image with centralpixel values of all pixels,
useful for implementing the modified census transform
sep: optional control the spacing of the CT samples (default 1)
Returns:
a numpy array containing the CT at each pixel packed as as many
uint64 image planes as needed to represent the (cw*cw-1) bits
derived from: http://stackoverflow.com/questions/38265364/census-transform-in-python-opencv
'''
hcw = int(cw/2)
# Initialize 64bit output array
census = None
# Center pixel values
if cp is None:
cp = img
# Offsets of non-central pixels
offsets = [(u-hcw, v-hcw) for v in range(cw)
for u in range(cw)
if not u == hcw == v]
def chunks(l, n):
"""Yield successive n-sized chunks from l."""
for i in range(0, len(l), n):
yield l[i:i + n]
for Loffsets in chunks(offsets,64):
# Initialize 64bit output array
Lcensus = np.zeros(img.shape, dtype='uint64')
# Fill census bitstring
for u,v in Loffsets:
Lcensus = (Lcensus << 1) | (np.roll(img,(-v*sep,-u*sep), axis=(0,1)) >= cp)
if census is None:
census = Lcensus
else: # concatenate along third axis if more than 64 bits are needed
if Lcensus.ndim==2:
Lcensus = np.expand_dims(Lcensus,axis=2)
if census.ndim==2:
census = np.expand_dims(census,axis=2)
census = np.dstack((census,Lcensus))
return census
def countbits(n):
'''
Count the number of bits set for all the elements of the numpy array up to uint64
https://stackoverflow.com/questions/9829578/fast-way-of-counting-non-zero-bits-in-positive-integer
Args:
n: numpy array of integer type (interpreted as uint64)
Returns:
numpy array with the number of bits for each element of n
'''
import numpy as np
if type(n) == np.ndarray: # force type in case of np.uint32
n = n.astype(np.uint64)
else: # in case of python number
n = int(n)
n = (n & 0x5555555555555555) + ((n & 0xAAAAAAAAAAAAAAAA) >> 1)
n = (n & 0x3333333333333333) + ((n & 0xCCCCCCCCCCCCCCCC) >> 2)
n = (n & 0x0F0F0F0F0F0F0F0F) + ((n & 0xF0F0F0F0F0F0F0F0) >> 4)
n = (n & 0x00FF00FF00FF00FF) + ((n & 0xFF00FF00FF00FF00) >> 8)
n = (n & 0x0000FFFF0000FFFF) + ((n & 0xFFFF0000FFFF0000) >> 16)
n = (n & 0x00000000FFFFFFFF) + ((n & 0xFFFFFFFF00000000) >> 32) # This last & isn't strictly necessary.
return n
def costvolumeSD(im1, im2, dmin=-20, dmax=20):
'''
creates a Squared Difference stereo cost volume
Args:
im1,im2: numpy arrays containing the stereo pair (im1 is reference)
dmin,dmax: minimum and maximum disparity to be explored
Returns:
numpy array containing cost volume of size [im1.shape[0], im1.shape[0], dmax+1 - dmin]
'''
imshape = im1.shape
CV = np.zeros((imshape[0], imshape[1], dmax+1-dmin))
offsets = range(dmin,dmax+1)
for i in range(len(offsets)):
sd = (im1 - np.roll(im2,(0,offsets[i]), axis=(0,1)))**2
if sd.ndim == 3: # in case of color images
sd = np.sum(sd, axis=2)/sd.shape[2]
CV[:,:,i] = sd
return CV
def costvolumeCT(im1, im2, dmin=-20, dmax=20, cw=7):
'''
creates a stereo cost volume for the Census cost: the Hamming
distance between the census transformed patches of the two images
Args:
im1,im2: numpy arrays containing the stereo pair (im1 is reference)
dmin,dmax: minimum and maximum disparity to be explored
cw: size of the census transform window (default 7x7)
Returns:
numpy array containing cost volume of size [im1.shape[0], im1.shape[0], dmax+1 - dmin]
'''
imshape = im1.shape
CV = np.zeros((imshape[0], imshape[1], dmax+1-dmin))
# this creates multi-channel images containing the census bitstrings for each pixel
im1ct = censustransform(im1, cw)
im2ct = censustransform(im2, cw)
offsets = range(dmin,dmax+1)
for i in range(len(offsets)):
# XOR the bitstrings and count the bits
xorbits = im1ct ^ np.roll(im2ct,(0,offsets[i]), axis=(0,1))
hamming = countbits(xorbits)
if hamming.ndim == 3: # in case of multiple bitplanes
hamming = np.sum(hamming.astype(float), axis=2)
CV[:,:,i] = hamming
return CV
def aggregateCV(CV, win_w, win_h):
'''
filters the cost volume with a rectangular spatial window of
size win_w * win_h and uniform weight = 1.0/(win_w * win_h)
Args:
CV: numpy array containing the cost volume
win_w,win_h: width and height of the rectangular window
Returns:
numpy array containing the filtered cost volume
'''
import scipy.signal
K = np.ones((win_h,win_w))/(win_w*win_h)
for i in range(CV.shape[2]):
CV[:,:,i] = scipy.signal.convolve2d(CV[:,:,i], K, mode='same', boundary='symm')
return CV
def leftright(offL, offR, maxdiff=1):
'''
Filters the disparity maps applying the left-right consistency test
| offR(round(x - offL(x))) + offR(x)| <= maxdiff
Args:
offL, offR: numpy arrays containing the Left and Right disparity maps
maxdiff: threshold for the uniqueness constraint
Returns:
numpy array containing the offL disparity map,
where the rejected pixels are set to np.inf
'''
sh = offL.shape
X, Y = np.meshgrid(range(sh[1]), range(sh[0]))
X = np.minimum(np.maximum(X - offL.astype(int), 0), sh[1]-1)
m = np.abs(offL + offR[Y,X] ) > maxdiff
out = offL.copy()
out[m] = np.Infinity
return out
def specklefilter(off, area=25, th=0):
'''
speckle filter of dispairt map off
Args:
off: numpy array with the input disparity map
area: the surface (in pixels) of the smallest allowed connected component of disparity
th: similarity threshold used to determin if two neighboring pixels have the same value
Returns:
numpy array with the filtered disparity map, removed points are set to infinity
'''
@jit(nopython=True)
def find(i,idx): # finds the root of a dsf
if idx.flat[i] == i:
return i
else:
ret = find(idx.flat[i],idx)
#idx.flat[i] = ret // path compression is useles with idx passed by value
return ret
@jit(nopython=True)
def dsf(D, th=0): # builds a dsf
h,w = D.shape[0],D.shape[1]
idx = np.zeros((h,w),dtype=np.int64)
for j in range(h):
for i in range(w):
idx[j,i] = j*w + i
for j in range(h):
for i in range(w):
if(i>0):
if( abs(D[j,i] - D[j,i-1])<= th ):
a = find(idx[j,i],idx)
b = find(idx[j,i-1],idx)
idx[j,i] = idx[j,i-1]
idx.flat[a] = b
if(j>0):
if( abs(D[j,i] - D[j-1,i])<= th ):
a = find(idx[j,i],idx)
b = find(idx[j-1,i],idx)
idx[j,i] = idx[j-1,i]
idx.flat[a] = b
return idx
@jit(nopython=True)
def labels(idx):
h,w=idx.shape[0],idx.shape[1]
lab = idx*0
for i in range(h*w):
ind = find(i,idx)
lab.flat[i] = ind
return lab
@jit(nopython=True)
def areas(lab):
h,w=lab.shape[0],lab.shape[1]
area = np.zeros((h,w),dtype=np.int64)
LL = np.zeros((h,w),dtype=np.int64)
for i in range(w*h):
area.flat[lab.flat[i]] += 1
for i in range(w*h):
LL.flat[i] = area.flat[lab.flat[i]]
return LL
# build the dsf
ind = dsf(off, th=th)
# extract the labels of all the regions
lab = labels(ind)
# creat e map where all the regions are tagged with their area
are = areas(lab)
# filter the disparity map
filtered = np.where((are>area), off, np.inf)
return filtered
def mismatchFiltering(dL,dR, area=50, tau=1):
'''
Applies left-right and speckle filter
Args:
dL,dR: are numpy arrays containing the left and right disparity maps
area: is the minimum area parameter of the speckle filter
tau: maximum left-right disparity difference
Returns:
numpy array containing a filtered version of the left disparity map,
where rejected pixels are set to infinity
'''
dLR = leftright(dL, dR, tau)
dLRspeckle = specklefilter(dLR,area=area,th=1)
return dLRspeckle
### SGM RELATED FUNCTIONS ###
#@jit(nopython=True)
def filterViterbiV(c, lam=8):
'''
The function filters the cost volume by computing
L_+(p,d) = C_{p}(d) + \min_{d'}(L_+(p-1,d') + \lambda V(d, d'))
| 0 , if x=y
with V(x,y) = | P1, if |x-y|=1
| P2, otherwise
and parameters P1=1 and P2=4.
Args:
cv: numpy array of shape [nodes M, disparities L] containing a cost volume slice
lam: lambda parameter of the energy
Returns:
numpy array containing the filtered costvolume slice
'''
P1=1.0
P2=4.0
sh = c.shape
M = sh[0]
L = sh[1]
S = c.copy().astype(np.float64)
for i in range(1,M): # loop over the nodes
### YOUR CODE HERE ###
minSim1 = np.min(S[i-1,:]) # precompute min of the previous node
for l in range(L):
minS = lam * P2 + minSim1 # 0 because of the normalization of the previous node
for lp in (l-1, l, l+1):
if lp>=0 and lp<L:
newS = S[i-1,lp] + lam * P1 * np.abs(l-lp)
if minS > newS:
minS = newS
S[i,l] = S[i,l] + minS
# this normalization removes the min of the previous node
#S[i,:] = S[i,:] - np.min(S[i,:]) #normalize
return S
def sgmfilter(CV, lam=8):
'''
SGM cost volume filtering along 4 directions
using the truncated regularity term V(with parameters P1=1,P2=4)
Args:
CV: numpy array of size [width, height, disparity] containing the costvolume
lam: lambda regularity parameter
Returns:
numpy array containing the filtered costvolume
'''
# compile the filterViterbiV function
viterbiV = jit(filterViterbiV, nopython=True)
S = np.zeros(CV.shape)
### YOUR CODE HERE ###
for i in range(CV.shape[0]):
fw = viterbiV(CV[i,:,:],lam)
bw = viterbiV(CV[i,::-1,:],lam)
S[i,:,:] += fw + bw[::-1]
for i in range(CV.shape[1]):
fw = viterbiV(CV[:,i,:],lam)
bw = viterbiV(CV[::-1,i,:],lam)
S[:,i,:] += fw + bw[::-1]
return S - 3*CV
@jit(nopython=True)
def VfitMinimum(v):
'''
interpolates the position of the subpixel minimum
given the samples (v) around the discrete minimum
according to the Vfit method illustrated below
v[0] *
\
\
\ * v[2]
\ /
v[1] * /
\/
v_min
^
____|____|_|__|____|_
-1 0 xm 1
Returns:
position of the minimum x_min (xm) and its value v_min
'''
# if we can't fit a V in the range [-1,1] then we leave the center
if( (v[1] > v[0]) and (v[1] > v[2]) ) :
v_min = v[1]
x_min = 0
return x_min, v_min
# select the maximum slope
slope = v[2] - v[1]
if ( slope < (v[0] - v[1]) ):
slope = v[0] - v[1]
# interpolate
x_min = (v[0] - v[2]) / (2*slope);
v_min = v[2] + (x_min - 1) * slope;
return x_min, v_min
# define the trivial winner-takes-all function
def WTA(CV):
'''computes the winner takes all of the cost volume CV'''
return np.argmin(CV,axis=2)
@jit(nopython=True)
def VfitWTA(CV, min_disp, min_cost):
'''computes the subpixel refined winner takes all of the cost volume CV'''
sh = CV.shape
for y in range(sh[0]):
for x in range(sh[1]):
md = int(min_disp[y,x])
#can only interpolate if the neighboring disparities are available
if md > 0 and md < sh[2]-1:
dmd, mc = VfitMinimum([float(CV[y,x,md-1]), float(CV[y,x,md]), float(CV[y,x,md+1])])
min_disp[y,x] = dmd +md
min_cost[y,x] = mc
return min_disp, min_cost
def stereoSGM(im1,im2,dmin,dmax,lam=10,cost='census',cw=3, win=1, subpix_refine=False):
'''
computes the disparity map from im1 to im2 using SGM
and optionally post filters the CV with a window of size (win X win).
cost selects the matching cots: sd, or census
Args:
im1,im2: numpy arrays containing the stereo pair (im1 is reference)
dmin,dmax: minimum and maximum disparity to be explored
lam:
cost: type of cost volume can be: 'sd' or 'census'
cw: census window size, used when cost='census'
win: aggregateCV window size (set to 1 to disable)
subpix_refine: activates the Vfit subpixel refinement (default False)
Returns:
numpy array containing the disparity map
'''
import time
start_time = time.time()
# generate the cost volume
if cost=='sd':
CV=costvolumeSD(im1, im2, dmin, dmax)
else:
CV=costvolumeCT(im1, im2, dmin, dmax, cw=7)
print ('t={:2.4f} done building CV'.format(time.time() - start_time))
CV = sgmfilter(CV,lam) # SGM
print ('t={:2.4f} done sgmfilter'.format(time.time() - start_time))
if win>1:
CV = aggregateCV(CV,win,win)
if subpix_refine: # WTA
d,_ = VfitWTA(CV, np.argmin(CV,axis=2).astype(np.float32), np.min(CV,axis=2).astype(np.float32))
else:
d = WTA(CV).astype(np.float32) # i.e. # d= np.argmin(CV,axis=2)
print ('t={:2.4f} done aggregation and WTA refinement'.format(time.time() - start_time))
# map from idx to disparity
## drange = np.array(range(dmin, dmax+1), dtype=float) # old code
## return drange[d]
return d+dmin
# a generic function to compute disparity maps from two rectified images using SGM
def compute_disparity_map(rect1, rect2, dmin, dmax, cost='census', lam=10):
'''
computes and filters the disparity map from im1 to im2 using SGM
cost selects the matching cots: sd, or census
Args:
im1,im2: numpy arrays containing the stereo pair (im1 is reference)
dmin,dmax: minimum and maximum disparity to be explored
cost: type of cost volume can be: 'sd' or 'census'
lam: lambda is the regularity parameter of SGM
Returns:
numpy array containing the filtered disparity map
'''
im1 , im2 = rect1, rect2
dmin, dmax = int(np.floor(-dmax)), int(np.ceil(-dmin))
# some reasonable parameters
#lam = 10 # lambda is a regularity parameter
cw = 5 # small census windows are good
win = 1 # this removes some streaking artifacts
subpix_refine = True
# compute left and right disparity maps
dL = stereoSGM(im1,im2,dmin,dmax,lam=lam,cost=cost, cw=cw, win=win, subpix_refine=subpix_refine)
dR = stereoSGM(im2,im1,-dmax,-dmin,lam=lam,cost=cost, cw=cw, win=win, subpix_refine=subpix_refine)
# apply mismatch filtering
LRS = mismatchFiltering(dL, dR, 50)
# minus sign here (due to different disparity conventions)
return -LRS, -dL, -dR