-
Notifications
You must be signed in to change notification settings - Fork 0
/
bilinear_sampler.py
131 lines (109 loc) · 5.24 KB
/
bilinear_sampler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
# Copyright 2017 Modifications Clement Godard.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from __future__ import absolute_import, division, print_function
import tensorflow as tf
def bilinear_sampler_1d_h(input_images, x_offset, wrap_mode='border', name='bilinear_sampler', **kwargs):
"""
img:[batch_size,height,width,3]
offset(disp):[batch_size,height,width,1]
"""
def _repeat(x, n_repeats):
with tf.variable_scope('_repeat'):
rep = tf.tile(tf.expand_dims(x, 1), [1, n_repeats])
return tf.reshape(rep, [-1])
def _interpolate(im, x, y):
with tf.variable_scope('_interpolate'):
# handle both texture border types
_edge_size = 0
# 如果包围方式是border,那么边界长度是1,在h和w维两侧加一排0
if _wrap_mode == 'border':
_edge_size = 1
im = tf.pad(im, [[0, 0], [1, 1], [1, 1], [0, 0]], mode='CONSTANT')
x = x + _edge_size
y = y + _edge_size # h w 两侧加了一排0 所以坐标值要加1
elif _wrap_mode == 'edge':
_edge_size = 0
else:
return None
# 修剪偏移量x,让它在0到w-1+2*e之间(因为偏移量不能太大,要小于等于padding之后的宽度减1)
x = tf.clip_by_value(x, 0.0, _width_f - 1 + 2 * _edge_size)
# 向下取 x y 然后 x 加 1
x0_f = tf.floor(x)
y0_f = tf.floor(y)
x1_f = x0_f + 1
x0 = tf.cast(x0_f, tf.int32)
y0 = tf.cast(y0_f, tf.int32)
x1 = tf.cast(tf.minimum(x1_f, _width_f - 1 + 2 * _edge_size), tf.int32)
dim2 = (_width + 2 * _edge_size) # padding之后的宽
dim1 = (_width + 2 * _edge_size) * (_height + 2 * _edge_size) # padding之后的分辨率
"""
计算偏移量索引的基,先得到[0,1,2,...,batch],再将它乘宽度,变成
[0,dim1,2*dim1,...,batch*dim1],然后重复原图分辨率,变成
base: [0,0,......,0,dim1,dim1,......,dim1,2*dim1,2*dim1,......,2*dim1,.....batch*dim1,....batch*dim1]
"""
base = _repeat(tf.range(_num_batch) * dim1, _height * _width)
base_y0 = base + y0 * dim2
idx_l = base_y0 + x0
idx_r = base_y0 + x1
im_flat = tf.reshape(im, tf.stack([-1, _num_channels]))
pix_l = tf.gather(im_flat, idx_l)
pix_r = tf.gather(im_flat, idx_r)
weight_l = tf.expand_dims(x1_f - x, 1)
weight_r = tf.expand_dims(x - x0_f, 1)
return weight_l * pix_l + weight_r * pix_r
def _transform(input_images, x_offset):
with tf.variable_scope('transform'):
# grid of (x_t, y_t, 1), eq (1) in ref [1]
# 生成 X Y 坐标轴索引
x_t, y_t = tf.meshgrid(tf.linspace(0.0, _width_f - 1.0, _width),
tf.linspace(0.0 , _height_f - 1.0 , _height))
# reshape 为一维
"""
x_t_flat: [ 0 1 2 .. 0 1 2..]
y_t_flat: [ 0 0 0 .. 1 1 1..]
"""
x_t_flat = tf.reshape(x_t, (1, -1))
y_t_flat = tf.reshape(y_t, (1, -1))
# 形成batch_size的排列
"""
[0. 1. 2. 3. 4.] ->
[[0. 1. 2. 3. 4.]
[0. 1. 2. 3. 4.]
[0. 1. 2. 3. 4.]
[0. 1. 2. 3. 4.]]
"""
x_t_flat = tf.tile(x_t_flat, tf.stack([_num_batch, 1]))# tf.tile(x_t_flat,[_num_batch,1])
y_t_flat = tf.tile(y_t_flat, tf.stack([_num_batch, 1]))
# reshap为一维
x_t_flat = tf.reshape(x_t_flat, [-1])
y_t_flat = tf.reshape(y_t_flat, [-1])
# 为什么要乘以宽 因为之前输出视差时经过了sigmoid函数,相当于归一化啦 ??????????
# 加上 x 的偏移量
x_t_flat = x_t_flat + tf.reshape(x_offset, [-1]) * _width_f
input_transformed = _interpolate(input_images, x_t_flat, y_t_flat)
output = tf.reshape(
input_transformed, tf.stack([_num_batch, _height, _width, _num_channels]))
return output
with tf.variable_scope(name):
_num_batch = tf.shape(input_images)[0]
_height = tf.shape(input_images)[1]
_width = tf.shape(input_images)[2]
_num_channels = tf.shape(input_images)[3]
_height_f = tf.cast(_height, tf.float32)
_width_f = tf.cast(_width, tf.float32)
_wrap_mode = wrap_mode
output = _transform(input_images, x_offset)
return output