bilinear_sampler.py

# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
# Copyright 2017 Modifications Clement Godard.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

from __future__ import absolute_import, division, print_function
import tensorflow as tf

def bilinear_sampler_1d_h(input_images, x_offset, wrap_mode='border', name='bilinear_sampler', **kwargs):
    """
    img:[batch_size,height,width,3]
    offset(disp):[batch_size,height,width,1]
    """
    def _repeat(x, n_repeats):
        with tf.variable_scope('_repeat'):
            rep = tf.tile(tf.expand_dims(x, 1), [1, n_repeats])
            return tf.reshape(rep, [-1])

    def _interpolate(im, x, y):
        with tf.variable_scope('_interpolate'):

            # handle both texture border types
            _edge_size = 0
            # 如果包围方式是border,那么边界长度是1,在h和w维两侧加一排0
            if _wrap_mode == 'border':
                _edge_size = 1
                im = tf.pad(im, [[0, 0], [1, 1], [1, 1], [0, 0]], mode='CONSTANT')
                x = x + _edge_size
                y = y + _edge_size  # h w 两侧加了一排0 所以坐标值要加1
            elif _wrap_mode == 'edge':
                _edge_size = 0
            else:
                return None

            # 修剪偏移量x,让它在0到w-1+2*e之间(因为偏移量不能太大,要小于等于padding之后的宽度减1)
            x = tf.clip_by_value(x, 0.0,  _width_f - 1 + 2 * _edge_size)

            # 向下取 x y 然后 x 加 1
            x0_f = tf.floor(x)
            y0_f = tf.floor(y)
            x1_f = x0_f + 1

            x0 = tf.cast(x0_f, tf.int32)
            y0 = tf.cast(y0_f, tf.int32)
            x1 = tf.cast(tf.minimum(x1_f,  _width_f - 1 + 2 * _edge_size), tf.int32)

            dim2 = (_width + 2 * _edge_size) # padding之后的宽
            dim1 = (_width + 2 * _edge_size) * (_height + 2 * _edge_size)  # padding之后的分辨率
            """
            计算偏移量索引的基,先得到[0,1,2,...,batch],再将它乘宽度,变成
            [0,dim1,2*dim1,...,batch*dim1],然后重复原图分辨率,变成
            base: [0,0,......,0,dim1,dim1,......,dim1,2*dim1,2*dim1,......,2*dim1,.....batch*dim1,....batch*dim1]
            """
            base = _repeat(tf.range(_num_batch) * dim1, _height * _width)
            base_y0 = base + y0 * dim2
            idx_l = base_y0 + x0
            idx_r = base_y0 + x1

            im_flat = tf.reshape(im, tf.stack([-1, _num_channels]))

            pix_l = tf.gather(im_flat, idx_l)
            pix_r = tf.gather(im_flat, idx_r)

            weight_l = tf.expand_dims(x1_f - x, 1)
            weight_r = tf.expand_dims(x - x0_f, 1)

            return weight_l * pix_l + weight_r * pix_r

    def _transform(input_images, x_offset):
        with tf.variable_scope('transform'):
            # grid of (x_t, y_t, 1), eq (1) in ref [1]
            # 生成　X Y 坐标轴索引
            x_t, y_t = tf.meshgrid(tf.linspace(0.0,   _width_f - 1.0,  _width),
                                   tf.linspace(0.0 , _height_f - 1.0 , _height))
            # reshape 为一维
            """
            x_t_flat:  [ 0 1 2 .. 0 1 2..]
            y_t_flat:  [ 0 0 0 .. 1 1 1..]
            """
            x_t_flat = tf.reshape(x_t, (1, -1))
            y_t_flat = tf.reshape(y_t, (1, -1))

            #  形成batch_size的排列
            """
            [0. 1. 2. 3. 4.]   ->
            [[0. 1. 2. 3. 4.]
             [0. 1. 2. 3. 4.]
             [0. 1. 2. 3. 4.]
             [0. 1. 2. 3. 4.]]
            """
            x_t_flat = tf.tile(x_t_flat, tf.stack([_num_batch, 1]))# tf.tile(x_t_flat,[_num_batch,1])
            y_t_flat = tf.tile(y_t_flat, tf.stack([_num_batch, 1]))

            # reshap为一维
            x_t_flat = tf.reshape(x_t_flat, [-1])
            y_t_flat = tf.reshape(y_t_flat, [-1])

            # 为什么要乘以宽  因为之前输出视差时经过了sigmoid函数,相当于归一化啦  ??????????
            # 加上 x 的偏移量
            x_t_flat = x_t_flat + tf.reshape(x_offset, [-1]) * _width_f

            input_transformed = _interpolate(input_images, x_t_flat, y_t_flat)

            output = tf.reshape(
                input_transformed, tf.stack([_num_batch, _height, _width, _num_channels]))
            return output

    with tf.variable_scope(name):
        _num_batch    = tf.shape(input_images)[0]
        _height       = tf.shape(input_images)[1]
        _width        = tf.shape(input_images)[2]
        _num_channels = tf.shape(input_images)[3]

        _height_f = tf.cast(_height, tf.float32)
        _width_f  = tf.cast(_width,  tf.float32)

        _wrap_mode = wrap_mode

        output = _transform(input_images, x_offset)
        return output