dense_image_warp.py

# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# Modification of bilinear interpolation from tensorflow
# catmull rom interpolation
# ==============================================================================
"""Image warping using per-pixel flow vectors."""

from re import X
import tensorflow as tf

from tensorflow_addons.utils import types
from typing import Optional


def interpolate_catmull_rom(
    grid: types.TensorLike,
    query_points: types.TensorLike,
    indexing: str = "ij",
    name: Optional[str] = None,
) -> tf.Tensor:
    """Finds values for query points on a grid using Catmull-Rom.

    Args:
      grid: a 4-D float `Tensor` of shape `[batch, height, width, channels]`.
      query_points: a 3-D float `Tensor` of N points with shape
        `[batch, N, 2]`.
      indexing: whether the query points are specified as row and column (ij),
        or Cartesian coordinates (xy).
      name: a name for the operation (optional).

    Returns:
      values: a 3-D `Tensor` with shape `[batch, N, channels]`

    Raises:
      ValueError: if the indexing mode is invalid, or if the shape of the
        inputs invalid.
    """
    if indexing != "ij" and indexing != "xy":
        raise ValueError("Indexing mode must be 'ij' or 'xy'")

    with tf.name_scope(name or "interpolate_catmull_rom"):
        grid = tf.convert_to_tensor(grid)
        query_points = tf.convert_to_tensor(query_points)

        # grid shape checks
        grid_static_shape = grid.shape
        grid_shape = tf.shape(grid)
        if grid_static_shape.dims is not None:
            if len(grid_static_shape) != 4:
                raise ValueError("Grid must be 4D Tensor")
            if grid_static_shape[1] is not None and grid_static_shape[1] < 2:
                raise ValueError("Grid height must be at least 2.")
            if grid_static_shape[2] is not None and grid_static_shape[2] < 2:
                raise ValueError("Grid width must be at least 2.")

        # query_points shape checks
        query_static_shape = query_points.shape
        query_shape = tf.shape(query_points)
        if query_static_shape.dims is not None:
            if len(query_static_shape) != 3:
                raise ValueError("Query points must be 3 dimensional.")
            query_hw = query_static_shape[2]
            if query_hw is not None and query_hw != 2:
                raise ValueError("Query points last dimension must be 2.")

        batch_size, height, width, channels = (
            grid_shape[0],
            grid_shape[1],
            grid_shape[2],
            grid_shape[3],
        )

        num_queries = query_shape[1]

        query_type = query_points.dtype
        grid_type = grid.dtype

        alphas = []
        floors = []
        index_order = [0, 1] if indexing == "ij" else [1, 0]
        unstacked_query_points = tf.unstack(query_points, axis=2, num=2)
        i=0
        dim=index_order[0]
        with tf.name_scope("dim-" + str(dim)):
            queries = unstacked_query_points[dim]

            size_in_indexing_dimension = grid_shape[i + 1]

            # max_floor is size_in_indexing_dimension - 2 so that max_floor + 1
            # is still a valid index into the grid.
            max_floor = tf.cast(size_in_indexing_dimension - 2, query_type)
            min_floor = tf.constant(0.0, dtype=query_type)
            floor = tf.math.minimum(
                tf.math.maximum(min_floor, tf.math.floor(queries)), max_floor
            )
            int_floor0 = tf.cast(floor, tf.dtypes.int32)
            

            # alpha has the same type as the grid, as we will directly use alpha
            # when taking linear combinations of pixel values from the image.
            alpha = tf.cast(queries - floor, grid_type)
            min_alpha = tf.constant(0.0, dtype=grid_type)
            max_alpha = tf.constant(1.0, dtype=grid_type)
            alpha = tf.math.minimum(tf.math.maximum(min_alpha, alpha), max_alpha)

            # Expand alpha to [b, n, 1] so we can use broadcasting
            # (since the alpha values don't depend on the channel).
            alpha0 = tf.expand_dims(alpha, 2)

        i=1
        dim=index_order[1]
        with tf.name_scope("dim-" + str(dim)):
            queries = unstacked_query_points[dim]

            size_in_indexing_dimension = grid_shape[i + 1]

            # max_floor is size_in_indexing_dimension - 2 so that max_floor + 1
            # is still a valid index into the grid.
            max_floor = tf.cast(size_in_indexing_dimension - 2, query_type)
            min_floor = tf.constant(0.0, dtype=query_type)
            floor = tf.math.minimum(
                tf.math.maximum(min_floor, tf.math.floor(queries)), max_floor
            )
            int_floor1 = tf.cast(floor, tf.dtypes.int32)

            # alpha has the same type as the grid, as we will directly use alpha
            # when taking linear combinations of pixel values from the image.
            alpha = tf.cast(queries - floor, grid_type)
            min_alpha = tf.constant(0.0, dtype=grid_type)
            max_alpha = tf.constant(1.0, dtype=grid_type)
            alpha = tf.math.minimum(tf.math.maximum(min_alpha, alpha), max_alpha)

            # Expand alpha to [b, n, 1] so we can use broadcasting
            # (since the alpha values don't depend on the channel).
            alpha1 = tf.expand_dims(alpha, 2)
            

        flattened_grid = tf.reshape(grid, [batch_size * height * width, channels])
        batch_offsets = tf.reshape(
            tf.range(batch_size) * height * width, [batch_size, 1]
        )
        floors = tf.stack([int_floor0,int_floor1])
        alphas = tf.stack([alpha0, alpha1])
        # This wraps tf.gather. We reshape the image data such that the
        # batch, y, and x coordinates are pulled into the first dimension.
        # Then we gather. Finally, we reshape the output back. It's possible this
        # code would be made simpler by using tf.gather_nd.
        def gather(y_coords, x_coords, name):
            with tf.name_scope("gather-" + name):
                linear_coordinates = batch_offsets + y_coords * width + x_coords
                gathered = tf.gather(flattened_grid, linear_coordinates)
                return tf.reshape( gathered, [batch_size, num_queries, channels])
        def _(alphas):
            with tf.name_scope("alphas"):

                alphas = tf.convert_to_tensor(alphas)
                alphas2 = alphas*alphas
                alphas3 = alphas2*alphas
                w0 = alphas2 - 0.5 * (alphas3 + alphas)
                w1 = 1.5 * alphas3 - 2.5 * alphas2 + 1.0
                w3 = 0.5 * (alphas3 - alphas2)
                w2 = 1.0 - w0 - w1 - w3
                return w0, w1, w2, w3
        w0, w1, w2, w3 = _(alphas)

        floors = tf.convert_to_tensor(floors)

        tc0 = floors - 1
        tc1 = floors
        tc2 = floors + 1
        tc3 = floors + 2

        return tf.math.reduce_sum( tf.stack([
            gather(tc0[0], tc0[1], "0i0j") * w0[0]*w0[1],
            gather(tc1[0], tc0[1], "1i0j") * w1[0]*w0[1],
            gather(tc2[0], tc0[1], "2i0j") * w2[0]*w0[1],
            gather(tc3[0], tc0[1], "3i0j") * w3[0]*w0[1],

            gather(tc0[0], tc1[1], "0i1j") * w0[0]*w1[1],
            gather(tc1[0], tc1[1], "1i1j") * w1[0]*w1[1],
            gather(tc2[0], tc1[1], "2i1j") * w2[0]*w1[1],
            gather(tc3[0], tc1[1], "3i1j") * w3[0]*w1[1],
            
            gather(tc0[0], tc2[1], "0i2j") * w0[0]*w2[1],
            gather(tc1[0], tc2[1], "1i2j") * w1[0]*w2[1],
            gather(tc2[0], tc2[1], "2i2j") * w2[0]*w2[1],
            gather(tc3[0], tc2[1], "3i2j") * w3[0]*w2[1],

            gather(tc0[0], tc3[1], "0i3j") * w0[0]*w3[1],
            gather(tc1[0], tc3[1], "1i3j") * w1[0]*w3[1],
            gather(tc2[0], tc3[1], "2i3j") * w2[0]*w3[1],
            gather(tc3[0], tc3[1], "3i3j") * w3[0]*w3[1]
        ],axis=0),axis=0)
        
def _get_dim(x, idx):
    if x.shape.ndims is None:
        return tf.shape(x)[idx]
    return x.shape[idx] or tf.shape(x)[idx]

@tf.function
def interpolate_bilinear(
    grid: types.TensorLike,
    query_points: types.TensorLike,
    indexing: str = "ij",
    name: Optional[str] = None,
) -> tf.Tensor:
    """Finds values for query points on a grid using Catmull-Rom.

    Args:
      grid: a 4-D float `Tensor` of shape `[batch, height, width, channels]`.
      query_points: a 3-D float `Tensor` of N points with shape
        `[batch, N, 2]`.
      indexing: whether the query points are specified as row and column (ij),
        or Cartesian coordinates (xy).
      name: a name for the operation (optional).

    Returns:
      values: a 3-D `Tensor` with shape `[batch, N, channels]`

    Raises:
      ValueError: if the indexing mode is invalid, or if the shape of the
        inputs invalid.
    """
    if indexing != "ij" and indexing != "xy":
        raise ValueError("Indexing mode must be 'ij' or 'xy'")

    with tf.name_scope(name or "interpolate_bilinear"):
        grid = tf.convert_to_tensor(grid)
        query_points = tf.convert_to_tensor(query_points)

        # grid shape checks
        grid_static_shape = grid.shape
        grid_shape = tf.shape(grid)
        if grid_static_shape.dims is not None:
            if len(grid_static_shape) != 4:
                raise ValueError("Grid must be 4D Tensor")
            if grid_static_shape[1] is not None and grid_static_shape[1] < 2:
                raise ValueError("Grid height must be at least 2.")
            if grid_static_shape[2] is not None and grid_static_shape[2] < 2:
                raise ValueError("Grid width must be at least 2.")

        # query_points shape checks
        query_static_shape = query_points.shape
        query_shape = tf.shape(query_points)
        if query_static_shape.dims is not None:
            if len(query_static_shape) != 3:
                raise ValueError("Query points must be 3 dimensional.")
            query_hw = query_static_shape[2]
            if query_hw is not None and query_hw != 2:
                raise ValueError("Query points last dimension must be 2.")

        batch_size, height, width, channels = (
            grid_shape[0],
            grid_shape[1],
            grid_shape[2],
            grid_shape[3],
        )

        num_queries = query_shape[1]

        query_type = query_points.dtype
        grid_type = grid.dtype

        alphas = []
        floors = []
        ceils = []
        index_order = [0, 1] if indexing == "ij" else [1, 0]
        unstacked_query_points = tf.unstack(query_points, axis=2, num=2)

        for i, dim in enumerate(index_order):
            with tf.name_scope("dim-" + str(dim)):
                queries = unstacked_query_points[dim]

                size_in_indexing_dimension = grid_shape[i + 1]

                # max_floor is size_in_indexing_dimension - 2 so that max_floor + 1
                # is still a valid index into the grid.
                max_floor = tf.cast(size_in_indexing_dimension - 2, query_type)
                min_floor = tf.constant(0.0, dtype=query_type)
                floor = tf.math.minimum(
                    tf.math.maximum(min_floor, tf.math.floor(queries)), max_floor
                )
                int_floor = tf.cast(floor, tf.dtypes.int32)
                floors.append(int_floor)
                ceil = int_floor + 1
                ceils.append(ceil)

                # alpha has the same type as the grid, as we will directly use alpha
                # when taking linear combinations of pixel values from the image.
                alpha = tf.cast(queries - floor, grid_type)
                min_alpha = tf.constant(0.0, dtype=grid_type)
                max_alpha = tf.constant(1.0, dtype=grid_type)
                alpha = tf.math.minimum(tf.math.maximum(min_alpha, alpha), max_alpha)

                # Expand alpha to [b, n, 1] so we can use broadcasting
                # (since the alpha values don't depend on the channel).
                alpha = tf.expand_dims(alpha, 2)
                alphas.append(alpha)

            flattened_grid = tf.reshape(grid, [batch_size * height * width, channels])
            batch_offsets = tf.reshape(
                tf.range(batch_size) * height * width, [batch_size, 1]
            )

        # This wraps tf.gather. We reshape the image data such that the
        # batch, y, and x coordinates are pulled into the first dimension.
        # Then we gather. Finally, we reshape the output back. It's possible this
        # code would be made simpler by using tf.gather_nd.
        def gather(y_coords, x_coords, name):
            with tf.name_scope("gather-" + name):
                linear_coordinates = batch_offsets + y_coords * width + x_coords
                gathered_values = tf.gather(flattened_grid, linear_coordinates)
                return tf.reshape(gathered_values, [batch_size, num_queries, channels])

        # grab the pixel values in the 4 corners around each query point
        top_left = gather(floors[0], floors[1], "top_left")
        top_right = gather(floors[0], ceils[1], "top_right")
        bottom_left = gather(ceils[0], floors[1], "bottom_left")
        bottom_right = gather(ceils[0], ceils[1], "bottom_right")

        # now, do the actual interpolation
        with tf.name_scope("interpolate"):
            interp_top = alphas[1] * (top_right - top_left) + top_left
            interp_bottom = alphas[1] * (bottom_right - bottom_left) + bottom_left
            interp = alphas[0] * (interp_bottom - interp_top) + interp_top

        return interp

def _get_dim(x, idx):
    if x.shape.ndims is None:
        return tf.shape(x)[idx]
    return x.shape[idx] or tf.shape(x)[idx]


@tf.function
def dense_image_warp_catmull(
    image: types.TensorLike, flow: types.TensorLike, name: Optional[str] = None
) -> tf.Tensor:
    """Image warping using per-pixel flow vectors.

    Apply a non-linear warp to the image, where the warp is specified by a
    dense flow field of offset vectors that define the correspondences of
    pixel values in the output image back to locations in the source image.
    Specifically, the pixel value at `output[b, j, i, c]` is
    `images[b, j - flow[b, j, i, 0], i - flow[b, j, i, 1], c]`.

    The locations specified by this formula do not necessarily map to an int
    index. Therefore, the pixel value is obtained by *CATMULL-ROM*
    interpolation of the 8 nearest pixels around. For locations outside
    of the image, we use the nearest pixel values at the image boundary.

    NOTE: The definition of the flow field above is different from that
    of optical flow. This function expects the negative forward flow from
    output image to source image. Given two images `I_1` and `I_2` and the
    optical flow `F_12` from `I_1` to `I_2`, the image `I_1` can be
    reconstructed by `I_1_rec = dense_image_warp(I_2, -F_12)`.

    Args:
      image: 4-D float `Tensor` with shape `[batch, height, width, channels]`.
      flow: A 4-D float `Tensor` with shape `[batch, height, width, 2]`.
      name: A name for the operation (optional).

      Note that image and flow can be of type `tf.half`, `tf.float32`, or
      `tf.float64`, and do not necessarily have to be the same type.

    Returns:
      A 4-D float `Tensor` with shape`[batch, height, width, channels]`
        and same type as input image.

    Raises:
      ValueError: if `height < 2` or `width < 2` or the inputs have the wrong
        number of dimensions.
    """
    with tf.name_scope(name or "dense_image_warp"):
        image = tf.convert_to_tensor(image)
        flow = tf.convert_to_tensor(flow)
        batch_size, height, width, channels = (
            _get_dim(image, 0),
            _get_dim(image, 1),
            _get_dim(image, 2),
            _get_dim(image, 3),
        )

        # The flow is defined on the image grid. Turn the flow into a list of query
        # points in the grid space.
        grid_x, grid_y = tf.meshgrid(tf.range(width), tf.range(height))
        stacked_grid = tf.cast(tf.stack([grid_y, grid_x], axis=2), flow.dtype)
        batched_grid = tf.expand_dims(stacked_grid, axis=0)
        query_points_on_grid = batched_grid - flow
        query_points_flattened = tf.reshape(
            query_points_on_grid, [batch_size, height * width, 2]
        )
        # Compute values at the query points, then reshape the result back to the
        # image grid.
        interpolated = interpolate_catmull_rom(image, query_points_flattened)
        interpolated = tf.reshape(interpolated, [batch_size, height, width, channels])
        return interpolated