yigitcolakoglu
/
MyCity

# Copyright 2017 The TensorFlow Authors. All Rights Reserved.## Licensed under the Apache License, Version 2.0 (the "License");# you may not use this file except in compliance with the License.# You may obtain a copy of the License at##     http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.# See the License for the specific language governing permissions and# limitations under the License.# ==============================================================================
"""Keypoint box coder.

The keypoint box coder follows the coding schema described below (this issimilar to the FasterRcnnBoxCoder, except that it encodes keypoints in additionto box coordinates):  ty = (y - ya) / ha  tx = (x - xa) / wa  th = log(h / ha)  tw = log(w / wa)  tky0 = (ky0 - ya) / ha  tkx0 = (kx0 - xa) / wa  tky1 = (ky1 - ya) / ha  tkx1 = (kx1 - xa) / wa  ...  where x, y, w, h denote the box's center coordinates, width and height  respectively. Similarly, xa, ya, wa, ha denote the anchor's center  coordinates, width and height. tx, ty, tw and th denote the anchor-encoded  center, width and height respectively. ky0, kx0, ky1, kx1, ... denote the  keypoints' coordinates, and tky0, tkx0, tky1, tkx1, ... denote the  anchor-encoded keypoint coordinates."""

import tensorflow as tf
from object_detection.core import box_coderfrom object_detection.core import box_listfrom object_detection.core import standard_fields as fields
EPSILON = 1e-8

class KeypointBoxCoder(box_coder.BoxCoder):  """Keypoint box coder."""
  def __init__(self, num_keypoints, scale_factors=None):    """Constructor for KeypointBoxCoder.

    Args:      num_keypoints: Number of keypoints to encode/decode.      scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.        In addition to scaling ty and tx, the first 2 scalars are used to scale        the y and x coordinates of the keypoints as well. If set to None, does        not perform scaling.    """
    self._num_keypoints = num_keypoints
    if scale_factors:      assert len(scale_factors) == 4      for scalar in scale_factors:        assert scalar > 0    self._scale_factors = scale_factors    self._keypoint_scale_factors = None    if scale_factors is not None:      self._keypoint_scale_factors = tf.expand_dims(tf.tile(          [tf.to_float(scale_factors[0]), tf.to_float(scale_factors[1])],          [num_keypoints]), 1)
  @property  def code_size(self):    return 4 + self._num_keypoints * 2
  def _encode(self, boxes, anchors):    """Encode a box and keypoint collection with respect to anchor collection.

    Args:      boxes: BoxList holding N boxes and keypoints to be encoded. Boxes are        tensors with the shape [N, 4], and keypoints are tensors with the shape        [N, num_keypoints, 2].      anchors: BoxList of anchors.
    Returns:      a tensor representing N anchor-encoded boxes of the format      [ty, tx, th, tw, tky0, tkx0, tky1, tkx1, ...] where tky0 and tkx0      represent the y and x coordinates of the first keypoint, tky1 and tkx1      represent the y and x coordinates of the second keypoint, and so on.    """
    # Convert anchors to the center coordinate representation.    ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()    ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()    keypoints = boxes.get_field(fields.BoxListFields.keypoints)    keypoints = tf.transpose(tf.reshape(keypoints,                                        [-1, self._num_keypoints * 2]))    num_boxes = boxes.num_boxes()
    # Avoid NaN in division and log below.    ha += EPSILON    wa += EPSILON    h += EPSILON    w += EPSILON
    tx = (xcenter - xcenter_a) / wa    ty = (ycenter - ycenter_a) / ha    tw = tf.log(w / wa)    th = tf.log(h / ha)
    tiled_anchor_centers = tf.tile(        tf.stack([ycenter_a, xcenter_a]), [self._num_keypoints, 1])    tiled_anchor_sizes = tf.tile(        tf.stack([ha, wa]), [self._num_keypoints, 1])    tkeypoints = (keypoints - tiled_anchor_centers) / tiled_anchor_sizes
    # Scales location targets as used in paper for joint training.    if self._scale_factors:      ty *= self._scale_factors[0]      tx *= self._scale_factors[1]      th *= self._scale_factors[2]      tw *= self._scale_factors[3]      tkeypoints *= tf.tile(self._keypoint_scale_factors, [1, num_boxes])
    tboxes = tf.stack([ty, tx, th, tw])    return tf.transpose(tf.concat([tboxes, tkeypoints], 0))
  def _decode(self, rel_codes, anchors):    """Decode relative codes to boxes and keypoints.

    Args:      rel_codes: a tensor with shape [N, 4 + 2 * num_keypoints] representing N        anchor-encoded boxes and keypoints      anchors: BoxList of anchors.
    Returns:      boxes: BoxList holding N bounding boxes and keypoints.    """
    ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
    num_codes = tf.shape(rel_codes)[0]    result = tf.unstack(tf.transpose(rel_codes))    ty, tx, th, tw = result[:4]    tkeypoints = result[4:]    if self._scale_factors:      ty /= self._scale_factors[0]      tx /= self._scale_factors[1]      th /= self._scale_factors[2]      tw /= self._scale_factors[3]      tkeypoints /= tf.tile(self._keypoint_scale_factors, [1, num_codes])
    w = tf.exp(tw) * wa    h = tf.exp(th) * ha    ycenter = ty * ha + ycenter_a    xcenter = tx * wa + xcenter_a    ymin = ycenter - h / 2.    xmin = xcenter - w / 2.    ymax = ycenter + h / 2.    xmax = xcenter + w / 2.    decoded_boxes_keypoints = box_list.BoxList(        tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
    tiled_anchor_centers = tf.tile(        tf.stack([ycenter_a, xcenter_a]), [self._num_keypoints, 1])    tiled_anchor_sizes = tf.tile(        tf.stack([ha, wa]), [self._num_keypoints, 1])    keypoints = tkeypoints * tiled_anchor_sizes + tiled_anchor_centers    keypoints = tf.reshape(tf.transpose(keypoints),                           [-1, self._num_keypoints, 2])    decoded_boxes_keypoints.add_field(fields.BoxListFields.keypoints, keypoints)    return decoded_boxes_keypoints