|
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ==============================================================================
|
|
|
|
"""Square box coder.
|
|
|
|
Square box coder follows the coding schema described below:
|
|
l = sqrt(h * w)
|
|
la = sqrt(ha * wa)
|
|
ty = (y - ya) / la
|
|
tx = (x - xa) / la
|
|
tl = log(l / la)
|
|
where x, y, w, h denote the box's center coordinates, width, and height,
|
|
respectively. Similarly, xa, ya, wa, ha denote the anchor's center
|
|
coordinates, width and height. tx, ty, tl denote the anchor-encoded
|
|
center, and length, respectively. Because the encoded box is a square, only
|
|
one length is encoded.
|
|
|
|
This has shown to provide performance improvements over the Faster RCNN box
|
|
coder when the objects being detected tend to be square (e.g. faces) and when
|
|
the input images are not distorted via resizing.
|
|
"""
|
|
|
|
import tensorflow as tf
|
|
|
|
from object_detection.core import box_coder
|
|
from object_detection.core import box_list
|
|
|
|
EPSILON = 1e-8
|
|
|
|
|
|
class SquareBoxCoder(box_coder.BoxCoder):
|
|
"""Encodes a 3-scalar representation of a square box."""
|
|
|
|
def __init__(self, scale_factors=None):
|
|
"""Constructor for SquareBoxCoder.
|
|
|
|
Args:
|
|
scale_factors: List of 3 positive scalars to scale ty, tx, and tl.
|
|
If set to None, does not perform scaling. For faster RCNN,
|
|
the open-source implementation recommends using [10.0, 10.0, 5.0].
|
|
|
|
Raises:
|
|
ValueError: If scale_factors is not length 3 or contains values less than
|
|
or equal to 0.
|
|
"""
|
|
if scale_factors:
|
|
if len(scale_factors) != 3:
|
|
raise ValueError('The argument scale_factors must be a list of length '
|
|
'3.')
|
|
if any(scalar <= 0 for scalar in scale_factors):
|
|
raise ValueError('The values in scale_factors must all be greater '
|
|
'than 0.')
|
|
self._scale_factors = scale_factors
|
|
|
|
@property
|
|
def code_size(self):
|
|
return 3
|
|
|
|
def _encode(self, boxes, anchors):
|
|
"""Encodes a box collection with respect to an anchor collection.
|
|
|
|
Args:
|
|
boxes: BoxList holding N boxes to be encoded.
|
|
anchors: BoxList of anchors.
|
|
|
|
Returns:
|
|
a tensor representing N anchor-encoded boxes of the format
|
|
[ty, tx, tl].
|
|
"""
|
|
# Convert anchors to the center coordinate representation.
|
|
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
|
|
la = tf.sqrt(ha * wa)
|
|
ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
|
|
l = tf.sqrt(h * w)
|
|
# Avoid NaN in division and log below.
|
|
la += EPSILON
|
|
l += EPSILON
|
|
|
|
tx = (xcenter - xcenter_a) / la
|
|
ty = (ycenter - ycenter_a) / la
|
|
tl = tf.log(l / la)
|
|
# Scales location targets for joint training.
|
|
if self._scale_factors:
|
|
ty *= self._scale_factors[0]
|
|
tx *= self._scale_factors[1]
|
|
tl *= self._scale_factors[2]
|
|
return tf.transpose(tf.stack([ty, tx, tl]))
|
|
|
|
def _decode(self, rel_codes, anchors):
|
|
"""Decodes relative codes to boxes.
|
|
|
|
Args:
|
|
rel_codes: a tensor representing N anchor-encoded boxes.
|
|
anchors: BoxList of anchors.
|
|
|
|
Returns:
|
|
boxes: BoxList holding N bounding boxes.
|
|
"""
|
|
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
|
|
la = tf.sqrt(ha * wa)
|
|
|
|
ty, tx, tl = tf.unstack(tf.transpose(rel_codes))
|
|
if self._scale_factors:
|
|
ty /= self._scale_factors[0]
|
|
tx /= self._scale_factors[1]
|
|
tl /= self._scale_factors[2]
|
|
l = tf.exp(tl) * la
|
|
ycenter = ty * la + ycenter_a
|
|
xcenter = tx * la + xcenter_a
|
|
ymin = ycenter - l / 2.
|
|
xmin = xcenter - l / 2.
|
|
ymax = ycenter + l / 2.
|
|
xmax = xcenter + l / 2.
|
|
return box_list.BoxList(tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
|