yigitcolakoglu
/
MyCity

# Copyright 2017 The TensorFlow Authors. All Rights Reserved.## Licensed under the Apache License, Version 2.0 (the "License");# you may not use this file except in compliance with the License.# You may obtain a copy of the License at##     http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.# See the License for the specific language governing permissions and# limitations under the License.# ==============================================================================
"""Box Head.

Contains Box prediction head classes for different meta architectures.All the box prediction heads have a predict function that receives the`features` as the first argument and returns `box_encodings`."""
import functoolsimport tensorflow as tf
from object_detection.predictors.heads import head
slim = tf.contrib.slim

class MaskRCNNBoxHead(head.Head):  """Box prediction head.

  Please refer to Mask RCNN paper:  https://arxiv.org/abs/1703.06870  """

  def __init__(self,               is_training,               num_classes,               fc_hyperparams_fn,               use_dropout,               dropout_keep_prob,               box_code_size,               share_box_across_classes=False):    """Constructor.

    Args:      is_training: Indicates whether the BoxPredictor is in training mode.      num_classes: number of classes.  Note that num_classes *does not*        include the background category, so if groundtruth labels take values        in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the        assigned classification targets can range from {0,... K}).      fc_hyperparams_fn: A function to generate tf-slim arg_scope with        hyperparameters for fully connected ops.      use_dropout: Option to use dropout or not.  Note that a single dropout        op is applied here prior to both box and class predictions, which stands        in contrast to the ConvolutionalBoxPredictor below.      dropout_keep_prob: Keep probability for dropout.        This is only used if use_dropout is True.      box_code_size: Size of encoding for each box.      share_box_across_classes: Whether to share boxes across classes rather        than use a different box for each class.    """
    super(MaskRCNNBoxHead, self).__init__()    self._is_training = is_training    self._num_classes = num_classes    self._fc_hyperparams_fn = fc_hyperparams_fn    self._use_dropout = use_dropout    self._dropout_keep_prob = dropout_keep_prob    self._box_code_size = box_code_size    self._share_box_across_classes = share_box_across_classes
  def predict(self, features, num_predictions_per_location=1):    """Predicts boxes.

    Args:      features: A float tensor of shape [batch_size, height, width,        channels] containing features for a batch of images.      num_predictions_per_location: Int containing number of predictions per        location.
    Returns:      box_encodings: A float tensor of shape        [batch_size, 1, num_classes, code_size] representing the location of the        objects.
    Raises:      ValueError: If num_predictions_per_location is not 1.    """
    if num_predictions_per_location != 1:      raise ValueError('Only num_predictions_per_location=1 is supported')    spatial_averaged_roi_pooled_features = tf.reduce_mean(        features, [1, 2], keep_dims=True, name='AvgPool')    flattened_roi_pooled_features = slim.flatten(        spatial_averaged_roi_pooled_features)    if self._use_dropout:      flattened_roi_pooled_features = slim.dropout(          flattened_roi_pooled_features,          keep_prob=self._dropout_keep_prob,          is_training=self._is_training)    number_of_boxes = 1    if not self._share_box_across_classes:      number_of_boxes = self._num_classes
    with slim.arg_scope(self._fc_hyperparams_fn()):      box_encodings = slim.fully_connected(          flattened_roi_pooled_features,          number_of_boxes * self._box_code_size,          activation_fn=None,          scope='BoxEncodingPredictor')    box_encodings = tf.reshape(box_encodings,                               [-1, 1, number_of_boxes, self._box_code_size])    return box_encodings

class ConvolutionalBoxHead(head.Head):  """Convolutional box prediction head."""
  def __init__(self,               is_training,               box_code_size,               kernel_size,               use_depthwise=False):    """Constructor.

    Args:      is_training: Indicates whether the BoxPredictor is in training mode.      box_code_size: Size of encoding for each box.      kernel_size: Size of final convolution kernel.  If the        spatial resolution of the feature map is smaller than the kernel size,        then the kernel size is automatically set to be        min(feature_width, feature_height).      use_depthwise: Whether to use depthwise convolutions for prediction        steps. Default is False.
    Raises:      ValueError: if min_depth > max_depth.    """
    super(ConvolutionalBoxHead, self).__init__()    self._is_training = is_training    self._box_code_size = box_code_size    self._kernel_size = kernel_size    self._use_depthwise = use_depthwise
  def predict(self, features, num_predictions_per_location):    """Predicts boxes.

    Args:      features: A float tensor of shape [batch_size, height, width, channels]        containing image features.      num_predictions_per_location: Number of box predictions to be made per        spatial location. Int specifying number of boxes per location.
    Returns:      box_encodings: A float tensors of shape        [batch_size, num_anchors, q, code_size] representing the location of        the objects, where q is 1 or the number of classes.    """
    net = features    if self._use_depthwise:      box_encodings = slim.separable_conv2d(          net, None, [self._kernel_size, self._kernel_size],          padding='SAME', depth_multiplier=1, stride=1,          rate=1, scope='BoxEncodingPredictor_depthwise')      box_encodings = slim.conv2d(          box_encodings,          num_predictions_per_location * self._box_code_size, [1, 1],          activation_fn=None,          normalizer_fn=None,          normalizer_params=None,          scope='BoxEncodingPredictor')    else:      box_encodings = slim.conv2d(          net, num_predictions_per_location * self._box_code_size,          [self._kernel_size, self._kernel_size],          activation_fn=None,          normalizer_fn=None,          normalizer_params=None,          scope='BoxEncodingPredictor')    batch_size = features.get_shape().as_list()[0]    if batch_size is None:      batch_size = tf.shape(features)[0]    box_encodings = tf.reshape(box_encodings,                               [batch_size, -1, 1, self._box_code_size])    return box_encodings

# TODO(alirezafathi): See if possible to unify Weight Shared with regular# convolutional box head.class WeightSharedConvolutionalBoxHead(head.Head):  """Weight shared convolutional box prediction head.

  This head allows sharing the same set of parameters (weights) when called more  then once on different feature maps.  """

  def __init__(self,               box_code_size,               kernel_size=3,               use_depthwise=False,               box_encodings_clip_range=None):    """Constructor.

    Args:      box_code_size: Size of encoding for each box.      kernel_size: Size of final convolution kernel.      use_depthwise: Whether to use depthwise convolutions for prediction steps.        Default is False.      box_encodings_clip_range: Min and max values for clipping box_encodings.    """
    super(WeightSharedConvolutionalBoxHead, self).__init__()    self._box_code_size = box_code_size    self._kernel_size = kernel_size    self._use_depthwise = use_depthwise    self._box_encodings_clip_range = box_encodings_clip_range
  def predict(self, features, num_predictions_per_location):    """Predicts boxes.

    Args:      features: A float tensor of shape [batch_size, height, width, channels]        containing image features.      num_predictions_per_location: Number of box predictions to be made per        spatial location.
    Returns:      box_encodings: A float tensor of shape        [batch_size, num_anchors, code_size] representing the location of        the objects.    """
    box_encodings_net = features    if self._use_depthwise:      conv_op = functools.partial(slim.separable_conv2d, depth_multiplier=1)    else:      conv_op = slim.conv2d    box_encodings = conv_op(        box_encodings_net,        num_predictions_per_location * self._box_code_size,        [self._kernel_size, self._kernel_size],        activation_fn=None, stride=1, padding='SAME',        normalizer_fn=None,        scope='BoxPredictor')    batch_size = features.get_shape().as_list()[0]    if batch_size is None:      batch_size = tf.shape(features)[0]    # Clipping the box encodings to make the inference graph TPU friendly.    if self._box_encodings_clip_range is not None:      box_encodings = tf.clip_by_value(          box_encodings, self._box_encodings_clip_range.min,          self._box_encodings_clip_range.max)    box_encodings = tf.reshape(box_encodings,                               [batch_size, -1, self._box_code_size])    return box_encodings