|
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ==============================================================================
|
|
|
|
"""Class Head.
|
|
|
|
Contains Class prediction head classes for different meta architectures.
|
|
All the class prediction heads have a predict function that receives the
|
|
`features` as the first argument and returns class predictions with background.
|
|
"""
|
|
import functools
|
|
import tensorflow as tf
|
|
|
|
from object_detection.predictors.heads import head
|
|
|
|
slim = tf.contrib.slim
|
|
|
|
|
|
class MaskRCNNClassHead(head.Head):
|
|
"""Mask RCNN class prediction head.
|
|
|
|
Please refer to Mask RCNN paper:
|
|
https://arxiv.org/abs/1703.06870
|
|
"""
|
|
|
|
def __init__(self,
|
|
is_training,
|
|
num_class_slots,
|
|
fc_hyperparams_fn,
|
|
use_dropout,
|
|
dropout_keep_prob):
|
|
"""Constructor.
|
|
|
|
Args:
|
|
is_training: Indicates whether the BoxPredictor is in training mode.
|
|
num_class_slots: number of class slots. Note that num_class_slots may or
|
|
may not include an implicit background category.
|
|
fc_hyperparams_fn: A function to generate tf-slim arg_scope with
|
|
hyperparameters for fully connected ops.
|
|
use_dropout: Option to use dropout or not. Note that a single dropout
|
|
op is applied here prior to both box and class predictions, which stands
|
|
in contrast to the ConvolutionalBoxPredictor below.
|
|
dropout_keep_prob: Keep probability for dropout.
|
|
This is only used if use_dropout is True.
|
|
"""
|
|
super(MaskRCNNClassHead, self).__init__()
|
|
self._is_training = is_training
|
|
self._num_class_slots = num_class_slots
|
|
self._fc_hyperparams_fn = fc_hyperparams_fn
|
|
self._use_dropout = use_dropout
|
|
self._dropout_keep_prob = dropout_keep_prob
|
|
|
|
def predict(self, features, num_predictions_per_location=1):
|
|
"""Predicts boxes and class scores.
|
|
|
|
Args:
|
|
features: A float tensor of shape [batch_size, height, width, channels]
|
|
containing features for a batch of images.
|
|
num_predictions_per_location: Int containing number of predictions per
|
|
location.
|
|
|
|
Returns:
|
|
class_predictions_with_background: A float tensor of shape
|
|
[batch_size, 1, num_class_slots] representing the class predictions for
|
|
the proposals.
|
|
|
|
Raises:
|
|
ValueError: If num_predictions_per_location is not 1.
|
|
"""
|
|
if num_predictions_per_location != 1:
|
|
raise ValueError('Only num_predictions_per_location=1 is supported')
|
|
spatial_averaged_roi_pooled_features = tf.reduce_mean(
|
|
features, [1, 2], keep_dims=True, name='AvgPool')
|
|
flattened_roi_pooled_features = slim.flatten(
|
|
spatial_averaged_roi_pooled_features)
|
|
if self._use_dropout:
|
|
flattened_roi_pooled_features = slim.dropout(
|
|
flattened_roi_pooled_features,
|
|
keep_prob=self._dropout_keep_prob,
|
|
is_training=self._is_training)
|
|
|
|
with slim.arg_scope(self._fc_hyperparams_fn()):
|
|
class_predictions_with_background = slim.fully_connected(
|
|
flattened_roi_pooled_features,
|
|
self._num_class_slots,
|
|
activation_fn=None,
|
|
scope='ClassPredictor')
|
|
class_predictions_with_background = tf.reshape(
|
|
class_predictions_with_background,
|
|
[-1, 1, self._num_class_slots])
|
|
return class_predictions_with_background
|
|
|
|
|
|
class ConvolutionalClassHead(head.Head):
|
|
"""Convolutional class prediction head."""
|
|
|
|
def __init__(self,
|
|
is_training,
|
|
num_class_slots,
|
|
use_dropout,
|
|
dropout_keep_prob,
|
|
kernel_size,
|
|
apply_sigmoid_to_scores=False,
|
|
class_prediction_bias_init=0.0,
|
|
use_depthwise=False):
|
|
"""Constructor.
|
|
|
|
Args:
|
|
is_training: Indicates whether the BoxPredictor is in training mode.
|
|
num_class_slots: number of class slots. Note that num_class_slots may or
|
|
may not include an implicit background category.
|
|
use_dropout: Option to use dropout or not. Note that a single dropout
|
|
op is applied here prior to both box and class predictions, which stands
|
|
in contrast to the ConvolutionalBoxPredictor below.
|
|
dropout_keep_prob: Keep probability for dropout.
|
|
This is only used if use_dropout is True.
|
|
kernel_size: Size of final convolution kernel. If the
|
|
spatial resolution of the feature map is smaller than the kernel size,
|
|
then the kernel size is automatically set to be
|
|
min(feature_width, feature_height).
|
|
apply_sigmoid_to_scores: if True, apply the sigmoid on the output
|
|
class_predictions.
|
|
class_prediction_bias_init: constant value to initialize bias of the last
|
|
conv2d layer before class prediction.
|
|
use_depthwise: Whether to use depthwise convolutions for prediction
|
|
steps. Default is False.
|
|
|
|
Raises:
|
|
ValueError: if min_depth > max_depth.
|
|
"""
|
|
super(ConvolutionalClassHead, self).__init__()
|
|
self._is_training = is_training
|
|
self._num_class_slots = num_class_slots
|
|
self._use_dropout = use_dropout
|
|
self._dropout_keep_prob = dropout_keep_prob
|
|
self._kernel_size = kernel_size
|
|
self._apply_sigmoid_to_scores = apply_sigmoid_to_scores
|
|
self._class_prediction_bias_init = class_prediction_bias_init
|
|
self._use_depthwise = use_depthwise
|
|
|
|
def predict(self, features, num_predictions_per_location):
|
|
"""Predicts boxes.
|
|
|
|
Args:
|
|
features: A float tensor of shape [batch_size, height, width, channels]
|
|
containing image features.
|
|
num_predictions_per_location: Number of box predictions to be made per
|
|
spatial location.
|
|
|
|
Returns:
|
|
class_predictions_with_background: A float tensors of shape
|
|
[batch_size, num_anchors, num_class_slots] representing the class
|
|
predictions for the proposals.
|
|
"""
|
|
net = features
|
|
if self._use_dropout:
|
|
net = slim.dropout(net, keep_prob=self._dropout_keep_prob)
|
|
if self._use_depthwise:
|
|
class_predictions_with_background = slim.separable_conv2d(
|
|
net, None, [self._kernel_size, self._kernel_size],
|
|
padding='SAME', depth_multiplier=1, stride=1,
|
|
rate=1, scope='ClassPredictor_depthwise')
|
|
class_predictions_with_background = slim.conv2d(
|
|
class_predictions_with_background,
|
|
num_predictions_per_location * self._num_class_slots, [1, 1],
|
|
activation_fn=None,
|
|
normalizer_fn=None,
|
|
normalizer_params=None,
|
|
scope='ClassPredictor')
|
|
else:
|
|
class_predictions_with_background = slim.conv2d(
|
|
net,
|
|
num_predictions_per_location * self._num_class_slots,
|
|
[self._kernel_size, self._kernel_size],
|
|
activation_fn=None,
|
|
normalizer_fn=None,
|
|
normalizer_params=None,
|
|
scope='ClassPredictor',
|
|
biases_initializer=tf.constant_initializer(
|
|
self._class_prediction_bias_init))
|
|
if self._apply_sigmoid_to_scores:
|
|
class_predictions_with_background = tf.sigmoid(
|
|
class_predictions_with_background)
|
|
batch_size = features.get_shape().as_list()[0]
|
|
if batch_size is None:
|
|
batch_size = tf.shape(features)[0]
|
|
class_predictions_with_background = tf.reshape(
|
|
class_predictions_with_background,
|
|
[batch_size, -1, self._num_class_slots])
|
|
return class_predictions_with_background
|
|
|
|
|
|
# TODO(alirezafathi): See if possible to unify Weight Shared with regular
|
|
# convolutional class head.
|
|
class WeightSharedConvolutionalClassHead(head.Head):
|
|
"""Weight shared convolutional class prediction head.
|
|
|
|
This head allows sharing the same set of parameters (weights) when called more
|
|
then once on different feature maps.
|
|
"""
|
|
|
|
def __init__(self,
|
|
num_class_slots,
|
|
kernel_size=3,
|
|
class_prediction_bias_init=0.0,
|
|
use_dropout=False,
|
|
dropout_keep_prob=0.8,
|
|
use_depthwise=False,
|
|
score_converter_fn=tf.identity):
|
|
"""Constructor.
|
|
|
|
Args:
|
|
num_class_slots: number of class slots. Note that num_class_slots may or
|
|
may not include an implicit background category.
|
|
kernel_size: Size of final convolution kernel.
|
|
class_prediction_bias_init: constant value to initialize bias of the last
|
|
conv2d layer before class prediction.
|
|
use_dropout: Whether to apply dropout to class prediction head.
|
|
dropout_keep_prob: Probability of keeping activiations.
|
|
use_depthwise: Whether to use depthwise convolutions for prediction
|
|
steps. Default is False.
|
|
score_converter_fn: Callable elementwise nonlinearity (that takes tensors
|
|
as inputs and returns tensors).
|
|
"""
|
|
super(WeightSharedConvolutionalClassHead, self).__init__()
|
|
self._num_class_slots = num_class_slots
|
|
self._kernel_size = kernel_size
|
|
self._class_prediction_bias_init = class_prediction_bias_init
|
|
self._use_dropout = use_dropout
|
|
self._dropout_keep_prob = dropout_keep_prob
|
|
self._use_depthwise = use_depthwise
|
|
self._score_converter_fn = score_converter_fn
|
|
|
|
def predict(self, features, num_predictions_per_location):
|
|
"""Predicts boxes.
|
|
|
|
Args:
|
|
features: A float tensor of shape [batch_size, height, width, channels]
|
|
containing image features.
|
|
num_predictions_per_location: Number of box predictions to be made per
|
|
spatial location.
|
|
|
|
Returns:
|
|
class_predictions_with_background: A tensor of shape
|
|
[batch_size, num_anchors, num_class_slots] representing the class
|
|
predictions for the proposals.
|
|
"""
|
|
class_predictions_net = features
|
|
if self._use_dropout:
|
|
class_predictions_net = slim.dropout(
|
|
class_predictions_net, keep_prob=self._dropout_keep_prob)
|
|
if self._use_depthwise:
|
|
conv_op = functools.partial(slim.separable_conv2d, depth_multiplier=1)
|
|
else:
|
|
conv_op = slim.conv2d
|
|
class_predictions_with_background = conv_op(
|
|
class_predictions_net,
|
|
num_predictions_per_location * self._num_class_slots,
|
|
[self._kernel_size, self._kernel_size],
|
|
activation_fn=None, stride=1, padding='SAME',
|
|
normalizer_fn=None,
|
|
biases_initializer=tf.constant_initializer(
|
|
self._class_prediction_bias_init),
|
|
scope='ClassPredictor')
|
|
batch_size = features.get_shape().as_list()[0]
|
|
if batch_size is None:
|
|
batch_size = tf.shape(features)[0]
|
|
class_predictions_with_background = self._score_converter_fn(
|
|
class_predictions_with_background)
|
|
class_predictions_with_background = tf.reshape(
|
|
class_predictions_with_background,
|
|
[batch_size, -1, self._num_class_slots])
|
|
return class_predictions_with_background
|