You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

284 lines
11 KiB

6 years ago
  1. # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. """Class Head.
  16. Contains Class prediction head classes for different meta architectures.
  17. All the class prediction heads have a predict function that receives the
  18. `features` as the first argument and returns class predictions with background.
  19. """
  20. import functools
  21. import tensorflow as tf
  22. from object_detection.predictors.heads import head
  23. slim = tf.contrib.slim
  24. class MaskRCNNClassHead(head.Head):
  25. """Mask RCNN class prediction head.
  26. Please refer to Mask RCNN paper:
  27. https://arxiv.org/abs/1703.06870
  28. """
  29. def __init__(self,
  30. is_training,
  31. num_class_slots,
  32. fc_hyperparams_fn,
  33. use_dropout,
  34. dropout_keep_prob):
  35. """Constructor.
  36. Args:
  37. is_training: Indicates whether the BoxPredictor is in training mode.
  38. num_class_slots: number of class slots. Note that num_class_slots may or
  39. may not include an implicit background category.
  40. fc_hyperparams_fn: A function to generate tf-slim arg_scope with
  41. hyperparameters for fully connected ops.
  42. use_dropout: Option to use dropout or not. Note that a single dropout
  43. op is applied here prior to both box and class predictions, which stands
  44. in contrast to the ConvolutionalBoxPredictor below.
  45. dropout_keep_prob: Keep probability for dropout.
  46. This is only used if use_dropout is True.
  47. """
  48. super(MaskRCNNClassHead, self).__init__()
  49. self._is_training = is_training
  50. self._num_class_slots = num_class_slots
  51. self._fc_hyperparams_fn = fc_hyperparams_fn
  52. self._use_dropout = use_dropout
  53. self._dropout_keep_prob = dropout_keep_prob
  54. def predict(self, features, num_predictions_per_location=1):
  55. """Predicts boxes and class scores.
  56. Args:
  57. features: A float tensor of shape [batch_size, height, width, channels]
  58. containing features for a batch of images.
  59. num_predictions_per_location: Int containing number of predictions per
  60. location.
  61. Returns:
  62. class_predictions_with_background: A float tensor of shape
  63. [batch_size, 1, num_class_slots] representing the class predictions for
  64. the proposals.
  65. Raises:
  66. ValueError: If num_predictions_per_location is not 1.
  67. """
  68. if num_predictions_per_location != 1:
  69. raise ValueError('Only num_predictions_per_location=1 is supported')
  70. spatial_averaged_roi_pooled_features = tf.reduce_mean(
  71. features, [1, 2], keep_dims=True, name='AvgPool')
  72. flattened_roi_pooled_features = slim.flatten(
  73. spatial_averaged_roi_pooled_features)
  74. if self._use_dropout:
  75. flattened_roi_pooled_features = slim.dropout(
  76. flattened_roi_pooled_features,
  77. keep_prob=self._dropout_keep_prob,
  78. is_training=self._is_training)
  79. with slim.arg_scope(self._fc_hyperparams_fn()):
  80. class_predictions_with_background = slim.fully_connected(
  81. flattened_roi_pooled_features,
  82. self._num_class_slots,
  83. activation_fn=None,
  84. scope='ClassPredictor')
  85. class_predictions_with_background = tf.reshape(
  86. class_predictions_with_background,
  87. [-1, 1, self._num_class_slots])
  88. return class_predictions_with_background
  89. class ConvolutionalClassHead(head.Head):
  90. """Convolutional class prediction head."""
  91. def __init__(self,
  92. is_training,
  93. num_class_slots,
  94. use_dropout,
  95. dropout_keep_prob,
  96. kernel_size,
  97. apply_sigmoid_to_scores=False,
  98. class_prediction_bias_init=0.0,
  99. use_depthwise=False):
  100. """Constructor.
  101. Args:
  102. is_training: Indicates whether the BoxPredictor is in training mode.
  103. num_class_slots: number of class slots. Note that num_class_slots may or
  104. may not include an implicit background category.
  105. use_dropout: Option to use dropout or not. Note that a single dropout
  106. op is applied here prior to both box and class predictions, which stands
  107. in contrast to the ConvolutionalBoxPredictor below.
  108. dropout_keep_prob: Keep probability for dropout.
  109. This is only used if use_dropout is True.
  110. kernel_size: Size of final convolution kernel. If the
  111. spatial resolution of the feature map is smaller than the kernel size,
  112. then the kernel size is automatically set to be
  113. min(feature_width, feature_height).
  114. apply_sigmoid_to_scores: if True, apply the sigmoid on the output
  115. class_predictions.
  116. class_prediction_bias_init: constant value to initialize bias of the last
  117. conv2d layer before class prediction.
  118. use_depthwise: Whether to use depthwise convolutions for prediction
  119. steps. Default is False.
  120. Raises:
  121. ValueError: if min_depth > max_depth.
  122. """
  123. super(ConvolutionalClassHead, self).__init__()
  124. self._is_training = is_training
  125. self._num_class_slots = num_class_slots
  126. self._use_dropout = use_dropout
  127. self._dropout_keep_prob = dropout_keep_prob
  128. self._kernel_size = kernel_size
  129. self._apply_sigmoid_to_scores = apply_sigmoid_to_scores
  130. self._class_prediction_bias_init = class_prediction_bias_init
  131. self._use_depthwise = use_depthwise
  132. def predict(self, features, num_predictions_per_location):
  133. """Predicts boxes.
  134. Args:
  135. features: A float tensor of shape [batch_size, height, width, channels]
  136. containing image features.
  137. num_predictions_per_location: Number of box predictions to be made per
  138. spatial location.
  139. Returns:
  140. class_predictions_with_background: A float tensors of shape
  141. [batch_size, num_anchors, num_class_slots] representing the class
  142. predictions for the proposals.
  143. """
  144. net = features
  145. if self._use_dropout:
  146. net = slim.dropout(net, keep_prob=self._dropout_keep_prob)
  147. if self._use_depthwise:
  148. class_predictions_with_background = slim.separable_conv2d(
  149. net, None, [self._kernel_size, self._kernel_size],
  150. padding='SAME', depth_multiplier=1, stride=1,
  151. rate=1, scope='ClassPredictor_depthwise')
  152. class_predictions_with_background = slim.conv2d(
  153. class_predictions_with_background,
  154. num_predictions_per_location * self._num_class_slots, [1, 1],
  155. activation_fn=None,
  156. normalizer_fn=None,
  157. normalizer_params=None,
  158. scope='ClassPredictor')
  159. else:
  160. class_predictions_with_background = slim.conv2d(
  161. net,
  162. num_predictions_per_location * self._num_class_slots,
  163. [self._kernel_size, self._kernel_size],
  164. activation_fn=None,
  165. normalizer_fn=None,
  166. normalizer_params=None,
  167. scope='ClassPredictor',
  168. biases_initializer=tf.constant_initializer(
  169. self._class_prediction_bias_init))
  170. if self._apply_sigmoid_to_scores:
  171. class_predictions_with_background = tf.sigmoid(
  172. class_predictions_with_background)
  173. batch_size = features.get_shape().as_list()[0]
  174. if batch_size is None:
  175. batch_size = tf.shape(features)[0]
  176. class_predictions_with_background = tf.reshape(
  177. class_predictions_with_background,
  178. [batch_size, -1, self._num_class_slots])
  179. return class_predictions_with_background
  180. # TODO(alirezafathi): See if possible to unify Weight Shared with regular
  181. # convolutional class head.
  182. class WeightSharedConvolutionalClassHead(head.Head):
  183. """Weight shared convolutional class prediction head.
  184. This head allows sharing the same set of parameters (weights) when called more
  185. then once on different feature maps.
  186. """
  187. def __init__(self,
  188. num_class_slots,
  189. kernel_size=3,
  190. class_prediction_bias_init=0.0,
  191. use_dropout=False,
  192. dropout_keep_prob=0.8,
  193. use_depthwise=False,
  194. score_converter_fn=tf.identity):
  195. """Constructor.
  196. Args:
  197. num_class_slots: number of class slots. Note that num_class_slots may or
  198. may not include an implicit background category.
  199. kernel_size: Size of final convolution kernel.
  200. class_prediction_bias_init: constant value to initialize bias of the last
  201. conv2d layer before class prediction.
  202. use_dropout: Whether to apply dropout to class prediction head.
  203. dropout_keep_prob: Probability of keeping activiations.
  204. use_depthwise: Whether to use depthwise convolutions for prediction
  205. steps. Default is False.
  206. score_converter_fn: Callable elementwise nonlinearity (that takes tensors
  207. as inputs and returns tensors).
  208. """
  209. super(WeightSharedConvolutionalClassHead, self).__init__()
  210. self._num_class_slots = num_class_slots
  211. self._kernel_size = kernel_size
  212. self._class_prediction_bias_init = class_prediction_bias_init
  213. self._use_dropout = use_dropout
  214. self._dropout_keep_prob = dropout_keep_prob
  215. self._use_depthwise = use_depthwise
  216. self._score_converter_fn = score_converter_fn
  217. def predict(self, features, num_predictions_per_location):
  218. """Predicts boxes.
  219. Args:
  220. features: A float tensor of shape [batch_size, height, width, channels]
  221. containing image features.
  222. num_predictions_per_location: Number of box predictions to be made per
  223. spatial location.
  224. Returns:
  225. class_predictions_with_background: A tensor of shape
  226. [batch_size, num_anchors, num_class_slots] representing the class
  227. predictions for the proposals.
  228. """
  229. class_predictions_net = features
  230. if self._use_dropout:
  231. class_predictions_net = slim.dropout(
  232. class_predictions_net, keep_prob=self._dropout_keep_prob)
  233. if self._use_depthwise:
  234. conv_op = functools.partial(slim.separable_conv2d, depth_multiplier=1)
  235. else:
  236. conv_op = slim.conv2d
  237. class_predictions_with_background = conv_op(
  238. class_predictions_net,
  239. num_predictions_per_location * self._num_class_slots,
  240. [self._kernel_size, self._kernel_size],
  241. activation_fn=None, stride=1, padding='SAME',
  242. normalizer_fn=None,
  243. biases_initializer=tf.constant_initializer(
  244. self._class_prediction_bias_init),
  245. scope='ClassPredictor')
  246. batch_size = features.get_shape().as_list()[0]
  247. if batch_size is None:
  248. batch_size = tf.shape(features)[0]
  249. class_predictions_with_background = self._score_converter_fn(
  250. class_predictions_with_background)
  251. class_predictions_with_background = tf.reshape(
  252. class_predictions_with_background,
  253. [batch_size, -1, self._num_class_slots])
  254. return class_predictions_with_background