You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

328 lines
13 KiB

6 years ago
  1. # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. """A wrapper around the Keras MobilenetV1 models for object detection."""
  16. from __future__ import absolute_import
  17. from __future__ import division
  18. from __future__ import print_function
  19. import tensorflow as tf
  20. from object_detection.core import freezable_batch_norm
  21. def _fixed_padding(inputs, kernel_size, rate=1): # pylint: disable=invalid-name
  22. """Pads the input along the spatial dimensions independently of input size.
  23. Pads the input such that if it was used in a convolution with 'VALID' padding,
  24. the output would have the same dimensions as if the unpadded input was used
  25. in a convolution with 'SAME' padding.
  26. Args:
  27. inputs: A tensor of size [batch, height_in, width_in, channels].
  28. kernel_size: The kernel to be used in the conv2d or max_pool2d operation.
  29. rate: An integer, rate for atrous convolution.
  30. Returns:
  31. output: A tensor of size [batch, height_out, width_out, channels] with the
  32. input, either intact (if kernel_size == 1) or padded (if kernel_size > 1).
  33. """
  34. kernel_size_effective = [kernel_size[0] + (kernel_size[0] - 1) * (rate - 1),
  35. kernel_size[0] + (kernel_size[0] - 1) * (rate - 1)]
  36. pad_total = [kernel_size_effective[0] - 1, kernel_size_effective[1] - 1]
  37. pad_beg = [pad_total[0] // 2, pad_total[1] // 2]
  38. pad_end = [pad_total[0] - pad_beg[0], pad_total[1] - pad_beg[1]]
  39. padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg[0], pad_end[0]],
  40. [pad_beg[1], pad_end[1]], [0, 0]])
  41. return padded_inputs
  42. class _LayersOverride(object):
  43. """Alternative Keras layers interface for the Keras MobileNetV1."""
  44. def __init__(self,
  45. batchnorm_training,
  46. default_batchnorm_momentum=0.999,
  47. conv_hyperparams=None,
  48. use_explicit_padding=False,
  49. alpha=1.0,
  50. min_depth=None):
  51. """Alternative tf.keras.layers interface, for use by the Keras MobileNetV1.
  52. It is used by the Keras applications kwargs injection API to
  53. modify the MobilenetV1 Keras application with changes required by
  54. the Object Detection API.
  55. These injected interfaces make the following changes to the network:
  56. - Applies the Object Detection hyperparameter configuration
  57. - Supports FreezableBatchNorms
  58. - Adds support for a min number of filters for each layer
  59. - Makes the `alpha` parameter affect the final convolution block even if it
  60. is less than 1.0
  61. - Adds support for explicit padding of convolutions
  62. Args:
  63. batchnorm_training: Bool. Assigned to Batch norm layer `training` param
  64. when constructing `freezable_batch_norm.FreezableBatchNorm` layers.
  65. default_batchnorm_momentum: Float. When 'conv_hyperparams' is None,
  66. batch norm layers will be constructed using this value as the momentum.
  67. conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
  68. containing hyperparameters for convolution ops. Optionally set to `None`
  69. to use default mobilenet_v1 layer builders.
  70. use_explicit_padding: If True, use 'valid' padding for convolutions,
  71. but explicitly pre-pads inputs so that the output dimensions are the
  72. same as if 'same' padding were used. Off by default.
  73. alpha: The width multiplier referenced in the MobileNetV1 paper. It
  74. modifies the number of filters in each convolutional layer. It's called
  75. depth multiplier in Keras application MobilenetV1.
  76. min_depth: Minimum number of filters in the convolutional layers.
  77. """
  78. self._alpha = alpha
  79. self._batchnorm_training = batchnorm_training
  80. self._default_batchnorm_momentum = default_batchnorm_momentum
  81. self._conv_hyperparams = conv_hyperparams
  82. self._use_explicit_padding = use_explicit_padding
  83. self._min_depth = min_depth
  84. self.regularizer = tf.keras.regularizers.l2(0.00004 * 0.5)
  85. self.initializer = tf.truncated_normal_initializer(stddev=0.09)
  86. def _FixedPaddingLayer(self, kernel_size, rate=1):
  87. return tf.keras.layers.Lambda(
  88. lambda x: _fixed_padding(x, kernel_size, rate))
  89. def Conv2D(self, filters, kernel_size, **kwargs):
  90. """Builds a Conv2D layer according to the current Object Detection config.
  91. Overrides the Keras MobileNetV1 application's convolutions with ones that
  92. follow the spec specified by the Object Detection hyperparameters.
  93. Args:
  94. filters: The number of filters to use for the convolution.
  95. kernel_size: The kernel size to specify the height and width of the 2D
  96. convolution window.
  97. **kwargs: Keyword args specified by the Keras application for
  98. constructing the convolution.
  99. Returns:
  100. A one-arg callable that will either directly apply a Keras Conv2D layer to
  101. the input argument, or that will first pad the input then apply a Conv2D
  102. layer.
  103. """
  104. # Apply the width multiplier and the minimum depth to the convolution layers
  105. filters = int(filters * self._alpha)
  106. if self._min_depth and filters < self._min_depth:
  107. filters = self._min_depth
  108. if self._conv_hyperparams:
  109. kwargs = self._conv_hyperparams.params(**kwargs)
  110. else:
  111. kwargs['kernel_regularizer'] = self.regularizer
  112. kwargs['kernel_initializer'] = self.initializer
  113. kwargs['padding'] = 'same'
  114. if self._use_explicit_padding and kernel_size > 1:
  115. kwargs['padding'] = 'valid'
  116. def padded_conv(features): # pylint: disable=invalid-name
  117. padded_features = self._FixedPaddingLayer(kernel_size)(features)
  118. return tf.keras.layers.Conv2D(
  119. filters, kernel_size, **kwargs)(padded_features)
  120. return padded_conv
  121. else:
  122. return tf.keras.layers.Conv2D(filters, kernel_size, **kwargs)
  123. def DepthwiseConv2D(self, kernel_size, **kwargs):
  124. """Builds a DepthwiseConv2D according to the Object Detection config.
  125. Overrides the Keras MobileNetV2 application's convolutions with ones that
  126. follow the spec specified by the Object Detection hyperparameters.
  127. Args:
  128. kernel_size: The kernel size to specify the height and width of the 2D
  129. convolution window.
  130. **kwargs: Keyword args specified by the Keras application for
  131. constructing the convolution.
  132. Returns:
  133. A one-arg callable that will either directly apply a Keras DepthwiseConv2D
  134. layer to the input argument, or that will first pad the input then apply
  135. the depthwise convolution.
  136. """
  137. if self._conv_hyperparams:
  138. kwargs = self._conv_hyperparams.params(**kwargs)
  139. else:
  140. kwargs['depthwise_initializer'] = self.initializer
  141. kwargs['padding'] = 'same'
  142. if self._use_explicit_padding:
  143. kwargs['padding'] = 'valid'
  144. def padded_depthwise_conv(features): # pylint: disable=invalid-name
  145. padded_features = self._FixedPaddingLayer(kernel_size)(features)
  146. return tf.keras.layers.DepthwiseConv2D(
  147. kernel_size, **kwargs)(padded_features)
  148. return padded_depthwise_conv
  149. else:
  150. return tf.keras.layers.DepthwiseConv2D(kernel_size, **kwargs)
  151. def BatchNormalization(self, **kwargs):
  152. """Builds a normalization layer.
  153. Overrides the Keras application batch norm with the norm specified by the
  154. Object Detection configuration.
  155. Args:
  156. **kwargs: Only the name is used, all other params ignored.
  157. Required for matching `layers.BatchNormalization` calls in the Keras
  158. application.
  159. Returns:
  160. A normalization layer specified by the Object Detection hyperparameter
  161. configurations.
  162. """
  163. name = kwargs.get('name')
  164. if self._conv_hyperparams:
  165. return self._conv_hyperparams.build_batch_norm(
  166. training=self._batchnorm_training,
  167. name=name)
  168. else:
  169. return freezable_batch_norm.FreezableBatchNorm(
  170. training=self._batchnorm_training,
  171. epsilon=1e-3,
  172. momentum=self._default_batchnorm_momentum,
  173. name=name)
  174. def Input(self, shape):
  175. """Builds an Input layer.
  176. Overrides the Keras application Input layer with one that uses a
  177. tf.placeholder_with_default instead of a tf.placeholder. This is necessary
  178. to ensure the application works when run on a TPU.
  179. Args:
  180. shape: The shape for the input layer to use. (Does not include a dimension
  181. for the batch size).
  182. Returns:
  183. An input layer for the specified shape that internally uses a
  184. placeholder_with_default.
  185. """
  186. default_size = 224
  187. default_batch_size = 1
  188. shape = list(shape)
  189. default_shape = [default_size if dim is None else dim for dim in shape]
  190. input_tensor = tf.constant(0.0, shape=[default_batch_size] + default_shape)
  191. placeholder_with_default = tf.placeholder_with_default(
  192. input=input_tensor, shape=[None] + shape)
  193. return tf.keras.layers.Input(tensor=placeholder_with_default)
  194. # pylint: disable=unused-argument
  195. def ReLU(self, *args, **kwargs):
  196. """Builds an activation layer.
  197. Overrides the Keras application ReLU with the activation specified by the
  198. Object Detection configuration.
  199. Args:
  200. *args: Ignored, required to match the `tf.keras.ReLU` interface
  201. **kwargs: Only the name is used,
  202. required to match `tf.keras.ReLU` interface
  203. Returns:
  204. An activation layer specified by the Object Detection hyperparameter
  205. configurations.
  206. """
  207. name = kwargs.get('name')
  208. if self._conv_hyperparams:
  209. return self._conv_hyperparams.build_activation_layer(name=name)
  210. else:
  211. return tf.keras.layers.Lambda(tf.nn.relu6, name=name)
  212. # pylint: enable=unused-argument
  213. # pylint: disable=unused-argument
  214. def ZeroPadding2D(self, padding, **kwargs):
  215. """Replaces explicit padding in the Keras application with a no-op.
  216. Args:
  217. padding: The padding values for image height and width.
  218. **kwargs: Ignored, required to match the Keras applications usage.
  219. Returns:
  220. A no-op identity lambda.
  221. """
  222. return lambda x: x
  223. # pylint: enable=unused-argument
  224. # Forward all non-overridden methods to the keras layers
  225. def __getattr__(self, item):
  226. return getattr(tf.keras.layers, item)
  227. # pylint: disable=invalid-name
  228. def mobilenet_v1(batchnorm_training,
  229. default_batchnorm_momentum=0.9997,
  230. conv_hyperparams=None,
  231. use_explicit_padding=False,
  232. alpha=1.0,
  233. min_depth=None,
  234. **kwargs):
  235. """Instantiates the MobileNetV1 architecture, modified for object detection.
  236. This wraps the MobileNetV1 tensorflow Keras application, but uses the
  237. Keras application's kwargs-based monkey-patching API to override the Keras
  238. architecture with the following changes:
  239. - Changes the default batchnorm momentum to 0.9997
  240. - Applies the Object Detection hyperparameter configuration
  241. - Supports FreezableBatchNorms
  242. - Adds support for a min number of filters for each layer
  243. - Makes the `alpha` parameter affect the final convolution block even if it
  244. is less than 1.0
  245. - Adds support for explicit padding of convolutions
  246. - Makes the Input layer use a tf.placeholder_with_default instead of a
  247. tf.placeholder, to work on TPUs.
  248. Args:
  249. batchnorm_training: Bool. Assigned to Batch norm layer `training` param
  250. when constructing `freezable_batch_norm.FreezableBatchNorm` layers.
  251. default_batchnorm_momentum: Float. When 'conv_hyperparams' is None,
  252. batch norm layers will be constructed using this value as the momentum.
  253. conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
  254. containing hyperparameters for convolution ops. Optionally set to `None`
  255. to use default mobilenet_v1 layer builders.
  256. use_explicit_padding: If True, use 'valid' padding for convolutions,
  257. but explicitly pre-pads inputs so that the output dimensions are the
  258. same as if 'same' padding were used. Off by default.
  259. alpha: The width multiplier referenced in the MobileNetV1 paper. It
  260. modifies the number of filters in each convolutional layer.
  261. min_depth: Minimum number of filters in the convolutional layers.
  262. **kwargs: Keyword arguments forwarded directly to the
  263. `tf.keras.applications.Mobilenet` method that constructs the Keras
  264. model.
  265. Returns:
  266. A Keras model instance.
  267. """
  268. layers_override = _LayersOverride(
  269. batchnorm_training,
  270. default_batchnorm_momentum=default_batchnorm_momentum,
  271. conv_hyperparams=conv_hyperparams,
  272. use_explicit_padding=use_explicit_padding,
  273. min_depth=min_depth,
  274. alpha=alpha)
  275. return tf.keras.applications.MobileNet(
  276. alpha=alpha, layers=layers_override, **kwargs)
  277. # pylint: enable=invalid-name