|
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ==============================================================================
|
|
|
|
"""Builder function to construct tf-slim arg_scope for convolution, fc ops."""
|
|
import tensorflow as tf
|
|
|
|
from object_detection.core import freezable_batch_norm
|
|
from object_detection.protos import hyperparams_pb2
|
|
from object_detection.utils import context_manager
|
|
|
|
slim = tf.contrib.slim
|
|
|
|
|
|
class KerasLayerHyperparams(object):
|
|
"""
|
|
A hyperparameter configuration object for Keras layers used in
|
|
Object Detection models.
|
|
"""
|
|
|
|
def __init__(self, hyperparams_config):
|
|
"""Builds keras hyperparameter config for layers based on the proto config.
|
|
|
|
It automatically converts from Slim layer hyperparameter configs to
|
|
Keras layer hyperparameters. Namely, it:
|
|
- Builds Keras initializers/regularizers instead of Slim ones
|
|
- sets weights_regularizer/initializer to kernel_regularizer/initializer
|
|
- converts batchnorm decay to momentum
|
|
- converts Slim l2 regularizer weights to the equivalent Keras l2 weights
|
|
|
|
Contains a hyperparameter configuration for ops that specifies kernel
|
|
initializer, kernel regularizer, activation. Also contains parameters for
|
|
batch norm operators based on the configuration.
|
|
|
|
Note that if the batch_norm parameters are not specified in the config
|
|
(i.e. left to default) then batch norm is excluded from the config.
|
|
|
|
Args:
|
|
hyperparams_config: hyperparams.proto object containing
|
|
hyperparameters.
|
|
|
|
Raises:
|
|
ValueError: if hyperparams_config is not of type hyperparams.Hyperparams.
|
|
"""
|
|
if not isinstance(hyperparams_config,
|
|
hyperparams_pb2.Hyperparams):
|
|
raise ValueError('hyperparams_config not of type '
|
|
'hyperparams_pb.Hyperparams.')
|
|
|
|
self._batch_norm_params = None
|
|
if hyperparams_config.HasField('batch_norm'):
|
|
self._batch_norm_params = _build_keras_batch_norm_params(
|
|
hyperparams_config.batch_norm)
|
|
|
|
self._activation_fn = _build_activation_fn(hyperparams_config.activation)
|
|
# TODO(kaftan): Unclear if these kwargs apply to separable & depthwise conv
|
|
# (Those might use depthwise_* instead of kernel_*)
|
|
# We should probably switch to using build_conv2d_layer and
|
|
# build_depthwise_conv2d_layer methods instead.
|
|
self._op_params = {
|
|
'kernel_regularizer': _build_keras_regularizer(
|
|
hyperparams_config.regularizer),
|
|
'kernel_initializer': _build_initializer(
|
|
hyperparams_config.initializer, build_for_keras=True),
|
|
'activation': _build_activation_fn(hyperparams_config.activation)
|
|
}
|
|
|
|
def use_batch_norm(self):
|
|
return self._batch_norm_params is not None
|
|
|
|
def batch_norm_params(self, **overrides):
|
|
"""Returns a dict containing batchnorm layer construction hyperparameters.
|
|
|
|
Optionally overrides values in the batchnorm hyperparam dict. Overrides
|
|
only apply to individual calls of this method, and do not affect
|
|
future calls.
|
|
|
|
Args:
|
|
**overrides: keyword arguments to override in the hyperparams dictionary
|
|
|
|
Returns: dict containing the layer construction keyword arguments, with
|
|
values overridden by the `overrides` keyword arguments.
|
|
"""
|
|
if self._batch_norm_params is None:
|
|
new_batch_norm_params = dict()
|
|
else:
|
|
new_batch_norm_params = self._batch_norm_params.copy()
|
|
new_batch_norm_params.update(overrides)
|
|
return new_batch_norm_params
|
|
|
|
def build_batch_norm(self, training=None, **overrides):
|
|
"""Returns a Batch Normalization layer with the appropriate hyperparams.
|
|
|
|
If the hyperparams are configured to not use batch normalization,
|
|
this will return a Keras Lambda layer that only applies tf.Identity,
|
|
without doing any normalization.
|
|
|
|
Optionally overrides values in the batch_norm hyperparam dict. Overrides
|
|
only apply to individual calls of this method, and do not affect
|
|
future calls.
|
|
|
|
Args:
|
|
training: if True, the normalization layer will normalize using the batch
|
|
statistics. If False, the normalization layer will be frozen and will
|
|
act as if it is being used for inference. If None, the layer
|
|
will look up the Keras learning phase at `call` time to decide what to
|
|
do.
|
|
**overrides: batch normalization construction args to override from the
|
|
batch_norm hyperparams dictionary.
|
|
|
|
Returns: Either a FreezableBatchNorm layer (if use_batch_norm() is True),
|
|
or a Keras Lambda layer that applies the identity (if use_batch_norm()
|
|
is False)
|
|
"""
|
|
if self.use_batch_norm():
|
|
return freezable_batch_norm.FreezableBatchNorm(
|
|
training=training,
|
|
**self.batch_norm_params(**overrides)
|
|
)
|
|
else:
|
|
return tf.keras.layers.Lambda(tf.identity)
|
|
|
|
def build_activation_layer(self, name='activation'):
|
|
"""Returns a Keras layer that applies the desired activation function.
|
|
|
|
Args:
|
|
name: The name to assign the Keras layer.
|
|
Returns: A Keras lambda layer that applies the activation function
|
|
specified in the hyperparam config, or applies the identity if the
|
|
activation function is None.
|
|
"""
|
|
if self._activation_fn:
|
|
return tf.keras.layers.Lambda(self._activation_fn, name=name)
|
|
else:
|
|
return tf.keras.layers.Lambda(tf.identity, name=name)
|
|
|
|
def params(self, include_activation=False, **overrides):
|
|
"""Returns a dict containing the layer construction hyperparameters to use.
|
|
|
|
Optionally overrides values in the returned dict. Overrides
|
|
only apply to individual calls of this method, and do not affect
|
|
future calls.
|
|
|
|
Args:
|
|
include_activation: If False, activation in the returned dictionary will
|
|
be set to `None`, and the activation must be applied via a separate
|
|
layer created by `build_activation_layer`. If True, `activation` in the
|
|
output param dictionary will be set to the activation function
|
|
specified in the hyperparams config.
|
|
**overrides: keyword arguments to override in the hyperparams dictionary.
|
|
|
|
Returns: dict containing the layer construction keyword arguments, with
|
|
values overridden by the `overrides` keyword arguments.
|
|
"""
|
|
new_params = self._op_params.copy()
|
|
new_params['activation'] = None
|
|
if include_activation:
|
|
new_params['activation'] = self._activation_fn
|
|
if self.use_batch_norm() and self.batch_norm_params()['center']:
|
|
new_params['use_bias'] = False
|
|
else:
|
|
new_params['use_bias'] = True
|
|
new_params.update(**overrides)
|
|
return new_params
|
|
|
|
|
|
def build(hyperparams_config, is_training):
|
|
"""Builds tf-slim arg_scope for convolution ops based on the config.
|
|
|
|
Returns an arg_scope to use for convolution ops containing weights
|
|
initializer, weights regularizer, activation function, batch norm function
|
|
and batch norm parameters based on the configuration.
|
|
|
|
Note that if no normalization parameters are specified in the config,
|
|
(i.e. left to default) then both batch norm and group norm are excluded
|
|
from the arg_scope.
|
|
|
|
The batch norm parameters are set for updates based on `is_training` argument
|
|
and conv_hyperparams_config.batch_norm.train parameter. During training, they
|
|
are updated only if batch_norm.train parameter is true. However, during eval,
|
|
no updates are made to the batch norm variables. In both cases, their current
|
|
values are used during forward pass.
|
|
|
|
Args:
|
|
hyperparams_config: hyperparams.proto object containing
|
|
hyperparameters.
|
|
is_training: Whether the network is in training mode.
|
|
|
|
Returns:
|
|
arg_scope_fn: A function to construct tf-slim arg_scope containing
|
|
hyperparameters for ops.
|
|
|
|
Raises:
|
|
ValueError: if hyperparams_config is not of type hyperparams.Hyperparams.
|
|
"""
|
|
if not isinstance(hyperparams_config,
|
|
hyperparams_pb2.Hyperparams):
|
|
raise ValueError('hyperparams_config not of type '
|
|
'hyperparams_pb.Hyperparams.')
|
|
|
|
normalizer_fn = None
|
|
batch_norm_params = None
|
|
if hyperparams_config.HasField('batch_norm'):
|
|
normalizer_fn = slim.batch_norm
|
|
batch_norm_params = _build_batch_norm_params(
|
|
hyperparams_config.batch_norm, is_training)
|
|
if hyperparams_config.HasField('group_norm'):
|
|
normalizer_fn = tf.contrib.layers.group_norm
|
|
affected_ops = [slim.conv2d, slim.separable_conv2d, slim.conv2d_transpose]
|
|
if hyperparams_config.HasField('op') and (
|
|
hyperparams_config.op == hyperparams_pb2.Hyperparams.FC):
|
|
affected_ops = [slim.fully_connected]
|
|
def scope_fn():
|
|
with (slim.arg_scope([slim.batch_norm], **batch_norm_params)
|
|
if batch_norm_params is not None else
|
|
context_manager.IdentityContextManager()):
|
|
with slim.arg_scope(
|
|
affected_ops,
|
|
weights_regularizer=_build_slim_regularizer(
|
|
hyperparams_config.regularizer),
|
|
weights_initializer=_build_initializer(
|
|
hyperparams_config.initializer),
|
|
activation_fn=_build_activation_fn(hyperparams_config.activation),
|
|
normalizer_fn=normalizer_fn) as sc:
|
|
return sc
|
|
|
|
return scope_fn
|
|
|
|
|
|
def _build_activation_fn(activation_fn):
|
|
"""Builds a callable activation from config.
|
|
|
|
Args:
|
|
activation_fn: hyperparams_pb2.Hyperparams.activation
|
|
|
|
Returns:
|
|
Callable activation function.
|
|
|
|
Raises:
|
|
ValueError: On unknown activation function.
|
|
"""
|
|
if activation_fn == hyperparams_pb2.Hyperparams.NONE:
|
|
return None
|
|
if activation_fn == hyperparams_pb2.Hyperparams.RELU:
|
|
return tf.nn.relu
|
|
if activation_fn == hyperparams_pb2.Hyperparams.RELU_6:
|
|
return tf.nn.relu6
|
|
raise ValueError('Unknown activation function: {}'.format(activation_fn))
|
|
|
|
|
|
def _build_slim_regularizer(regularizer):
|
|
"""Builds a tf-slim regularizer from config.
|
|
|
|
Args:
|
|
regularizer: hyperparams_pb2.Hyperparams.regularizer proto.
|
|
|
|
Returns:
|
|
tf-slim regularizer.
|
|
|
|
Raises:
|
|
ValueError: On unknown regularizer.
|
|
"""
|
|
regularizer_oneof = regularizer.WhichOneof('regularizer_oneof')
|
|
if regularizer_oneof == 'l1_regularizer':
|
|
return slim.l1_regularizer(scale=float(regularizer.l1_regularizer.weight))
|
|
if regularizer_oneof == 'l2_regularizer':
|
|
return slim.l2_regularizer(scale=float(regularizer.l2_regularizer.weight))
|
|
if regularizer_oneof is None:
|
|
return None
|
|
raise ValueError('Unknown regularizer function: {}'.format(regularizer_oneof))
|
|
|
|
|
|
def _build_keras_regularizer(regularizer):
|
|
"""Builds a keras regularizer from config.
|
|
|
|
Args:
|
|
regularizer: hyperparams_pb2.Hyperparams.regularizer proto.
|
|
|
|
Returns:
|
|
Keras regularizer.
|
|
|
|
Raises:
|
|
ValueError: On unknown regularizer.
|
|
"""
|
|
regularizer_oneof = regularizer.WhichOneof('regularizer_oneof')
|
|
if regularizer_oneof == 'l1_regularizer':
|
|
return tf.keras.regularizers.l1(float(regularizer.l1_regularizer.weight))
|
|
if regularizer_oneof == 'l2_regularizer':
|
|
# The Keras L2 regularizer weight differs from the Slim L2 regularizer
|
|
# weight by a factor of 2
|
|
return tf.keras.regularizers.l2(
|
|
float(regularizer.l2_regularizer.weight * 0.5))
|
|
raise ValueError('Unknown regularizer function: {}'.format(regularizer_oneof))
|
|
|
|
|
|
def _build_initializer(initializer, build_for_keras=False):
|
|
"""Build a tf initializer from config.
|
|
|
|
Args:
|
|
initializer: hyperparams_pb2.Hyperparams.regularizer proto.
|
|
build_for_keras: Whether the initializers should be built for Keras
|
|
operators. If false builds for Slim.
|
|
|
|
Returns:
|
|
tf initializer.
|
|
|
|
Raises:
|
|
ValueError: On unknown initializer.
|
|
"""
|
|
initializer_oneof = initializer.WhichOneof('initializer_oneof')
|
|
if initializer_oneof == 'truncated_normal_initializer':
|
|
return tf.truncated_normal_initializer(
|
|
mean=initializer.truncated_normal_initializer.mean,
|
|
stddev=initializer.truncated_normal_initializer.stddev)
|
|
if initializer_oneof == 'random_normal_initializer':
|
|
return tf.random_normal_initializer(
|
|
mean=initializer.random_normal_initializer.mean,
|
|
stddev=initializer.random_normal_initializer.stddev)
|
|
if initializer_oneof == 'variance_scaling_initializer':
|
|
enum_descriptor = (hyperparams_pb2.VarianceScalingInitializer.
|
|
DESCRIPTOR.enum_types_by_name['Mode'])
|
|
mode = enum_descriptor.values_by_number[initializer.
|
|
variance_scaling_initializer.
|
|
mode].name
|
|
if build_for_keras:
|
|
if initializer.variance_scaling_initializer.uniform:
|
|
return tf.variance_scaling_initializer(
|
|
scale=initializer.variance_scaling_initializer.factor,
|
|
mode=mode.lower(),
|
|
distribution='uniform')
|
|
else:
|
|
# In TF 1.9 release and earlier, the truncated_normal distribution was
|
|
# not supported correctly. So, in these earlier versions of tensorflow,
|
|
# the ValueError will be raised, and we manually truncate the
|
|
# distribution scale.
|
|
#
|
|
# It is insufficient to just set distribution to `normal` from the
|
|
# start, because the `normal` distribution in newer Tensorflow versions
|
|
# creates a truncated distribution, whereas it created untruncated
|
|
# distributions in older versions.
|
|
try:
|
|
return tf.variance_scaling_initializer(
|
|
scale=initializer.variance_scaling_initializer.factor,
|
|
mode=mode.lower(),
|
|
distribution='truncated_normal')
|
|
except ValueError:
|
|
truncate_constant = 0.87962566103423978
|
|
truncated_scale = initializer.variance_scaling_initializer.factor / (
|
|
truncate_constant * truncate_constant
|
|
)
|
|
return tf.variance_scaling_initializer(
|
|
scale=truncated_scale,
|
|
mode=mode.lower(),
|
|
distribution='normal')
|
|
|
|
else:
|
|
return slim.variance_scaling_initializer(
|
|
factor=initializer.variance_scaling_initializer.factor,
|
|
mode=mode,
|
|
uniform=initializer.variance_scaling_initializer.uniform)
|
|
raise ValueError('Unknown initializer function: {}'.format(
|
|
initializer_oneof))
|
|
|
|
|
|
def _build_batch_norm_params(batch_norm, is_training):
|
|
"""Build a dictionary of batch_norm params from config.
|
|
|
|
Args:
|
|
batch_norm: hyperparams_pb2.ConvHyperparams.batch_norm proto.
|
|
is_training: Whether the models is in training mode.
|
|
|
|
Returns:
|
|
A dictionary containing batch_norm parameters.
|
|
"""
|
|
batch_norm_params = {
|
|
'decay': batch_norm.decay,
|
|
'center': batch_norm.center,
|
|
'scale': batch_norm.scale,
|
|
'epsilon': batch_norm.epsilon,
|
|
# Remove is_training parameter from here and deprecate it in the proto
|
|
# once we refactor Faster RCNN models to set is_training through an outer
|
|
# arg_scope in the meta architecture.
|
|
'is_training': is_training and batch_norm.train,
|
|
}
|
|
return batch_norm_params
|
|
|
|
|
|
def _build_keras_batch_norm_params(batch_norm):
|
|
"""Build a dictionary of Keras BatchNormalization params from config.
|
|
|
|
Args:
|
|
batch_norm: hyperparams_pb2.ConvHyperparams.batch_norm proto.
|
|
|
|
Returns:
|
|
A dictionary containing Keras BatchNormalization parameters.
|
|
"""
|
|
# Note: Although decay is defined to be 1 - momentum in batch_norm,
|
|
# decay in the slim batch_norm layers was erroneously defined and is
|
|
# actually the same as momentum in the Keras batch_norm layers.
|
|
# For context, see: github.com/keras-team/keras/issues/6839
|
|
batch_norm_params = {
|
|
'momentum': batch_norm.decay,
|
|
'center': batch_norm.center,
|
|
'scale': batch_norm.scale,
|
|
'epsilon': batch_norm.epsilon,
|
|
}
|
|
return batch_norm_params
|