|
|
- syntax = "proto2";
-
- package object_detection.protos;
-
- import "object_detection/protos/hyperparams.proto";
-
- // Configuration proto for box predictor. See core/box_predictor.py for details.
- message BoxPredictor {
- oneof box_predictor_oneof {
- ConvolutionalBoxPredictor convolutional_box_predictor = 1;
- MaskRCNNBoxPredictor mask_rcnn_box_predictor = 2;
- RfcnBoxPredictor rfcn_box_predictor = 3;
- WeightSharedConvolutionalBoxPredictor
- weight_shared_convolutional_box_predictor = 4;
- }
- }
-
- // Configuration proto for Convolutional box predictor.
- // Next id: 13
- message ConvolutionalBoxPredictor {
- // Hyperparameters for convolution ops used in the box predictor.
- optional Hyperparams conv_hyperparams = 1;
-
- // Minimum feature depth prior to predicting box encodings and class
- // predictions.
- optional int32 min_depth = 2 [default = 0];
-
- // Maximum feature depth prior to predicting box encodings and class
- // predictions. If max_depth is set to 0, no additional feature map will be
- // inserted before location and class predictions.
- optional int32 max_depth = 3 [default = 0];
-
- // Number of the additional conv layers before the predictor.
- optional int32 num_layers_before_predictor = 4 [default = 0];
-
- // Whether to use dropout for class prediction.
- optional bool use_dropout = 5 [default = true];
-
- // Keep probability for dropout
- optional float dropout_keep_probability = 6 [default = 0.8];
-
- // Size of final convolution kernel. If the spatial resolution of the feature
- // map is smaller than the kernel size, then the kernel size is set to
- // min(feature_width, feature_height).
- optional int32 kernel_size = 7 [default = 1];
-
- // Size of the encoding for boxes.
- optional int32 box_code_size = 8 [default = 4];
-
- // Whether to apply sigmoid to the output of class predictions.
- // TODO(jonathanhuang): Do we need this since we have a post processing
- // module.?
- optional bool apply_sigmoid_to_scores = 9 [default = false];
-
- optional float class_prediction_bias_init = 10 [default = 0.0];
-
- // Whether to use depthwise separable convolution for box predictor layers.
- optional bool use_depthwise = 11 [default = false];
-
- // If specified, apply clipping to box encodings.
- message BoxEncodingsClipRange {
- optional float min = 1;
- optional float max = 2;
- }
- optional BoxEncodingsClipRange box_encodings_clip_range = 12;
- }
-
- // Configuration proto for weight shared convolutional box predictor.
- // Next id: 18
- message WeightSharedConvolutionalBoxPredictor {
- // Hyperparameters for convolution ops used in the box predictor.
- optional Hyperparams conv_hyperparams = 1;
-
- // Number of the additional conv layers before the predictor.
- optional int32 num_layers_before_predictor = 4 [default = 0];
-
- // Output depth for the convolution ops prior to predicting box encodings
- // and class predictions.
- optional int32 depth = 2 [default = 0];
-
- // Size of final convolution kernel. If the spatial resolution of the feature
- // map is smaller than the kernel size, then the kernel size is set to
- // min(feature_width, feature_height).
- optional int32 kernel_size = 7 [default = 3];
-
- // Size of the encoding for boxes.
- optional int32 box_code_size = 8 [default = 4];
-
- // Bias initialization for class prediction. It has been show to stabilize
- // training where there are large number of negative boxes. See
- // https://arxiv.org/abs/1708.02002 for details.
- optional float class_prediction_bias_init = 10 [default = 0.0];
-
- // Whether to use dropout for class prediction.
- optional bool use_dropout = 11 [default = false];
-
- // Keep probability for dropout.
- optional float dropout_keep_probability = 12 [default = 0.8];
-
- // Whether to share the multi-layer tower between box prediction and class
- // prediction heads.
- optional bool share_prediction_tower = 13 [default = false];
-
- // Whether to use depthwise separable convolution for box predictor layers.
- optional bool use_depthwise = 14 [default = false];
-
- // Enum to specify how to convert the detection scores at inference time.
- enum ScoreConverter {
- // Input scores equals output scores.
- IDENTITY = 0;
-
- // Applies a sigmoid on input scores.
- SIGMOID = 1;
- }
-
- // Callable elementwise score converter at inference time.
- optional ScoreConverter score_converter = 16 [default = IDENTITY];
-
- // If specified, apply clipping to box encodings.
- message BoxEncodingsClipRange {
- optional float min = 1;
- optional float max = 2;
- }
- optional BoxEncodingsClipRange box_encodings_clip_range = 17;
- }
-
-
-
- // TODO(alirezafathi): Refactor the proto file to be able to configure mask rcnn
- // head easily.
- // Next id: 15
- message MaskRCNNBoxPredictor {
- // Hyperparameters for fully connected ops used in the box predictor.
- optional Hyperparams fc_hyperparams = 1;
-
- // Whether to use dropout op prior to the both box and class predictions.
- optional bool use_dropout = 2 [default = false];
-
- // Keep probability for dropout. This is only used if use_dropout is true.
- optional float dropout_keep_probability = 3 [default = 0.5];
-
- // Size of the encoding for the boxes.
- optional int32 box_code_size = 4 [default = 4];
-
- // Hyperparameters for convolution ops used in the box predictor.
- optional Hyperparams conv_hyperparams = 5;
-
- // Whether to predict instance masks inside detection boxes.
- optional bool predict_instance_masks = 6 [default = false];
-
- // The depth for the first conv2d_transpose op applied to the
- // image_features in the mask prediction branch. If set to 0, the value
- // will be set automatically based on the number of channels in the image
- // features and the number of classes.
- optional int32 mask_prediction_conv_depth = 7 [default = 256];
-
- // Whether to predict keypoints inside detection boxes.
- optional bool predict_keypoints = 8 [default = false];
-
- // The height and the width of the predicted mask.
- optional int32 mask_height = 9 [default = 15];
- optional int32 mask_width = 10 [default = 15];
-
- // The number of convolutions applied to image_features in the mask prediction
- // branch.
- optional int32 mask_prediction_num_conv_layers = 11 [default = 2];
- optional bool masks_are_class_agnostic = 12 [default = false];
-
- // Whether to use one box for all classes rather than a different box for each
- // class.
- optional bool share_box_across_classes = 13 [default = false];
-
- // Whether to apply convolutions on mask features before upsampling using
- // nearest neighbor resizing.
- // By default, mask features are resized to [`mask_height`, `mask_width`]
- // before applying convolutions and predicting masks.
- optional bool convolve_then_upsample_masks = 14 [default = false];
- }
-
- message RfcnBoxPredictor {
- // Hyperparameters for convolution ops used in the box predictor.
- optional Hyperparams conv_hyperparams = 1;
-
- // Bin sizes for RFCN crops.
- optional int32 num_spatial_bins_height = 2 [default = 3];
-
- optional int32 num_spatial_bins_width = 3 [default = 3];
-
- // Target depth to reduce the input image features to.
- optional int32 depth = 4 [default = 1024];
-
- // Size of the encoding for the boxes.
- optional int32 box_code_size = 5 [default = 4];
-
- // Size to resize the rfcn crops to.
- optional int32 crop_height = 6 [default = 12];
-
- optional int32 crop_width = 7 [default = 12];
- }
|