You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

199 lines
7.2 KiB

  1. syntax = "proto2";
  2. package object_detection.protos;
  3. import "object_detection/protos/hyperparams.proto";
  4. // Configuration proto for box predictor. See core/box_predictor.py for details.
  5. message BoxPredictor {
  6. oneof box_predictor_oneof {
  7. ConvolutionalBoxPredictor convolutional_box_predictor = 1;
  8. MaskRCNNBoxPredictor mask_rcnn_box_predictor = 2;
  9. RfcnBoxPredictor rfcn_box_predictor = 3;
  10. WeightSharedConvolutionalBoxPredictor
  11. weight_shared_convolutional_box_predictor = 4;
  12. }
  13. }
  14. // Configuration proto for Convolutional box predictor.
  15. // Next id: 13
  16. message ConvolutionalBoxPredictor {
  17. // Hyperparameters for convolution ops used in the box predictor.
  18. optional Hyperparams conv_hyperparams = 1;
  19. // Minimum feature depth prior to predicting box encodings and class
  20. // predictions.
  21. optional int32 min_depth = 2 [default = 0];
  22. // Maximum feature depth prior to predicting box encodings and class
  23. // predictions. If max_depth is set to 0, no additional feature map will be
  24. // inserted before location and class predictions.
  25. optional int32 max_depth = 3 [default = 0];
  26. // Number of the additional conv layers before the predictor.
  27. optional int32 num_layers_before_predictor = 4 [default = 0];
  28. // Whether to use dropout for class prediction.
  29. optional bool use_dropout = 5 [default = true];
  30. // Keep probability for dropout
  31. optional float dropout_keep_probability = 6 [default = 0.8];
  32. // Size of final convolution kernel. If the spatial resolution of the feature
  33. // map is smaller than the kernel size, then the kernel size is set to
  34. // min(feature_width, feature_height).
  35. optional int32 kernel_size = 7 [default = 1];
  36. // Size of the encoding for boxes.
  37. optional int32 box_code_size = 8 [default = 4];
  38. // Whether to apply sigmoid to the output of class predictions.
  39. // TODO(jonathanhuang): Do we need this since we have a post processing
  40. // module.?
  41. optional bool apply_sigmoid_to_scores = 9 [default = false];
  42. optional float class_prediction_bias_init = 10 [default = 0.0];
  43. // Whether to use depthwise separable convolution for box predictor layers.
  44. optional bool use_depthwise = 11 [default = false];
  45. // If specified, apply clipping to box encodings.
  46. message BoxEncodingsClipRange {
  47. optional float min = 1;
  48. optional float max = 2;
  49. }
  50. optional BoxEncodingsClipRange box_encodings_clip_range = 12;
  51. }
  52. // Configuration proto for weight shared convolutional box predictor.
  53. // Next id: 18
  54. message WeightSharedConvolutionalBoxPredictor {
  55. // Hyperparameters for convolution ops used in the box predictor.
  56. optional Hyperparams conv_hyperparams = 1;
  57. // Number of the additional conv layers before the predictor.
  58. optional int32 num_layers_before_predictor = 4 [default = 0];
  59. // Output depth for the convolution ops prior to predicting box encodings
  60. // and class predictions.
  61. optional int32 depth = 2 [default = 0];
  62. // Size of final convolution kernel. If the spatial resolution of the feature
  63. // map is smaller than the kernel size, then the kernel size is set to
  64. // min(feature_width, feature_height).
  65. optional int32 kernel_size = 7 [default = 3];
  66. // Size of the encoding for boxes.
  67. optional int32 box_code_size = 8 [default = 4];
  68. // Bias initialization for class prediction. It has been show to stabilize
  69. // training where there are large number of negative boxes. See
  70. // https://arxiv.org/abs/1708.02002 for details.
  71. optional float class_prediction_bias_init = 10 [default = 0.0];
  72. // Whether to use dropout for class prediction.
  73. optional bool use_dropout = 11 [default = false];
  74. // Keep probability for dropout.
  75. optional float dropout_keep_probability = 12 [default = 0.8];
  76. // Whether to share the multi-layer tower between box prediction and class
  77. // prediction heads.
  78. optional bool share_prediction_tower = 13 [default = false];
  79. // Whether to use depthwise separable convolution for box predictor layers.
  80. optional bool use_depthwise = 14 [default = false];
  81. // Enum to specify how to convert the detection scores at inference time.
  82. enum ScoreConverter {
  83. // Input scores equals output scores.
  84. IDENTITY = 0;
  85. // Applies a sigmoid on input scores.
  86. SIGMOID = 1;
  87. }
  88. // Callable elementwise score converter at inference time.
  89. optional ScoreConverter score_converter = 16 [default = IDENTITY];
  90. // If specified, apply clipping to box encodings.
  91. message BoxEncodingsClipRange {
  92. optional float min = 1;
  93. optional float max = 2;
  94. }
  95. optional BoxEncodingsClipRange box_encodings_clip_range = 17;
  96. }
  97. // TODO(alirezafathi): Refactor the proto file to be able to configure mask rcnn
  98. // head easily.
  99. // Next id: 15
  100. message MaskRCNNBoxPredictor {
  101. // Hyperparameters for fully connected ops used in the box predictor.
  102. optional Hyperparams fc_hyperparams = 1;
  103. // Whether to use dropout op prior to the both box and class predictions.
  104. optional bool use_dropout = 2 [default = false];
  105. // Keep probability for dropout. This is only used if use_dropout is true.
  106. optional float dropout_keep_probability = 3 [default = 0.5];
  107. // Size of the encoding for the boxes.
  108. optional int32 box_code_size = 4 [default = 4];
  109. // Hyperparameters for convolution ops used in the box predictor.
  110. optional Hyperparams conv_hyperparams = 5;
  111. // Whether to predict instance masks inside detection boxes.
  112. optional bool predict_instance_masks = 6 [default = false];
  113. // The depth for the first conv2d_transpose op applied to the
  114. // image_features in the mask prediction branch. If set to 0, the value
  115. // will be set automatically based on the number of channels in the image
  116. // features and the number of classes.
  117. optional int32 mask_prediction_conv_depth = 7 [default = 256];
  118. // Whether to predict keypoints inside detection boxes.
  119. optional bool predict_keypoints = 8 [default = false];
  120. // The height and the width of the predicted mask.
  121. optional int32 mask_height = 9 [default = 15];
  122. optional int32 mask_width = 10 [default = 15];
  123. // The number of convolutions applied to image_features in the mask prediction
  124. // branch.
  125. optional int32 mask_prediction_num_conv_layers = 11 [default = 2];
  126. optional bool masks_are_class_agnostic = 12 [default = false];
  127. // Whether to use one box for all classes rather than a different box for each
  128. // class.
  129. optional bool share_box_across_classes = 13 [default = false];
  130. // Whether to apply convolutions on mask features before upsampling using
  131. // nearest neighbor resizing.
  132. // By default, mask features are resized to [`mask_height`, `mask_width`]
  133. // before applying convolutions and predicting masks.
  134. optional bool convolve_then_upsample_masks = 14 [default = false];
  135. }
  136. message RfcnBoxPredictor {
  137. // Hyperparameters for convolution ops used in the box predictor.
  138. optional Hyperparams conv_hyperparams = 1;
  139. // Bin sizes for RFCN crops.
  140. optional int32 num_spatial_bins_height = 2 [default = 3];
  141. optional int32 num_spatial_bins_width = 3 [default = 3];
  142. // Target depth to reduce the input image features to.
  143. optional int32 depth = 4 [default = 1024];
  144. // Size of the encoding for the boxes.
  145. optional int32 box_code_size = 5 [default = 4];
  146. // Size to resize the rfcn crops to.
  147. optional int32 crop_height = 6 [default = 12];
  148. optional int32 crop_width = 7 [default = 12];
  149. }