You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

219 lines
9.0 KiB

  1. syntax = "proto2";
  2. package object_detection.protos;
  3. import "object_detection/protos/anchor_generator.proto";
  4. import "object_detection/protos/box_coder.proto";
  5. import "object_detection/protos/box_predictor.proto";
  6. import "object_detection/protos/hyperparams.proto";
  7. import "object_detection/protos/image_resizer.proto";
  8. import "object_detection/protos/losses.proto";
  9. import "object_detection/protos/matcher.proto";
  10. import "object_detection/protos/post_processing.proto";
  11. import "object_detection/protos/region_similarity_calculator.proto";
  12. // Configuration for Single Shot Detection (SSD) models.
  13. // Next id: 26
  14. message Ssd {
  15. // Number of classes to predict.
  16. optional int32 num_classes = 1;
  17. // Image resizer for preprocessing the input image.
  18. optional ImageResizer image_resizer = 2;
  19. // Feature extractor config.
  20. optional SsdFeatureExtractor feature_extractor = 3;
  21. // Box coder to encode the boxes.
  22. optional BoxCoder box_coder = 4;
  23. // Matcher to match groundtruth with anchors.
  24. optional Matcher matcher = 5;
  25. // Region similarity calculator to compute similarity of boxes.
  26. optional RegionSimilarityCalculator similarity_calculator = 6;
  27. // Whether background targets are to be encoded as an all
  28. // zeros vector or a one-hot vector (where background is the 0th class).
  29. optional bool encode_background_as_zeros = 12 [default = false];
  30. // classification weight to be associated to negative
  31. // anchors (default: 1.0). The weight must be in [0., 1.].
  32. optional float negative_class_weight = 13 [default = 1.0];
  33. // Box predictor to attach to the features.
  34. optional BoxPredictor box_predictor = 7;
  35. // Anchor generator to compute anchors.
  36. optional AnchorGenerator anchor_generator = 8;
  37. // Post processing to apply on the predictions.
  38. optional PostProcessing post_processing = 9;
  39. // Whether to normalize the loss by number of groundtruth boxes that match to
  40. // the anchors.
  41. optional bool normalize_loss_by_num_matches = 10 [default = true];
  42. // Whether to normalize the localization loss by the code size of the box
  43. // encodings. This is applied along with other normalization factors.
  44. optional bool normalize_loc_loss_by_codesize = 14 [default = false];
  45. // Loss configuration for training.
  46. optional Loss loss = 11;
  47. // Whether to update batch norm parameters during training or not.
  48. // When training with a relative small batch size (e.g. 1), it is
  49. // desirable to disable batch norm update and use pretrained batch norm
  50. // params.
  51. //
  52. // Note: Some feature extractors are used with canned arg_scopes
  53. // (e.g resnet arg scopes). In these cases training behavior of batch norm
  54. // variables may depend on both values of `batch_norm_trainable` and
  55. // `is_training`.
  56. //
  57. // When canned arg_scopes are used with feature extractors `conv_hyperparams`
  58. // will apply only to the additional layers that are added and are outside the
  59. // canned arg_scope.
  60. optional bool freeze_batchnorm = 16 [default = false];
  61. // Whether to update batch_norm inplace during training. This is required
  62. // for batch norm to work correctly on TPUs. When this is false, user must add
  63. // a control dependency on tf.GraphKeys.UPDATE_OPS for train/loss op in order
  64. // to update the batch norm moving average parameters.
  65. optional bool inplace_batchnorm_update = 15 [default = false];
  66. // Whether to add an implicit background class to one-hot encodings of
  67. // groundtruth labels. Set to false if training a single
  68. // class model or using an explicit background class.
  69. optional bool add_background_class = 21 [default = true];
  70. // Whether to use an explicit background class. Set to true if using
  71. // groundtruth labels with an explicit background class, as in multiclass
  72. // scores.
  73. optional bool explicit_background_class = 24 [default = false];
  74. optional bool use_confidences_as_targets = 22 [default = false];
  75. optional float implicit_example_weight = 23 [default = 1.0];
  76. // Configuration proto for MaskHead.
  77. // Next id: 11
  78. message MaskHead {
  79. // The height and the width of the predicted mask. Only used when
  80. // predict_instance_masks is true.
  81. optional int32 mask_height = 1 [default = 15];
  82. optional int32 mask_width = 2 [default = 15];
  83. // Whether to predict class agnostic masks. Only used when
  84. // predict_instance_masks is true.
  85. optional bool masks_are_class_agnostic = 3 [default = true];
  86. // The depth for the first conv2d_transpose op applied to the
  87. // image_features in the mask prediction branch. If set to 0, the value
  88. // will be set automatically based on the number of channels in the image
  89. // features and the number of classes.
  90. optional int32 mask_prediction_conv_depth = 4 [default = 256];
  91. // The number of convolutions applied to image_features in the mask
  92. // prediction branch.
  93. optional int32 mask_prediction_num_conv_layers = 5 [default = 2];
  94. // Whether to apply convolutions on mask features before upsampling using
  95. // nearest neighbor resizing.
  96. // By default, mask features are resized to [`mask_height`, `mask_width`]
  97. // before applying convolutions and predicting masks.
  98. optional bool convolve_then_upsample_masks = 6 [default = false];
  99. // Mask loss weight.
  100. optional float mask_loss_weight = 7 [default = 5.0];
  101. // Number of boxes to be generated at training time for computing mask loss.
  102. optional int32 mask_loss_sample_size = 8 [default = 16];
  103. // Hyperparameters for convolution ops used in the box predictor.
  104. optional Hyperparams conv_hyperparams = 9;
  105. // Output size (width and height are set to be the same) of the initial
  106. // bilinear interpolation based cropping during ROI pooling. Only used when
  107. // we have second stage prediction head enabled (e.g. mask head).
  108. optional int32 initial_crop_size = 10 [default = 15];
  109. }
  110. // Configs for mask head.
  111. optional MaskHead mask_head_config = 25;
  112. }
  113. message SsdFeatureExtractor {
  114. reserved 6;
  115. // Type of ssd feature extractor.
  116. optional string type = 1;
  117. // The factor to alter the depth of the channels in the feature extractor.
  118. optional float depth_multiplier = 2 [default = 1.0];
  119. // Minimum number of the channels in the feature extractor.
  120. optional int32 min_depth = 3 [default = 16];
  121. // Hyperparameters that affect the layers of feature extractor added on top
  122. // of the base feature extractor.
  123. optional Hyperparams conv_hyperparams = 4;
  124. // Normally, SSD feature extractors are constructed by reusing an existing
  125. // base feature extractor (that has its own hyperparams) and adding new layers
  126. // on top of it. `conv_hyperparams` above normally applies only to the new
  127. // layers while base feature extractor uses its own default hyperparams. If
  128. // this value is set to true, the base feature extractor's hyperparams will be
  129. // overridden with the `conv_hyperparams`.
  130. optional bool override_base_feature_extractor_hyperparams = 9
  131. [default = false];
  132. // The nearest multiple to zero-pad the input height and width dimensions to.
  133. // For example, if pad_to_multiple = 2, input dimensions are zero-padded
  134. // until the resulting dimensions are even.
  135. optional int32 pad_to_multiple = 5 [default = 1];
  136. // Whether to use explicit padding when extracting SSD multiresolution
  137. // features. This will also apply to the base feature extractor if a MobileNet
  138. // architecture is used.
  139. optional bool use_explicit_padding = 7 [default = false];
  140. // Whether to use depthwise separable convolutions for to extract additional
  141. // feature maps added by SSD.
  142. optional bool use_depthwise = 8 [default = false];
  143. // Feature Pyramid Networks config.
  144. optional FeaturePyramidNetworks fpn = 10;
  145. // If true, replace preprocess function of feature extractor with a
  146. // placeholder. This should only be used if all the image preprocessing steps
  147. // happen outside the graph.
  148. optional bool replace_preprocessor_with_placeholder = 11 [default = false];
  149. }
  150. // Configuration for Feature Pyramid Networks.
  151. message FeaturePyramidNetworks {
  152. // We recommend to use multi_resolution_feature_map_generator with FPN, and
  153. // the levels there must match the levels defined below for better
  154. // performance.
  155. // Correspondence from FPN levels to Resnet/Mobilenet V1 feature maps:
  156. // FPN Level Resnet Feature Map Mobilenet-V1 Feature Map
  157. // 2 Block 1 Conv2d_3_pointwise
  158. // 3 Block 2 Conv2d_5_pointwise
  159. // 4 Block 3 Conv2d_11_pointwise
  160. // 5 Block 4 Conv2d_13_pointwise
  161. // 6 Bottomup_5 bottom_up_Conv2d_14
  162. // 7 Bottomup_6 bottom_up_Conv2d_15
  163. // 8 Bottomup_7 bottom_up_Conv2d_16
  164. // 9 Bottomup_8 bottom_up_Conv2d_17
  165. // minimum level in feature pyramid
  166. optional int32 min_level = 1 [default = 3];
  167. // maximum level in feature pyramid
  168. optional int32 max_level = 2 [default = 7];
  169. // channel depth for additional coarse feature layers.
  170. optional int32 additional_layer_depth = 3 [default = 256];
  171. }