You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

186 lines
7.9 KiB

  1. syntax = "proto2";
  2. package object_detection.protos;
  3. import "object_detection/protos/anchor_generator.proto";
  4. import "object_detection/protos/box_predictor.proto";
  5. import "object_detection/protos/hyperparams.proto";
  6. import "object_detection/protos/image_resizer.proto";
  7. import "object_detection/protos/losses.proto";
  8. import "object_detection/protos/post_processing.proto";
  9. // Configuration for Faster R-CNN models.
  10. // See meta_architectures/faster_rcnn_meta_arch.py and models/model_builder.py
  11. //
  12. // Naming conventions:
  13. // Faster R-CNN models have two stages: a first stage region proposal network
  14. // (or RPN) and a second stage box classifier. We thus use the prefixes
  15. // `first_stage_` and `second_stage_` to indicate the stage to which each
  16. // parameter pertains when relevant.
  17. message FasterRcnn {
  18. // Whether to construct only the Region Proposal Network (RPN).
  19. optional int32 number_of_stages = 1 [default=2];
  20. // Number of classes to predict.
  21. optional int32 num_classes = 3;
  22. // Image resizer for preprocessing the input image.
  23. optional ImageResizer image_resizer = 4;
  24. // Feature extractor config.
  25. optional FasterRcnnFeatureExtractor feature_extractor = 5;
  26. // (First stage) region proposal network (RPN) parameters.
  27. // Anchor generator to compute RPN anchors.
  28. optional AnchorGenerator first_stage_anchor_generator = 6;
  29. // Atrous rate for the convolution op applied to the
  30. // `first_stage_features_to_crop` tensor to obtain box predictions.
  31. optional int32 first_stage_atrous_rate = 7 [default=1];
  32. // Hyperparameters for the convolutional RPN box predictor.
  33. optional Hyperparams first_stage_box_predictor_conv_hyperparams = 8;
  34. // Kernel size to use for the convolution op just prior to RPN box
  35. // predictions.
  36. optional int32 first_stage_box_predictor_kernel_size = 9 [default=3];
  37. // Output depth for the convolution op just prior to RPN box predictions.
  38. optional int32 first_stage_box_predictor_depth = 10 [default=512];
  39. // The batch size to use for computing the first stage objectness and
  40. // location losses.
  41. optional int32 first_stage_minibatch_size = 11 [default=256];
  42. // Fraction of positive examples per image for the RPN.
  43. optional float first_stage_positive_balance_fraction = 12 [default=0.5];
  44. // Non max suppression score threshold applied to first stage RPN proposals.
  45. optional float first_stage_nms_score_threshold = 13 [default=0.0];
  46. // Non max suppression IOU threshold applied to first stage RPN proposals.
  47. optional float first_stage_nms_iou_threshold = 14 [default=0.7];
  48. // Maximum number of RPN proposals retained after first stage postprocessing.
  49. optional int32 first_stage_max_proposals = 15 [default=300];
  50. // First stage RPN localization loss weight.
  51. optional float first_stage_localization_loss_weight = 16 [default=1.0];
  52. // First stage RPN objectness loss weight.
  53. optional float first_stage_objectness_loss_weight = 17 [default=1.0];
  54. // Per-region cropping parameters.
  55. // Note that if a R-FCN model is constructed the per region cropping
  56. // parameters below are ignored.
  57. // Output size (width and height are set to be the same) of the initial
  58. // bilinear interpolation based cropping during ROI pooling.
  59. optional int32 initial_crop_size = 18;
  60. // Kernel size of the max pool op on the cropped feature map during
  61. // ROI pooling.
  62. optional int32 maxpool_kernel_size = 19;
  63. // Stride of the max pool op on the cropped feature map during ROI pooling.
  64. optional int32 maxpool_stride = 20;
  65. // (Second stage) box classifier parameters
  66. // Hyperparameters for the second stage box predictor. If box predictor type
  67. // is set to rfcn_box_predictor, a R-FCN model is constructed, otherwise a
  68. // Faster R-CNN model is constructed.
  69. optional BoxPredictor second_stage_box_predictor = 21;
  70. // The batch size per image used for computing the classification and refined
  71. // location loss of the box classifier.
  72. // Note that this field is ignored if `hard_example_miner` is configured.
  73. optional int32 second_stage_batch_size = 22 [default=64];
  74. // Fraction of positive examples to use per image for the box classifier.
  75. optional float second_stage_balance_fraction = 23 [default=0.25];
  76. // Post processing to apply on the second stage box classifier predictions.
  77. // Note: the `score_converter` provided to the FasterRCNNMetaArch constructor
  78. // is taken from this `second_stage_post_processing` proto.
  79. optional PostProcessing second_stage_post_processing = 24;
  80. // Second stage refined localization loss weight.
  81. optional float second_stage_localization_loss_weight = 25 [default=1.0];
  82. // Second stage classification loss weight
  83. optional float second_stage_classification_loss_weight = 26 [default=1.0];
  84. // Second stage instance mask loss weight. Note that this is only applicable
  85. // when `MaskRCNNBoxPredictor` is selected for second stage and configured to
  86. // predict instance masks.
  87. optional float second_stage_mask_prediction_loss_weight = 27 [default=1.0];
  88. // If not left to default, applies hard example mining only to classification
  89. // and localization loss..
  90. optional HardExampleMiner hard_example_miner = 28;
  91. // Loss for second stage box classifers, supports Softmax and Sigmoid.
  92. // Note that score converter must be consistent with loss type.
  93. // When there are multiple labels assigned to the same boxes, recommend
  94. // to use sigmoid loss and enable merge_multiple_label_boxes.
  95. // If not specified, Softmax loss is used as default.
  96. optional ClassificationLoss second_stage_classification_loss = 29;
  97. // Whether to update batch_norm inplace during training. This is required
  98. // for batch norm to work correctly on TPUs. When this is false, user must add
  99. // a control dependency on tf.GraphKeys.UPDATE_OPS for train/loss op in order
  100. // to update the batch norm moving average parameters.
  101. optional bool inplace_batchnorm_update = 30 [default = false];
  102. // Force the use of matrix multiplication based crop and resize instead of
  103. // standard tf.image.crop_and_resize while computing second stage input
  104. // feature maps.
  105. optional bool use_matmul_crop_and_resize = 31 [default = false];
  106. // Normally, anchors generated for a given image size are pruned during
  107. // training if they lie outside the image window. Setting this option to true,
  108. // clips the anchors to be within the image instead of pruning.
  109. optional bool clip_anchors_to_image = 32 [default = false];
  110. // After peforming matching between anchors and targets, in order to pull out
  111. // targets for training Faster R-CNN meta architecture we perform a gather
  112. // operation. This options specifies whether to use an alternate
  113. // implementation of tf.gather that is faster on TPUs.
  114. optional bool use_matmul_gather_in_matcher = 33 [default = false];
  115. // Whether to use the balanced positive negative sampler implementation with
  116. // static shape guarantees.
  117. optional bool use_static_balanced_label_sampler = 34 [default = false];
  118. // If True, uses implementation of ops with static shape guarantees.
  119. optional bool use_static_shapes = 35 [default = false];
  120. // Whether the masks present in groundtruth should be resized in the model to
  121. // match the image size.
  122. optional bool resize_masks = 36 [default = true];
  123. // If True, uses implementation of ops with static shape guarantees when
  124. // running evaluation (specifically not is_training if False).
  125. optional bool use_static_shapes_for_eval = 37 [default = false];
  126. }
  127. message FasterRcnnFeatureExtractor {
  128. // Type of Faster R-CNN model (e.g., 'faster_rcnn_resnet101';
  129. // See builders/model_builder.py for expected types).
  130. optional string type = 1;
  131. // Output stride of extracted RPN feature map.
  132. optional int32 first_stage_features_stride = 2 [default=16];
  133. // Whether to update batch norm parameters during training or not.
  134. // When training with a relative large batch size (e.g. 8), it could be
  135. // desirable to enable batch norm update.
  136. optional bool batch_norm_trainable = 3 [default=false];
  137. }