|
|
- syntax = "proto2";
-
- package object_detection.protos;
-
- import "object_detection/protos/anchor_generator.proto";
- import "object_detection/protos/box_coder.proto";
- import "object_detection/protos/box_predictor.proto";
- import "object_detection/protos/hyperparams.proto";
- import "object_detection/protos/image_resizer.proto";
- import "object_detection/protos/losses.proto";
- import "object_detection/protos/matcher.proto";
- import "object_detection/protos/post_processing.proto";
- import "object_detection/protos/region_similarity_calculator.proto";
-
- // Configuration for Single Shot Detection (SSD) models.
- // Next id: 26
- message Ssd {
- // Number of classes to predict.
- optional int32 num_classes = 1;
-
- // Image resizer for preprocessing the input image.
- optional ImageResizer image_resizer = 2;
-
- // Feature extractor config.
- optional SsdFeatureExtractor feature_extractor = 3;
-
- // Box coder to encode the boxes.
- optional BoxCoder box_coder = 4;
-
- // Matcher to match groundtruth with anchors.
- optional Matcher matcher = 5;
-
- // Region similarity calculator to compute similarity of boxes.
- optional RegionSimilarityCalculator similarity_calculator = 6;
-
- // Whether background targets are to be encoded as an all
- // zeros vector or a one-hot vector (where background is the 0th class).
- optional bool encode_background_as_zeros = 12 [default = false];
-
- // classification weight to be associated to negative
- // anchors (default: 1.0). The weight must be in [0., 1.].
- optional float negative_class_weight = 13 [default = 1.0];
-
- // Box predictor to attach to the features.
- optional BoxPredictor box_predictor = 7;
-
- // Anchor generator to compute anchors.
- optional AnchorGenerator anchor_generator = 8;
-
- // Post processing to apply on the predictions.
- optional PostProcessing post_processing = 9;
-
- // Whether to normalize the loss by number of groundtruth boxes that match to
- // the anchors.
- optional bool normalize_loss_by_num_matches = 10 [default = true];
-
- // Whether to normalize the localization loss by the code size of the box
- // encodings. This is applied along with other normalization factors.
- optional bool normalize_loc_loss_by_codesize = 14 [default = false];
-
- // Loss configuration for training.
- optional Loss loss = 11;
-
- // Whether to update batch norm parameters during training or not.
- // When training with a relative small batch size (e.g. 1), it is
- // desirable to disable batch norm update and use pretrained batch norm
- // params.
- //
- // Note: Some feature extractors are used with canned arg_scopes
- // (e.g resnet arg scopes). In these cases training behavior of batch norm
- // variables may depend on both values of `batch_norm_trainable` and
- // `is_training`.
- //
- // When canned arg_scopes are used with feature extractors `conv_hyperparams`
- // will apply only to the additional layers that are added and are outside the
- // canned arg_scope.
- optional bool freeze_batchnorm = 16 [default = false];
-
- // Whether to update batch_norm inplace during training. This is required
- // for batch norm to work correctly on TPUs. When this is false, user must add
- // a control dependency on tf.GraphKeys.UPDATE_OPS for train/loss op in order
- // to update the batch norm moving average parameters.
- optional bool inplace_batchnorm_update = 15 [default = false];
-
- // Whether to add an implicit background class to one-hot encodings of
- // groundtruth labels. Set to false if training a single
- // class model or using an explicit background class.
- optional bool add_background_class = 21 [default = true];
-
- // Whether to use an explicit background class. Set to true if using
- // groundtruth labels with an explicit background class, as in multiclass
- // scores.
- optional bool explicit_background_class = 24 [default = false];
-
- optional bool use_confidences_as_targets = 22 [default = false];
-
- optional float implicit_example_weight = 23 [default = 1.0];
-
- // Configuration proto for MaskHead.
- // Next id: 11
- message MaskHead {
- // The height and the width of the predicted mask. Only used when
- // predict_instance_masks is true.
- optional int32 mask_height = 1 [default = 15];
- optional int32 mask_width = 2 [default = 15];
-
- // Whether to predict class agnostic masks. Only used when
- // predict_instance_masks is true.
- optional bool masks_are_class_agnostic = 3 [default = true];
-
- // The depth for the first conv2d_transpose op applied to the
- // image_features in the mask prediction branch. If set to 0, the value
- // will be set automatically based on the number of channels in the image
- // features and the number of classes.
- optional int32 mask_prediction_conv_depth = 4 [default = 256];
-
- // The number of convolutions applied to image_features in the mask
- // prediction branch.
- optional int32 mask_prediction_num_conv_layers = 5 [default = 2];
-
- // Whether to apply convolutions on mask features before upsampling using
- // nearest neighbor resizing.
- // By default, mask features are resized to [`mask_height`, `mask_width`]
- // before applying convolutions and predicting masks.
- optional bool convolve_then_upsample_masks = 6 [default = false];
-
- // Mask loss weight.
- optional float mask_loss_weight = 7 [default = 5.0];
-
- // Number of boxes to be generated at training time for computing mask loss.
- optional int32 mask_loss_sample_size = 8 [default = 16];
-
- // Hyperparameters for convolution ops used in the box predictor.
- optional Hyperparams conv_hyperparams = 9;
-
- // Output size (width and height are set to be the same) of the initial
- // bilinear interpolation based cropping during ROI pooling. Only used when
- // we have second stage prediction head enabled (e.g. mask head).
- optional int32 initial_crop_size = 10 [default = 15];
- }
-
- // Configs for mask head.
- optional MaskHead mask_head_config = 25;
- }
-
- message SsdFeatureExtractor {
- reserved 6;
-
- // Type of ssd feature extractor.
- optional string type = 1;
-
- // The factor to alter the depth of the channels in the feature extractor.
- optional float depth_multiplier = 2 [default = 1.0];
-
- // Minimum number of the channels in the feature extractor.
- optional int32 min_depth = 3 [default = 16];
-
- // Hyperparameters that affect the layers of feature extractor added on top
- // of the base feature extractor.
- optional Hyperparams conv_hyperparams = 4;
-
- // Normally, SSD feature extractors are constructed by reusing an existing
- // base feature extractor (that has its own hyperparams) and adding new layers
- // on top of it. `conv_hyperparams` above normally applies only to the new
- // layers while base feature extractor uses its own default hyperparams. If
- // this value is set to true, the base feature extractor's hyperparams will be
- // overridden with the `conv_hyperparams`.
- optional bool override_base_feature_extractor_hyperparams = 9
- [default = false];
-
- // The nearest multiple to zero-pad the input height and width dimensions to.
- // For example, if pad_to_multiple = 2, input dimensions are zero-padded
- // until the resulting dimensions are even.
- optional int32 pad_to_multiple = 5 [default = 1];
-
- // Whether to use explicit padding when extracting SSD multiresolution
- // features. This will also apply to the base feature extractor if a MobileNet
- // architecture is used.
- optional bool use_explicit_padding = 7 [default = false];
-
- // Whether to use depthwise separable convolutions for to extract additional
- // feature maps added by SSD.
- optional bool use_depthwise = 8 [default = false];
-
- // Feature Pyramid Networks config.
- optional FeaturePyramidNetworks fpn = 10;
-
- // If true, replace preprocess function of feature extractor with a
- // placeholder. This should only be used if all the image preprocessing steps
- // happen outside the graph.
- optional bool replace_preprocessor_with_placeholder = 11 [default = false];
- }
-
- // Configuration for Feature Pyramid Networks.
- message FeaturePyramidNetworks {
- // We recommend to use multi_resolution_feature_map_generator with FPN, and
- // the levels there must match the levels defined below for better
- // performance.
- // Correspondence from FPN levels to Resnet/Mobilenet V1 feature maps:
- // FPN Level Resnet Feature Map Mobilenet-V1 Feature Map
- // 2 Block 1 Conv2d_3_pointwise
- // 3 Block 2 Conv2d_5_pointwise
- // 4 Block 3 Conv2d_11_pointwise
- // 5 Block 4 Conv2d_13_pointwise
- // 6 Bottomup_5 bottom_up_Conv2d_14
- // 7 Bottomup_6 bottom_up_Conv2d_15
- // 8 Bottomup_7 bottom_up_Conv2d_16
- // 9 Bottomup_8 bottom_up_Conv2d_17
-
- // minimum level in feature pyramid
- optional int32 min_level = 1 [default = 3];
-
- // maximum level in feature pyramid
- optional int32 max_level = 2 [default = 7];
-
- // channel depth for additional coarse feature layers.
- optional int32 additional_layer_depth = 3 [default = 256];
-
- }
|