You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

463 lines
17 KiB

  1. syntax = "proto2";
  2. package object_detection.protos;
  3. // Message for defining a preprocessing operation on input data.
  4. // See: //third_party/tensorflow_models/object_detection/core/preprocessor.py
  5. message PreprocessingStep {
  6. oneof preprocessing_step {
  7. NormalizeImage normalize_image = 1;
  8. RandomHorizontalFlip random_horizontal_flip = 2;
  9. RandomPixelValueScale random_pixel_value_scale = 3;
  10. RandomImageScale random_image_scale = 4;
  11. RandomRGBtoGray random_rgb_to_gray = 5;
  12. RandomAdjustBrightness random_adjust_brightness = 6;
  13. RandomAdjustContrast random_adjust_contrast = 7;
  14. RandomAdjustHue random_adjust_hue = 8;
  15. RandomAdjustSaturation random_adjust_saturation = 9;
  16. RandomDistortColor random_distort_color = 10;
  17. RandomJitterBoxes random_jitter_boxes = 11;
  18. RandomCropImage random_crop_image = 12;
  19. RandomPadImage random_pad_image = 13;
  20. RandomCropPadImage random_crop_pad_image = 14;
  21. RandomCropToAspectRatio random_crop_to_aspect_ratio = 15;
  22. RandomBlackPatches random_black_patches = 16;
  23. RandomResizeMethod random_resize_method = 17;
  24. ScaleBoxesToPixelCoordinates scale_boxes_to_pixel_coordinates = 18;
  25. ResizeImage resize_image = 19;
  26. SubtractChannelMean subtract_channel_mean = 20;
  27. SSDRandomCrop ssd_random_crop = 21;
  28. SSDRandomCropPad ssd_random_crop_pad = 22;
  29. SSDRandomCropFixedAspectRatio ssd_random_crop_fixed_aspect_ratio = 23;
  30. SSDRandomCropPadFixedAspectRatio ssd_random_crop_pad_fixed_aspect_ratio = 24;
  31. RandomVerticalFlip random_vertical_flip = 25;
  32. RandomRotation90 random_rotation90 = 26;
  33. RGBtoGray rgb_to_gray = 27;
  34. ConvertClassLogitsToSoftmax convert_class_logits_to_softmax = 28;
  35. RandomAbsolutePadImage random_absolute_pad_image = 29;
  36. RandomSelfConcatImage random_self_concat_image = 30;
  37. }
  38. }
  39. // Normalizes pixel values in an image.
  40. // For every channel in the image, moves the pixel values from the range
  41. // [original_minval, original_maxval] to [target_minval, target_maxval].
  42. message NormalizeImage {
  43. optional float original_minval = 1;
  44. optional float original_maxval = 2;
  45. optional float target_minval = 3 [default=0];
  46. optional float target_maxval = 4 [default=1];
  47. }
  48. // Randomly horizontally flips the image and detections 50% of the time.
  49. message RandomHorizontalFlip {
  50. // Specifies a mapping from the original keypoint indices to horizontally
  51. // flipped indices. This is used in the event that keypoints are specified,
  52. // in which case when the image is horizontally flipped the keypoints will
  53. // need to be permuted. E.g. for keypoints representing left_eye, right_eye,
  54. // nose_tip, mouth, left_ear, right_ear (in that order), one might specify
  55. // the keypoint_flip_permutation below:
  56. // keypoint_flip_permutation: 1
  57. // keypoint_flip_permutation: 0
  58. // keypoint_flip_permutation: 2
  59. // keypoint_flip_permutation: 3
  60. // keypoint_flip_permutation: 5
  61. // keypoint_flip_permutation: 4
  62. repeated int32 keypoint_flip_permutation = 1;
  63. }
  64. // Randomly vertically flips the image and detections 50% of the time.
  65. message RandomVerticalFlip {
  66. // Specifies a mapping from the original keypoint indices to vertically
  67. // flipped indices. This is used in the event that keypoints are specified,
  68. // in which case when the image is vertically flipped the keypoints will
  69. // need to be permuted. E.g. for keypoints representing left_eye, right_eye,
  70. // nose_tip, mouth, left_ear, right_ear (in that order), one might specify
  71. // the keypoint_flip_permutation below:
  72. // keypoint_flip_permutation: 1
  73. // keypoint_flip_permutation: 0
  74. // keypoint_flip_permutation: 2
  75. // keypoint_flip_permutation: 3
  76. // keypoint_flip_permutation: 5
  77. // keypoint_flip_permutation: 4
  78. repeated int32 keypoint_flip_permutation = 1;
  79. }
  80. // Randomly rotates the image and detections by 90 degrees counter-clockwise
  81. // 50% of the time.
  82. message RandomRotation90 {}
  83. // Randomly scales the values of all pixels in the image by some constant value
  84. // between [minval, maxval], then clip the value to a range between [0, 1.0].
  85. message RandomPixelValueScale {
  86. optional float minval = 1 [default=0.9];
  87. optional float maxval = 2 [default=1.1];
  88. }
  89. // Randomly enlarges or shrinks image (keeping aspect ratio).
  90. message RandomImageScale {
  91. optional float min_scale_ratio = 1 [default=0.5];
  92. optional float max_scale_ratio = 2 [default=2.0];
  93. }
  94. // Randomly convert entire image to grey scale.
  95. message RandomRGBtoGray {
  96. optional float probability = 1 [default=0.1];
  97. }
  98. // Randomly changes image brightness by up to max_delta. Image outputs will be
  99. // saturated between 0 and 1.
  100. message RandomAdjustBrightness {
  101. optional float max_delta=1 [default=0.2];
  102. }
  103. // Randomly scales contract by a value between [min_delta, max_delta].
  104. message RandomAdjustContrast {
  105. optional float min_delta = 1 [default=0.8];
  106. optional float max_delta = 2 [default=1.25];
  107. }
  108. // Randomly alters hue by a value of up to max_delta.
  109. message RandomAdjustHue {
  110. optional float max_delta = 1 [default=0.02];
  111. }
  112. // Randomly changes saturation by a value between [min_delta, max_delta].
  113. message RandomAdjustSaturation {
  114. optional float min_delta = 1 [default=0.8];
  115. optional float max_delta = 2 [default=1.25];
  116. }
  117. // Performs a random color distortion. color_orderings should either be 0 or 1.
  118. message RandomDistortColor {
  119. optional int32 color_ordering = 1;
  120. }
  121. // Randomly jitters corners of boxes in the image determined by ratio.
  122. // ie. If a box is [100, 200] and ratio is 0.02, the corners can move by [1, 4].
  123. message RandomJitterBoxes {
  124. optional float ratio = 1 [default=0.05];
  125. }
  126. // Randomly crops the image and bounding boxes.
  127. message RandomCropImage {
  128. // Cropped image must cover at least one box by this fraction.
  129. optional float min_object_covered = 1 [default=1.0];
  130. // Aspect ratio bounds of cropped image.
  131. optional float min_aspect_ratio = 2 [default=0.75];
  132. optional float max_aspect_ratio = 3 [default=1.33];
  133. // Allowed area ratio of cropped image to original image.
  134. optional float min_area = 4 [default=0.1];
  135. optional float max_area = 5 [default=1.0];
  136. // Minimum overlap threshold of cropped boxes to keep in new image. If the
  137. // ratio between a cropped bounding box and the original is less than this
  138. // value, it is removed from the new image.
  139. optional float overlap_thresh = 6 [default=0.3];
  140. // Whether to clip the boxes to the cropped image.
  141. optional bool clip_boxes = 8 [default=true];
  142. // Probability of keeping the original image.
  143. optional float random_coef = 7 [default=0.0];
  144. }
  145. // Randomly adds padding to the image.
  146. message RandomPadImage {
  147. // Minimum dimensions for padded image. If unset, will use original image
  148. // dimension as a lower bound.
  149. optional int32 min_image_height = 1;
  150. optional int32 min_image_width = 2;
  151. // Maximum dimensions for padded image. If unset, will use double the original
  152. // image dimension as a lower bound.
  153. optional int32 max_image_height = 3;
  154. optional int32 max_image_width = 4;
  155. // Color of the padding. If unset, will pad using average color of the input
  156. // image.
  157. repeated float pad_color = 5;
  158. }
  159. // Randomly adds a padding of size [0, max_height_padding), [0, max_width_padding).
  160. message RandomAbsolutePadImage {
  161. // Height will be padded uniformly at random from [0, max_height_padding).
  162. optional int32 max_height_padding = 1;
  163. // Width will be padded uniformly at random from [0, max_width_padding).
  164. optional int32 max_width_padding = 2;
  165. // Color of the padding. If unset, will pad using average color of the input
  166. // image.
  167. repeated float pad_color = 3;
  168. }
  169. // Randomly crops an image followed by a random pad.
  170. message RandomCropPadImage {
  171. // Cropping operation must cover at least one box by this fraction.
  172. optional float min_object_covered = 1 [default=1.0];
  173. // Aspect ratio bounds of image after cropping operation.
  174. optional float min_aspect_ratio = 2 [default=0.75];
  175. optional float max_aspect_ratio = 3 [default=1.33];
  176. // Allowed area ratio of image after cropping operation.
  177. optional float min_area = 4 [default=0.1];
  178. optional float max_area = 5 [default=1.0];
  179. // Minimum overlap threshold of cropped boxes to keep in new image. If the
  180. // ratio between a cropped bounding box and the original is less than this
  181. // value, it is removed from the new image.
  182. optional float overlap_thresh = 6 [default=0.3];
  183. // Whether to clip the boxes to the cropped image.
  184. optional bool clip_boxes = 11 [default=true];
  185. // Probability of keeping the original image during the crop operation.
  186. optional float random_coef = 7 [default=0.0];
  187. // Maximum dimensions for padded image. If unset, will use double the original
  188. // image dimension as a lower bound. Both of the following fields should be
  189. // length 2.
  190. repeated float min_padded_size_ratio = 8;
  191. repeated float max_padded_size_ratio = 9;
  192. // Color of the padding. If unset, will pad using average color of the input
  193. // image. This field should be of length 3.
  194. repeated float pad_color = 10;
  195. }
  196. // Randomly crops an iamge to a given aspect ratio.
  197. message RandomCropToAspectRatio {
  198. // Aspect ratio.
  199. optional float aspect_ratio = 1 [default=1.0];
  200. // Minimum overlap threshold of cropped boxes to keep in new image. If the
  201. // ratio between a cropped bounding box and the original is less than this
  202. // value, it is removed from the new image.
  203. optional float overlap_thresh = 2 [default=0.3];
  204. // Whether to clip the boxes to the cropped image.
  205. optional bool clip_boxes = 3 [default=true];
  206. }
  207. // Randomly adds black square patches to an image.
  208. message RandomBlackPatches {
  209. // The maximum number of black patches to add.
  210. optional int32 max_black_patches = 1 [default=10];
  211. // The probability of a black patch being added to an image.
  212. optional float probability = 2 [default=0.5];
  213. // Ratio between the dimension of the black patch to the minimum dimension of
  214. // the image (patch_width = patch_height = min(image_height, image_width)).
  215. optional float size_to_image_ratio = 3 [default=0.1];
  216. }
  217. // Randomly resizes the image up to [target_height, target_width].
  218. message RandomResizeMethod {
  219. optional int32 target_height = 1;
  220. optional int32 target_width = 2;
  221. }
  222. // Converts the RGB image to a grayscale image. This also converts the image
  223. // depth from 3 to 1, unlike RandomRGBtoGray which does not change the image
  224. // depth.
  225. message RGBtoGray {}
  226. // Scales boxes from normalized coordinates to pixel coordinates.
  227. message ScaleBoxesToPixelCoordinates {
  228. }
  229. // Resizes images to [new_height, new_width].
  230. message ResizeImage {
  231. optional int32 new_height = 1;
  232. optional int32 new_width = 2;
  233. enum Method {
  234. AREA=1;
  235. BICUBIC=2;
  236. BILINEAR=3;
  237. NEAREST_NEIGHBOR=4;
  238. }
  239. optional Method method = 3 [default=BILINEAR];
  240. }
  241. // Normalizes an image by subtracting a mean from each channel.
  242. message SubtractChannelMean {
  243. // The mean to subtract from each channel. Should be of same dimension of
  244. // channels in the input image.
  245. repeated float means = 1;
  246. }
  247. message SSDRandomCropOperation {
  248. // Cropped image must cover at least this fraction of one original bounding
  249. // box.
  250. optional float min_object_covered = 1;
  251. // The aspect ratio of the cropped image must be within the range of
  252. // [min_aspect_ratio, max_aspect_ratio].
  253. optional float min_aspect_ratio = 2;
  254. optional float max_aspect_ratio = 3;
  255. // The area of the cropped image must be within the range of
  256. // [min_area, max_area].
  257. optional float min_area = 4;
  258. optional float max_area = 5;
  259. // Cropped box area ratio must be above this threhold to be kept.
  260. optional float overlap_thresh = 6;
  261. // Whether to clip the boxes to the cropped image.
  262. optional bool clip_boxes = 8 [default=true];
  263. // Probability a crop operation is skipped.
  264. optional float random_coef = 7;
  265. }
  266. // Randomly crops a image according to:
  267. // Liu et al., SSD: Single shot multibox detector.
  268. // This preprocessing step defines multiple SSDRandomCropOperations. Only one
  269. // operation (chosen at random) is actually performed on an image.
  270. message SSDRandomCrop {
  271. repeated SSDRandomCropOperation operations = 1;
  272. }
  273. message SSDRandomCropPadOperation {
  274. // Cropped image must cover at least this fraction of one original bounding
  275. // box.
  276. optional float min_object_covered = 1;
  277. // The aspect ratio of the cropped image must be within the range of
  278. // [min_aspect_ratio, max_aspect_ratio].
  279. optional float min_aspect_ratio = 2;
  280. optional float max_aspect_ratio = 3;
  281. // The area of the cropped image must be within the range of
  282. // [min_area, max_area].
  283. optional float min_area = 4;
  284. optional float max_area = 5;
  285. // Cropped box area ratio must be above this threhold to be kept.
  286. optional float overlap_thresh = 6;
  287. // Whether to clip the boxes to the cropped image.
  288. optional bool clip_boxes = 13 [default=true];
  289. // Probability a crop operation is skipped.
  290. optional float random_coef = 7;
  291. // Min ratio of padded image height and width to the input image's height and
  292. // width. Two entries per operation.
  293. repeated float min_padded_size_ratio = 8;
  294. // Max ratio of padded image height and width to the input image's height and
  295. // width. Two entries per operation.
  296. repeated float max_padded_size_ratio = 9;
  297. // Padding color.
  298. optional float pad_color_r = 10;
  299. optional float pad_color_g = 11;
  300. optional float pad_color_b = 12;
  301. }
  302. // Randomly crops and pads an image according to:
  303. // Liu et al., SSD: Single shot multibox detector.
  304. // This preprocessing step defines multiple SSDRandomCropPadOperations. Only one
  305. // operation (chosen at random) is actually performed on an image.
  306. message SSDRandomCropPad {
  307. repeated SSDRandomCropPadOperation operations = 1;
  308. }
  309. message SSDRandomCropFixedAspectRatioOperation {
  310. // Cropped image must cover at least this fraction of one original bounding
  311. // box.
  312. optional float min_object_covered = 1;
  313. // The area of the cropped image must be within the range of
  314. // [min_area, max_area].
  315. optional float min_area = 4;
  316. optional float max_area = 5;
  317. // Cropped box area ratio must be above this threhold to be kept.
  318. optional float overlap_thresh = 6;
  319. // Whether to clip the boxes to the cropped image.
  320. optional bool clip_boxes = 8 [default=true];
  321. // Probability a crop operation is skipped.
  322. optional float random_coef = 7;
  323. }
  324. // Randomly crops a image to a fixed aspect ratio according to:
  325. // Liu et al., SSD: Single shot multibox detector.
  326. // Multiple SSDRandomCropFixedAspectRatioOperations are defined by this
  327. // preprocessing step. Only one operation (chosen at random) is actually
  328. // performed on an image.
  329. message SSDRandomCropFixedAspectRatio {
  330. repeated SSDRandomCropFixedAspectRatioOperation operations = 1;
  331. // Aspect ratio to crop to. This value is used for all crop operations.
  332. optional float aspect_ratio = 2 [default=1.0];
  333. }
  334. message SSDRandomCropPadFixedAspectRatioOperation {
  335. // Cropped image must cover at least this fraction of one original bounding
  336. // box.
  337. optional float min_object_covered = 1;
  338. // The aspect ratio of the cropped image must be within the range of
  339. // [min_aspect_ratio, max_aspect_ratio].
  340. optional float min_aspect_ratio = 2;
  341. optional float max_aspect_ratio = 3;
  342. // The area of the cropped image must be within the range of
  343. // [min_area, max_area].
  344. optional float min_area = 4;
  345. optional float max_area = 5;
  346. // Cropped box area ratio must be above this threhold to be kept.
  347. optional float overlap_thresh = 6;
  348. // Whether to clip the boxes to the cropped image.
  349. optional bool clip_boxes = 8 [default=true];
  350. // Probability a crop operation is skipped.
  351. optional float random_coef = 7;
  352. }
  353. // Randomly crops and pads an image to a fixed aspect ratio according to:
  354. // Liu et al., SSD: Single shot multibox detector.
  355. // Multiple SSDRandomCropPadFixedAspectRatioOperations are defined by this
  356. // preprocessing step. Only one operation (chosen at random) is actually
  357. // performed on an image.
  358. message SSDRandomCropPadFixedAspectRatio {
  359. repeated SSDRandomCropPadFixedAspectRatioOperation operations = 1;
  360. // Aspect ratio to pad to. This value is used for all crop and pad operations.
  361. optional float aspect_ratio = 2 [default=1.0];
  362. // Min ratio of padded image height and width to the input image's height and
  363. // width. Two entries per operation.
  364. repeated float min_padded_size_ratio = 3;
  365. // Max ratio of padded image height and width to the input image's height and
  366. // width. Two entries per operation.
  367. repeated float max_padded_size_ratio = 4;
  368. }
  369. // Converts class logits to softmax optionally scaling the values by temperature
  370. // first.
  371. message ConvertClassLogitsToSoftmax {
  372. // Scale to use on logits before applying softmax.
  373. optional float temperature = 1 [default=1.0];
  374. }
  375. // Randomly concatenates the image with itself horizontally and/or vertically.
  376. message RandomSelfConcatImage {
  377. // Probability of concatenating the image vertically.
  378. optional float concat_vertical_probability = 1 [default = 0.1];
  379. // Probability of concatenating the image horizontally.
  380. optional float concat_horizontal_probability = 2 [default = 0.1];
  381. }