You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

974 lines
39 KiB

6 years ago
  1. # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. """A set of functions that are used for visualization.
  16. These functions often receive an image, perform some visualization on the image.
  17. The functions do not return a value, instead they modify the image itself.
  18. """
  19. import abc
  20. import collections
  21. import functools
  22. # Set headless-friendly backend.
  23. import matplotlib; matplotlib.use('Agg') # pylint: disable=multiple-statements
  24. import matplotlib.pyplot as plt # pylint: disable=g-import-not-at-top
  25. import numpy as np
  26. import PIL.Image as Image
  27. import PIL.ImageColor as ImageColor
  28. import PIL.ImageDraw as ImageDraw
  29. import PIL.ImageFont as ImageFont
  30. import six
  31. import tensorflow as tf
  32. from object_detection.core import standard_fields as fields
  33. from object_detection.utils import shape_utils
  34. _TITLE_LEFT_MARGIN = 10
  35. _TITLE_TOP_MARGIN = 10
  36. STANDARD_COLORS = [
  37. 'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque',
  38. 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',
  39. 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan',
  40. 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',
  41. 'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',
  42. 'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
  43. 'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod',
  44. 'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',
  45. 'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',
  46. 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',
  47. 'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',
  48. 'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',
  49. 'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',
  50. 'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
  51. 'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',
  52. 'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',
  53. 'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',
  54. 'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
  55. 'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',
  56. 'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
  57. 'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',
  58. 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',
  59. 'WhiteSmoke', 'Yellow', 'YellowGreen'
  60. ]
  61. def save_image_array_as_png(image, output_path):
  62. """Saves an image (represented as a numpy array) to PNG.
  63. Args:
  64. image: a numpy array with shape [height, width, 3].
  65. output_path: path to which image should be written.
  66. """
  67. image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
  68. with tf.gfile.Open(output_path, 'w') as fid:
  69. image_pil.save(fid, 'PNG')
  70. def encode_image_array_as_png_str(image):
  71. """Encodes a numpy array into a PNG string.
  72. Args:
  73. image: a numpy array with shape [height, width, 3].
  74. Returns:
  75. PNG encoded image string.
  76. """
  77. image_pil = Image.fromarray(np.uint8(image))
  78. output = six.BytesIO()
  79. image_pil.save(output, format='PNG')
  80. png_string = output.getvalue()
  81. output.close()
  82. return png_string
  83. def draw_bounding_box_on_image_array(image,
  84. ymin,
  85. xmin,
  86. ymax,
  87. xmax,
  88. color='red',
  89. thickness=4,
  90. display_str_list=(),
  91. use_normalized_coordinates=True):
  92. """Adds a bounding box to an image (numpy array).
  93. Bounding box coordinates can be specified in either absolute (pixel) or
  94. normalized coordinates by setting the use_normalized_coordinates argument.
  95. Args:
  96. image: a numpy array with shape [height, width, 3].
  97. ymin: ymin of bounding box.
  98. xmin: xmin of bounding box.
  99. ymax: ymax of bounding box.
  100. xmax: xmax of bounding box.
  101. color: color to draw bounding box. Default is red.
  102. thickness: line thickness. Default value is 4.
  103. display_str_list: list of strings to display in box
  104. (each to be shown on its own line).
  105. use_normalized_coordinates: If True (default), treat coordinates
  106. ymin, xmin, ymax, xmax as relative to the image. Otherwise treat
  107. coordinates as absolute.
  108. """
  109. image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
  110. draw_bounding_box_on_image(image_pil, ymin, xmin, ymax, xmax, color,
  111. thickness, display_str_list,
  112. use_normalized_coordinates)
  113. np.copyto(image, np.array(image_pil))
  114. def draw_bounding_box_on_image(image,
  115. ymin,
  116. xmin,
  117. ymax,
  118. xmax,
  119. color='red',
  120. thickness=4,
  121. display_str_list=(),
  122. use_normalized_coordinates=True):
  123. """Adds a bounding box to an image.
  124. Bounding box coordinates can be specified in either absolute (pixel) or
  125. normalized coordinates by setting the use_normalized_coordinates argument.
  126. Each string in display_str_list is displayed on a separate line above the
  127. bounding box in black text on a rectangle filled with the input 'color'.
  128. If the top of the bounding box extends to the edge of the image, the strings
  129. are displayed below the bounding box.
  130. Args:
  131. image: a PIL.Image object.
  132. ymin: ymin of bounding box.
  133. xmin: xmin of bounding box.
  134. ymax: ymax of bounding box.
  135. xmax: xmax of bounding box.
  136. color: color to draw bounding box. Default is red.
  137. thickness: line thickness. Default value is 4.
  138. display_str_list: list of strings to display in box
  139. (each to be shown on its own line).
  140. use_normalized_coordinates: If True (default), treat coordinates
  141. ymin, xmin, ymax, xmax as relative to the image. Otherwise treat
  142. coordinates as absolute.
  143. """
  144. draw = ImageDraw.Draw(image)
  145. im_width, im_height = image.size
  146. if use_normalized_coordinates:
  147. (left, right, top, bottom) = (xmin * im_width, xmax * im_width,
  148. ymin * im_height, ymax * im_height)
  149. else:
  150. (left, right, top, bottom) = (xmin, xmax, ymin, ymax)
  151. draw.line([(left, top), (left, bottom), (right, bottom),
  152. (right, top), (left, top)], width=thickness, fill=color)
  153. try:
  154. font = ImageFont.truetype('arial.ttf', 24)
  155. except IOError:
  156. font = ImageFont.load_default()
  157. # If the total height of the display strings added to the top of the bounding
  158. # box exceeds the top of the image, stack the strings below the bounding box
  159. # instead of above.
  160. display_str_heights = [font.getsize(ds)[1] for ds in display_str_list]
  161. # Each display_str has a top and bottom margin of 0.05x.
  162. total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)
  163. if top > total_display_str_height:
  164. text_bottom = top
  165. else:
  166. text_bottom = bottom + total_display_str_height
  167. # Reverse list and print from bottom to top.
  168. for display_str in display_str_list[::-1]:
  169. text_width, text_height = font.getsize(display_str)
  170. margin = np.ceil(0.05 * text_height)
  171. draw.rectangle(
  172. [(left, text_bottom - text_height - 2 * margin), (left + text_width,
  173. text_bottom)],
  174. fill=color)
  175. draw.text(
  176. (left + margin, text_bottom - text_height - margin),
  177. display_str,
  178. fill='black',
  179. font=font)
  180. text_bottom -= text_height - 2 * margin
  181. def draw_bounding_boxes_on_image_array(image,
  182. boxes,
  183. color='red',
  184. thickness=4,
  185. display_str_list_list=()):
  186. """Draws bounding boxes on image (numpy array).
  187. Args:
  188. image: a numpy array object.
  189. boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax).
  190. The coordinates are in normalized format between [0, 1].
  191. color: color to draw bounding box. Default is red.
  192. thickness: line thickness. Default value is 4.
  193. display_str_list_list: list of list of strings.
  194. a list of strings for each bounding box.
  195. The reason to pass a list of strings for a
  196. bounding box is that it might contain
  197. multiple labels.
  198. Raises:
  199. ValueError: if boxes is not a [N, 4] array
  200. """
  201. image_pil = Image.fromarray(image)
  202. draw_bounding_boxes_on_image(image_pil, boxes, color, thickness,
  203. display_str_list_list)
  204. np.copyto(image, np.array(image_pil))
  205. def draw_bounding_boxes_on_image(image,
  206. boxes,
  207. color='red',
  208. thickness=4,
  209. display_str_list_list=()):
  210. """Draws bounding boxes on image.
  211. Args:
  212. image: a PIL.Image object.
  213. boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax).
  214. The coordinates are in normalized format between [0, 1].
  215. color: color to draw bounding box. Default is red.
  216. thickness: line thickness. Default value is 4.
  217. display_str_list_list: list of list of strings.
  218. a list of strings for each bounding box.
  219. The reason to pass a list of strings for a
  220. bounding box is that it might contain
  221. multiple labels.
  222. Raises:
  223. ValueError: if boxes is not a [N, 4] array
  224. """
  225. boxes_shape = boxes.shape
  226. if not boxes_shape:
  227. return
  228. if len(boxes_shape) != 2 or boxes_shape[1] != 4:
  229. raise ValueError('Input must be of size [N, 4]')
  230. for i in range(boxes_shape[0]):
  231. display_str_list = ()
  232. if display_str_list_list:
  233. display_str_list = display_str_list_list[i]
  234. draw_bounding_box_on_image(image, boxes[i, 0], boxes[i, 1], boxes[i, 2],
  235. boxes[i, 3], color, thickness, display_str_list)
  236. def _visualize_boxes(image, boxes, classes, scores, category_index, **kwargs):
  237. return visualize_boxes_and_labels_on_image_array(
  238. image, boxes, classes, scores, category_index=category_index, **kwargs)
  239. def _visualize_boxes_and_masks(image, boxes, classes, scores, masks,
  240. category_index, **kwargs):
  241. return visualize_boxes_and_labels_on_image_array(
  242. image,
  243. boxes,
  244. classes,
  245. scores,
  246. category_index=category_index,
  247. instance_masks=masks,
  248. **kwargs)
  249. def _visualize_boxes_and_keypoints(image, boxes, classes, scores, keypoints,
  250. category_index, **kwargs):
  251. return visualize_boxes_and_labels_on_image_array(
  252. image,
  253. boxes,
  254. classes,
  255. scores,
  256. category_index=category_index,
  257. keypoints=keypoints,
  258. **kwargs)
  259. def _visualize_boxes_and_masks_and_keypoints(
  260. image, boxes, classes, scores, masks, keypoints, category_index, **kwargs):
  261. return visualize_boxes_and_labels_on_image_array(
  262. image,
  263. boxes,
  264. classes,
  265. scores,
  266. category_index=category_index,
  267. instance_masks=masks,
  268. keypoints=keypoints,
  269. **kwargs)
  270. def _resize_original_image(image, image_shape):
  271. image = tf.expand_dims(image, 0)
  272. image = tf.image.resize_images(
  273. image,
  274. image_shape,
  275. method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
  276. align_corners=True)
  277. return tf.cast(tf.squeeze(image, 0), tf.uint8)
  278. def draw_bounding_boxes_on_image_tensors(images,
  279. boxes,
  280. classes,
  281. scores,
  282. category_index,
  283. original_image_spatial_shape=None,
  284. true_image_shape=None,
  285. instance_masks=None,
  286. keypoints=None,
  287. max_boxes_to_draw=20,
  288. min_score_thresh=0.2,
  289. use_normalized_coordinates=True):
  290. """Draws bounding boxes, masks, and keypoints on batch of image tensors.
  291. Args:
  292. images: A 4D uint8 image tensor of shape [N, H, W, C]. If C > 3, additional
  293. channels will be ignored. If C = 1, then we convert the images to RGB
  294. images.
  295. boxes: [N, max_detections, 4] float32 tensor of detection boxes.
  296. classes: [N, max_detections] int tensor of detection classes. Note that
  297. classes are 1-indexed.
  298. scores: [N, max_detections] float32 tensor of detection scores.
  299. category_index: a dict that maps integer ids to category dicts. e.g.
  300. {1: {1: 'dog'}, 2: {2: 'cat'}, ...}
  301. original_image_spatial_shape: [N, 2] tensor containing the spatial size of
  302. the original image.
  303. true_image_shape: [N, 3] tensor containing the spatial size of unpadded
  304. original_image.
  305. instance_masks: A 4D uint8 tensor of shape [N, max_detection, H, W] with
  306. instance masks.
  307. keypoints: A 4D float32 tensor of shape [N, max_detection, num_keypoints, 2]
  308. with keypoints.
  309. max_boxes_to_draw: Maximum number of boxes to draw on an image. Default 20.
  310. min_score_thresh: Minimum score threshold for visualization. Default 0.2.
  311. use_normalized_coordinates: Whether to assume boxes and kepoints are in
  312. normalized coordinates (as opposed to absolute coordiantes).
  313. Default is True.
  314. Returns:
  315. 4D image tensor of type uint8, with boxes drawn on top.
  316. """
  317. # Additional channels are being ignored.
  318. if images.shape[3] > 3:
  319. images = images[:, :, :, 0:3]
  320. elif images.shape[3] == 1:
  321. images = tf.image.grayscale_to_rgb(images)
  322. visualization_keyword_args = {
  323. 'use_normalized_coordinates': use_normalized_coordinates,
  324. 'max_boxes_to_draw': max_boxes_to_draw,
  325. 'min_score_thresh': min_score_thresh,
  326. 'agnostic_mode': False,
  327. 'line_thickness': 4
  328. }
  329. if true_image_shape is None:
  330. true_shapes = tf.constant(-1, shape=[images.shape.as_list()[0], 3])
  331. else:
  332. true_shapes = true_image_shape
  333. if original_image_spatial_shape is None:
  334. original_shapes = tf.constant(-1, shape=[images.shape.as_list()[0], 2])
  335. else:
  336. original_shapes = original_image_spatial_shape
  337. if instance_masks is not None and keypoints is None:
  338. visualize_boxes_fn = functools.partial(
  339. _visualize_boxes_and_masks,
  340. category_index=category_index,
  341. **visualization_keyword_args)
  342. elems = [
  343. true_shapes, original_shapes, images, boxes, classes, scores,
  344. instance_masks
  345. ]
  346. elif instance_masks is None and keypoints is not None:
  347. visualize_boxes_fn = functools.partial(
  348. _visualize_boxes_and_keypoints,
  349. category_index=category_index,
  350. **visualization_keyword_args)
  351. elems = [
  352. true_shapes, original_shapes, images, boxes, classes, scores, keypoints
  353. ]
  354. elif instance_masks is not None and keypoints is not None:
  355. visualize_boxes_fn = functools.partial(
  356. _visualize_boxes_and_masks_and_keypoints,
  357. category_index=category_index,
  358. **visualization_keyword_args)
  359. elems = [
  360. true_shapes, original_shapes, images, boxes, classes, scores,
  361. instance_masks, keypoints
  362. ]
  363. else:
  364. visualize_boxes_fn = functools.partial(
  365. _visualize_boxes,
  366. category_index=category_index,
  367. **visualization_keyword_args)
  368. elems = [
  369. true_shapes, original_shapes, images, boxes, classes, scores
  370. ]
  371. def draw_boxes(image_and_detections):
  372. """Draws boxes on image."""
  373. true_shape = image_and_detections[0]
  374. original_shape = image_and_detections[1]
  375. if true_image_shape is not None:
  376. image = shape_utils.pad_or_clip_nd(image_and_detections[2],
  377. [true_shape[0], true_shape[1], 3])
  378. if original_image_spatial_shape is not None:
  379. image_and_detections[2] = _resize_original_image(image, original_shape)
  380. image_with_boxes = tf.py_func(visualize_boxes_fn, image_and_detections[2:],
  381. tf.uint8)
  382. return image_with_boxes
  383. images = tf.map_fn(draw_boxes, elems, dtype=tf.uint8, back_prop=False)
  384. return images
  385. def draw_side_by_side_evaluation_image(eval_dict,
  386. category_index,
  387. max_boxes_to_draw=20,
  388. min_score_thresh=0.2,
  389. use_normalized_coordinates=True):
  390. """Creates a side-by-side image with detections and groundtruth.
  391. Bounding boxes (and instance masks, if available) are visualized on both
  392. subimages.
  393. Args:
  394. eval_dict: The evaluation dictionary returned by
  395. eval_util.result_dict_for_batched_example() or
  396. eval_util.result_dict_for_single_example().
  397. category_index: A category index (dictionary) produced from a labelmap.
  398. max_boxes_to_draw: The maximum number of boxes to draw for detections.
  399. min_score_thresh: The minimum score threshold for showing detections.
  400. use_normalized_coordinates: Whether to assume boxes and kepoints are in
  401. normalized coordinates (as opposed to absolute coordiantes).
  402. Default is True.
  403. Returns:
  404. A list of [1, H, 2 * W, C] uint8 tensor. The subimage on the left
  405. corresponds to detections, while the subimage on the right corresponds to
  406. groundtruth.
  407. """
  408. detection_fields = fields.DetectionResultFields()
  409. input_data_fields = fields.InputDataFields()
  410. images_with_detections_list = []
  411. # Add the batch dimension if the eval_dict is for single example.
  412. if len(eval_dict[detection_fields.detection_classes].shape) == 1:
  413. for key in eval_dict:
  414. if key != input_data_fields.original_image:
  415. eval_dict[key] = tf.expand_dims(eval_dict[key], 0)
  416. for indx in range(eval_dict[input_data_fields.original_image].shape[0]):
  417. instance_masks = None
  418. if detection_fields.detection_masks in eval_dict:
  419. instance_masks = tf.cast(
  420. tf.expand_dims(
  421. eval_dict[detection_fields.detection_masks][indx], axis=0),
  422. tf.uint8)
  423. keypoints = None
  424. if detection_fields.detection_keypoints in eval_dict:
  425. keypoints = tf.expand_dims(
  426. eval_dict[detection_fields.detection_keypoints][indx], axis=0)
  427. groundtruth_instance_masks = None
  428. if input_data_fields.groundtruth_instance_masks in eval_dict:
  429. groundtruth_instance_masks = tf.cast(
  430. tf.expand_dims(
  431. eval_dict[input_data_fields.groundtruth_instance_masks][indx],
  432. axis=0), tf.uint8)
  433. images_with_detections = draw_bounding_boxes_on_image_tensors(
  434. tf.expand_dims(
  435. eval_dict[input_data_fields.original_image][indx], axis=0),
  436. tf.expand_dims(
  437. eval_dict[detection_fields.detection_boxes][indx], axis=0),
  438. tf.expand_dims(
  439. eval_dict[detection_fields.detection_classes][indx], axis=0),
  440. tf.expand_dims(
  441. eval_dict[detection_fields.detection_scores][indx], axis=0),
  442. category_index,
  443. original_image_spatial_shape=tf.expand_dims(
  444. eval_dict[input_data_fields.original_image_spatial_shape][indx],
  445. axis=0),
  446. true_image_shape=tf.expand_dims(
  447. eval_dict[input_data_fields.true_image_shape][indx], axis=0),
  448. instance_masks=instance_masks,
  449. keypoints=keypoints,
  450. max_boxes_to_draw=max_boxes_to_draw,
  451. min_score_thresh=min_score_thresh,
  452. use_normalized_coordinates=use_normalized_coordinates)
  453. images_with_groundtruth = draw_bounding_boxes_on_image_tensors(
  454. tf.expand_dims(
  455. eval_dict[input_data_fields.original_image][indx], axis=0),
  456. tf.expand_dims(
  457. eval_dict[input_data_fields.groundtruth_boxes][indx], axis=0),
  458. tf.expand_dims(
  459. eval_dict[input_data_fields.groundtruth_classes][indx], axis=0),
  460. tf.expand_dims(
  461. tf.ones_like(
  462. eval_dict[input_data_fields.groundtruth_classes][indx],
  463. dtype=tf.float32),
  464. axis=0),
  465. category_index,
  466. original_image_spatial_shape=tf.expand_dims(
  467. eval_dict[input_data_fields.original_image_spatial_shape][indx],
  468. axis=0),
  469. true_image_shape=tf.expand_dims(
  470. eval_dict[input_data_fields.true_image_shape][indx], axis=0),
  471. instance_masks=groundtruth_instance_masks,
  472. keypoints=None,
  473. max_boxes_to_draw=None,
  474. min_score_thresh=0.0,
  475. use_normalized_coordinates=use_normalized_coordinates)
  476. images_with_detections_list.append(
  477. tf.concat([images_with_detections, images_with_groundtruth], axis=2))
  478. return images_with_detections_list
  479. def draw_keypoints_on_image_array(image,
  480. keypoints,
  481. color='red',
  482. radius=2,
  483. use_normalized_coordinates=True):
  484. """Draws keypoints on an image (numpy array).
  485. Args:
  486. image: a numpy array with shape [height, width, 3].
  487. keypoints: a numpy array with shape [num_keypoints, 2].
  488. color: color to draw the keypoints with. Default is red.
  489. radius: keypoint radius. Default value is 2.
  490. use_normalized_coordinates: if True (default), treat keypoint values as
  491. relative to the image. Otherwise treat them as absolute.
  492. """
  493. image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
  494. draw_keypoints_on_image(image_pil, keypoints, color, radius,
  495. use_normalized_coordinates)
  496. np.copyto(image, np.array(image_pil))
  497. def draw_keypoints_on_image(image,
  498. keypoints,
  499. color='red',
  500. radius=2,
  501. use_normalized_coordinates=True):
  502. """Draws keypoints on an image.
  503. Args:
  504. image: a PIL.Image object.
  505. keypoints: a numpy array with shape [num_keypoints, 2].
  506. color: color to draw the keypoints with. Default is red.
  507. radius: keypoint radius. Default value is 2.
  508. use_normalized_coordinates: if True (default), treat keypoint values as
  509. relative to the image. Otherwise treat them as absolute.
  510. """
  511. draw = ImageDraw.Draw(image)
  512. im_width, im_height = image.size
  513. keypoints_x = [k[1] for k in keypoints]
  514. keypoints_y = [k[0] for k in keypoints]
  515. if use_normalized_coordinates:
  516. keypoints_x = tuple([im_width * x for x in keypoints_x])
  517. keypoints_y = tuple([im_height * y for y in keypoints_y])
  518. for keypoint_x, keypoint_y in zip(keypoints_x, keypoints_y):
  519. draw.ellipse([(keypoint_x - radius, keypoint_y - radius),
  520. (keypoint_x + radius, keypoint_y + radius)],
  521. outline=color, fill=color)
  522. def draw_mask_on_image_array(image, mask, color='red', alpha=0.4):
  523. """Draws mask on an image.
  524. Args:
  525. image: uint8 numpy array with shape (img_height, img_height, 3)
  526. mask: a uint8 numpy array of shape (img_height, img_height) with
  527. values between either 0 or 1.
  528. color: color to draw the keypoints with. Default is red.
  529. alpha: transparency value between 0 and 1. (default: 0.4)
  530. Raises:
  531. ValueError: On incorrect data type for image or masks.
  532. """
  533. if image.dtype != np.uint8:
  534. raise ValueError('`image` not of type np.uint8')
  535. if mask.dtype != np.uint8:
  536. raise ValueError('`mask` not of type np.uint8')
  537. if np.any(np.logical_and(mask != 1, mask != 0)):
  538. raise ValueError('`mask` elements should be in [0, 1]')
  539. if image.shape[:2] != mask.shape:
  540. raise ValueError('The image has spatial dimensions %s but the mask has '
  541. 'dimensions %s' % (image.shape[:2], mask.shape))
  542. rgb = ImageColor.getrgb(color)
  543. pil_image = Image.fromarray(image)
  544. solid_color = np.expand_dims(
  545. np.ones_like(mask), axis=2) * np.reshape(list(rgb), [1, 1, 3])
  546. pil_solid_color = Image.fromarray(np.uint8(solid_color)).convert('RGBA')
  547. pil_mask = Image.fromarray(np.uint8(255.0*alpha*mask)).convert('L')
  548. pil_image = Image.composite(pil_solid_color, pil_image, pil_mask)
  549. np.copyto(image, np.array(pil_image.convert('RGB')))
  550. def visualize_boxes_and_labels_on_image_array(
  551. image,
  552. boxes,
  553. classes,
  554. scores,
  555. category_index,
  556. instance_masks=None,
  557. instance_boundaries=None,
  558. keypoints=None,
  559. use_normalized_coordinates=False,
  560. max_boxes_to_draw=20,
  561. min_score_thresh=.5,
  562. agnostic_mode=False,
  563. line_thickness=4,
  564. groundtruth_box_visualization_color='black',
  565. skip_scores=False,
  566. skip_labels=False):
  567. """Overlay labeled boxes on an image with formatted scores and label names.
  568. This function groups boxes that correspond to the same location
  569. and creates a display string for each detection and overlays these
  570. on the image. Note that this function modifies the image in place, and returns
  571. that same image.
  572. Args:
  573. image: uint8 numpy array with shape (img_height, img_width, 3)
  574. boxes: a numpy array of shape [N, 4]
  575. classes: a numpy array of shape [N]. Note that class indices are 1-based,
  576. and match the keys in the label map.
  577. scores: a numpy array of shape [N] or None. If scores=None, then
  578. this function assumes that the boxes to be plotted are groundtruth
  579. boxes and plot all boxes as black with no classes or scores.
  580. category_index: a dict containing category dictionaries (each holding
  581. category index `id` and category name `name`) keyed by category indices.
  582. instance_masks: a numpy array of shape [N, image_height, image_width] with
  583. values ranging between 0 and 1, can be None.
  584. instance_boundaries: a numpy array of shape [N, image_height, image_width]
  585. with values ranging between 0 and 1, can be None.
  586. keypoints: a numpy array of shape [N, num_keypoints, 2], can
  587. be None
  588. use_normalized_coordinates: whether boxes is to be interpreted as
  589. normalized coordinates or not.
  590. max_boxes_to_draw: maximum number of boxes to visualize. If None, draw
  591. all boxes.
  592. min_score_thresh: minimum score threshold for a box to be visualized
  593. agnostic_mode: boolean (default: False) controlling whether to evaluate in
  594. class-agnostic mode or not. This mode will display scores but ignore
  595. classes.
  596. line_thickness: integer (default: 4) controlling line width of the boxes.
  597. groundtruth_box_visualization_color: box color for visualizing groundtruth
  598. boxes
  599. skip_scores: whether to skip score when drawing a single detection
  600. skip_labels: whether to skip label when drawing a single detection
  601. Returns:
  602. uint8 numpy array with shape (img_height, img_width, 3) with overlaid boxes.
  603. """
  604. # Create a display string (and color) for every box location, group any boxes
  605. # that correspond to the same location.
  606. box_to_display_str_map = collections.defaultdict(list)
  607. box_to_color_map = collections.defaultdict(str)
  608. box_to_instance_masks_map = {}
  609. box_to_instance_boundaries_map = {}
  610. box_to_keypoints_map = collections.defaultdict(list)
  611. if not max_boxes_to_draw:
  612. max_boxes_to_draw = boxes.shape[0]
  613. for i in range(min(max_boxes_to_draw, boxes.shape[0])):
  614. if scores is None or scores[i] > min_score_thresh:
  615. box = tuple(boxes[i].tolist())
  616. if instance_masks is not None:
  617. box_to_instance_masks_map[box] = instance_masks[i]
  618. if instance_boundaries is not None:
  619. box_to_instance_boundaries_map[box] = instance_boundaries[i]
  620. if keypoints is not None:
  621. box_to_keypoints_map[box].extend(keypoints[i])
  622. if scores is None:
  623. box_to_color_map[box] = groundtruth_box_visualization_color
  624. else:
  625. display_str = ''
  626. if not skip_labels:
  627. if not agnostic_mode:
  628. if classes[i] in category_index.keys():
  629. class_name = category_index[classes[i]]['name']
  630. else:
  631. class_name = 'N/A'
  632. display_str = str(class_name)
  633. if not skip_scores:
  634. if not display_str:
  635. display_str = '{}%'.format(int(100*scores[i]))
  636. else:
  637. display_str = '{}: {}%'.format(display_str, int(100*scores[i]))
  638. box_to_display_str_map[box].append(display_str)
  639. if agnostic_mode:
  640. box_to_color_map[box] = 'DarkOrange'
  641. else:
  642. box_to_color_map[box] = STANDARD_COLORS[
  643. classes[i] % len(STANDARD_COLORS)]
  644. # Draw all boxes onto image.
  645. for box, color in box_to_color_map.items():
  646. ymin, xmin, ymax, xmax = box
  647. if instance_masks is not None:
  648. draw_mask_on_image_array(
  649. image,
  650. box_to_instance_masks_map[box],
  651. color=color
  652. )
  653. if instance_boundaries is not None:
  654. draw_mask_on_image_array(
  655. image,
  656. box_to_instance_boundaries_map[box],
  657. color='red',
  658. alpha=1.0
  659. )
  660. draw_bounding_box_on_image_array(
  661. image,
  662. ymin,
  663. xmin,
  664. ymax,
  665. xmax,
  666. color=color,
  667. thickness=line_thickness,
  668. display_str_list=box_to_display_str_map[box],
  669. use_normalized_coordinates=use_normalized_coordinates)
  670. if keypoints is not None:
  671. draw_keypoints_on_image_array(
  672. image,
  673. box_to_keypoints_map[box],
  674. color=color,
  675. radius=line_thickness / 2,
  676. use_normalized_coordinates=use_normalized_coordinates)
  677. return image
  678. def add_cdf_image_summary(values, name):
  679. """Adds a tf.summary.image for a CDF plot of the values.
  680. Normalizes `values` such that they sum to 1, plots the cumulative distribution
  681. function and creates a tf image summary.
  682. Args:
  683. values: a 1-D float32 tensor containing the values.
  684. name: name for the image summary.
  685. """
  686. def cdf_plot(values):
  687. """Numpy function to plot CDF."""
  688. normalized_values = values / np.sum(values)
  689. sorted_values = np.sort(normalized_values)
  690. cumulative_values = np.cumsum(sorted_values)
  691. fraction_of_examples = (np.arange(cumulative_values.size, dtype=np.float32)
  692. / cumulative_values.size)
  693. fig = plt.figure(frameon=False)
  694. ax = fig.add_subplot('111')
  695. ax.plot(fraction_of_examples, cumulative_values)
  696. ax.set_ylabel('cumulative normalized values')
  697. ax.set_xlabel('fraction of examples')
  698. fig.canvas.draw()
  699. width, height = fig.get_size_inches() * fig.get_dpi()
  700. image = np.fromstring(fig.canvas.tostring_rgb(), dtype='uint8').reshape(
  701. 1, int(height), int(width), 3)
  702. return image
  703. cdf_plot = tf.py_func(cdf_plot, [values], tf.uint8)
  704. tf.summary.image(name, cdf_plot)
  705. def add_hist_image_summary(values, bins, name):
  706. """Adds a tf.summary.image for a histogram plot of the values.
  707. Plots the histogram of values and creates a tf image summary.
  708. Args:
  709. values: a 1-D float32 tensor containing the values.
  710. bins: bin edges which will be directly passed to np.histogram.
  711. name: name for the image summary.
  712. """
  713. def hist_plot(values, bins):
  714. """Numpy function to plot hist."""
  715. fig = plt.figure(frameon=False)
  716. ax = fig.add_subplot('111')
  717. y, x = np.histogram(values, bins=bins)
  718. ax.plot(x[:-1], y)
  719. ax.set_ylabel('count')
  720. ax.set_xlabel('value')
  721. fig.canvas.draw()
  722. width, height = fig.get_size_inches() * fig.get_dpi()
  723. image = np.fromstring(
  724. fig.canvas.tostring_rgb(), dtype='uint8').reshape(
  725. 1, int(height), int(width), 3)
  726. return image
  727. hist_plot = tf.py_func(hist_plot, [values, bins], tf.uint8)
  728. tf.summary.image(name, hist_plot)
  729. class EvalMetricOpsVisualization(object):
  730. """Abstract base class responsible for visualizations during evaluation.
  731. Currently, summary images are not run during evaluation. One way to produce
  732. evaluation images in Tensorboard is to provide tf.summary.image strings as
  733. `value_ops` in tf.estimator.EstimatorSpec's `eval_metric_ops`. This class is
  734. responsible for accruing images (with overlaid detections and groundtruth)
  735. and returning a dictionary that can be passed to `eval_metric_ops`.
  736. """
  737. __metaclass__ = abc.ABCMeta
  738. def __init__(self,
  739. category_index,
  740. max_examples_to_draw=5,
  741. max_boxes_to_draw=20,
  742. min_score_thresh=0.2,
  743. use_normalized_coordinates=True,
  744. summary_name_prefix='evaluation_image'):
  745. """Creates an EvalMetricOpsVisualization.
  746. Args:
  747. category_index: A category index (dictionary) produced from a labelmap.
  748. max_examples_to_draw: The maximum number of example summaries to produce.
  749. max_boxes_to_draw: The maximum number of boxes to draw for detections.
  750. min_score_thresh: The minimum score threshold for showing detections.
  751. use_normalized_coordinates: Whether to assume boxes and kepoints are in
  752. normalized coordinates (as opposed to absolute coordiantes).
  753. Default is True.
  754. summary_name_prefix: A string prefix for each image summary.
  755. """
  756. self._category_index = category_index
  757. self._max_examples_to_draw = max_examples_to_draw
  758. self._max_boxes_to_draw = max_boxes_to_draw
  759. self._min_score_thresh = min_score_thresh
  760. self._use_normalized_coordinates = use_normalized_coordinates
  761. self._summary_name_prefix = summary_name_prefix
  762. self._images = []
  763. def clear(self):
  764. self._images = []
  765. def add_images(self, images):
  766. """Store a list of images, each with shape [1, H, W, C]."""
  767. if len(self._images) >= self._max_examples_to_draw:
  768. return
  769. # Store images and clip list if necessary.
  770. self._images.extend(images)
  771. if len(self._images) > self._max_examples_to_draw:
  772. self._images[self._max_examples_to_draw:] = []
  773. def get_estimator_eval_metric_ops(self, eval_dict):
  774. """Returns metric ops for use in tf.estimator.EstimatorSpec.
  775. Args:
  776. eval_dict: A dictionary that holds an image, groundtruth, and detections
  777. for a batched example. Note that, we use only the first example for
  778. visualization. See eval_util.result_dict_for_batched_example() for a
  779. convenient method for constructing such a dictionary. The dictionary
  780. contains
  781. fields.InputDataFields.original_image: [batch_size, H, W, 3] image.
  782. fields.InputDataFields.original_image_spatial_shape: [batch_size, 2]
  783. tensor containing the size of the original image.
  784. fields.InputDataFields.true_image_shape: [batch_size, 3]
  785. tensor containing the spatial size of the upadded original image.
  786. fields.InputDataFields.groundtruth_boxes - [batch_size, num_boxes, 4]
  787. float32 tensor with groundtruth boxes in range [0.0, 1.0].
  788. fields.InputDataFields.groundtruth_classes - [batch_size, num_boxes]
  789. int64 tensor with 1-indexed groundtruth classes.
  790. fields.InputDataFields.groundtruth_instance_masks - (optional)
  791. [batch_size, num_boxes, H, W] int64 tensor with instance masks.
  792. fields.DetectionResultFields.detection_boxes - [batch_size,
  793. max_num_boxes, 4] float32 tensor with detection boxes in range [0.0,
  794. 1.0].
  795. fields.DetectionResultFields.detection_classes - [batch_size,
  796. max_num_boxes] int64 tensor with 1-indexed detection classes.
  797. fields.DetectionResultFields.detection_scores - [batch_size,
  798. max_num_boxes] float32 tensor with detection scores.
  799. fields.DetectionResultFields.detection_masks - (optional) [batch_size,
  800. max_num_boxes, H, W] float32 tensor of binarized masks.
  801. fields.DetectionResultFields.detection_keypoints - (optional)
  802. [batch_size, max_num_boxes, num_keypoints, 2] float32 tensor with
  803. keypoints.
  804. Returns:
  805. A dictionary of image summary names to tuple of (value_op, update_op). The
  806. `update_op` is the same for all items in the dictionary, and is
  807. responsible for saving a single side-by-side image with detections and
  808. groundtruth. Each `value_op` holds the tf.summary.image string for a given
  809. image.
  810. """
  811. if self._max_examples_to_draw == 0:
  812. return {}
  813. images = self.images_from_evaluation_dict(eval_dict)
  814. def get_images():
  815. """Returns a list of images, padded to self._max_images_to_draw."""
  816. images = self._images
  817. while len(images) < self._max_examples_to_draw:
  818. images.append(np.array(0, dtype=np.uint8))
  819. self.clear()
  820. return images
  821. def image_summary_or_default_string(summary_name, image):
  822. """Returns image summaries for non-padded elements."""
  823. return tf.cond(
  824. tf.equal(tf.size(tf.shape(image)), 4),
  825. lambda: tf.summary.image(summary_name, image),
  826. lambda: tf.constant(''))
  827. update_op = tf.py_func(self.add_images, [[images[0]]], [])
  828. image_tensors = tf.py_func(
  829. get_images, [], [tf.uint8] * self._max_examples_to_draw)
  830. eval_metric_ops = {}
  831. for i, image in enumerate(image_tensors):
  832. summary_name = self._summary_name_prefix + '/' + str(i)
  833. value_op = image_summary_or_default_string(summary_name, image)
  834. eval_metric_ops[summary_name] = (value_op, update_op)
  835. return eval_metric_ops
  836. @abc.abstractmethod
  837. def images_from_evaluation_dict(self, eval_dict):
  838. """Converts evaluation dictionary into a list of image tensors.
  839. To be overridden by implementations.
  840. Args:
  841. eval_dict: A dictionary with all the necessary information for producing
  842. visualizations.
  843. Returns:
  844. A list of [1, H, W, C] uint8 tensors.
  845. """
  846. raise NotImplementedError
  847. class VisualizeSingleFrameDetections(EvalMetricOpsVisualization):
  848. """Class responsible for single-frame object detection visualizations."""
  849. def __init__(self,
  850. category_index,
  851. max_examples_to_draw=5,
  852. max_boxes_to_draw=20,
  853. min_score_thresh=0.2,
  854. use_normalized_coordinates=True,
  855. summary_name_prefix='Detections_Left_Groundtruth_Right'):
  856. super(VisualizeSingleFrameDetections, self).__init__(
  857. category_index=category_index,
  858. max_examples_to_draw=max_examples_to_draw,
  859. max_boxes_to_draw=max_boxes_to_draw,
  860. min_score_thresh=min_score_thresh,
  861. use_normalized_coordinates=use_normalized_coordinates,
  862. summary_name_prefix=summary_name_prefix)
  863. def images_from_evaluation_dict(self, eval_dict):
  864. return draw_side_by_side_evaluation_image(
  865. eval_dict, self._category_index, self._max_boxes_to_draw,
  866. self._min_score_thresh, self._use_normalized_coordinates)