You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

850 lines
37 KiB

  1. # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. """Wrappers for third party pycocotools to be used within object_detection.
  16. Note that nothing in this file is tensorflow related and thus cannot
  17. be called directly as a slim metric, for example.
  18. TODO(jonathanhuang): wrap as a slim metric in metrics.py
  19. Usage example: given a set of images with ids in the list image_ids
  20. and corresponding lists of numpy arrays encoding groundtruth (boxes and classes)
  21. and detections (boxes, scores and classes), where elements of each list
  22. correspond to detections/annotations of a single image,
  23. then evaluation (in multi-class mode) can be invoked as follows:
  24. groundtruth_dict = coco_tools.ExportGroundtruthToCOCO(
  25. image_ids, groundtruth_boxes_list, groundtruth_classes_list,
  26. max_num_classes, output_path=None)
  27. detections_list = coco_tools.ExportDetectionsToCOCO(
  28. image_ids, detection_boxes_list, detection_scores_list,
  29. detection_classes_list, output_path=None)
  30. groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
  31. detections = groundtruth.LoadAnnotations(detections_list)
  32. evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
  33. agnostic_mode=False)
  34. metrics = evaluator.ComputeMetrics()
  35. """
  36. from collections import OrderedDict
  37. import copy
  38. import time
  39. import numpy as np
  40. from pycocotools import coco
  41. from pycocotools import cocoeval
  42. from pycocotools import mask
  43. import tensorflow as tf
  44. from object_detection.utils import json_utils
  45. class COCOWrapper(coco.COCO):
  46. """Wrapper for the pycocotools COCO class."""
  47. def __init__(self, dataset, detection_type='bbox'):
  48. """COCOWrapper constructor.
  49. See http://mscoco.org/dataset/#format for a description of the format.
  50. By default, the coco.COCO class constructor reads from a JSON file.
  51. This function duplicates the same behavior but loads from a dictionary,
  52. allowing us to perform evaluation without writing to external storage.
  53. Args:
  54. dataset: a dictionary holding bounding box annotations in the COCO format.
  55. detection_type: type of detections being wrapped. Can be one of ['bbox',
  56. 'segmentation']
  57. Raises:
  58. ValueError: if detection_type is unsupported.
  59. """
  60. supported_detection_types = ['bbox', 'segmentation']
  61. if detection_type not in supported_detection_types:
  62. raise ValueError('Unsupported detection type: {}. '
  63. 'Supported values are: {}'.format(
  64. detection_type, supported_detection_types))
  65. self._detection_type = detection_type
  66. coco.COCO.__init__(self)
  67. self.dataset = dataset
  68. self.createIndex()
  69. def LoadAnnotations(self, annotations):
  70. """Load annotations dictionary into COCO datastructure.
  71. See http://mscoco.org/dataset/#format for a description of the annotations
  72. format. As above, this function replicates the default behavior of the API
  73. but does not require writing to external storage.
  74. Args:
  75. annotations: python list holding object detection results where each
  76. detection is encoded as a dict with required keys ['image_id',
  77. 'category_id', 'score'] and one of ['bbox', 'segmentation'] based on
  78. `detection_type`.
  79. Returns:
  80. a coco.COCO datastructure holding object detection annotations results
  81. Raises:
  82. ValueError: if annotations is not a list
  83. ValueError: if annotations do not correspond to the images contained
  84. in self.
  85. """
  86. results = coco.COCO()
  87. results.dataset['images'] = [img for img in self.dataset['images']]
  88. tf.logging.info('Loading and preparing annotation results...')
  89. tic = time.time()
  90. if not isinstance(annotations, list):
  91. raise ValueError('annotations is not a list of objects')
  92. annotation_img_ids = [ann['image_id'] for ann in annotations]
  93. if (set(annotation_img_ids) != (set(annotation_img_ids)
  94. & set(self.getImgIds()))):
  95. raise ValueError('Results do not correspond to current coco set')
  96. results.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
  97. if self._detection_type == 'bbox':
  98. for idx, ann in enumerate(annotations):
  99. bb = ann['bbox']
  100. ann['area'] = bb[2] * bb[3]
  101. ann['id'] = idx + 1
  102. ann['iscrowd'] = 0
  103. elif self._detection_type == 'segmentation':
  104. for idx, ann in enumerate(annotations):
  105. ann['area'] = mask.area(ann['segmentation'])
  106. ann['bbox'] = mask.toBbox(ann['segmentation'])
  107. ann['id'] = idx + 1
  108. ann['iscrowd'] = 0
  109. tf.logging.info('DONE (t=%0.2fs)', (time.time() - tic))
  110. results.dataset['annotations'] = annotations
  111. results.createIndex()
  112. return results
  113. class COCOEvalWrapper(cocoeval.COCOeval):
  114. """Wrapper for the pycocotools COCOeval class.
  115. To evaluate, create two objects (groundtruth_dict and detections_list)
  116. using the conventions listed at http://mscoco.org/dataset/#format.
  117. Then call evaluation as follows:
  118. groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
  119. detections = groundtruth.LoadAnnotations(detections_list)
  120. evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
  121. agnostic_mode=False)
  122. metrics = evaluator.ComputeMetrics()
  123. """
  124. def __init__(self, groundtruth=None, detections=None, agnostic_mode=False,
  125. iou_type='bbox'):
  126. """COCOEvalWrapper constructor.
  127. Note that for the area-based metrics to be meaningful, detection and
  128. groundtruth boxes must be in image coordinates measured in pixels.
  129. Args:
  130. groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding
  131. groundtruth annotations
  132. detections: a coco.COCO (or coco_tools.COCOWrapper) object holding
  133. detections
  134. agnostic_mode: boolean (default: False). If True, evaluation ignores
  135. class labels, treating all detections as proposals.
  136. iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`.
  137. """
  138. cocoeval.COCOeval.__init__(self, groundtruth, detections,
  139. iouType=iou_type)
  140. if agnostic_mode:
  141. self.params.useCats = 0
  142. def GetCategory(self, category_id):
  143. """Fetches dictionary holding category information given category id.
  144. Args:
  145. category_id: integer id
  146. Returns:
  147. dictionary holding 'id', 'name'.
  148. """
  149. return self.cocoGt.cats[category_id]
  150. def GetAgnosticMode(self):
  151. """Returns true if COCO Eval is configured to evaluate in agnostic mode."""
  152. return self.params.useCats == 0
  153. def GetCategoryIdList(self):
  154. """Returns list of valid category ids."""
  155. return self.params.catIds
  156. def ComputeMetrics(self,
  157. include_metrics_per_category=False,
  158. all_metrics_per_category=False):
  159. """Computes detection metrics.
  160. Args:
  161. include_metrics_per_category: If True, will include metrics per category.
  162. all_metrics_per_category: If true, include all the summery metrics for
  163. each category in per_category_ap. Be careful with setting it to true if
  164. you have more than handful of categories, because it will pollute
  165. your mldash.
  166. Returns:
  167. 1. summary_metrics: a dictionary holding:
  168. 'Precision/mAP': mean average precision over classes averaged over IOU
  169. thresholds ranging from .5 to .95 with .05 increments
  170. 'Precision/mAP@.50IOU': mean average precision at 50% IOU
  171. 'Precision/mAP@.75IOU': mean average precision at 75% IOU
  172. 'Precision/mAP (small)': mean average precision for small objects
  173. (area < 32^2 pixels)
  174. 'Precision/mAP (medium)': mean average precision for medium sized
  175. objects (32^2 pixels < area < 96^2 pixels)
  176. 'Precision/mAP (large)': mean average precision for large objects
  177. (96^2 pixels < area < 10000^2 pixels)
  178. 'Recall/AR@1': average recall with 1 detection
  179. 'Recall/AR@10': average recall with 10 detections
  180. 'Recall/AR@100': average recall with 100 detections
  181. 'Recall/AR@100 (small)': average recall for small objects with 100
  182. detections
  183. 'Recall/AR@100 (medium)': average recall for medium objects with 100
  184. detections
  185. 'Recall/AR@100 (large)': average recall for large objects with 100
  186. detections
  187. 2. per_category_ap: a dictionary holding category specific results with
  188. keys of the form: 'Precision mAP ByCategory/category'
  189. (without the supercategory part if no supercategories exist).
  190. For backward compatibility 'PerformanceByCategory' is included in the
  191. output regardless of all_metrics_per_category.
  192. If evaluating class-agnostic mode, per_category_ap is an empty
  193. dictionary.
  194. Raises:
  195. ValueError: If category_stats does not exist.
  196. """
  197. self.evaluate()
  198. self.accumulate()
  199. self.summarize()
  200. summary_metrics = OrderedDict([
  201. ('Precision/mAP', self.stats[0]),
  202. ('Precision/mAP@.50IOU', self.stats[1]),
  203. ('Precision/mAP@.75IOU', self.stats[2]),
  204. ('Precision/mAP (small)', self.stats[3]),
  205. ('Precision/mAP (medium)', self.stats[4]),
  206. ('Precision/mAP (large)', self.stats[5]),
  207. ('Recall/AR@1', self.stats[6]),
  208. ('Recall/AR@10', self.stats[7]),
  209. ('Recall/AR@100', self.stats[8]),
  210. ('Recall/AR@100 (small)', self.stats[9]),
  211. ('Recall/AR@100 (medium)', self.stats[10]),
  212. ('Recall/AR@100 (large)', self.stats[11])
  213. ])
  214. if not include_metrics_per_category:
  215. return summary_metrics, {}
  216. if not hasattr(self, 'category_stats'):
  217. raise ValueError('Category stats do not exist')
  218. per_category_ap = OrderedDict([])
  219. if self.GetAgnosticMode():
  220. return summary_metrics, per_category_ap
  221. for category_index, category_id in enumerate(self.GetCategoryIdList()):
  222. category = self.GetCategory(category_id)['name']
  223. # Kept for backward compatilbility
  224. per_category_ap['PerformanceByCategory/mAP/{}'.format(
  225. category)] = self.category_stats[0][category_index]
  226. if all_metrics_per_category:
  227. per_category_ap['Precision mAP ByCategory/{}'.format(
  228. category)] = self.category_stats[0][category_index]
  229. per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format(
  230. category)] = self.category_stats[1][category_index]
  231. per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format(
  232. category)] = self.category_stats[2][category_index]
  233. per_category_ap['Precision mAP (small) ByCategory/{}'.format(
  234. category)] = self.category_stats[3][category_index]
  235. per_category_ap['Precision mAP (medium) ByCategory/{}'.format(
  236. category)] = self.category_stats[4][category_index]
  237. per_category_ap['Precision mAP (large) ByCategory/{}'.format(
  238. category)] = self.category_stats[5][category_index]
  239. per_category_ap['Recall AR@1 ByCategory/{}'.format(
  240. category)] = self.category_stats[6][category_index]
  241. per_category_ap['Recall AR@10 ByCategory/{}'.format(
  242. category)] = self.category_stats[7][category_index]
  243. per_category_ap['Recall AR@100 ByCategory/{}'.format(
  244. category)] = self.category_stats[8][category_index]
  245. per_category_ap['Recall AR@100 (small) ByCategory/{}'.format(
  246. category)] = self.category_stats[9][category_index]
  247. per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format(
  248. category)] = self.category_stats[10][category_index]
  249. per_category_ap['Recall AR@100 (large) ByCategory/{}'.format(
  250. category)] = self.category_stats[11][category_index]
  251. return summary_metrics, per_category_ap
  252. def _ConvertBoxToCOCOFormat(box):
  253. """Converts a box in [ymin, xmin, ymax, xmax] format to COCO format.
  254. This is a utility function for converting from our internal
  255. [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API
  256. i.e., [xmin, ymin, width, height].
  257. Args:
  258. box: a [ymin, xmin, ymax, xmax] numpy array
  259. Returns:
  260. a list of floats representing [xmin, ymin, width, height]
  261. """
  262. return [float(box[1]), float(box[0]), float(box[3] - box[1]),
  263. float(box[2] - box[0])]
  264. def _RleCompress(masks):
  265. """Compresses mask using Run-length encoding provided by pycocotools.
  266. Args:
  267. masks: uint8 numpy array of shape [mask_height, mask_width] with values in
  268. {0, 1}.
  269. Returns:
  270. A pycocotools Run-length encoding of the mask.
  271. """
  272. return mask.encode(np.asfortranarray(masks))
  273. def ExportSingleImageGroundtruthToCoco(image_id,
  274. next_annotation_id,
  275. category_id_set,
  276. groundtruth_boxes,
  277. groundtruth_classes,
  278. groundtruth_masks=None,
  279. groundtruth_is_crowd=None):
  280. """Export groundtruth of a single image to COCO format.
  281. This function converts groundtruth detection annotations represented as numpy
  282. arrays to dictionaries that can be ingested by the COCO evaluation API. Note
  283. that the image_ids provided here must match the ones given to
  284. ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in
  285. correspondence - that is: groundtruth_boxes[i, :], and
  286. groundtruth_classes[i] are associated with the same groundtruth annotation.
  287. In the exported result, "area" fields are always set to the area of the
  288. groundtruth bounding box.
  289. Args:
  290. image_id: a unique image identifier either of type integer or string.
  291. next_annotation_id: integer specifying the first id to use for the
  292. groundtruth annotations. All annotations are assigned a continuous integer
  293. id starting from this value.
  294. category_id_set: A set of valid class ids. Groundtruth with classes not in
  295. category_id_set are dropped.
  296. groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4]
  297. groundtruth_classes: numpy array (int) with shape [num_gt_boxes]
  298. groundtruth_masks: optional uint8 numpy array of shape [num_detections,
  299. image_height, image_width] containing detection_masks.
  300. groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes]
  301. indicating whether groundtruth boxes are crowd.
  302. Returns:
  303. a list of groundtruth annotations for a single image in the COCO format.
  304. Raises:
  305. ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the
  306. right lengths or (2) if each of the elements inside these lists do not
  307. have the correct shapes or (3) if image_ids are not integers
  308. """
  309. if len(groundtruth_classes.shape) != 1:
  310. raise ValueError('groundtruth_classes is '
  311. 'expected to be of rank 1.')
  312. if len(groundtruth_boxes.shape) != 2:
  313. raise ValueError('groundtruth_boxes is expected to be of '
  314. 'rank 2.')
  315. if groundtruth_boxes.shape[1] != 4:
  316. raise ValueError('groundtruth_boxes should have '
  317. 'shape[1] == 4.')
  318. num_boxes = groundtruth_classes.shape[0]
  319. if num_boxes != groundtruth_boxes.shape[0]:
  320. raise ValueError('Corresponding entries in groundtruth_classes, '
  321. 'and groundtruth_boxes should have '
  322. 'compatible shapes (i.e., agree on the 0th dimension).'
  323. 'Classes shape: %d. Boxes shape: %d. Image ID: %s' % (
  324. groundtruth_classes.shape[0],
  325. groundtruth_boxes.shape[0], image_id))
  326. has_is_crowd = groundtruth_is_crowd is not None
  327. if has_is_crowd and len(groundtruth_is_crowd.shape) != 1:
  328. raise ValueError('groundtruth_is_crowd is expected to be of rank 1.')
  329. groundtruth_list = []
  330. for i in range(num_boxes):
  331. if groundtruth_classes[i] in category_id_set:
  332. iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0
  333. export_dict = {
  334. 'id':
  335. next_annotation_id + i,
  336. 'image_id':
  337. image_id,
  338. 'category_id':
  339. int(groundtruth_classes[i]),
  340. 'bbox':
  341. list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])),
  342. 'area':
  343. float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) *
  344. (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])),
  345. 'iscrowd':
  346. iscrowd
  347. }
  348. if groundtruth_masks is not None:
  349. export_dict['segmentation'] = _RleCompress(groundtruth_masks[i])
  350. groundtruth_list.append(export_dict)
  351. return groundtruth_list
  352. def ExportGroundtruthToCOCO(image_ids,
  353. groundtruth_boxes,
  354. groundtruth_classes,
  355. categories,
  356. output_path=None):
  357. """Export groundtruth detection annotations in numpy arrays to COCO API.
  358. This function converts a set of groundtruth detection annotations represented
  359. as numpy arrays to dictionaries that can be ingested by the COCO API.
  360. Inputs to this function are three lists: image ids for each groundtruth image,
  361. groundtruth boxes for each image and groundtruth classes respectively.
  362. Note that the image_ids provided here must match the ones given to the
  363. ExportDetectionsToCOCO function in order for evaluation to work properly.
  364. We assume that for each image, boxes, scores and classes are in
  365. correspondence --- that is: image_id[i], groundtruth_boxes[i, :] and
  366. groundtruth_classes[i] are associated with the same groundtruth annotation.
  367. In the exported result, "area" fields are always set to the area of the
  368. groundtruth bounding box and "iscrowd" fields are always set to 0.
  369. TODO(jonathanhuang): pass in "iscrowd" array for evaluating on COCO dataset.
  370. Args:
  371. image_ids: a list of unique image identifier either of type integer or
  372. string.
  373. groundtruth_boxes: list of numpy arrays with shape [num_gt_boxes, 4]
  374. (note that num_gt_boxes can be different for each entry in the list)
  375. groundtruth_classes: list of numpy arrays (int) with shape [num_gt_boxes]
  376. (note that num_gt_boxes can be different for each entry in the list)
  377. categories: a list of dictionaries representing all possible categories.
  378. Each dict in this list has the following keys:
  379. 'id': (required) an integer id uniquely identifying this category
  380. 'name': (required) string representing category name
  381. e.g., 'cat', 'dog', 'pizza'
  382. 'supercategory': (optional) string representing the supercategory
  383. e.g., 'animal', 'vehicle', 'food', etc
  384. output_path: (optional) path for exporting result to JSON
  385. Returns:
  386. dictionary that can be read by COCO API
  387. Raises:
  388. ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the
  389. right lengths or (2) if each of the elements inside these lists do not
  390. have the correct shapes or (3) if image_ids are not integers
  391. """
  392. category_id_set = set([cat['id'] for cat in categories])
  393. groundtruth_export_list = []
  394. image_export_list = []
  395. if not len(image_ids) == len(groundtruth_boxes) == len(groundtruth_classes):
  396. raise ValueError('Input lists must have the same length')
  397. # For reasons internal to the COCO API, it is important that annotation ids
  398. # are not equal to zero; we thus start counting from 1.
  399. annotation_id = 1
  400. for image_id, boxes, classes in zip(image_ids, groundtruth_boxes,
  401. groundtruth_classes):
  402. image_export_list.append({'id': image_id})
  403. groundtruth_export_list.extend(ExportSingleImageGroundtruthToCoco(
  404. image_id,
  405. annotation_id,
  406. category_id_set,
  407. boxes,
  408. classes))
  409. num_boxes = classes.shape[0]
  410. annotation_id += num_boxes
  411. groundtruth_dict = {
  412. 'annotations': groundtruth_export_list,
  413. 'images': image_export_list,
  414. 'categories': categories
  415. }
  416. if output_path:
  417. with tf.gfile.GFile(output_path, 'w') as fid:
  418. json_utils.Dump(groundtruth_dict, fid, float_digits=4, indent=2)
  419. return groundtruth_dict
  420. def ExportSingleImageDetectionBoxesToCoco(image_id,
  421. category_id_set,
  422. detection_boxes,
  423. detection_scores,
  424. detection_classes):
  425. """Export detections of a single image to COCO format.
  426. This function converts detections represented as numpy arrays to dictionaries
  427. that can be ingested by the COCO evaluation API. Note that the image_ids
  428. provided here must match the ones given to the
  429. ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in
  430. correspondence - that is: boxes[i, :], and classes[i]
  431. are associated with the same groundtruth annotation.
  432. Args:
  433. image_id: unique image identifier either of type integer or string.
  434. category_id_set: A set of valid class ids. Detections with classes not in
  435. category_id_set are dropped.
  436. detection_boxes: float numpy array of shape [num_detections, 4] containing
  437. detection boxes.
  438. detection_scores: float numpy array of shape [num_detections] containing
  439. scored for the detection boxes.
  440. detection_classes: integer numpy array of shape [num_detections] containing
  441. the classes for detection boxes.
  442. Returns:
  443. a list of detection annotations for a single image in the COCO format.
  444. Raises:
  445. ValueError: if (1) detection_boxes, detection_scores and detection_classes
  446. do not have the right lengths or (2) if each of the elements inside these
  447. lists do not have the correct shapes or (3) if image_ids are not integers.
  448. """
  449. if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
  450. raise ValueError('All entries in detection_classes and detection_scores'
  451. 'expected to be of rank 1.')
  452. if len(detection_boxes.shape) != 2:
  453. raise ValueError('All entries in detection_boxes expected to be of '
  454. 'rank 2.')
  455. if detection_boxes.shape[1] != 4:
  456. raise ValueError('All entries in detection_boxes should have '
  457. 'shape[1] == 4.')
  458. num_boxes = detection_classes.shape[0]
  459. if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]:
  460. raise ValueError('Corresponding entries in detection_classes, '
  461. 'detection_scores and detection_boxes should have '
  462. 'compatible shapes (i.e., agree on the 0th dimension). '
  463. 'Classes shape: %d. Boxes shape: %d. '
  464. 'Scores shape: %d' % (
  465. detection_classes.shape[0], detection_boxes.shape[0],
  466. detection_scores.shape[0]
  467. ))
  468. detections_list = []
  469. for i in range(num_boxes):
  470. if detection_classes[i] in category_id_set:
  471. detections_list.append({
  472. 'image_id': image_id,
  473. 'category_id': int(detection_classes[i]),
  474. 'bbox': list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])),
  475. 'score': float(detection_scores[i])
  476. })
  477. return detections_list
  478. def ExportSingleImageDetectionMasksToCoco(image_id,
  479. category_id_set,
  480. detection_masks,
  481. detection_scores,
  482. detection_classes):
  483. """Export detection masks of a single image to COCO format.
  484. This function converts detections represented as numpy arrays to dictionaries
  485. that can be ingested by the COCO evaluation API. We assume that
  486. detection_masks, detection_scores, and detection_classes are in correspondence
  487. - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i]
  488. are associated with the same annotation.
  489. Args:
  490. image_id: unique image identifier either of type integer or string.
  491. category_id_set: A set of valid class ids. Detections with classes not in
  492. category_id_set are dropped.
  493. detection_masks: uint8 numpy array of shape [num_detections, image_height,
  494. image_width] containing detection_masks.
  495. detection_scores: float numpy array of shape [num_detections] containing
  496. scores for detection masks.
  497. detection_classes: integer numpy array of shape [num_detections] containing
  498. the classes for detection masks.
  499. Returns:
  500. a list of detection mask annotations for a single image in the COCO format.
  501. Raises:
  502. ValueError: if (1) detection_masks, detection_scores and detection_classes
  503. do not have the right lengths or (2) if each of the elements inside these
  504. lists do not have the correct shapes or (3) if image_ids are not integers.
  505. """
  506. if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
  507. raise ValueError('All entries in detection_classes and detection_scores'
  508. 'expected to be of rank 1.')
  509. num_boxes = detection_classes.shape[0]
  510. if not num_boxes == len(detection_masks) == detection_scores.shape[0]:
  511. raise ValueError('Corresponding entries in detection_classes, '
  512. 'detection_scores and detection_masks should have '
  513. 'compatible lengths and shapes '
  514. 'Classes length: %d. Masks length: %d. '
  515. 'Scores length: %d' % (
  516. detection_classes.shape[0], len(detection_masks),
  517. detection_scores.shape[0]
  518. ))
  519. detections_list = []
  520. for i in range(num_boxes):
  521. if detection_classes[i] in category_id_set:
  522. detections_list.append({
  523. 'image_id': image_id,
  524. 'category_id': int(detection_classes[i]),
  525. 'segmentation': _RleCompress(detection_masks[i]),
  526. 'score': float(detection_scores[i])
  527. })
  528. return detections_list
  529. def ExportDetectionsToCOCO(image_ids,
  530. detection_boxes,
  531. detection_scores,
  532. detection_classes,
  533. categories,
  534. output_path=None):
  535. """Export detection annotations in numpy arrays to COCO API.
  536. This function converts a set of predicted detections represented
  537. as numpy arrays to dictionaries that can be ingested by the COCO API.
  538. Inputs to this function are lists, consisting of boxes, scores and
  539. classes, respectively, corresponding to each image for which detections
  540. have been produced. Note that the image_ids provided here must
  541. match the ones given to the ExportGroundtruthToCOCO function in order
  542. for evaluation to work properly.
  543. We assume that for each image, boxes, scores and classes are in
  544. correspondence --- that is: detection_boxes[i, :], detection_scores[i] and
  545. detection_classes[i] are associated with the same detection.
  546. Args:
  547. image_ids: a list of unique image identifier either of type integer or
  548. string.
  549. detection_boxes: list of numpy arrays with shape [num_detection_boxes, 4]
  550. detection_scores: list of numpy arrays (float) with shape
  551. [num_detection_boxes]. Note that num_detection_boxes can be different
  552. for each entry in the list.
  553. detection_classes: list of numpy arrays (int) with shape
  554. [num_detection_boxes]. Note that num_detection_boxes can be different
  555. for each entry in the list.
  556. categories: a list of dictionaries representing all possible categories.
  557. Each dict in this list must have an integer 'id' key uniquely identifying
  558. this category.
  559. output_path: (optional) path for exporting result to JSON
  560. Returns:
  561. list of dictionaries that can be read by COCO API, where each entry
  562. corresponds to a single detection and has keys from:
  563. ['image_id', 'category_id', 'bbox', 'score'].
  564. Raises:
  565. ValueError: if (1) detection_boxes and detection_classes do not have the
  566. right lengths or (2) if each of the elements inside these lists do not
  567. have the correct shapes or (3) if image_ids are not integers.
  568. """
  569. category_id_set = set([cat['id'] for cat in categories])
  570. detections_export_list = []
  571. if not (len(image_ids) == len(detection_boxes) == len(detection_scores) ==
  572. len(detection_classes)):
  573. raise ValueError('Input lists must have the same length')
  574. for image_id, boxes, scores, classes in zip(image_ids, detection_boxes,
  575. detection_scores,
  576. detection_classes):
  577. detections_export_list.extend(ExportSingleImageDetectionBoxesToCoco(
  578. image_id,
  579. category_id_set,
  580. boxes,
  581. scores,
  582. classes))
  583. if output_path:
  584. with tf.gfile.GFile(output_path, 'w') as fid:
  585. json_utils.Dump(detections_export_list, fid, float_digits=4, indent=2)
  586. return detections_export_list
  587. def ExportSegmentsToCOCO(image_ids,
  588. detection_masks,
  589. detection_scores,
  590. detection_classes,
  591. categories,
  592. output_path=None):
  593. """Export segmentation masks in numpy arrays to COCO API.
  594. This function converts a set of predicted instance masks represented
  595. as numpy arrays to dictionaries that can be ingested by the COCO API.
  596. Inputs to this function are lists, consisting of segments, scores and
  597. classes, respectively, corresponding to each image for which detections
  598. have been produced.
  599. Note this function is recommended to use for small dataset.
  600. For large dataset, it should be used with a merge function
  601. (e.g. in map reduce), otherwise the memory consumption is large.
  602. We assume that for each image, masks, scores and classes are in
  603. correspondence --- that is: detection_masks[i, :, :, :], detection_scores[i]
  604. and detection_classes[i] are associated with the same detection.
  605. Args:
  606. image_ids: list of image ids (typically ints or strings)
  607. detection_masks: list of numpy arrays with shape [num_detection, h, w, 1]
  608. and type uint8. The height and width should match the shape of
  609. corresponding image.
  610. detection_scores: list of numpy arrays (float) with shape
  611. [num_detection]. Note that num_detection can be different
  612. for each entry in the list.
  613. detection_classes: list of numpy arrays (int) with shape
  614. [num_detection]. Note that num_detection can be different
  615. for each entry in the list.
  616. categories: a list of dictionaries representing all possible categories.
  617. Each dict in this list must have an integer 'id' key uniquely identifying
  618. this category.
  619. output_path: (optional) path for exporting result to JSON
  620. Returns:
  621. list of dictionaries that can be read by COCO API, where each entry
  622. corresponds to a single detection and has keys from:
  623. ['image_id', 'category_id', 'segmentation', 'score'].
  624. Raises:
  625. ValueError: if detection_masks and detection_classes do not have the
  626. right lengths or if each of the elements inside these lists do not
  627. have the correct shapes.
  628. """
  629. if not (len(image_ids) == len(detection_masks) == len(detection_scores) ==
  630. len(detection_classes)):
  631. raise ValueError('Input lists must have the same length')
  632. segment_export_list = []
  633. for image_id, masks, scores, classes in zip(image_ids, detection_masks,
  634. detection_scores,
  635. detection_classes):
  636. if len(classes.shape) != 1 or len(scores.shape) != 1:
  637. raise ValueError('All entries in detection_classes and detection_scores'
  638. 'expected to be of rank 1.')
  639. if len(masks.shape) != 4:
  640. raise ValueError('All entries in masks expected to be of '
  641. 'rank 4. Given {}'.format(masks.shape))
  642. num_boxes = classes.shape[0]
  643. if not num_boxes == masks.shape[0] == scores.shape[0]:
  644. raise ValueError('Corresponding entries in segment_classes, '
  645. 'detection_scores and detection_boxes should have '
  646. 'compatible shapes (i.e., agree on the 0th dimension).')
  647. category_id_set = set([cat['id'] for cat in categories])
  648. segment_export_list.extend(ExportSingleImageDetectionMasksToCoco(
  649. image_id, category_id_set, np.squeeze(masks, axis=3), scores, classes))
  650. if output_path:
  651. with tf.gfile.GFile(output_path, 'w') as fid:
  652. json_utils.Dump(segment_export_list, fid, float_digits=4, indent=2)
  653. return segment_export_list
  654. def ExportKeypointsToCOCO(image_ids,
  655. detection_keypoints,
  656. detection_scores,
  657. detection_classes,
  658. categories,
  659. output_path=None):
  660. """Exports keypoints in numpy arrays to COCO API.
  661. This function converts a set of predicted keypoints represented
  662. as numpy arrays to dictionaries that can be ingested by the COCO API.
  663. Inputs to this function are lists, consisting of keypoints, scores and
  664. classes, respectively, corresponding to each image for which detections
  665. have been produced.
  666. We assume that for each image, keypoints, scores and classes are in
  667. correspondence --- that is: detection_keypoints[i, :, :, :],
  668. detection_scores[i] and detection_classes[i] are associated with the same
  669. detection.
  670. Args:
  671. image_ids: list of image ids (typically ints or strings)
  672. detection_keypoints: list of numpy arrays with shape
  673. [num_detection, num_keypoints, 2] and type float32 in absolute
  674. x-y coordinates.
  675. detection_scores: list of numpy arrays (float) with shape
  676. [num_detection]. Note that num_detection can be different
  677. for each entry in the list.
  678. detection_classes: list of numpy arrays (int) with shape
  679. [num_detection]. Note that num_detection can be different
  680. for each entry in the list.
  681. categories: a list of dictionaries representing all possible categories.
  682. Each dict in this list must have an integer 'id' key uniquely identifying
  683. this category and an integer 'num_keypoints' key specifying the number of
  684. keypoints the category has.
  685. output_path: (optional) path for exporting result to JSON
  686. Returns:
  687. list of dictionaries that can be read by COCO API, where each entry
  688. corresponds to a single detection and has keys from:
  689. ['image_id', 'category_id', 'keypoints', 'score'].
  690. Raises:
  691. ValueError: if detection_keypoints and detection_classes do not have the
  692. right lengths or if each of the elements inside these lists do not
  693. have the correct shapes.
  694. """
  695. if not (len(image_ids) == len(detection_keypoints) ==
  696. len(detection_scores) == len(detection_classes)):
  697. raise ValueError('Input lists must have the same length')
  698. keypoints_export_list = []
  699. for image_id, keypoints, scores, classes in zip(
  700. image_ids, detection_keypoints, detection_scores, detection_classes):
  701. if len(classes.shape) != 1 or len(scores.shape) != 1:
  702. raise ValueError('All entries in detection_classes and detection_scores'
  703. 'expected to be of rank 1.')
  704. if len(keypoints.shape) != 3:
  705. raise ValueError('All entries in keypoints expected to be of '
  706. 'rank 3. Given {}'.format(keypoints.shape))
  707. num_boxes = classes.shape[0]
  708. if not num_boxes == keypoints.shape[0] == scores.shape[0]:
  709. raise ValueError('Corresponding entries in detection_classes, '
  710. 'detection_keypoints, and detection_scores should have '
  711. 'compatible shapes (i.e., agree on the 0th dimension).')
  712. category_id_set = set([cat['id'] for cat in categories])
  713. category_id_to_num_keypoints_map = {
  714. cat['id']: cat['num_keypoints'] for cat in categories
  715. if 'num_keypoints' in cat}
  716. for i in range(num_boxes):
  717. if classes[i] not in category_id_set:
  718. raise ValueError('class id should be in category_id_set\n')
  719. if classes[i] in category_id_to_num_keypoints_map:
  720. num_keypoints = category_id_to_num_keypoints_map[classes[i]]
  721. # Adds extra ones to indicate the visibility for each keypoint as is
  722. # recommended by MSCOCO.
  723. instance_keypoints = np.concatenate(
  724. [keypoints[i, 0:num_keypoints, :],
  725. np.expand_dims(np.ones(num_keypoints), axis=1)],
  726. axis=1).astype(int)
  727. instance_keypoints = instance_keypoints.flatten().tolist()
  728. keypoints_export_list.append({
  729. 'image_id': image_id,
  730. 'category_id': int(classes[i]),
  731. 'keypoints': instance_keypoints,
  732. 'score': float(scores[i])
  733. })
  734. if output_path:
  735. with tf.gfile.GFile(output_path, 'w') as fid:
  736. json_utils.Dump(keypoints_export_list, fid, float_digits=4, indent=2)
  737. return keypoints_export_list