|
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ==============================================================================
|
|
"""Wrappers for third party pycocotools to be used within object_detection.
|
|
|
|
Note that nothing in this file is tensorflow related and thus cannot
|
|
be called directly as a slim metric, for example.
|
|
|
|
TODO(jonathanhuang): wrap as a slim metric in metrics.py
|
|
|
|
|
|
Usage example: given a set of images with ids in the list image_ids
|
|
and corresponding lists of numpy arrays encoding groundtruth (boxes and classes)
|
|
and detections (boxes, scores and classes), where elements of each list
|
|
correspond to detections/annotations of a single image,
|
|
then evaluation (in multi-class mode) can be invoked as follows:
|
|
|
|
groundtruth_dict = coco_tools.ExportGroundtruthToCOCO(
|
|
image_ids, groundtruth_boxes_list, groundtruth_classes_list,
|
|
max_num_classes, output_path=None)
|
|
detections_list = coco_tools.ExportDetectionsToCOCO(
|
|
image_ids, detection_boxes_list, detection_scores_list,
|
|
detection_classes_list, output_path=None)
|
|
groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
|
|
detections = groundtruth.LoadAnnotations(detections_list)
|
|
evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
|
|
agnostic_mode=False)
|
|
metrics = evaluator.ComputeMetrics()
|
|
|
|
"""
|
|
from collections import OrderedDict
|
|
import copy
|
|
import time
|
|
import numpy as np
|
|
|
|
from pycocotools import coco
|
|
from pycocotools import cocoeval
|
|
from pycocotools import mask
|
|
|
|
import tensorflow as tf
|
|
|
|
from object_detection.utils import json_utils
|
|
|
|
|
|
class COCOWrapper(coco.COCO):
|
|
"""Wrapper for the pycocotools COCO class."""
|
|
|
|
def __init__(self, dataset, detection_type='bbox'):
|
|
"""COCOWrapper constructor.
|
|
|
|
See http://mscoco.org/dataset/#format for a description of the format.
|
|
By default, the coco.COCO class constructor reads from a JSON file.
|
|
This function duplicates the same behavior but loads from a dictionary,
|
|
allowing us to perform evaluation without writing to external storage.
|
|
|
|
Args:
|
|
dataset: a dictionary holding bounding box annotations in the COCO format.
|
|
detection_type: type of detections being wrapped. Can be one of ['bbox',
|
|
'segmentation']
|
|
|
|
Raises:
|
|
ValueError: if detection_type is unsupported.
|
|
"""
|
|
supported_detection_types = ['bbox', 'segmentation']
|
|
if detection_type not in supported_detection_types:
|
|
raise ValueError('Unsupported detection type: {}. '
|
|
'Supported values are: {}'.format(
|
|
detection_type, supported_detection_types))
|
|
self._detection_type = detection_type
|
|
coco.COCO.__init__(self)
|
|
self.dataset = dataset
|
|
self.createIndex()
|
|
|
|
def LoadAnnotations(self, annotations):
|
|
"""Load annotations dictionary into COCO datastructure.
|
|
|
|
See http://mscoco.org/dataset/#format for a description of the annotations
|
|
format. As above, this function replicates the default behavior of the API
|
|
but does not require writing to external storage.
|
|
|
|
Args:
|
|
annotations: python list holding object detection results where each
|
|
detection is encoded as a dict with required keys ['image_id',
|
|
'category_id', 'score'] and one of ['bbox', 'segmentation'] based on
|
|
`detection_type`.
|
|
|
|
Returns:
|
|
a coco.COCO datastructure holding object detection annotations results
|
|
|
|
Raises:
|
|
ValueError: if annotations is not a list
|
|
ValueError: if annotations do not correspond to the images contained
|
|
in self.
|
|
"""
|
|
results = coco.COCO()
|
|
results.dataset['images'] = [img for img in self.dataset['images']]
|
|
|
|
tf.logging.info('Loading and preparing annotation results...')
|
|
tic = time.time()
|
|
|
|
if not isinstance(annotations, list):
|
|
raise ValueError('annotations is not a list of objects')
|
|
annotation_img_ids = [ann['image_id'] for ann in annotations]
|
|
if (set(annotation_img_ids) != (set(annotation_img_ids)
|
|
& set(self.getImgIds()))):
|
|
raise ValueError('Results do not correspond to current coco set')
|
|
results.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
|
|
if self._detection_type == 'bbox':
|
|
for idx, ann in enumerate(annotations):
|
|
bb = ann['bbox']
|
|
ann['area'] = bb[2] * bb[3]
|
|
ann['id'] = idx + 1
|
|
ann['iscrowd'] = 0
|
|
elif self._detection_type == 'segmentation':
|
|
for idx, ann in enumerate(annotations):
|
|
ann['area'] = mask.area(ann['segmentation'])
|
|
ann['bbox'] = mask.toBbox(ann['segmentation'])
|
|
ann['id'] = idx + 1
|
|
ann['iscrowd'] = 0
|
|
tf.logging.info('DONE (t=%0.2fs)', (time.time() - tic))
|
|
|
|
results.dataset['annotations'] = annotations
|
|
results.createIndex()
|
|
return results
|
|
|
|
|
|
class COCOEvalWrapper(cocoeval.COCOeval):
|
|
"""Wrapper for the pycocotools COCOeval class.
|
|
|
|
To evaluate, create two objects (groundtruth_dict and detections_list)
|
|
using the conventions listed at http://mscoco.org/dataset/#format.
|
|
Then call evaluation as follows:
|
|
|
|
groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
|
|
detections = groundtruth.LoadAnnotations(detections_list)
|
|
evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
|
|
agnostic_mode=False)
|
|
|
|
metrics = evaluator.ComputeMetrics()
|
|
"""
|
|
|
|
def __init__(self, groundtruth=None, detections=None, agnostic_mode=False,
|
|
iou_type='bbox'):
|
|
"""COCOEvalWrapper constructor.
|
|
|
|
Note that for the area-based metrics to be meaningful, detection and
|
|
groundtruth boxes must be in image coordinates measured in pixels.
|
|
|
|
Args:
|
|
groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding
|
|
groundtruth annotations
|
|
detections: a coco.COCO (or coco_tools.COCOWrapper) object holding
|
|
detections
|
|
agnostic_mode: boolean (default: False). If True, evaluation ignores
|
|
class labels, treating all detections as proposals.
|
|
iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`.
|
|
"""
|
|
cocoeval.COCOeval.__init__(self, groundtruth, detections,
|
|
iouType=iou_type)
|
|
if agnostic_mode:
|
|
self.params.useCats = 0
|
|
|
|
def GetCategory(self, category_id):
|
|
"""Fetches dictionary holding category information given category id.
|
|
|
|
Args:
|
|
category_id: integer id
|
|
Returns:
|
|
dictionary holding 'id', 'name'.
|
|
"""
|
|
return self.cocoGt.cats[category_id]
|
|
|
|
def GetAgnosticMode(self):
|
|
"""Returns true if COCO Eval is configured to evaluate in agnostic mode."""
|
|
return self.params.useCats == 0
|
|
|
|
def GetCategoryIdList(self):
|
|
"""Returns list of valid category ids."""
|
|
return self.params.catIds
|
|
|
|
def ComputeMetrics(self,
|
|
include_metrics_per_category=False,
|
|
all_metrics_per_category=False):
|
|
"""Computes detection metrics.
|
|
|
|
Args:
|
|
include_metrics_per_category: If True, will include metrics per category.
|
|
all_metrics_per_category: If true, include all the summery metrics for
|
|
each category in per_category_ap. Be careful with setting it to true if
|
|
you have more than handful of categories, because it will pollute
|
|
your mldash.
|
|
|
|
Returns:
|
|
1. summary_metrics: a dictionary holding:
|
|
'Precision/mAP': mean average precision over classes averaged over IOU
|
|
thresholds ranging from .5 to .95 with .05 increments
|
|
'Precision/mAP@.50IOU': mean average precision at 50% IOU
|
|
'Precision/mAP@.75IOU': mean average precision at 75% IOU
|
|
'Precision/mAP (small)': mean average precision for small objects
|
|
(area < 32^2 pixels)
|
|
'Precision/mAP (medium)': mean average precision for medium sized
|
|
objects (32^2 pixels < area < 96^2 pixels)
|
|
'Precision/mAP (large)': mean average precision for large objects
|
|
(96^2 pixels < area < 10000^2 pixels)
|
|
'Recall/AR@1': average recall with 1 detection
|
|
'Recall/AR@10': average recall with 10 detections
|
|
'Recall/AR@100': average recall with 100 detections
|
|
'Recall/AR@100 (small)': average recall for small objects with 100
|
|
detections
|
|
'Recall/AR@100 (medium)': average recall for medium objects with 100
|
|
detections
|
|
'Recall/AR@100 (large)': average recall for large objects with 100
|
|
detections
|
|
2. per_category_ap: a dictionary holding category specific results with
|
|
keys of the form: 'Precision mAP ByCategory/category'
|
|
(without the supercategory part if no supercategories exist).
|
|
For backward compatibility 'PerformanceByCategory' is included in the
|
|
output regardless of all_metrics_per_category.
|
|
If evaluating class-agnostic mode, per_category_ap is an empty
|
|
dictionary.
|
|
|
|
Raises:
|
|
ValueError: If category_stats does not exist.
|
|
"""
|
|
self.evaluate()
|
|
self.accumulate()
|
|
self.summarize()
|
|
|
|
summary_metrics = OrderedDict([
|
|
('Precision/mAP', self.stats[0]),
|
|
('Precision/mAP@.50IOU', self.stats[1]),
|
|
('Precision/mAP@.75IOU', self.stats[2]),
|
|
('Precision/mAP (small)', self.stats[3]),
|
|
('Precision/mAP (medium)', self.stats[4]),
|
|
('Precision/mAP (large)', self.stats[5]),
|
|
('Recall/AR@1', self.stats[6]),
|
|
('Recall/AR@10', self.stats[7]),
|
|
('Recall/AR@100', self.stats[8]),
|
|
('Recall/AR@100 (small)', self.stats[9]),
|
|
('Recall/AR@100 (medium)', self.stats[10]),
|
|
('Recall/AR@100 (large)', self.stats[11])
|
|
])
|
|
if not include_metrics_per_category:
|
|
return summary_metrics, {}
|
|
if not hasattr(self, 'category_stats'):
|
|
raise ValueError('Category stats do not exist')
|
|
per_category_ap = OrderedDict([])
|
|
if self.GetAgnosticMode():
|
|
return summary_metrics, per_category_ap
|
|
for category_index, category_id in enumerate(self.GetCategoryIdList()):
|
|
category = self.GetCategory(category_id)['name']
|
|
# Kept for backward compatilbility
|
|
per_category_ap['PerformanceByCategory/mAP/{}'.format(
|
|
category)] = self.category_stats[0][category_index]
|
|
if all_metrics_per_category:
|
|
per_category_ap['Precision mAP ByCategory/{}'.format(
|
|
category)] = self.category_stats[0][category_index]
|
|
per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format(
|
|
category)] = self.category_stats[1][category_index]
|
|
per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format(
|
|
category)] = self.category_stats[2][category_index]
|
|
per_category_ap['Precision mAP (small) ByCategory/{}'.format(
|
|
category)] = self.category_stats[3][category_index]
|
|
per_category_ap['Precision mAP (medium) ByCategory/{}'.format(
|
|
category)] = self.category_stats[4][category_index]
|
|
per_category_ap['Precision mAP (large) ByCategory/{}'.format(
|
|
category)] = self.category_stats[5][category_index]
|
|
per_category_ap['Recall AR@1 ByCategory/{}'.format(
|
|
category)] = self.category_stats[6][category_index]
|
|
per_category_ap['Recall AR@10 ByCategory/{}'.format(
|
|
category)] = self.category_stats[7][category_index]
|
|
per_category_ap['Recall AR@100 ByCategory/{}'.format(
|
|
category)] = self.category_stats[8][category_index]
|
|
per_category_ap['Recall AR@100 (small) ByCategory/{}'.format(
|
|
category)] = self.category_stats[9][category_index]
|
|
per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format(
|
|
category)] = self.category_stats[10][category_index]
|
|
per_category_ap['Recall AR@100 (large) ByCategory/{}'.format(
|
|
category)] = self.category_stats[11][category_index]
|
|
|
|
return summary_metrics, per_category_ap
|
|
|
|
|
|
def _ConvertBoxToCOCOFormat(box):
|
|
"""Converts a box in [ymin, xmin, ymax, xmax] format to COCO format.
|
|
|
|
This is a utility function for converting from our internal
|
|
[ymin, xmin, ymax, xmax] convention to the convention used by the COCO API
|
|
i.e., [xmin, ymin, width, height].
|
|
|
|
Args:
|
|
box: a [ymin, xmin, ymax, xmax] numpy array
|
|
|
|
Returns:
|
|
a list of floats representing [xmin, ymin, width, height]
|
|
"""
|
|
return [float(box[1]), float(box[0]), float(box[3] - box[1]),
|
|
float(box[2] - box[0])]
|
|
|
|
|
|
def _RleCompress(masks):
|
|
"""Compresses mask using Run-length encoding provided by pycocotools.
|
|
|
|
Args:
|
|
masks: uint8 numpy array of shape [mask_height, mask_width] with values in
|
|
{0, 1}.
|
|
|
|
Returns:
|
|
A pycocotools Run-length encoding of the mask.
|
|
"""
|
|
return mask.encode(np.asfortranarray(masks))
|
|
|
|
|
|
def ExportSingleImageGroundtruthToCoco(image_id,
|
|
next_annotation_id,
|
|
category_id_set,
|
|
groundtruth_boxes,
|
|
groundtruth_classes,
|
|
groundtruth_masks=None,
|
|
groundtruth_is_crowd=None):
|
|
"""Export groundtruth of a single image to COCO format.
|
|
|
|
This function converts groundtruth detection annotations represented as numpy
|
|
arrays to dictionaries that can be ingested by the COCO evaluation API. Note
|
|
that the image_ids provided here must match the ones given to
|
|
ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in
|
|
correspondence - that is: groundtruth_boxes[i, :], and
|
|
groundtruth_classes[i] are associated with the same groundtruth annotation.
|
|
|
|
In the exported result, "area" fields are always set to the area of the
|
|
groundtruth bounding box.
|
|
|
|
Args:
|
|
image_id: a unique image identifier either of type integer or string.
|
|
next_annotation_id: integer specifying the first id to use for the
|
|
groundtruth annotations. All annotations are assigned a continuous integer
|
|
id starting from this value.
|
|
category_id_set: A set of valid class ids. Groundtruth with classes not in
|
|
category_id_set are dropped.
|
|
groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4]
|
|
groundtruth_classes: numpy array (int) with shape [num_gt_boxes]
|
|
groundtruth_masks: optional uint8 numpy array of shape [num_detections,
|
|
image_height, image_width] containing detection_masks.
|
|
groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes]
|
|
indicating whether groundtruth boxes are crowd.
|
|
|
|
Returns:
|
|
a list of groundtruth annotations for a single image in the COCO format.
|
|
|
|
Raises:
|
|
ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the
|
|
right lengths or (2) if each of the elements inside these lists do not
|
|
have the correct shapes or (3) if image_ids are not integers
|
|
"""
|
|
|
|
if len(groundtruth_classes.shape) != 1:
|
|
raise ValueError('groundtruth_classes is '
|
|
'expected to be of rank 1.')
|
|
if len(groundtruth_boxes.shape) != 2:
|
|
raise ValueError('groundtruth_boxes is expected to be of '
|
|
'rank 2.')
|
|
if groundtruth_boxes.shape[1] != 4:
|
|
raise ValueError('groundtruth_boxes should have '
|
|
'shape[1] == 4.')
|
|
num_boxes = groundtruth_classes.shape[0]
|
|
if num_boxes != groundtruth_boxes.shape[0]:
|
|
raise ValueError('Corresponding entries in groundtruth_classes, '
|
|
'and groundtruth_boxes should have '
|
|
'compatible shapes (i.e., agree on the 0th dimension).'
|
|
'Classes shape: %d. Boxes shape: %d. Image ID: %s' % (
|
|
groundtruth_classes.shape[0],
|
|
groundtruth_boxes.shape[0], image_id))
|
|
has_is_crowd = groundtruth_is_crowd is not None
|
|
if has_is_crowd and len(groundtruth_is_crowd.shape) != 1:
|
|
raise ValueError('groundtruth_is_crowd is expected to be of rank 1.')
|
|
groundtruth_list = []
|
|
for i in range(num_boxes):
|
|
if groundtruth_classes[i] in category_id_set:
|
|
iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0
|
|
export_dict = {
|
|
'id':
|
|
next_annotation_id + i,
|
|
'image_id':
|
|
image_id,
|
|
'category_id':
|
|
int(groundtruth_classes[i]),
|
|
'bbox':
|
|
list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])),
|
|
'area':
|
|
float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) *
|
|
(groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])),
|
|
'iscrowd':
|
|
iscrowd
|
|
}
|
|
if groundtruth_masks is not None:
|
|
export_dict['segmentation'] = _RleCompress(groundtruth_masks[i])
|
|
groundtruth_list.append(export_dict)
|
|
return groundtruth_list
|
|
|
|
|
|
def ExportGroundtruthToCOCO(image_ids,
|
|
groundtruth_boxes,
|
|
groundtruth_classes,
|
|
categories,
|
|
output_path=None):
|
|
"""Export groundtruth detection annotations in numpy arrays to COCO API.
|
|
|
|
This function converts a set of groundtruth detection annotations represented
|
|
as numpy arrays to dictionaries that can be ingested by the COCO API.
|
|
Inputs to this function are three lists: image ids for each groundtruth image,
|
|
groundtruth boxes for each image and groundtruth classes respectively.
|
|
Note that the image_ids provided here must match the ones given to the
|
|
ExportDetectionsToCOCO function in order for evaluation to work properly.
|
|
We assume that for each image, boxes, scores and classes are in
|
|
correspondence --- that is: image_id[i], groundtruth_boxes[i, :] and
|
|
groundtruth_classes[i] are associated with the same groundtruth annotation.
|
|
|
|
In the exported result, "area" fields are always set to the area of the
|
|
groundtruth bounding box and "iscrowd" fields are always set to 0.
|
|
TODO(jonathanhuang): pass in "iscrowd" array for evaluating on COCO dataset.
|
|
|
|
Args:
|
|
image_ids: a list of unique image identifier either of type integer or
|
|
string.
|
|
groundtruth_boxes: list of numpy arrays with shape [num_gt_boxes, 4]
|
|
(note that num_gt_boxes can be different for each entry in the list)
|
|
groundtruth_classes: list of numpy arrays (int) with shape [num_gt_boxes]
|
|
(note that num_gt_boxes can be different for each entry in the list)
|
|
categories: a list of dictionaries representing all possible categories.
|
|
Each dict in this list has the following keys:
|
|
'id': (required) an integer id uniquely identifying this category
|
|
'name': (required) string representing category name
|
|
e.g., 'cat', 'dog', 'pizza'
|
|
'supercategory': (optional) string representing the supercategory
|
|
e.g., 'animal', 'vehicle', 'food', etc
|
|
output_path: (optional) path for exporting result to JSON
|
|
Returns:
|
|
dictionary that can be read by COCO API
|
|
Raises:
|
|
ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the
|
|
right lengths or (2) if each of the elements inside these lists do not
|
|
have the correct shapes or (3) if image_ids are not integers
|
|
"""
|
|
category_id_set = set([cat['id'] for cat in categories])
|
|
groundtruth_export_list = []
|
|
image_export_list = []
|
|
if not len(image_ids) == len(groundtruth_boxes) == len(groundtruth_classes):
|
|
raise ValueError('Input lists must have the same length')
|
|
|
|
# For reasons internal to the COCO API, it is important that annotation ids
|
|
# are not equal to zero; we thus start counting from 1.
|
|
annotation_id = 1
|
|
for image_id, boxes, classes in zip(image_ids, groundtruth_boxes,
|
|
groundtruth_classes):
|
|
image_export_list.append({'id': image_id})
|
|
groundtruth_export_list.extend(ExportSingleImageGroundtruthToCoco(
|
|
image_id,
|
|
annotation_id,
|
|
category_id_set,
|
|
boxes,
|
|
classes))
|
|
num_boxes = classes.shape[0]
|
|
annotation_id += num_boxes
|
|
|
|
groundtruth_dict = {
|
|
'annotations': groundtruth_export_list,
|
|
'images': image_export_list,
|
|
'categories': categories
|
|
}
|
|
if output_path:
|
|
with tf.gfile.GFile(output_path, 'w') as fid:
|
|
json_utils.Dump(groundtruth_dict, fid, float_digits=4, indent=2)
|
|
return groundtruth_dict
|
|
|
|
|
|
def ExportSingleImageDetectionBoxesToCoco(image_id,
|
|
category_id_set,
|
|
detection_boxes,
|
|
detection_scores,
|
|
detection_classes):
|
|
"""Export detections of a single image to COCO format.
|
|
|
|
This function converts detections represented as numpy arrays to dictionaries
|
|
that can be ingested by the COCO evaluation API. Note that the image_ids
|
|
provided here must match the ones given to the
|
|
ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in
|
|
correspondence - that is: boxes[i, :], and classes[i]
|
|
are associated with the same groundtruth annotation.
|
|
|
|
Args:
|
|
image_id: unique image identifier either of type integer or string.
|
|
category_id_set: A set of valid class ids. Detections with classes not in
|
|
category_id_set are dropped.
|
|
detection_boxes: float numpy array of shape [num_detections, 4] containing
|
|
detection boxes.
|
|
detection_scores: float numpy array of shape [num_detections] containing
|
|
scored for the detection boxes.
|
|
detection_classes: integer numpy array of shape [num_detections] containing
|
|
the classes for detection boxes.
|
|
|
|
Returns:
|
|
a list of detection annotations for a single image in the COCO format.
|
|
|
|
Raises:
|
|
ValueError: if (1) detection_boxes, detection_scores and detection_classes
|
|
do not have the right lengths or (2) if each of the elements inside these
|
|
lists do not have the correct shapes or (3) if image_ids are not integers.
|
|
"""
|
|
|
|
if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
|
|
raise ValueError('All entries in detection_classes and detection_scores'
|
|
'expected to be of rank 1.')
|
|
if len(detection_boxes.shape) != 2:
|
|
raise ValueError('All entries in detection_boxes expected to be of '
|
|
'rank 2.')
|
|
if detection_boxes.shape[1] != 4:
|
|
raise ValueError('All entries in detection_boxes should have '
|
|
'shape[1] == 4.')
|
|
num_boxes = detection_classes.shape[0]
|
|
if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]:
|
|
raise ValueError('Corresponding entries in detection_classes, '
|
|
'detection_scores and detection_boxes should have '
|
|
'compatible shapes (i.e., agree on the 0th dimension). '
|
|
'Classes shape: %d. Boxes shape: %d. '
|
|
'Scores shape: %d' % (
|
|
detection_classes.shape[0], detection_boxes.shape[0],
|
|
detection_scores.shape[0]
|
|
))
|
|
detections_list = []
|
|
for i in range(num_boxes):
|
|
if detection_classes[i] in category_id_set:
|
|
detections_list.append({
|
|
'image_id': image_id,
|
|
'category_id': int(detection_classes[i]),
|
|
'bbox': list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])),
|
|
'score': float(detection_scores[i])
|
|
})
|
|
return detections_list
|
|
|
|
|
|
def ExportSingleImageDetectionMasksToCoco(image_id,
|
|
category_id_set,
|
|
detection_masks,
|
|
detection_scores,
|
|
detection_classes):
|
|
"""Export detection masks of a single image to COCO format.
|
|
|
|
This function converts detections represented as numpy arrays to dictionaries
|
|
that can be ingested by the COCO evaluation API. We assume that
|
|
detection_masks, detection_scores, and detection_classes are in correspondence
|
|
- that is: detection_masks[i, :], detection_classes[i] and detection_scores[i]
|
|
are associated with the same annotation.
|
|
|
|
Args:
|
|
image_id: unique image identifier either of type integer or string.
|
|
category_id_set: A set of valid class ids. Detections with classes not in
|
|
category_id_set are dropped.
|
|
detection_masks: uint8 numpy array of shape [num_detections, image_height,
|
|
image_width] containing detection_masks.
|
|
detection_scores: float numpy array of shape [num_detections] containing
|
|
scores for detection masks.
|
|
detection_classes: integer numpy array of shape [num_detections] containing
|
|
the classes for detection masks.
|
|
|
|
Returns:
|
|
a list of detection mask annotations for a single image in the COCO format.
|
|
|
|
Raises:
|
|
ValueError: if (1) detection_masks, detection_scores and detection_classes
|
|
do not have the right lengths or (2) if each of the elements inside these
|
|
lists do not have the correct shapes or (3) if image_ids are not integers.
|
|
"""
|
|
|
|
if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
|
|
raise ValueError('All entries in detection_classes and detection_scores'
|
|
'expected to be of rank 1.')
|
|
num_boxes = detection_classes.shape[0]
|
|
if not num_boxes == len(detection_masks) == detection_scores.shape[0]:
|
|
raise ValueError('Corresponding entries in detection_classes, '
|
|
'detection_scores and detection_masks should have '
|
|
'compatible lengths and shapes '
|
|
'Classes length: %d. Masks length: %d. '
|
|
'Scores length: %d' % (
|
|
detection_classes.shape[0], len(detection_masks),
|
|
detection_scores.shape[0]
|
|
))
|
|
detections_list = []
|
|
for i in range(num_boxes):
|
|
if detection_classes[i] in category_id_set:
|
|
detections_list.append({
|
|
'image_id': image_id,
|
|
'category_id': int(detection_classes[i]),
|
|
'segmentation': _RleCompress(detection_masks[i]),
|
|
'score': float(detection_scores[i])
|
|
})
|
|
return detections_list
|
|
|
|
|
|
def ExportDetectionsToCOCO(image_ids,
|
|
detection_boxes,
|
|
detection_scores,
|
|
detection_classes,
|
|
categories,
|
|
output_path=None):
|
|
"""Export detection annotations in numpy arrays to COCO API.
|
|
|
|
This function converts a set of predicted detections represented
|
|
as numpy arrays to dictionaries that can be ingested by the COCO API.
|
|
Inputs to this function are lists, consisting of boxes, scores and
|
|
classes, respectively, corresponding to each image for which detections
|
|
have been produced. Note that the image_ids provided here must
|
|
match the ones given to the ExportGroundtruthToCOCO function in order
|
|
for evaluation to work properly.
|
|
|
|
We assume that for each image, boxes, scores and classes are in
|
|
correspondence --- that is: detection_boxes[i, :], detection_scores[i] and
|
|
detection_classes[i] are associated with the same detection.
|
|
|
|
Args:
|
|
image_ids: a list of unique image identifier either of type integer or
|
|
string.
|
|
detection_boxes: list of numpy arrays with shape [num_detection_boxes, 4]
|
|
detection_scores: list of numpy arrays (float) with shape
|
|
[num_detection_boxes]. Note that num_detection_boxes can be different
|
|
for each entry in the list.
|
|
detection_classes: list of numpy arrays (int) with shape
|
|
[num_detection_boxes]. Note that num_detection_boxes can be different
|
|
for each entry in the list.
|
|
categories: a list of dictionaries representing all possible categories.
|
|
Each dict in this list must have an integer 'id' key uniquely identifying
|
|
this category.
|
|
output_path: (optional) path for exporting result to JSON
|
|
|
|
Returns:
|
|
list of dictionaries that can be read by COCO API, where each entry
|
|
corresponds to a single detection and has keys from:
|
|
['image_id', 'category_id', 'bbox', 'score'].
|
|
Raises:
|
|
ValueError: if (1) detection_boxes and detection_classes do not have the
|
|
right lengths or (2) if each of the elements inside these lists do not
|
|
have the correct shapes or (3) if image_ids are not integers.
|
|
"""
|
|
category_id_set = set([cat['id'] for cat in categories])
|
|
detections_export_list = []
|
|
if not (len(image_ids) == len(detection_boxes) == len(detection_scores) ==
|
|
len(detection_classes)):
|
|
raise ValueError('Input lists must have the same length')
|
|
for image_id, boxes, scores, classes in zip(image_ids, detection_boxes,
|
|
detection_scores,
|
|
detection_classes):
|
|
detections_export_list.extend(ExportSingleImageDetectionBoxesToCoco(
|
|
image_id,
|
|
category_id_set,
|
|
boxes,
|
|
scores,
|
|
classes))
|
|
if output_path:
|
|
with tf.gfile.GFile(output_path, 'w') as fid:
|
|
json_utils.Dump(detections_export_list, fid, float_digits=4, indent=2)
|
|
return detections_export_list
|
|
|
|
|
|
def ExportSegmentsToCOCO(image_ids,
|
|
detection_masks,
|
|
detection_scores,
|
|
detection_classes,
|
|
categories,
|
|
output_path=None):
|
|
"""Export segmentation masks in numpy arrays to COCO API.
|
|
|
|
This function converts a set of predicted instance masks represented
|
|
as numpy arrays to dictionaries that can be ingested by the COCO API.
|
|
Inputs to this function are lists, consisting of segments, scores and
|
|
classes, respectively, corresponding to each image for which detections
|
|
have been produced.
|
|
|
|
Note this function is recommended to use for small dataset.
|
|
For large dataset, it should be used with a merge function
|
|
(e.g. in map reduce), otherwise the memory consumption is large.
|
|
|
|
We assume that for each image, masks, scores and classes are in
|
|
correspondence --- that is: detection_masks[i, :, :, :], detection_scores[i]
|
|
and detection_classes[i] are associated with the same detection.
|
|
|
|
Args:
|
|
image_ids: list of image ids (typically ints or strings)
|
|
detection_masks: list of numpy arrays with shape [num_detection, h, w, 1]
|
|
and type uint8. The height and width should match the shape of
|
|
corresponding image.
|
|
detection_scores: list of numpy arrays (float) with shape
|
|
[num_detection]. Note that num_detection can be different
|
|
for each entry in the list.
|
|
detection_classes: list of numpy arrays (int) with shape
|
|
[num_detection]. Note that num_detection can be different
|
|
for each entry in the list.
|
|
categories: a list of dictionaries representing all possible categories.
|
|
Each dict in this list must have an integer 'id' key uniquely identifying
|
|
this category.
|
|
output_path: (optional) path for exporting result to JSON
|
|
|
|
Returns:
|
|
list of dictionaries that can be read by COCO API, where each entry
|
|
corresponds to a single detection and has keys from:
|
|
['image_id', 'category_id', 'segmentation', 'score'].
|
|
|
|
Raises:
|
|
ValueError: if detection_masks and detection_classes do not have the
|
|
right lengths or if each of the elements inside these lists do not
|
|
have the correct shapes.
|
|
"""
|
|
if not (len(image_ids) == len(detection_masks) == len(detection_scores) ==
|
|
len(detection_classes)):
|
|
raise ValueError('Input lists must have the same length')
|
|
|
|
segment_export_list = []
|
|
for image_id, masks, scores, classes in zip(image_ids, detection_masks,
|
|
detection_scores,
|
|
detection_classes):
|
|
|
|
if len(classes.shape) != 1 or len(scores.shape) != 1:
|
|
raise ValueError('All entries in detection_classes and detection_scores'
|
|
'expected to be of rank 1.')
|
|
if len(masks.shape) != 4:
|
|
raise ValueError('All entries in masks expected to be of '
|
|
'rank 4. Given {}'.format(masks.shape))
|
|
|
|
num_boxes = classes.shape[0]
|
|
if not num_boxes == masks.shape[0] == scores.shape[0]:
|
|
raise ValueError('Corresponding entries in segment_classes, '
|
|
'detection_scores and detection_boxes should have '
|
|
'compatible shapes (i.e., agree on the 0th dimension).')
|
|
|
|
category_id_set = set([cat['id'] for cat in categories])
|
|
segment_export_list.extend(ExportSingleImageDetectionMasksToCoco(
|
|
image_id, category_id_set, np.squeeze(masks, axis=3), scores, classes))
|
|
|
|
if output_path:
|
|
with tf.gfile.GFile(output_path, 'w') as fid:
|
|
json_utils.Dump(segment_export_list, fid, float_digits=4, indent=2)
|
|
return segment_export_list
|
|
|
|
|
|
def ExportKeypointsToCOCO(image_ids,
|
|
detection_keypoints,
|
|
detection_scores,
|
|
detection_classes,
|
|
categories,
|
|
output_path=None):
|
|
"""Exports keypoints in numpy arrays to COCO API.
|
|
|
|
This function converts a set of predicted keypoints represented
|
|
as numpy arrays to dictionaries that can be ingested by the COCO API.
|
|
Inputs to this function are lists, consisting of keypoints, scores and
|
|
classes, respectively, corresponding to each image for which detections
|
|
have been produced.
|
|
|
|
We assume that for each image, keypoints, scores and classes are in
|
|
correspondence --- that is: detection_keypoints[i, :, :, :],
|
|
detection_scores[i] and detection_classes[i] are associated with the same
|
|
detection.
|
|
|
|
Args:
|
|
image_ids: list of image ids (typically ints or strings)
|
|
detection_keypoints: list of numpy arrays with shape
|
|
[num_detection, num_keypoints, 2] and type float32 in absolute
|
|
x-y coordinates.
|
|
detection_scores: list of numpy arrays (float) with shape
|
|
[num_detection]. Note that num_detection can be different
|
|
for each entry in the list.
|
|
detection_classes: list of numpy arrays (int) with shape
|
|
[num_detection]. Note that num_detection can be different
|
|
for each entry in the list.
|
|
categories: a list of dictionaries representing all possible categories.
|
|
Each dict in this list must have an integer 'id' key uniquely identifying
|
|
this category and an integer 'num_keypoints' key specifying the number of
|
|
keypoints the category has.
|
|
output_path: (optional) path for exporting result to JSON
|
|
|
|
Returns:
|
|
list of dictionaries that can be read by COCO API, where each entry
|
|
corresponds to a single detection and has keys from:
|
|
['image_id', 'category_id', 'keypoints', 'score'].
|
|
|
|
Raises:
|
|
ValueError: if detection_keypoints and detection_classes do not have the
|
|
right lengths or if each of the elements inside these lists do not
|
|
have the correct shapes.
|
|
"""
|
|
if not (len(image_ids) == len(detection_keypoints) ==
|
|
len(detection_scores) == len(detection_classes)):
|
|
raise ValueError('Input lists must have the same length')
|
|
|
|
keypoints_export_list = []
|
|
for image_id, keypoints, scores, classes in zip(
|
|
image_ids, detection_keypoints, detection_scores, detection_classes):
|
|
|
|
if len(classes.shape) != 1 or len(scores.shape) != 1:
|
|
raise ValueError('All entries in detection_classes and detection_scores'
|
|
'expected to be of rank 1.')
|
|
if len(keypoints.shape) != 3:
|
|
raise ValueError('All entries in keypoints expected to be of '
|
|
'rank 3. Given {}'.format(keypoints.shape))
|
|
|
|
num_boxes = classes.shape[0]
|
|
if not num_boxes == keypoints.shape[0] == scores.shape[0]:
|
|
raise ValueError('Corresponding entries in detection_classes, '
|
|
'detection_keypoints, and detection_scores should have '
|
|
'compatible shapes (i.e., agree on the 0th dimension).')
|
|
|
|
category_id_set = set([cat['id'] for cat in categories])
|
|
category_id_to_num_keypoints_map = {
|
|
cat['id']: cat['num_keypoints'] for cat in categories
|
|
if 'num_keypoints' in cat}
|
|
|
|
for i in range(num_boxes):
|
|
if classes[i] not in category_id_set:
|
|
raise ValueError('class id should be in category_id_set\n')
|
|
|
|
if classes[i] in category_id_to_num_keypoints_map:
|
|
num_keypoints = category_id_to_num_keypoints_map[classes[i]]
|
|
# Adds extra ones to indicate the visibility for each keypoint as is
|
|
# recommended by MSCOCO.
|
|
instance_keypoints = np.concatenate(
|
|
[keypoints[i, 0:num_keypoints, :],
|
|
np.expand_dims(np.ones(num_keypoints), axis=1)],
|
|
axis=1).astype(int)
|
|
|
|
instance_keypoints = instance_keypoints.flatten().tolist()
|
|
keypoints_export_list.append({
|
|
'image_id': image_id,
|
|
'category_id': int(classes[i]),
|
|
'keypoints': instance_keypoints,
|
|
'score': float(scores[i])
|
|
})
|
|
|
|
if output_path:
|
|
with tf.gfile.GFile(output_path, 'w') as fid:
|
|
json_utils.Dump(keypoints_export_list, fid, float_digits=4, indent=2)
|
|
return keypoints_export_list
|