You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1479 lines
62 KiB

6 years ago
  1. # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. """Tests for object_detection.utils.ops."""
  16. import numpy as np
  17. import tensorflow as tf
  18. from object_detection.core import standard_fields as fields
  19. from object_detection.utils import ops
  20. from object_detection.utils import test_case
  21. slim = tf.contrib.slim
  22. class NormalizedToImageCoordinatesTest(tf.test.TestCase):
  23. def test_normalized_to_image_coordinates(self):
  24. normalized_boxes = tf.placeholder(tf.float32, shape=(None, 1, 4))
  25. normalized_boxes_np = np.array([[[0.0, 0.0, 1.0, 1.0]],
  26. [[0.5, 0.5, 1.0, 1.0]]])
  27. image_shape = tf.convert_to_tensor([1, 4, 4, 3], dtype=tf.int32)
  28. absolute_boxes = ops.normalized_to_image_coordinates(normalized_boxes,
  29. image_shape,
  30. parallel_iterations=2)
  31. expected_boxes = np.array([[[0, 0, 4, 4]],
  32. [[2, 2, 4, 4]]])
  33. with self.test_session() as sess:
  34. absolute_boxes = sess.run(absolute_boxes,
  35. feed_dict={normalized_boxes:
  36. normalized_boxes_np})
  37. self.assertAllEqual(absolute_boxes, expected_boxes)
  38. class ReduceSumTrailingDimensions(tf.test.TestCase):
  39. def test_reduce_sum_trailing_dimensions(self):
  40. input_tensor = tf.placeholder(tf.float32, shape=[None, None, None])
  41. reduced_tensor = ops.reduce_sum_trailing_dimensions(input_tensor, ndims=2)
  42. with self.test_session() as sess:
  43. reduced_np = sess.run(reduced_tensor,
  44. feed_dict={input_tensor: np.ones((2, 2, 2),
  45. np.float32)})
  46. self.assertAllClose(reduced_np, 2 * np.ones((2, 2), np.float32))
  47. class MeshgridTest(tf.test.TestCase):
  48. def test_meshgrid_numpy_comparison(self):
  49. """Tests meshgrid op with vectors, for which it should match numpy."""
  50. x = np.arange(4)
  51. y = np.arange(6)
  52. exp_xgrid, exp_ygrid = np.meshgrid(x, y)
  53. xgrid, ygrid = ops.meshgrid(x, y)
  54. with self.test_session() as sess:
  55. xgrid_output, ygrid_output = sess.run([xgrid, ygrid])
  56. self.assertAllEqual(xgrid_output, exp_xgrid)
  57. self.assertAllEqual(ygrid_output, exp_ygrid)
  58. def test_meshgrid_multidimensional(self):
  59. np.random.seed(18)
  60. x = np.random.rand(4, 1, 2).astype(np.float32)
  61. y = np.random.rand(2, 3).astype(np.float32)
  62. xgrid, ygrid = ops.meshgrid(x, y)
  63. grid_shape = list(y.shape) + list(x.shape)
  64. self.assertEqual(xgrid.get_shape().as_list(), grid_shape)
  65. self.assertEqual(ygrid.get_shape().as_list(), grid_shape)
  66. with self.test_session() as sess:
  67. xgrid_output, ygrid_output = sess.run([xgrid, ygrid])
  68. # Check the shape of the output grids
  69. self.assertEqual(xgrid_output.shape, tuple(grid_shape))
  70. self.assertEqual(ygrid_output.shape, tuple(grid_shape))
  71. # Check a few elements
  72. test_elements = [((3, 0, 0), (1, 2)),
  73. ((2, 0, 1), (0, 0)),
  74. ((0, 0, 0), (1, 1))]
  75. for xind, yind in test_elements:
  76. # These are float equality tests, but the meshgrid op should not introduce
  77. # rounding.
  78. self.assertEqual(xgrid_output[yind + xind], x[xind])
  79. self.assertEqual(ygrid_output[yind + xind], y[yind])
  80. class OpsTestFixedPadding(tf.test.TestCase):
  81. def test_3x3_kernel(self):
  82. tensor = tf.constant([[[[0.], [0.]], [[0.], [0.]]]])
  83. padded_tensor = ops.fixed_padding(tensor, 3)
  84. with self.test_session() as sess:
  85. padded_tensor_out = sess.run(padded_tensor)
  86. self.assertEqual((1, 4, 4, 1), padded_tensor_out.shape)
  87. def test_5x5_kernel(self):
  88. tensor = tf.constant([[[[0.], [0.]], [[0.], [0.]]]])
  89. padded_tensor = ops.fixed_padding(tensor, 5)
  90. with self.test_session() as sess:
  91. padded_tensor_out = sess.run(padded_tensor)
  92. self.assertEqual((1, 6, 6, 1), padded_tensor_out.shape)
  93. def test_3x3_atrous_kernel(self):
  94. tensor = tf.constant([[[[0.], [0.]], [[0.], [0.]]]])
  95. padded_tensor = ops.fixed_padding(tensor, 3, 2)
  96. with self.test_session() as sess:
  97. padded_tensor_out = sess.run(padded_tensor)
  98. self.assertEqual((1, 6, 6, 1), padded_tensor_out.shape)
  99. class OpsTestPadToMultiple(tf.test.TestCase):
  100. def test_zero_padding(self):
  101. tensor = tf.constant([[[[0.], [0.]], [[0.], [0.]]]])
  102. padded_tensor = ops.pad_to_multiple(tensor, 1)
  103. with self.test_session() as sess:
  104. padded_tensor_out = sess.run(padded_tensor)
  105. self.assertEqual((1, 2, 2, 1), padded_tensor_out.shape)
  106. def test_no_padding(self):
  107. tensor = tf.constant([[[[0.], [0.]], [[0.], [0.]]]])
  108. padded_tensor = ops.pad_to_multiple(tensor, 2)
  109. with self.test_session() as sess:
  110. padded_tensor_out = sess.run(padded_tensor)
  111. self.assertEqual((1, 2, 2, 1), padded_tensor_out.shape)
  112. def test_non_square_padding(self):
  113. tensor = tf.constant([[[[0.], [0.]]]])
  114. padded_tensor = ops.pad_to_multiple(tensor, 2)
  115. with self.test_session() as sess:
  116. padded_tensor_out = sess.run(padded_tensor)
  117. self.assertEqual((1, 2, 2, 1), padded_tensor_out.shape)
  118. def test_padding(self):
  119. tensor = tf.constant([[[[0.], [0.]], [[0.], [0.]]]])
  120. padded_tensor = ops.pad_to_multiple(tensor, 4)
  121. with self.test_session() as sess:
  122. padded_tensor_out = sess.run(padded_tensor)
  123. self.assertEqual((1, 4, 4, 1), padded_tensor_out.shape)
  124. class OpsTestPaddedOneHotEncoding(tf.test.TestCase):
  125. def test_correct_one_hot_tensor_with_no_pad(self):
  126. indices = tf.constant([1, 2, 3, 5])
  127. one_hot_tensor = ops.padded_one_hot_encoding(indices, depth=6, left_pad=0)
  128. expected_tensor = np.array([[0, 1, 0, 0, 0, 0],
  129. [0, 0, 1, 0, 0, 0],
  130. [0, 0, 0, 1, 0, 0],
  131. [0, 0, 0, 0, 0, 1]], np.float32)
  132. with self.test_session() as sess:
  133. out_one_hot_tensor = sess.run(one_hot_tensor)
  134. self.assertAllClose(out_one_hot_tensor, expected_tensor, rtol=1e-10,
  135. atol=1e-10)
  136. def test_correct_one_hot_tensor_with_pad_one(self):
  137. indices = tf.constant([1, 2, 3, 5])
  138. one_hot_tensor = ops.padded_one_hot_encoding(indices, depth=6, left_pad=1)
  139. expected_tensor = np.array([[0, 0, 1, 0, 0, 0, 0],
  140. [0, 0, 0, 1, 0, 0, 0],
  141. [0, 0, 0, 0, 1, 0, 0],
  142. [0, 0, 0, 0, 0, 0, 1]], np.float32)
  143. with self.test_session() as sess:
  144. out_one_hot_tensor = sess.run(one_hot_tensor)
  145. self.assertAllClose(out_one_hot_tensor, expected_tensor, rtol=1e-10,
  146. atol=1e-10)
  147. def test_correct_one_hot_tensor_with_pad_three(self):
  148. indices = tf.constant([1, 2, 3, 5])
  149. one_hot_tensor = ops.padded_one_hot_encoding(indices, depth=6, left_pad=3)
  150. expected_tensor = np.array([[0, 0, 0, 0, 1, 0, 0, 0, 0],
  151. [0, 0, 0, 0, 0, 1, 0, 0, 0],
  152. [0, 0, 0, 0, 0, 0, 1, 0, 0],
  153. [0, 0, 0, 0, 0, 0, 0, 0, 1]], np.float32)
  154. with self.test_session() as sess:
  155. out_one_hot_tensor = sess.run(one_hot_tensor)
  156. self.assertAllClose(out_one_hot_tensor, expected_tensor, rtol=1e-10,
  157. atol=1e-10)
  158. def test_correct_padded_one_hot_tensor_with_empty_indices(self):
  159. depth = 6
  160. pad = 2
  161. indices = tf.constant([])
  162. one_hot_tensor = ops.padded_one_hot_encoding(
  163. indices, depth=depth, left_pad=pad)
  164. expected_tensor = np.zeros((0, depth + pad))
  165. with self.test_session() as sess:
  166. out_one_hot_tensor = sess.run(one_hot_tensor)
  167. self.assertAllClose(out_one_hot_tensor, expected_tensor, rtol=1e-10,
  168. atol=1e-10)
  169. def test_return_none_on_zero_depth(self):
  170. indices = tf.constant([1, 2, 3, 4, 5])
  171. one_hot_tensor = ops.padded_one_hot_encoding(indices, depth=0, left_pad=2)
  172. self.assertEqual(one_hot_tensor, None)
  173. def test_raise_value_error_on_rank_two_input(self):
  174. indices = tf.constant(1.0, shape=(2, 3))
  175. with self.assertRaises(ValueError):
  176. ops.padded_one_hot_encoding(indices, depth=6, left_pad=2)
  177. def test_raise_value_error_on_negative_pad(self):
  178. indices = tf.constant(1.0, shape=(2, 3))
  179. with self.assertRaises(ValueError):
  180. ops.padded_one_hot_encoding(indices, depth=6, left_pad=-1)
  181. def test_raise_value_error_on_float_pad(self):
  182. indices = tf.constant(1.0, shape=(2, 3))
  183. with self.assertRaises(ValueError):
  184. ops.padded_one_hot_encoding(indices, depth=6, left_pad=0.1)
  185. def test_raise_value_error_on_float_depth(self):
  186. indices = tf.constant(1.0, shape=(2, 3))
  187. with self.assertRaises(ValueError):
  188. ops.padded_one_hot_encoding(indices, depth=0.1, left_pad=2)
  189. class OpsDenseToSparseBoxesTest(tf.test.TestCase):
  190. def test_return_all_boxes_when_all_input_boxes_are_valid(self):
  191. num_classes = 4
  192. num_valid_boxes = 3
  193. code_size = 4
  194. dense_location_placeholder = tf.placeholder(tf.float32,
  195. shape=(num_valid_boxes,
  196. code_size))
  197. dense_num_boxes_placeholder = tf.placeholder(tf.int32, shape=(num_classes))
  198. box_locations, box_classes = ops.dense_to_sparse_boxes(
  199. dense_location_placeholder, dense_num_boxes_placeholder, num_classes)
  200. feed_dict = {dense_location_placeholder: np.random.uniform(
  201. size=[num_valid_boxes, code_size]),
  202. dense_num_boxes_placeholder: np.array([1, 0, 0, 2],
  203. dtype=np.int32)}
  204. expected_box_locations = feed_dict[dense_location_placeholder]
  205. expected_box_classses = np.array([0, 3, 3])
  206. with self.test_session() as sess:
  207. box_locations, box_classes = sess.run([box_locations, box_classes],
  208. feed_dict=feed_dict)
  209. self.assertAllClose(box_locations, expected_box_locations, rtol=1e-6,
  210. atol=1e-6)
  211. self.assertAllEqual(box_classes, expected_box_classses)
  212. def test_return_only_valid_boxes_when_input_contains_invalid_boxes(self):
  213. num_classes = 4
  214. num_valid_boxes = 3
  215. num_boxes = 10
  216. code_size = 4
  217. dense_location_placeholder = tf.placeholder(tf.float32, shape=(num_boxes,
  218. code_size))
  219. dense_num_boxes_placeholder = tf.placeholder(tf.int32, shape=(num_classes))
  220. box_locations, box_classes = ops.dense_to_sparse_boxes(
  221. dense_location_placeholder, dense_num_boxes_placeholder, num_classes)
  222. feed_dict = {dense_location_placeholder: np.random.uniform(
  223. size=[num_boxes, code_size]),
  224. dense_num_boxes_placeholder: np.array([1, 0, 0, 2],
  225. dtype=np.int32)}
  226. expected_box_locations = (feed_dict[dense_location_placeholder]
  227. [:num_valid_boxes])
  228. expected_box_classses = np.array([0, 3, 3])
  229. with self.test_session() as sess:
  230. box_locations, box_classes = sess.run([box_locations, box_classes],
  231. feed_dict=feed_dict)
  232. self.assertAllClose(box_locations, expected_box_locations, rtol=1e-6,
  233. atol=1e-6)
  234. self.assertAllEqual(box_classes, expected_box_classses)
  235. class OpsTestIndicesToDenseVector(tf.test.TestCase):
  236. def test_indices_to_dense_vector(self):
  237. size = 10000
  238. num_indices = np.random.randint(size)
  239. rand_indices = np.random.permutation(np.arange(size))[0:num_indices]
  240. expected_output = np.zeros(size, dtype=np.float32)
  241. expected_output[rand_indices] = 1.
  242. tf_rand_indices = tf.constant(rand_indices)
  243. indicator = ops.indices_to_dense_vector(tf_rand_indices, size)
  244. with self.test_session() as sess:
  245. output = sess.run(indicator)
  246. self.assertAllEqual(output, expected_output)
  247. self.assertEqual(output.dtype, expected_output.dtype)
  248. def test_indices_to_dense_vector_size_at_inference(self):
  249. size = 5000
  250. num_indices = 250
  251. all_indices = np.arange(size)
  252. rand_indices = np.random.permutation(all_indices)[0:num_indices]
  253. expected_output = np.zeros(size, dtype=np.float32)
  254. expected_output[rand_indices] = 1.
  255. tf_all_indices = tf.placeholder(tf.int32)
  256. tf_rand_indices = tf.constant(rand_indices)
  257. indicator = ops.indices_to_dense_vector(tf_rand_indices,
  258. tf.shape(tf_all_indices)[0])
  259. feed_dict = {tf_all_indices: all_indices}
  260. with self.test_session() as sess:
  261. output = sess.run(indicator, feed_dict=feed_dict)
  262. self.assertAllEqual(output, expected_output)
  263. self.assertEqual(output.dtype, expected_output.dtype)
  264. def test_indices_to_dense_vector_int(self):
  265. size = 500
  266. num_indices = 25
  267. rand_indices = np.random.permutation(np.arange(size))[0:num_indices]
  268. expected_output = np.zeros(size, dtype=np.int64)
  269. expected_output[rand_indices] = 1
  270. tf_rand_indices = tf.constant(rand_indices)
  271. indicator = ops.indices_to_dense_vector(
  272. tf_rand_indices, size, 1, dtype=tf.int64)
  273. with self.test_session() as sess:
  274. output = sess.run(indicator)
  275. self.assertAllEqual(output, expected_output)
  276. self.assertEqual(output.dtype, expected_output.dtype)
  277. def test_indices_to_dense_vector_custom_values(self):
  278. size = 100
  279. num_indices = 10
  280. rand_indices = np.random.permutation(np.arange(size))[0:num_indices]
  281. indices_value = np.random.rand(1)
  282. default_value = np.random.rand(1)
  283. expected_output = np.float32(np.ones(size) * default_value)
  284. expected_output[rand_indices] = indices_value
  285. tf_rand_indices = tf.constant(rand_indices)
  286. indicator = ops.indices_to_dense_vector(
  287. tf_rand_indices,
  288. size,
  289. indices_value=indices_value,
  290. default_value=default_value)
  291. with self.test_session() as sess:
  292. output = sess.run(indicator)
  293. self.assertAllClose(output, expected_output)
  294. self.assertEqual(output.dtype, expected_output.dtype)
  295. def test_indices_to_dense_vector_all_indices_as_input(self):
  296. size = 500
  297. num_indices = 500
  298. rand_indices = np.random.permutation(np.arange(size))[0:num_indices]
  299. expected_output = np.ones(size, dtype=np.float32)
  300. tf_rand_indices = tf.constant(rand_indices)
  301. indicator = ops.indices_to_dense_vector(tf_rand_indices, size)
  302. with self.test_session() as sess:
  303. output = sess.run(indicator)
  304. self.assertAllEqual(output, expected_output)
  305. self.assertEqual(output.dtype, expected_output.dtype)
  306. def test_indices_to_dense_vector_empty_indices_as_input(self):
  307. size = 500
  308. rand_indices = []
  309. expected_output = np.zeros(size, dtype=np.float32)
  310. tf_rand_indices = tf.constant(rand_indices)
  311. indicator = ops.indices_to_dense_vector(tf_rand_indices, size)
  312. with self.test_session() as sess:
  313. output = sess.run(indicator)
  314. self.assertAllEqual(output, expected_output)
  315. self.assertEqual(output.dtype, expected_output.dtype)
  316. class GroundtruthFilterTest(tf.test.TestCase):
  317. def test_filter_groundtruth(self):
  318. input_image = tf.placeholder(tf.float32, shape=(None, None, 3))
  319. input_boxes = tf.placeholder(tf.float32, shape=(None, 4))
  320. input_classes = tf.placeholder(tf.int32, shape=(None,))
  321. input_is_crowd = tf.placeholder(tf.bool, shape=(None,))
  322. input_area = tf.placeholder(tf.float32, shape=(None,))
  323. input_difficult = tf.placeholder(tf.float32, shape=(None,))
  324. input_label_types = tf.placeholder(tf.string, shape=(None,))
  325. input_confidences = tf.placeholder(tf.float32, shape=(None,))
  326. valid_indices = tf.placeholder(tf.int32, shape=(None,))
  327. input_tensors = {
  328. fields.InputDataFields.image: input_image,
  329. fields.InputDataFields.groundtruth_boxes: input_boxes,
  330. fields.InputDataFields.groundtruth_classes: input_classes,
  331. fields.InputDataFields.groundtruth_is_crowd: input_is_crowd,
  332. fields.InputDataFields.groundtruth_area: input_area,
  333. fields.InputDataFields.groundtruth_difficult: input_difficult,
  334. fields.InputDataFields.groundtruth_label_types: input_label_types,
  335. fields.InputDataFields.groundtruth_confidences: input_confidences,
  336. }
  337. output_tensors = ops.retain_groundtruth(input_tensors, valid_indices)
  338. image_tensor = np.random.rand(224, 224, 3)
  339. feed_dict = {
  340. input_image: image_tensor,
  341. input_boxes:
  342. np.array([[0.2, 0.4, 0.1, 0.8], [0.2, 0.4, 1.0, 0.8]], dtype=np.float),
  343. input_classes: np.array([1, 2], dtype=np.int32),
  344. input_is_crowd: np.array([False, True], dtype=np.bool),
  345. input_area: np.array([32, 48], dtype=np.float32),
  346. input_difficult: np.array([True, False], dtype=np.bool),
  347. input_label_types:
  348. np.array(['APPROPRIATE', 'INCORRECT'], dtype=np.string_),
  349. input_confidences: np.array([0.99, 0.5], dtype=np.float32),
  350. valid_indices: np.array([0], dtype=np.int32),
  351. }
  352. expected_tensors = {
  353. fields.InputDataFields.image: image_tensor,
  354. fields.InputDataFields.groundtruth_boxes: [[0.2, 0.4, 0.1, 0.8]],
  355. fields.InputDataFields.groundtruth_classes: [1],
  356. fields.InputDataFields.groundtruth_is_crowd: [False],
  357. fields.InputDataFields.groundtruth_area: [32],
  358. fields.InputDataFields.groundtruth_difficult: [True],
  359. fields.InputDataFields.groundtruth_label_types: ['APPROPRIATE'],
  360. fields.InputDataFields.groundtruth_confidences: [0.99],
  361. }
  362. with self.test_session() as sess:
  363. output_tensors = sess.run(output_tensors, feed_dict=feed_dict)
  364. for key in [fields.InputDataFields.image,
  365. fields.InputDataFields.groundtruth_boxes,
  366. fields.InputDataFields.groundtruth_area,
  367. fields.InputDataFields.groundtruth_confidences]:
  368. self.assertAllClose(expected_tensors[key], output_tensors[key])
  369. for key in [fields.InputDataFields.groundtruth_classes,
  370. fields.InputDataFields.groundtruth_is_crowd,
  371. fields.InputDataFields.groundtruth_label_types]:
  372. self.assertAllEqual(expected_tensors[key], output_tensors[key])
  373. def test_filter_with_missing_fields(self):
  374. input_boxes = tf.placeholder(tf.float32, shape=(None, 4))
  375. input_classes = tf.placeholder(tf.int32, shape=(None,))
  376. input_tensors = {
  377. fields.InputDataFields.groundtruth_boxes: input_boxes,
  378. fields.InputDataFields.groundtruth_classes: input_classes
  379. }
  380. valid_indices = tf.placeholder(tf.int32, shape=(None,))
  381. feed_dict = {
  382. input_boxes:
  383. np.array([[0.2, 0.4, 0.1, 0.8], [0.2, 0.4, 1.0, 0.8]], dtype=np.float),
  384. input_classes:
  385. np.array([1, 2], dtype=np.int32),
  386. valid_indices:
  387. np.array([0], dtype=np.int32)
  388. }
  389. expected_tensors = {
  390. fields.InputDataFields.groundtruth_boxes:
  391. [[0.2, 0.4, 0.1, 0.8]],
  392. fields.InputDataFields.groundtruth_classes:
  393. [1]
  394. }
  395. output_tensors = ops.retain_groundtruth(input_tensors, valid_indices)
  396. with self.test_session() as sess:
  397. output_tensors = sess.run(output_tensors, feed_dict=feed_dict)
  398. for key in [fields.InputDataFields.groundtruth_boxes]:
  399. self.assertAllClose(expected_tensors[key], output_tensors[key])
  400. for key in [fields.InputDataFields.groundtruth_classes]:
  401. self.assertAllEqual(expected_tensors[key], output_tensors[key])
  402. def test_filter_with_empty_fields(self):
  403. input_boxes = tf.placeholder(tf.float32, shape=(None, 4))
  404. input_classes = tf.placeholder(tf.int32, shape=(None,))
  405. input_is_crowd = tf.placeholder(tf.bool, shape=(None,))
  406. input_area = tf.placeholder(tf.float32, shape=(None,))
  407. input_difficult = tf.placeholder(tf.float32, shape=(None,))
  408. input_confidences = tf.placeholder(tf.float32, shape=(None,))
  409. valid_indices = tf.placeholder(tf.int32, shape=(None,))
  410. input_tensors = {
  411. fields.InputDataFields.groundtruth_boxes: input_boxes,
  412. fields.InputDataFields.groundtruth_classes: input_classes,
  413. fields.InputDataFields.groundtruth_is_crowd: input_is_crowd,
  414. fields.InputDataFields.groundtruth_area: input_area,
  415. fields.InputDataFields.groundtruth_difficult: input_difficult,
  416. fields.InputDataFields.groundtruth_confidences: input_confidences,
  417. }
  418. output_tensors = ops.retain_groundtruth(input_tensors, valid_indices)
  419. feed_dict = {
  420. input_boxes:
  421. np.array([[0.2, 0.4, 0.1, 0.8], [0.2, 0.4, 1.0, 0.8]], dtype=np.float),
  422. input_classes: np.array([1, 2], dtype=np.int32),
  423. input_is_crowd: np.array([False, True], dtype=np.bool),
  424. input_area: np.array([], dtype=np.float32),
  425. input_difficult: np.array([], dtype=np.float32),
  426. input_confidences: np.array([0.99, 0.5], dtype=np.float32),
  427. valid_indices: np.array([0], dtype=np.int32)
  428. }
  429. expected_tensors = {
  430. fields.InputDataFields.groundtruth_boxes: [[0.2, 0.4, 0.1, 0.8]],
  431. fields.InputDataFields.groundtruth_classes: [1],
  432. fields.InputDataFields.groundtruth_is_crowd: [False],
  433. fields.InputDataFields.groundtruth_area: [],
  434. fields.InputDataFields.groundtruth_difficult: [],
  435. fields.InputDataFields.groundtruth_confidences: [0.99],
  436. }
  437. with self.test_session() as sess:
  438. output_tensors = sess.run(output_tensors, feed_dict=feed_dict)
  439. for key in [fields.InputDataFields.groundtruth_boxes,
  440. fields.InputDataFields.groundtruth_area,
  441. fields.InputDataFields.groundtruth_confidences]:
  442. self.assertAllClose(expected_tensors[key], output_tensors[key])
  443. for key in [fields.InputDataFields.groundtruth_classes,
  444. fields.InputDataFields.groundtruth_is_crowd]:
  445. self.assertAllEqual(expected_tensors[key], output_tensors[key])
  446. def test_filter_with_empty_groundtruth_boxes(self):
  447. input_boxes = tf.placeholder(tf.float32, shape=(None, 4))
  448. input_classes = tf.placeholder(tf.int32, shape=(None,))
  449. input_is_crowd = tf.placeholder(tf.bool, shape=(None,))
  450. input_area = tf.placeholder(tf.float32, shape=(None,))
  451. input_difficult = tf.placeholder(tf.float32, shape=(None,))
  452. input_confidences = tf.placeholder(tf.float32, shape=(None,))
  453. valid_indices = tf.placeholder(tf.int32, shape=(None,))
  454. input_tensors = {
  455. fields.InputDataFields.groundtruth_boxes: input_boxes,
  456. fields.InputDataFields.groundtruth_classes: input_classes,
  457. fields.InputDataFields.groundtruth_is_crowd: input_is_crowd,
  458. fields.InputDataFields.groundtruth_area: input_area,
  459. fields.InputDataFields.groundtruth_difficult: input_difficult,
  460. fields.InputDataFields.groundtruth_confidences: input_confidences,
  461. }
  462. output_tensors = ops.retain_groundtruth(input_tensors, valid_indices)
  463. feed_dict = {
  464. input_boxes: np.array([], dtype=np.float).reshape(0, 4),
  465. input_classes: np.array([], dtype=np.int32),
  466. input_is_crowd: np.array([], dtype=np.bool),
  467. input_area: np.array([], dtype=np.float32),
  468. input_difficult: np.array([], dtype=np.float32),
  469. input_confidences: np.array([], dtype=np.float32),
  470. valid_indices: np.array([], dtype=np.int32),
  471. }
  472. with self.test_session() as sess:
  473. output_tensors = sess.run(output_tensors, feed_dict=feed_dict)
  474. for key in input_tensors:
  475. if key == fields.InputDataFields.groundtruth_boxes:
  476. self.assertAllEqual([0, 4], output_tensors[key].shape)
  477. else:
  478. self.assertAllEqual([0], output_tensors[key].shape)
  479. class RetainGroundTruthWithPositiveClasses(tf.test.TestCase):
  480. def test_filter_groundtruth_with_positive_classes(self):
  481. input_image = tf.placeholder(tf.float32, shape=(None, None, 3))
  482. input_boxes = tf.placeholder(tf.float32, shape=(None, 4))
  483. input_classes = tf.placeholder(tf.int32, shape=(None,))
  484. input_is_crowd = tf.placeholder(tf.bool, shape=(None,))
  485. input_area = tf.placeholder(tf.float32, shape=(None,))
  486. input_difficult = tf.placeholder(tf.float32, shape=(None,))
  487. input_label_types = tf.placeholder(tf.string, shape=(None,))
  488. input_confidences = tf.placeholder(tf.float32, shape=(None,))
  489. valid_indices = tf.placeholder(tf.int32, shape=(None,))
  490. input_tensors = {
  491. fields.InputDataFields.image: input_image,
  492. fields.InputDataFields.groundtruth_boxes: input_boxes,
  493. fields.InputDataFields.groundtruth_classes: input_classes,
  494. fields.InputDataFields.groundtruth_is_crowd: input_is_crowd,
  495. fields.InputDataFields.groundtruth_area: input_area,
  496. fields.InputDataFields.groundtruth_difficult: input_difficult,
  497. fields.InputDataFields.groundtruth_label_types: input_label_types,
  498. fields.InputDataFields.groundtruth_confidences: input_confidences,
  499. }
  500. output_tensors = ops.retain_groundtruth_with_positive_classes(input_tensors)
  501. image_tensor = np.random.rand(224, 224, 3)
  502. feed_dict = {
  503. input_image: image_tensor,
  504. input_boxes:
  505. np.array([[0.2, 0.4, 0.1, 0.8], [0.2, 0.4, 1.0, 0.8]], dtype=np.float),
  506. input_classes: np.array([1, 0], dtype=np.int32),
  507. input_is_crowd: np.array([False, True], dtype=np.bool),
  508. input_area: np.array([32, 48], dtype=np.float32),
  509. input_difficult: np.array([True, False], dtype=np.bool),
  510. input_label_types:
  511. np.array(['APPROPRIATE', 'INCORRECT'], dtype=np.string_),
  512. input_confidences: np.array([0.99, 0.5], dtype=np.float32),
  513. valid_indices: np.array([0], dtype=np.int32),
  514. }
  515. expected_tensors = {
  516. fields.InputDataFields.image: image_tensor,
  517. fields.InputDataFields.groundtruth_boxes: [[0.2, 0.4, 0.1, 0.8]],
  518. fields.InputDataFields.groundtruth_classes: [1],
  519. fields.InputDataFields.groundtruth_is_crowd: [False],
  520. fields.InputDataFields.groundtruth_area: [32],
  521. fields.InputDataFields.groundtruth_difficult: [True],
  522. fields.InputDataFields.groundtruth_label_types: ['APPROPRIATE'],
  523. fields.InputDataFields.groundtruth_confidences: [0.99],
  524. }
  525. with self.test_session() as sess:
  526. output_tensors = sess.run(output_tensors, feed_dict=feed_dict)
  527. for key in [fields.InputDataFields.image,
  528. fields.InputDataFields.groundtruth_boxes,
  529. fields.InputDataFields.groundtruth_area,
  530. fields.InputDataFields.groundtruth_confidences]:
  531. self.assertAllClose(expected_tensors[key], output_tensors[key])
  532. for key in [fields.InputDataFields.groundtruth_classes,
  533. fields.InputDataFields.groundtruth_is_crowd,
  534. fields.InputDataFields.groundtruth_label_types]:
  535. self.assertAllEqual(expected_tensors[key], output_tensors[key])
  536. class ReplaceNaNGroundtruthLabelScoresWithOnes(tf.test.TestCase):
  537. def test_replace_nan_groundtruth_label_scores_with_ones(self):
  538. label_scores = tf.constant([np.nan, 1.0, np.nan])
  539. output_tensor = ops.replace_nan_groundtruth_label_scores_with_ones(
  540. label_scores)
  541. expected_tensor = [1.0, 1.0, 1.0]
  542. with self.test_session():
  543. output_tensor = output_tensor.eval()
  544. self.assertAllClose(expected_tensor, output_tensor)
  545. def test_input_equals_output_when_no_nans(self):
  546. input_label_scores = [0.5, 1.0, 1.0]
  547. label_scores_tensor = tf.constant(input_label_scores)
  548. output_label_scores = ops.replace_nan_groundtruth_label_scores_with_ones(
  549. label_scores_tensor)
  550. with self.test_session():
  551. output_label_scores = output_label_scores.eval()
  552. self.assertAllClose(input_label_scores, output_label_scores)
  553. class GroundtruthFilterWithCrowdBoxesTest(tf.test.TestCase):
  554. def test_filter_groundtruth_with_crowd_boxes(self):
  555. input_tensors = {
  556. fields.InputDataFields.groundtruth_boxes:
  557. [[0.1, 0.2, 0.6, 0.8], [0.2, 0.4, 0.1, 0.8]],
  558. fields.InputDataFields.groundtruth_classes: [1, 2],
  559. fields.InputDataFields.groundtruth_is_crowd: [True, False],
  560. fields.InputDataFields.groundtruth_area: [100.0, 238.7],
  561. fields.InputDataFields.groundtruth_confidences: [0.5, 0.99],
  562. }
  563. expected_tensors = {
  564. fields.InputDataFields.groundtruth_boxes: [[0.2, 0.4, 0.1, 0.8]],
  565. fields.InputDataFields.groundtruth_classes: [2],
  566. fields.InputDataFields.groundtruth_is_crowd: [False],
  567. fields.InputDataFields.groundtruth_area: [238.7],
  568. fields.InputDataFields.groundtruth_confidences: [0.99],
  569. }
  570. output_tensors = ops.filter_groundtruth_with_crowd_boxes(
  571. input_tensors)
  572. with self.test_session() as sess:
  573. output_tensors = sess.run(output_tensors)
  574. for key in [fields.InputDataFields.groundtruth_boxes,
  575. fields.InputDataFields.groundtruth_area,
  576. fields.InputDataFields.groundtruth_confidences]:
  577. self.assertAllClose(expected_tensors[key], output_tensors[key])
  578. for key in [fields.InputDataFields.groundtruth_classes,
  579. fields.InputDataFields.groundtruth_is_crowd]:
  580. self.assertAllEqual(expected_tensors[key], output_tensors[key])
  581. class GroundtruthFilterWithNanBoxTest(tf.test.TestCase):
  582. def test_filter_groundtruth_with_nan_box_coordinates(self):
  583. input_tensors = {
  584. fields.InputDataFields.groundtruth_boxes:
  585. [[np.nan, np.nan, np.nan, np.nan], [0.2, 0.4, 0.1, 0.8]],
  586. fields.InputDataFields.groundtruth_classes: [1, 2],
  587. fields.InputDataFields.groundtruth_is_crowd: [False, True],
  588. fields.InputDataFields.groundtruth_area: [100.0, 238.7],
  589. fields.InputDataFields.groundtruth_confidences: [0.5, 0.99],
  590. }
  591. expected_tensors = {
  592. fields.InputDataFields.groundtruth_boxes: [[0.2, 0.4, 0.1, 0.8]],
  593. fields.InputDataFields.groundtruth_classes: [2],
  594. fields.InputDataFields.groundtruth_is_crowd: [True],
  595. fields.InputDataFields.groundtruth_area: [238.7],
  596. fields.InputDataFields.groundtruth_confidences: [0.99],
  597. }
  598. output_tensors = ops.filter_groundtruth_with_nan_box_coordinates(
  599. input_tensors)
  600. with self.test_session() as sess:
  601. output_tensors = sess.run(output_tensors)
  602. for key in [fields.InputDataFields.groundtruth_boxes,
  603. fields.InputDataFields.groundtruth_area,
  604. fields.InputDataFields.groundtruth_confidences]:
  605. self.assertAllClose(expected_tensors[key], output_tensors[key])
  606. for key in [fields.InputDataFields.groundtruth_classes,
  607. fields.InputDataFields.groundtruth_is_crowd]:
  608. self.assertAllEqual(expected_tensors[key], output_tensors[key])
  609. class GroundtruthFilterWithUnrecognizedClassesTest(tf.test.TestCase):
  610. def test_filter_unrecognized_classes(self):
  611. input_tensors = {
  612. fields.InputDataFields.groundtruth_boxes:
  613. [[.3, .3, .5, .7], [0.2, 0.4, 0.1, 0.8]],
  614. fields.InputDataFields.groundtruth_classes: [-1, 2],
  615. fields.InputDataFields.groundtruth_is_crowd: [False, True],
  616. fields.InputDataFields.groundtruth_area: [100.0, 238.7],
  617. fields.InputDataFields.groundtruth_confidences: [0.5, 0.99],
  618. }
  619. expected_tensors = {
  620. fields.InputDataFields.groundtruth_boxes: [[0.2, 0.4, 0.1, 0.8]],
  621. fields.InputDataFields.groundtruth_classes: [2],
  622. fields.InputDataFields.groundtruth_is_crowd: [True],
  623. fields.InputDataFields.groundtruth_area: [238.7],
  624. fields.InputDataFields.groundtruth_confidences: [0.99],
  625. }
  626. output_tensors = ops.filter_unrecognized_classes(input_tensors)
  627. with self.test_session() as sess:
  628. output_tensors = sess.run(output_tensors)
  629. for key in [fields.InputDataFields.groundtruth_boxes,
  630. fields.InputDataFields.groundtruth_area,
  631. fields.InputDataFields.groundtruth_confidences]:
  632. self.assertAllClose(expected_tensors[key], output_tensors[key])
  633. for key in [fields.InputDataFields.groundtruth_classes,
  634. fields.InputDataFields.groundtruth_is_crowd]:
  635. self.assertAllEqual(expected_tensors[key], output_tensors[key])
  636. class OpsTestNormalizeToTarget(tf.test.TestCase):
  637. def test_create_normalize_to_target(self):
  638. inputs = tf.random_uniform([5, 10, 12, 3])
  639. target_norm_value = 4.0
  640. dim = 3
  641. with self.test_session():
  642. output = ops.normalize_to_target(inputs, target_norm_value, dim)
  643. self.assertEqual(output.op.name, 'NormalizeToTarget/mul')
  644. var_name = tf.contrib.framework.get_variables()[0].name
  645. self.assertEqual(var_name, 'NormalizeToTarget/weights:0')
  646. def test_invalid_dim(self):
  647. inputs = tf.random_uniform([5, 10, 12, 3])
  648. target_norm_value = 4.0
  649. dim = 10
  650. with self.assertRaisesRegexp(
  651. ValueError,
  652. 'dim must be non-negative but smaller than the input rank.'):
  653. ops.normalize_to_target(inputs, target_norm_value, dim)
  654. def test_invalid_target_norm_values(self):
  655. inputs = tf.random_uniform([5, 10, 12, 3])
  656. target_norm_value = [4.0, 4.0]
  657. dim = 3
  658. with self.assertRaisesRegexp(
  659. ValueError, 'target_norm_value must be a float or a list of floats'):
  660. ops.normalize_to_target(inputs, target_norm_value, dim)
  661. def test_correct_output_shape(self):
  662. inputs = tf.random_uniform([5, 10, 12, 3])
  663. target_norm_value = 4.0
  664. dim = 3
  665. with self.test_session():
  666. output = ops.normalize_to_target(inputs, target_norm_value, dim)
  667. self.assertEqual(output.get_shape().as_list(),
  668. inputs.get_shape().as_list())
  669. def test_correct_initial_output_values(self):
  670. inputs = tf.constant([[[[3, 4], [7, 24]],
  671. [[5, -12], [-1, 0]]]], tf.float32)
  672. target_norm_value = 10.0
  673. dim = 3
  674. expected_output = [[[[30/5.0, 40/5.0], [70/25.0, 240/25.0]],
  675. [[50/13.0, -120/13.0], [-10, 0]]]]
  676. with self.test_session() as sess:
  677. normalized_inputs = ops.normalize_to_target(inputs, target_norm_value,
  678. dim)
  679. sess.run(tf.global_variables_initializer())
  680. output = normalized_inputs.eval()
  681. self.assertAllClose(output, expected_output)
  682. def test_multiple_target_norm_values(self):
  683. inputs = tf.constant([[[[3, 4], [7, 24]],
  684. [[5, -12], [-1, 0]]]], tf.float32)
  685. target_norm_value = [10.0, 20.0]
  686. dim = 3
  687. expected_output = [[[[30/5.0, 80/5.0], [70/25.0, 480/25.0]],
  688. [[50/13.0, -240/13.0], [-10, 0]]]]
  689. with self.test_session() as sess:
  690. normalized_inputs = ops.normalize_to_target(inputs, target_norm_value,
  691. dim)
  692. sess.run(tf.global_variables_initializer())
  693. output = normalized_inputs.eval()
  694. self.assertAllClose(output, expected_output)
  695. class OpsTestPositionSensitiveCropRegions(tf.test.TestCase):
  696. def test_position_sensitive(self):
  697. num_spatial_bins = [3, 2]
  698. image_shape = [3, 2, 6]
  699. # First channel is 1's, second channel is 2's, etc.
  700. image = tf.constant(range(1, 3 * 2 + 1) * 6, dtype=tf.float32,
  701. shape=image_shape)
  702. boxes = tf.random_uniform((2, 4))
  703. # The result for both boxes should be [[1, 2], [3, 4], [5, 6]]
  704. # before averaging.
  705. expected_output = np.array([3.5, 3.5]).reshape([2, 1, 1, 1])
  706. for crop_size_mult in range(1, 3):
  707. crop_size = [3 * crop_size_mult, 2 * crop_size_mult]
  708. ps_crop_and_pool = ops.position_sensitive_crop_regions(
  709. image, boxes, crop_size, num_spatial_bins, global_pool=True)
  710. with self.test_session() as sess:
  711. output = sess.run(ps_crop_and_pool)
  712. self.assertAllClose(output, expected_output)
  713. def test_position_sensitive_with_equal_channels(self):
  714. num_spatial_bins = [2, 2]
  715. image_shape = [3, 3, 4]
  716. crop_size = [2, 2]
  717. image = tf.constant(range(1, 3 * 3 + 1), dtype=tf.float32,
  718. shape=[3, 3, 1])
  719. tiled_image = tf.tile(image, [1, 1, image_shape[2]])
  720. boxes = tf.random_uniform((3, 4))
  721. box_ind = tf.constant([0, 0, 0], dtype=tf.int32)
  722. # All channels are equal so position-sensitive crop and resize should
  723. # work as the usual crop and resize for just one channel.
  724. crop = tf.image.crop_and_resize(tf.expand_dims(image, axis=0), boxes,
  725. box_ind, crop_size)
  726. crop_and_pool = tf.reduce_mean(crop, [1, 2], keep_dims=True)
  727. ps_crop_and_pool = ops.position_sensitive_crop_regions(
  728. tiled_image,
  729. boxes,
  730. crop_size,
  731. num_spatial_bins,
  732. global_pool=True)
  733. with self.test_session() as sess:
  734. expected_output, output = sess.run((crop_and_pool, ps_crop_and_pool))
  735. self.assertAllClose(output, expected_output)
  736. def test_raise_value_error_on_num_bins_less_than_one(self):
  737. num_spatial_bins = [1, -1]
  738. image_shape = [1, 1, 2]
  739. crop_size = [2, 2]
  740. image = tf.constant(1, dtype=tf.float32, shape=image_shape)
  741. boxes = tf.constant([[0, 0, 1, 1]], dtype=tf.float32)
  742. with self.assertRaisesRegexp(ValueError, 'num_spatial_bins should be >= 1'):
  743. ops.position_sensitive_crop_regions(
  744. image, boxes, crop_size, num_spatial_bins, global_pool=True)
  745. def test_raise_value_error_on_non_divisible_crop_size(self):
  746. num_spatial_bins = [2, 3]
  747. image_shape = [1, 1, 6]
  748. crop_size = [3, 2]
  749. image = tf.constant(1, dtype=tf.float32, shape=image_shape)
  750. boxes = tf.constant([[0, 0, 1, 1]], dtype=tf.float32)
  751. with self.assertRaisesRegexp(
  752. ValueError, 'crop_size should be divisible by num_spatial_bins'):
  753. ops.position_sensitive_crop_regions(
  754. image, boxes, crop_size, num_spatial_bins, global_pool=True)
  755. def test_raise_value_error_on_non_divisible_num_channels(self):
  756. num_spatial_bins = [2, 2]
  757. image_shape = [1, 1, 5]
  758. crop_size = [2, 2]
  759. image = tf.constant(1, dtype=tf.float32, shape=image_shape)
  760. boxes = tf.constant([[0, 0, 1, 1]], dtype=tf.float32)
  761. with self.assertRaisesRegexp(
  762. ValueError, 'Dimension size must be evenly divisible by 4 but is 5'):
  763. ops.position_sensitive_crop_regions(
  764. image, boxes, crop_size, num_spatial_bins, global_pool=True)
  765. def test_position_sensitive_with_global_pool_false(self):
  766. num_spatial_bins = [3, 2]
  767. image_shape = [3, 2, 6]
  768. num_boxes = 2
  769. # First channel is 1's, second channel is 2's, etc.
  770. image = tf.constant(range(1, 3 * 2 + 1) * 6, dtype=tf.float32,
  771. shape=image_shape)
  772. boxes = tf.random_uniform((num_boxes, 4))
  773. expected_output = []
  774. # Expected output, when crop_size = [3, 2].
  775. expected_output.append(np.expand_dims(
  776. np.tile(np.array([[1, 2],
  777. [3, 4],
  778. [5, 6]]), (num_boxes, 1, 1)),
  779. axis=-1))
  780. # Expected output, when crop_size = [6, 4].
  781. expected_output.append(np.expand_dims(
  782. np.tile(np.array([[1, 1, 2, 2],
  783. [1, 1, 2, 2],
  784. [3, 3, 4, 4],
  785. [3, 3, 4, 4],
  786. [5, 5, 6, 6],
  787. [5, 5, 6, 6]]), (num_boxes, 1, 1)),
  788. axis=-1))
  789. for crop_size_mult in range(1, 3):
  790. crop_size = [3 * crop_size_mult, 2 * crop_size_mult]
  791. ps_crop = ops.position_sensitive_crop_regions(
  792. image, boxes, crop_size, num_spatial_bins, global_pool=False)
  793. with self.test_session() as sess:
  794. output = sess.run(ps_crop)
  795. self.assertAllEqual(output, expected_output[crop_size_mult - 1])
  796. def test_position_sensitive_with_global_pool_false_and_do_global_pool(self):
  797. num_spatial_bins = [3, 2]
  798. image_shape = [3, 2, 6]
  799. num_boxes = 2
  800. # First channel is 1's, second channel is 2's, etc.
  801. image = tf.constant(range(1, 3 * 2 + 1) * 6, dtype=tf.float32,
  802. shape=image_shape)
  803. boxes = tf.random_uniform((num_boxes, 4))
  804. expected_output = []
  805. # Expected output, when crop_size = [3, 2].
  806. expected_output.append(np.mean(
  807. np.expand_dims(
  808. np.tile(np.array([[1, 2],
  809. [3, 4],
  810. [5, 6]]), (num_boxes, 1, 1)),
  811. axis=-1),
  812. axis=(1, 2), keepdims=True))
  813. # Expected output, when crop_size = [6, 4].
  814. expected_output.append(np.mean(
  815. np.expand_dims(
  816. np.tile(np.array([[1, 1, 2, 2],
  817. [1, 1, 2, 2],
  818. [3, 3, 4, 4],
  819. [3, 3, 4, 4],
  820. [5, 5, 6, 6],
  821. [5, 5, 6, 6]]), (num_boxes, 1, 1)),
  822. axis=-1),
  823. axis=(1, 2), keepdims=True))
  824. for crop_size_mult in range(1, 3):
  825. crop_size = [3 * crop_size_mult, 2 * crop_size_mult]
  826. # Perform global_pooling after running the function with
  827. # global_pool=False.
  828. ps_crop = ops.position_sensitive_crop_regions(
  829. image, boxes, crop_size, num_spatial_bins, global_pool=False)
  830. ps_crop_and_pool = tf.reduce_mean(
  831. ps_crop, reduction_indices=(1, 2), keep_dims=True)
  832. with self.test_session() as sess:
  833. output = sess.run(ps_crop_and_pool)
  834. self.assertAllEqual(output, expected_output[crop_size_mult - 1])
  835. def test_raise_value_error_on_non_square_block_size(self):
  836. num_spatial_bins = [3, 2]
  837. image_shape = [3, 2, 6]
  838. crop_size = [6, 2]
  839. image = tf.constant(1, dtype=tf.float32, shape=image_shape)
  840. boxes = tf.constant([[0, 0, 1, 1]], dtype=tf.float32)
  841. with self.assertRaisesRegexp(
  842. ValueError, 'Only support square bin crop size for now.'):
  843. ops.position_sensitive_crop_regions(
  844. image, boxes, crop_size, num_spatial_bins, global_pool=False)
  845. class OpsTestBatchPositionSensitiveCropRegions(tf.test.TestCase):
  846. def test_position_sensitive_with_single_bin(self):
  847. num_spatial_bins = [1, 1]
  848. image_shape = [2, 3, 3, 4]
  849. crop_size = [2, 2]
  850. image = tf.random_uniform(image_shape)
  851. boxes = tf.random_uniform((2, 3, 4))
  852. box_ind = tf.constant([0, 0, 0, 1, 1, 1], dtype=tf.int32)
  853. # When a single bin is used, position-sensitive crop and pool should be
  854. # the same as non-position sensitive crop and pool.
  855. crop = tf.image.crop_and_resize(image, tf.reshape(boxes, [-1, 4]), box_ind,
  856. crop_size)
  857. crop_and_pool = tf.reduce_mean(crop, [1, 2], keepdims=True)
  858. crop_and_pool = tf.reshape(crop_and_pool, [2, 3, 1, 1, 4])
  859. ps_crop_and_pool = ops.batch_position_sensitive_crop_regions(
  860. image, boxes, crop_size, num_spatial_bins, global_pool=True)
  861. with self.test_session() as sess:
  862. expected_output, output = sess.run((crop_and_pool, ps_crop_and_pool))
  863. self.assertAllClose(output, expected_output)
  864. def test_position_sensitive_with_global_pool_false_and_known_boxes(self):
  865. num_spatial_bins = [2, 2]
  866. image_shape = [2, 2, 2, 4]
  867. crop_size = [2, 2]
  868. images = tf.constant(range(1, 2 * 2 * 4 + 1) * 2, dtype=tf.float32,
  869. shape=image_shape)
  870. # First box contains whole image, and second box contains only first row.
  871. boxes = tf.constant(np.array([[[0., 0., 1., 1.]],
  872. [[0., 0., 0.5, 1.]]]), dtype=tf.float32)
  873. # box_ind = tf.constant([0, 1], dtype=tf.int32)
  874. expected_output = []
  875. # Expected output, when the box containing whole image.
  876. expected_output.append(
  877. np.reshape(np.array([[4, 7],
  878. [10, 13]]),
  879. (1, 2, 2, 1))
  880. )
  881. # Expected output, when the box containing only first row.
  882. expected_output.append(
  883. np.reshape(np.array([[3, 6],
  884. [7, 10]]),
  885. (1, 2, 2, 1))
  886. )
  887. expected_output = np.stack(expected_output, axis=0)
  888. ps_crop = ops.batch_position_sensitive_crop_regions(
  889. images, boxes, crop_size, num_spatial_bins, global_pool=False)
  890. with self.test_session() as sess:
  891. output = sess.run(ps_crop)
  892. self.assertAllEqual(output, expected_output)
  893. def test_position_sensitive_with_global_pool_false_and_single_bin(self):
  894. num_spatial_bins = [1, 1]
  895. image_shape = [2, 3, 3, 4]
  896. crop_size = [1, 1]
  897. images = tf.random_uniform(image_shape)
  898. boxes = tf.random_uniform((2, 3, 4))
  899. # box_ind = tf.constant([0, 0, 0, 1, 1, 1], dtype=tf.int32)
  900. # Since single_bin is used and crop_size = [1, 1] (i.e., no crop resize),
  901. # the outputs are the same whatever the global_pool value is.
  902. ps_crop_and_pool = ops.batch_position_sensitive_crop_regions(
  903. images, boxes, crop_size, num_spatial_bins, global_pool=True)
  904. ps_crop = ops.batch_position_sensitive_crop_regions(
  905. images, boxes, crop_size, num_spatial_bins, global_pool=False)
  906. with self.test_session() as sess:
  907. pooled_output, unpooled_output = sess.run((ps_crop_and_pool, ps_crop))
  908. self.assertAllClose(pooled_output, unpooled_output)
  909. class ReframeBoxMasksToImageMasksTest(tf.test.TestCase):
  910. def testZeroImageOnEmptyMask(self):
  911. box_masks = tf.constant([[[0, 0],
  912. [0, 0]]], dtype=tf.float32)
  913. boxes = tf.constant([[0.0, 0.0, 1.0, 1.0]], dtype=tf.float32)
  914. image_masks = ops.reframe_box_masks_to_image_masks(box_masks, boxes,
  915. image_height=4,
  916. image_width=4)
  917. np_expected_image_masks = np.array([[[0, 0, 0, 0],
  918. [0, 0, 0, 0],
  919. [0, 0, 0, 0],
  920. [0, 0, 0, 0]]], dtype=np.float32)
  921. with self.test_session() as sess:
  922. np_image_masks = sess.run(image_masks)
  923. self.assertAllClose(np_image_masks, np_expected_image_masks)
  924. def testZeroBoxMasks(self):
  925. box_masks = tf.zeros([0, 3, 3], dtype=tf.float32)
  926. boxes = tf.zeros([0, 4], dtype=tf.float32)
  927. image_masks = ops.reframe_box_masks_to_image_masks(box_masks, boxes,
  928. image_height=4,
  929. image_width=4)
  930. with self.test_session() as sess:
  931. np_image_masks = sess.run(image_masks)
  932. self.assertAllEqual(np_image_masks.shape, np.array([0, 4, 4]))
  933. def testMaskIsCenteredInImageWhenBoxIsCentered(self):
  934. box_masks = tf.constant([[[1, 1],
  935. [1, 1]]], dtype=tf.float32)
  936. boxes = tf.constant([[0.25, 0.25, 0.75, 0.75]], dtype=tf.float32)
  937. image_masks = ops.reframe_box_masks_to_image_masks(box_masks, boxes,
  938. image_height=4,
  939. image_width=4)
  940. np_expected_image_masks = np.array([[[0, 0, 0, 0],
  941. [0, 1, 1, 0],
  942. [0, 1, 1, 0],
  943. [0, 0, 0, 0]]], dtype=np.float32)
  944. with self.test_session() as sess:
  945. np_image_masks = sess.run(image_masks)
  946. self.assertAllClose(np_image_masks, np_expected_image_masks)
  947. def testMaskOffCenterRemainsOffCenterInImage(self):
  948. box_masks = tf.constant([[[1, 0],
  949. [0, 1]]], dtype=tf.float32)
  950. boxes = tf.constant([[0.25, 0.5, 0.75, 1.0]], dtype=tf.float32)
  951. image_masks = ops.reframe_box_masks_to_image_masks(box_masks, boxes,
  952. image_height=4,
  953. image_width=4)
  954. np_expected_image_masks = np.array([[[0, 0, 0, 0],
  955. [0, 0, 0.6111111, 0.16666669],
  956. [0, 0, 0.3888889, 0.83333337],
  957. [0, 0, 0, 0]]], dtype=np.float32)
  958. with self.test_session() as sess:
  959. np_image_masks = sess.run(image_masks)
  960. self.assertAllClose(np_image_masks, np_expected_image_masks)
  961. class MergeBoxesWithMultipleLabelsTest(tf.test.TestCase):
  962. def testMergeBoxesWithMultipleLabels(self):
  963. boxes = tf.constant(
  964. [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75],
  965. [0.25, 0.25, 0.75, 0.75]],
  966. dtype=tf.float32)
  967. class_indices = tf.constant([0, 4, 2], dtype=tf.int32)
  968. class_confidences = tf.constant([0.8, 0.2, 0.1], dtype=tf.float32)
  969. num_classes = 5
  970. merged_boxes, merged_classes, merged_confidences, merged_box_indices = (
  971. ops.merge_boxes_with_multiple_labels(
  972. boxes, class_indices, class_confidences, num_classes))
  973. expected_merged_boxes = np.array(
  974. [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype=np.float32)
  975. expected_merged_classes = np.array(
  976. [[1, 0, 1, 0, 0], [0, 0, 0, 0, 1]], dtype=np.int32)
  977. expected_merged_confidences = np.array(
  978. [[0.8, 0, 0.1, 0, 0], [0, 0, 0, 0, 0.2]], dtype=np.float32)
  979. expected_merged_box_indices = np.array([0, 1], dtype=np.int32)
  980. with self.test_session() as sess:
  981. (np_merged_boxes, np_merged_classes, np_merged_confidences,
  982. np_merged_box_indices) = sess.run(
  983. [merged_boxes, merged_classes, merged_confidences,
  984. merged_box_indices])
  985. self.assertAllClose(np_merged_boxes, expected_merged_boxes)
  986. self.assertAllClose(np_merged_classes, expected_merged_classes)
  987. self.assertAllClose(np_merged_confidences, expected_merged_confidences)
  988. self.assertAllClose(np_merged_box_indices, expected_merged_box_indices)
  989. def testMergeBoxesWithMultipleLabelsCornerCase(self):
  990. boxes = tf.constant(
  991. [[0, 0, 1, 1], [0, 1, 1, 1], [1, 0, 1, 1], [1, 1, 1, 1],
  992. [1, 1, 1, 1], [1, 0, 1, 1], [0, 1, 1, 1], [0, 0, 1, 1]],
  993. dtype=tf.float32)
  994. class_indices = tf.constant([0, 1, 2, 3, 2, 1, 0, 3], dtype=tf.int32)
  995. class_confidences = tf.constant([0.1, 0.9, 0.2, 0.8, 0.3, 0.7, 0.4, 0.6],
  996. dtype=tf.float32)
  997. num_classes = 4
  998. merged_boxes, merged_classes, merged_confidences, merged_box_indices = (
  999. ops.merge_boxes_with_multiple_labels(
  1000. boxes, class_indices, class_confidences, num_classes))
  1001. expected_merged_boxes = np.array(
  1002. [[0, 0, 1, 1], [0, 1, 1, 1], [1, 0, 1, 1], [1, 1, 1, 1]],
  1003. dtype=np.float32)
  1004. expected_merged_classes = np.array(
  1005. [[1, 0, 0, 1], [1, 1, 0, 0], [0, 1, 1, 0], [0, 0, 1, 1]],
  1006. dtype=np.int32)
  1007. expected_merged_confidences = np.array(
  1008. [[0.1, 0, 0, 0.6], [0.4, 0.9, 0, 0],
  1009. [0, 0.7, 0.2, 0], [0, 0, 0.3, 0.8]], dtype=np.float32)
  1010. expected_merged_box_indices = np.array([0, 1, 2, 3], dtype=np.int32)
  1011. with self.test_session() as sess:
  1012. (np_merged_boxes, np_merged_classes, np_merged_confidences,
  1013. np_merged_box_indices) = sess.run(
  1014. [merged_boxes, merged_classes, merged_confidences,
  1015. merged_box_indices])
  1016. self.assertAllClose(np_merged_boxes, expected_merged_boxes)
  1017. self.assertAllClose(np_merged_classes, expected_merged_classes)
  1018. self.assertAllClose(np_merged_confidences, expected_merged_confidences)
  1019. self.assertAllClose(np_merged_box_indices, expected_merged_box_indices)
  1020. def testMergeBoxesWithEmptyInputs(self):
  1021. boxes = tf.zeros([0, 4], dtype=tf.float32)
  1022. class_indices = tf.constant([], dtype=tf.int32)
  1023. class_confidences = tf.constant([], dtype=tf.float32)
  1024. num_classes = 5
  1025. merged_boxes, merged_classes, merged_confidences, merged_box_indices = (
  1026. ops.merge_boxes_with_multiple_labels(
  1027. boxes, class_indices, class_confidences, num_classes))
  1028. with self.test_session() as sess:
  1029. (np_merged_boxes, np_merged_classes, np_merged_confidences,
  1030. np_merged_box_indices) = sess.run(
  1031. [merged_boxes, merged_classes, merged_confidences,
  1032. merged_box_indices])
  1033. self.assertAllEqual(np_merged_boxes.shape, [0, 4])
  1034. self.assertAllEqual(np_merged_classes.shape, [0, 5])
  1035. self.assertAllEqual(np_merged_confidences.shape, [0, 5])
  1036. self.assertAllEqual(np_merged_box_indices.shape, [0])
  1037. class NearestNeighborUpsamplingTest(test_case.TestCase):
  1038. def test_upsampling_with_single_scale(self):
  1039. def graph_fn(inputs):
  1040. custom_op_output = ops.nearest_neighbor_upsampling(inputs, scale=2)
  1041. return custom_op_output
  1042. inputs = np.reshape(np.arange(4).astype(np.float32), [1, 2, 2, 1])
  1043. custom_op_output = self.execute(graph_fn, [inputs])
  1044. expected_output = [[[[0], [0], [1], [1]],
  1045. [[0], [0], [1], [1]],
  1046. [[2], [2], [3], [3]],
  1047. [[2], [2], [3], [3]]]]
  1048. self.assertAllClose(custom_op_output, expected_output)
  1049. def test_upsampling_with_separate_height_width_scales(self):
  1050. def graph_fn(inputs):
  1051. custom_op_output = ops.nearest_neighbor_upsampling(inputs,
  1052. height_scale=2,
  1053. width_scale=3)
  1054. return custom_op_output
  1055. inputs = np.reshape(np.arange(4).astype(np.float32), [1, 2, 2, 1])
  1056. custom_op_output = self.execute(graph_fn, [inputs])
  1057. expected_output = [[[[0], [0], [0], [1], [1], [1]],
  1058. [[0], [0], [0], [1], [1], [1]],
  1059. [[2], [2], [2], [3], [3], [3]],
  1060. [[2], [2], [2], [3], [3], [3]]]]
  1061. self.assertAllClose(custom_op_output, expected_output)
  1062. class MatmulGatherOnZerothAxis(test_case.TestCase):
  1063. def test_gather_2d(self):
  1064. def graph_fn(params, indices):
  1065. return ops.matmul_gather_on_zeroth_axis(params, indices)
  1066. params = np.array([[1, 2, 3, 4],
  1067. [5, 6, 7, 8],
  1068. [9, 10, 11, 12],
  1069. [0, 1, 0, 0]], dtype=np.float32)
  1070. indices = np.array([2, 2, 1], dtype=np.int32)
  1071. expected_output = np.array([[9, 10, 11, 12], [9, 10, 11, 12], [5, 6, 7, 8]])
  1072. gather_output = self.execute(graph_fn, [params, indices])
  1073. self.assertAllClose(gather_output, expected_output)
  1074. def test_gather_3d(self):
  1075. def graph_fn(params, indices):
  1076. return ops.matmul_gather_on_zeroth_axis(params, indices)
  1077. params = np.array([[[1, 2], [3, 4]],
  1078. [[5, 6], [7, 8]],
  1079. [[9, 10], [11, 12]],
  1080. [[0, 1], [0, 0]]], dtype=np.float32)
  1081. indices = np.array([0, 3, 1], dtype=np.int32)
  1082. expected_output = np.array([[[1, 2], [3, 4]],
  1083. [[0, 1], [0, 0]],
  1084. [[5, 6], [7, 8]]])
  1085. gather_output = self.execute(graph_fn, [params, indices])
  1086. self.assertAllClose(gather_output, expected_output)
  1087. def test_gather_with_many_indices(self):
  1088. def graph_fn(params, indices):
  1089. return ops.matmul_gather_on_zeroth_axis(params, indices)
  1090. params = np.array([[1, 2, 3, 4],
  1091. [5, 6, 7, 8],
  1092. [9, 10, 11, 12],
  1093. [0, 1, 0, 0]], dtype=np.float32)
  1094. indices = np.array([0, 0, 0, 0, 0, 0], dtype=np.int32)
  1095. expected_output = np.array(6*[[1, 2, 3, 4]])
  1096. gather_output = self.execute(graph_fn, [params, indices])
  1097. self.assertAllClose(gather_output, expected_output)
  1098. def test_gather_with_dynamic_shape_input(self):
  1099. params_placeholder = tf.placeholder(tf.float32, shape=[None, 4])
  1100. indices_placeholder = tf.placeholder(tf.int32, shape=[None])
  1101. gather_result = ops.matmul_gather_on_zeroth_axis(
  1102. params_placeholder, indices_placeholder)
  1103. params = np.array([[1, 2, 3, 4],
  1104. [5, 6, 7, 8],
  1105. [9, 10, 11, 12],
  1106. [0, 1, 0, 0]], dtype=np.float32)
  1107. indices = np.array([0, 0, 0, 0, 0, 0])
  1108. expected_output = np.array(6*[[1, 2, 3, 4]])
  1109. with self.test_session() as sess:
  1110. gather_output = sess.run(gather_result, feed_dict={
  1111. params_placeholder: params, indices_placeholder: indices})
  1112. self.assertAllClose(gather_output, expected_output)
  1113. class OpsTestMatMulCropAndResize(test_case.TestCase):
  1114. def testMatMulCropAndResize2x2To1x1(self):
  1115. def graph_fn(image, boxes):
  1116. return ops.matmul_crop_and_resize(image, boxes, crop_size=[1, 1])
  1117. image = np.array([[[[1], [2]], [[3], [4]]]], dtype=np.float32)
  1118. boxes = np.array([[[0, 0, 1, 1]]], dtype=np.float32)
  1119. expected_output = [[[[[2.5]]]]]
  1120. crop_output = self.execute(graph_fn, [image, boxes])
  1121. self.assertAllClose(crop_output, expected_output)
  1122. def testMatMulCropAndResize2x2To1x1Flipped(self):
  1123. def graph_fn(image, boxes):
  1124. return ops.matmul_crop_and_resize(image, boxes, crop_size=[1, 1])
  1125. image = np.array([[[[1], [2]], [[3], [4]]]], dtype=np.float32)
  1126. boxes = np.array([[[1, 1, 0, 0]]], dtype=np.float32)
  1127. expected_output = [[[[[2.5]]]]]
  1128. crop_output = self.execute(graph_fn, [image, boxes])
  1129. self.assertAllClose(crop_output, expected_output)
  1130. def testMatMulCropAndResize2x2To3x3(self):
  1131. def graph_fn(image, boxes):
  1132. return ops.matmul_crop_and_resize(image, boxes, crop_size=[3, 3])
  1133. image = np.array([[[[1], [2]], [[3], [4]]]], dtype=np.float32)
  1134. boxes = np.array([[[0, 0, 1, 1]]], dtype=np.float32)
  1135. expected_output = [[[[[1.0], [1.5], [2.0]],
  1136. [[2.0], [2.5], [3.0]],
  1137. [[3.0], [3.5], [4.0]]]]]
  1138. crop_output = self.execute(graph_fn, [image, boxes])
  1139. self.assertAllClose(crop_output, expected_output)
  1140. def testMatMulCropAndResize2x2To3x3Flipped(self):
  1141. def graph_fn(image, boxes):
  1142. return ops.matmul_crop_and_resize(image, boxes, crop_size=[3, 3])
  1143. image = np.array([[[[1], [2]], [[3], [4]]]], dtype=np.float32)
  1144. boxes = np.array([[[1, 1, 0, 0]]], dtype=np.float32)
  1145. expected_output = [[[[[4.0], [3.5], [3.0]],
  1146. [[3.0], [2.5], [2.0]],
  1147. [[2.0], [1.5], [1.0]]]]]
  1148. crop_output = self.execute(graph_fn, [image, boxes])
  1149. self.assertAllClose(crop_output, expected_output)
  1150. def testMatMulCropAndResize3x3To2x2(self):
  1151. def graph_fn(image, boxes):
  1152. return ops.matmul_crop_and_resize(image, boxes, crop_size=[2, 2])
  1153. image = np.array([[[[1], [2], [3]],
  1154. [[4], [5], [6]],
  1155. [[7], [8], [9]]]], dtype=np.float32)
  1156. boxes = np.array([[[0, 0, 1, 1],
  1157. [0, 0, .5, .5]]], dtype=np.float32)
  1158. expected_output = [[[[[1], [3]], [[7], [9]]],
  1159. [[[1], [2]], [[4], [5]]]]]
  1160. crop_output = self.execute(graph_fn, [image, boxes])
  1161. self.assertAllClose(crop_output, expected_output)
  1162. def testMatMulCropAndResize3x3To2x2_2Channels(self):
  1163. def graph_fn(image, boxes):
  1164. return ops.matmul_crop_and_resize(image, boxes, crop_size=[2, 2])
  1165. image = np.array([[[[1, 0], [2, 1], [3, 2]],
  1166. [[4, 3], [5, 4], [6, 5]],
  1167. [[7, 6], [8, 7], [9, 8]]]], dtype=np.float32)
  1168. boxes = np.array([[[0, 0, 1, 1],
  1169. [0, 0, .5, .5]]], dtype=np.float32)
  1170. expected_output = [[[[[1, 0], [3, 2]], [[7, 6], [9, 8]]],
  1171. [[[1, 0], [2, 1]], [[4, 3], [5, 4]]]]]
  1172. crop_output = self.execute(graph_fn, [image, boxes])
  1173. self.assertAllClose(crop_output, expected_output)
  1174. def testBatchMatMulCropAndResize3x3To2x2_2Channels(self):
  1175. def graph_fn(image, boxes):
  1176. return ops.matmul_crop_and_resize(image, boxes, crop_size=[2, 2])
  1177. image = np.array([[[[1, 0], [2, 1], [3, 2]],
  1178. [[4, 3], [5, 4], [6, 5]],
  1179. [[7, 6], [8, 7], [9, 8]]],
  1180. [[[1, 0], [2, 1], [3, 2]],
  1181. [[4, 3], [5, 4], [6, 5]],
  1182. [[7, 6], [8, 7], [9, 8]]]], dtype=np.float32)
  1183. boxes = np.array([[[0, 0, 1, 1],
  1184. [0, 0, .5, .5]],
  1185. [[1, 1, 0, 0],
  1186. [.5, .5, 0, 0]]], dtype=np.float32)
  1187. expected_output = [[[[[1, 0], [3, 2]], [[7, 6], [9, 8]]],
  1188. [[[1, 0], [2, 1]], [[4, 3], [5, 4]]]],
  1189. [[[[9, 8], [7, 6]], [[3, 2], [1, 0]]],
  1190. [[[5, 4], [4, 3]], [[2, 1], [1, 0]]]]]
  1191. crop_output = self.execute(graph_fn, [image, boxes])
  1192. self.assertAllClose(crop_output, expected_output)
  1193. def testMatMulCropAndResize3x3To2x2Flipped(self):
  1194. def graph_fn(image, boxes):
  1195. return ops.matmul_crop_and_resize(image, boxes, crop_size=[2, 2])
  1196. image = np.array([[[[1], [2], [3]],
  1197. [[4], [5], [6]],
  1198. [[7], [8], [9]]]], dtype=np.float32)
  1199. boxes = np.array([[[1, 1, 0, 0],
  1200. [.5, .5, 0, 0]]], dtype=np.float32)
  1201. expected_output = [[[[[9], [7]], [[3], [1]]],
  1202. [[[5], [4]], [[2], [1]]]]]
  1203. crop_output = self.execute(graph_fn, [image, boxes])
  1204. self.assertAllClose(crop_output, expected_output)
  1205. def testInvalidInputShape(self):
  1206. image = tf.constant([[[1], [2]], [[3], [4]]], dtype=tf.float32)
  1207. boxes = tf.constant([[-1, -1, 1, 1]], dtype=tf.float32)
  1208. crop_size = [4, 4]
  1209. with self.assertRaises(ValueError):
  1210. _ = ops.matmul_crop_and_resize(image, boxes, crop_size)
  1211. class OpsTestCropAndResize(test_case.TestCase):
  1212. def testBatchCropAndResize3x3To2x2_2Channels(self):
  1213. def graph_fn(image, boxes):
  1214. return ops.native_crop_and_resize(image, boxes, crop_size=[2, 2])
  1215. image = np.array([[[[1, 0], [2, 1], [3, 2]],
  1216. [[4, 3], [5, 4], [6, 5]],
  1217. [[7, 6], [8, 7], [9, 8]]],
  1218. [[[1, 0], [2, 1], [3, 2]],
  1219. [[4, 3], [5, 4], [6, 5]],
  1220. [[7, 6], [8, 7], [9, 8]]]], dtype=np.float32)
  1221. boxes = np.array([[[0, 0, 1, 1],
  1222. [0, 0, .5, .5]],
  1223. [[1, 1, 0, 0],
  1224. [.5, .5, 0, 0]]], dtype=np.float32)
  1225. expected_output = [[[[[1, 0], [3, 2]], [[7, 6], [9, 8]]],
  1226. [[[1, 0], [2, 1]], [[4, 3], [5, 4]]]],
  1227. [[[[9, 8], [7, 6]], [[3, 2], [1, 0]]],
  1228. [[[5, 4], [4, 3]], [[2, 1], [1, 0]]]]]
  1229. crop_output = self.execute_cpu(graph_fn, [image, boxes])
  1230. self.assertAllClose(crop_output, expected_output)
  1231. if __name__ == '__main__':
  1232. tf.test.main()