Commit b4499b90 authored by Yoni Ben-Meshulam's avatar Yoni Ben-Meshulam Committed by TF Object Detection Team
Browse files

Improve testing and documentation for weighted dataset sampling.

PiperOrigin-RevId: 345586199
parent 101282ad
...@@ -577,6 +577,28 @@ class ReadDatasetTest(test_case.TestCase): ...@@ -577,6 +577,28 @@ class ReadDatasetTest(test_case.TestCase):
self._assert_item_count(data, 2, 0.25) self._assert_item_count(data, 2, 0.25)
self._assert_item_count(data, 20, 0.25) self._assert_item_count(data, 20, 0.25)
def test_read_dataset_sample_from_datasets_weights_non_normalized(self):
"""Ensure that the values are equally-weighted when not normalized."""
config = input_reader_pb2.InputReader()
config.num_readers = 2
config.shuffle = False
# Values are not normalized to sum to 1. In this case, it's a 50/50 split
# with each dataset having weight of 1.
config.sample_from_datasets_weights.extend([1, 1])
def graph_fn():
return self._get_dataset_next(
[self._path_template % '0', self._path_template % '1'],
config,
batch_size=1000)
data = list(self.execute(graph_fn, []))
self.assertEqual(len(data), 1000)
self._assert_item_count(data, 1, 0.25)
self._assert_item_count(data, 10, 0.25)
self._assert_item_count(data, 2, 0.25)
self._assert_item_count(data, 20, 0.25)
def test_read_dataset_sample_from_datasets_weights_zero_weight(self): def test_read_dataset_sample_from_datasets_weights_zero_weight(self):
"""Ensure that the files' values are equally-weighted.""" """Ensure that the files' values are equally-weighted."""
config = input_reader_pb2.InputReader() config = input_reader_pb2.InputReader()
......
...@@ -159,7 +159,22 @@ message InputReader { ...@@ -159,7 +159,22 @@ message InputReader {
// applied individually to each dataset. // applied individually to each dataset.
// //
// Implementation follows tf.data.experimental.sample_from_datasets sampling // Implementation follows tf.data.experimental.sample_from_datasets sampling
// strategy. // strategy. Weights may take any value - only relative weights matter.
// Zero weights will result in a dataset not being sampled.
//
// Examples, assuming two input files configured:
//
// Equal weighting:
// sample_from_datasets_weights: 0.5
// sample_from_datasets_weights: 0.5
//
// 2:1 weighting:
// sample_from_datasets_weights: 2
// sample_from_datasets_weights: 1
//
// Exclude the second dataset:
// sample_from_datasets_weights: 1
// sample_from_datasets_weights: 0
repeated float sample_from_datasets_weights = 34; repeated float sample_from_datasets_weights = 34;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment