Commit 6fe50699 authored by Marianne Linhares Monteiro's avatar Marianne Linhares Monteiro Committed by GitHub
Browse files

Generating a single file for each mode: train, validation, eval

parent 7269b862
...@@ -46,9 +46,12 @@ def _bytes_feature(value): ...@@ -46,9 +46,12 @@ def _bytes_feature(value):
def _get_file_names(): def _get_file_names():
"""Returns the file names expected to exist in the input_dir.""" """Returns the file names expected to exist for training, validation
file_names = ['data_batch_%d' % i for i in xrange(1, 6)] and evaluation in the input_dir."""
file_names.append('test_batch') file_names = {}
file_names['train'] = ['data_batch_%d' % i for i in xrange(1, 5)]
file_names['validation'] = ['data_batch_5']
file_names['eval'] = ['test_batch']
return file_names return file_names
...@@ -58,35 +61,36 @@ def read_pickle_from_file(filename): ...@@ -58,35 +61,36 @@ def read_pickle_from_file(filename):
return data_dict return data_dict
def convert_to_tfrecord(input_file, name): def convert_to_tfrecord(input_files, output_file):
"""Converts a file to tfrecords.""" """Converts a file to tfrecords."""
print('Generating %s' % output_file) print('Generating %s' % output_file)
record_writer = tf.python_io.TFRecordWriter(output_file) record_writer = tf.python_io.TFRecordWriter(output_file)
data_dict = read_pickle_from_file(input_file) for input_file in input_files:
data = data_dict['data'] data_dict = read_pickle_from_file(input_file)
labels = data_dict['labels'] data = data_dict['data']
labels = data_dict['labels']
num_entries_in_batch = len(labels)
for i in range(num_entries_in_batch): num_entries_in_batch = len(labels)
example = tf.train.Example( for i in range(num_entries_in_batch):
features=tf.train.Features(feature={ example = tf.train.Example(
'image': _bytes_feature(data[i].tobytes()), features=tf.train.Features(feature={
'label': _int64_feature(labels[i]) 'image': _bytes_feature(data[i].tobytes()),
})) 'label': _int64_feature(labels[i])
record_writer.write(example.SerializeToString()) }))
record_writer.write(example.SerializeToString())
record_writer.close() record_writer.close()
def main(argv): def main(argv):
del argv # Unused. del argv # Unused.
file_names = _get_file_names() file_names = _get_file_names()
for file_name in file_names: for mode, files in file_names.items():
input_file = os.path.join(FLAGS.input_dir, file_name) input_files = [
output_file = os.path.join(FLAGS.output_dir, file_name + '.tfrecords') os.path.join(FLAGS.input_dir, f) for f in files]
output_file = os.path.join(FLAGS.output_dir, mode + '.tfrecords')
# Convert to Examples and write the result to TFRecords. # Convert to Examples and write the result to TFRecords.
convert_to_tfrecord(input_file, output_file) convert_to_tfrecord(input_files, output_file)
print('Done!') print('Done!')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment