Commit a0494c94 authored by Vincent Dumoulin's avatar Vincent Dumoulin Committed by A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 381089283
parent 0a9026e4
......@@ -3,11 +3,13 @@
# Processes the COCO few-shot benchmark into TFRecord files. Requires `wget`.
tmp_dir=$(mktemp -d -t coco-XXXXXXXXXX)
base_image_dir="/tmp/coco_images"
output_dir="/tmp/coco_few_shot"
while getopts "o:" o; do
while getopts ":i:o:" o; do
case "${o}" in
o) output_dir=${OPTARG} ;;
*) echo "Usage: ${0} [-o <output_dir>]" 1>&2; exit 1 ;;
i) base_image_dir=${OPTARG} ;;
*) echo "Usage: ${0} [-i <base_image_dir>] [-o <output_dir>]" 1>&2; exit 1 ;;
esac
done
......@@ -25,8 +27,8 @@ for seed in {0..9}; do
for shots in 10 30; do
python create_coco_tf_record.py \
--logtostderr \
--image_dir=/namespace/vale-project/datasets/mscoco_raw/images/train2014 \
--image_dir=/namespace/vale-project/datasets/mscoco_raw/images/val2014 \
--image_dir="${base_image_dir}/train2014" \
--image_dir="${base_image_dir}/val2014" \
--image_info_file="${tmp_dir}/${shots}shot_seed${seed}.json" \
--object_annotations_file="${tmp_dir}/${shots}shot_seed${seed}.json" \
--caption_annotations_file="" \
......@@ -37,8 +39,8 @@ done
python create_coco_tf_record.py \
--logtostderr \
--image_dir=/namespace/vale-project/datasets/mscoco_raw/images/train2014 \
--image_dir=/namespace/vale-project/datasets/mscoco_raw/images/val2014 \
--image_dir="${base_image_dir}/train2014" \
--image_dir="${base_image_dir}/val2014" \
--image_info_file="${tmp_dir}/datasplit/5k.json" \
--object_annotations_file="${tmp_dir}/datasplit/5k.json" \
--caption_annotations_file="" \
......@@ -47,12 +49,22 @@ python create_coco_tf_record.py \
python create_coco_tf_record.py \
--logtostderr \
--image_dir=/namespace/vale-project/datasets/mscoco_raw/images/train2014 \
--image_dir=/namespace/vale-project/datasets/mscoco_raw/images/val2014 \
--image_dir="${base_image_dir}/train2014" \
--image_dir="${base_image_dir}/val2014" \
--image_info_file="${tmp_dir}/datasplit/trainvalno5k_base.json" \
--object_annotations_file="${tmp_dir}/datasplit/trainvalno5k_base.json" \
--caption_annotations_file="" \
--output_file_prefix="${output_dir}/trainvalno5k_base" \
--num_shards=200
python create_coco_tf_record.py \
--logtostderr \
--image_dir="${base_image_dir}/train2014" \
--image_dir="${base_image_dir}/val2014" \
--image_info_file="${tmp_dir}/datasplit/5k_base.json" \
--object_annotations_file="${tmp_dir}/datasplit/5k_base.json" \
--caption_annotations_file="" \
--output_file_prefix="${output_dir}/5k_base" \
--num_shards=10
rm -rf "${tmp_dir}"
......@@ -87,14 +87,16 @@ BASE_CLASS_IDS = [8, 10, 11, 13, 14, 15, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
def main(unused_argv):
workdir = FLAGS.workdir
# Filter novel class annotations from the training set.
file_path = os.path.join(workdir, 'datasplit', 'trainvalno5k.json')
# Filter novel class annotations from the training and validation sets.
for name in ('trainvalno5k', '5k'):
file_path = os.path.join(workdir, 'datasplit', '{}.json'.format(name))
with tf.io.gfile.GFile(file_path, 'r') as f:
json_dict = json.load(f)
json_dict['annotations'] = [a for a in json_dict['annotations']
if a['category_id'] in BASE_CLASS_IDS]
output_path = os.path.join(workdir, 'datasplit', 'trainvalno5k_base.json')
output_path = os.path.join(
workdir, 'datasplit', '{}_base.json'.format(name))
with tf.io.gfile.GFile(output_path, 'w') as f:
json.dump(json_dict, f)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment