Commit 4c99ab71 authored by Vincent Dumoulin's avatar Vincent Dumoulin Committed by A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 381089283
parent 8b47c484
...@@ -3,11 +3,13 @@ ...@@ -3,11 +3,13 @@
# Processes the COCO few-shot benchmark into TFRecord files. Requires `wget`. # Processes the COCO few-shot benchmark into TFRecord files. Requires `wget`.
tmp_dir=$(mktemp -d -t coco-XXXXXXXXXX) tmp_dir=$(mktemp -d -t coco-XXXXXXXXXX)
base_image_dir="/tmp/coco_images"
output_dir="/tmp/coco_few_shot" output_dir="/tmp/coco_few_shot"
while getopts "o:" o; do while getopts ":i:o:" o; do
case "${o}" in case "${o}" in
o) output_dir=${OPTARG} ;; o) output_dir=${OPTARG} ;;
*) echo "Usage: ${0} [-o <output_dir>]" 1>&2; exit 1 ;; i) base_image_dir=${OPTARG} ;;
*) echo "Usage: ${0} [-i <base_image_dir>] [-o <output_dir>]" 1>&2; exit 1 ;;
esac esac
done done
...@@ -25,8 +27,8 @@ for seed in {0..9}; do ...@@ -25,8 +27,8 @@ for seed in {0..9}; do
for shots in 10 30; do for shots in 10 30; do
python create_coco_tf_record.py \ python create_coco_tf_record.py \
--logtostderr \ --logtostderr \
--image_dir=/namespace/vale-project/datasets/mscoco_raw/images/train2014 \ --image_dir="${base_image_dir}/train2014" \
--image_dir=/namespace/vale-project/datasets/mscoco_raw/images/val2014 \ --image_dir="${base_image_dir}/val2014" \
--image_info_file="${tmp_dir}/${shots}shot_seed${seed}.json" \ --image_info_file="${tmp_dir}/${shots}shot_seed${seed}.json" \
--object_annotations_file="${tmp_dir}/${shots}shot_seed${seed}.json" \ --object_annotations_file="${tmp_dir}/${shots}shot_seed${seed}.json" \
--caption_annotations_file="" \ --caption_annotations_file="" \
...@@ -37,8 +39,8 @@ done ...@@ -37,8 +39,8 @@ done
python create_coco_tf_record.py \ python create_coco_tf_record.py \
--logtostderr \ --logtostderr \
--image_dir=/namespace/vale-project/datasets/mscoco_raw/images/train2014 \ --image_dir="${base_image_dir}/train2014" \
--image_dir=/namespace/vale-project/datasets/mscoco_raw/images/val2014 \ --image_dir="${base_image_dir}/val2014" \
--image_info_file="${tmp_dir}/datasplit/5k.json" \ --image_info_file="${tmp_dir}/datasplit/5k.json" \
--object_annotations_file="${tmp_dir}/datasplit/5k.json" \ --object_annotations_file="${tmp_dir}/datasplit/5k.json" \
--caption_annotations_file="" \ --caption_annotations_file="" \
...@@ -47,12 +49,22 @@ python create_coco_tf_record.py \ ...@@ -47,12 +49,22 @@ python create_coco_tf_record.py \
python create_coco_tf_record.py \ python create_coco_tf_record.py \
--logtostderr \ --logtostderr \
--image_dir=/namespace/vale-project/datasets/mscoco_raw/images/train2014 \ --image_dir="${base_image_dir}/train2014" \
--image_dir=/namespace/vale-project/datasets/mscoco_raw/images/val2014 \ --image_dir="${base_image_dir}/val2014" \
--image_info_file="${tmp_dir}/datasplit/trainvalno5k_base.json" \ --image_info_file="${tmp_dir}/datasplit/trainvalno5k_base.json" \
--object_annotations_file="${tmp_dir}/datasplit/trainvalno5k_base.json" \ --object_annotations_file="${tmp_dir}/datasplit/trainvalno5k_base.json" \
--caption_annotations_file="" \ --caption_annotations_file="" \
--output_file_prefix="${output_dir}/trainvalno5k_base" \ --output_file_prefix="${output_dir}/trainvalno5k_base" \
--num_shards=200 --num_shards=200
python create_coco_tf_record.py \
--logtostderr \
--image_dir="${base_image_dir}/train2014" \
--image_dir="${base_image_dir}/val2014" \
--image_info_file="${tmp_dir}/datasplit/5k_base.json" \
--object_annotations_file="${tmp_dir}/datasplit/5k_base.json" \
--caption_annotations_file="" \
--output_file_prefix="${output_dir}/5k_base" \
--num_shards=10
rm -rf "${tmp_dir}" rm -rf "${tmp_dir}"
...@@ -87,16 +87,18 @@ BASE_CLASS_IDS = [8, 10, 11, 13, 14, 15, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, ...@@ -87,16 +87,18 @@ BASE_CLASS_IDS = [8, 10, 11, 13, 14, 15, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
def main(unused_argv): def main(unused_argv):
workdir = FLAGS.workdir workdir = FLAGS.workdir
# Filter novel class annotations from the training set. # Filter novel class annotations from the training and validation sets.
file_path = os.path.join(workdir, 'datasplit', 'trainvalno5k.json') for name in ('trainvalno5k', '5k'):
with tf.io.gfile.GFile(file_path, 'r') as f: file_path = os.path.join(workdir, 'datasplit', '{}.json'.format(name))
json_dict = json.load(f) with tf.io.gfile.GFile(file_path, 'r') as f:
json_dict = json.load(f)
json_dict['annotations'] = [a for a in json_dict['annotations']
if a['category_id'] in BASE_CLASS_IDS] json_dict['annotations'] = [a for a in json_dict['annotations']
output_path = os.path.join(workdir, 'datasplit', 'trainvalno5k_base.json') if a['category_id'] in BASE_CLASS_IDS]
with tf.io.gfile.GFile(output_path, 'w') as f: output_path = os.path.join(
json.dump(json_dict, f) workdir, 'datasplit', '{}_base.json'.format(name))
with tf.io.gfile.GFile(output_path, 'w') as f:
json.dump(json_dict, f)
for seed, shots in itertools.product(SEEDS, SHOTS): for seed, shots in itertools.product(SEEDS, SHOTS):
# Retrieve all examples for a given seed and shots setting. # Retrieve all examples for a given seed and shots setting.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment