Internal change

PiperOrigin-RevId: 381089283

Internal change
PiperOrigin-RevId: 381089283
4c99ab71 · Vincent Dumoulin · A. Unique TensorFlower · 8b47c484 · 4c99ab71 · 4c99ab71
Commit 4c99ab71 authored Jun 23, 2021 by Vincent Dumoulin Committed by A. Unique TensorFlower Jun 23, 2021
2 changed files
--- a/official/vision/beta/data/process_coco_few_shot.sh
+++ b/official/vision/beta/data/process_coco_few_shot.sh
@@ -3,11 +3,13 @@
 # Processes the COCO few-shot benchmark into TFRecord files. Requires `wget`.
 tmp_dir=$(mktemp -d -t coco-XXXXXXXXXX)
+base_image_dir="/tmp/coco_images"
 output_dir="/tmp/coco_few_shot"
-while getopts "o:" o; do
+while getopts ":i:o:" o; do
  case "${o}" in
    o) output_dir=${OPTARG} ;;
-    *) echo "Usage: ${0} [-o <output_dir>]" 1>&2; exit 1 ;;
+    i) base_image_dir=${OPTARG} ;;
+    *) echo "Usage: ${0} [-i <base_image_dir>] [-o <output_dir>]" 1>&2; exit 1 ;;
  esac
 done
@@ -25,8 +27,8 @@ for seed in {0..9}; do
  for shots in 10 30; do
    python create_coco_tf_record.py \
        --logtostderr \
-        --image_dir=/namespace/vale-project/datasets/mscoco_raw/images/train2014 \
+        --image_dir="${base_image_dir}/train2014" \
-        --image_dir=/namespace/vale-project/datasets/mscoco_raw/images/val2014 \
+        --image_dir="${base_image_dir}/val2014" \
        --image_info_file="${tmp_dir}/${shots}shot_seed${seed}.json" \
        --object_annotations_file="${tmp_dir}/${shots}shot_seed${seed}.json" \
        --caption_annotations_file="" \
@@ -37,8 +39,8 @@ done
 python create_coco_tf_record.py \
    --logtostderr \
-    --image_dir=/namespace/vale-project/datasets/mscoco_raw/images/train2014 \
+    --image_dir="${base_image_dir}/train2014" \
-    --image_dir=/namespace/vale-project/datasets/mscoco_raw/images/val2014 \
+    --image_dir="${base_image_dir}/val2014" \
    --image_info_file="${tmp_dir}/datasplit/5k.json" \
    --object_annotations_file="${tmp_dir}/datasplit/5k.json" \
    --caption_annotations_file="" \
@@ -47,12 +49,22 @@ python create_coco_tf_record.py \
 python create_coco_tf_record.py \
    --logtostderr \
-    --image_dir=/namespace/vale-project/datasets/mscoco_raw/images/train2014 \
+    --image_dir="${base_image_dir}/train2014" \
-    --image_dir=/namespace/vale-project/datasets/mscoco_raw/images/val2014 \
+    --image_dir="${base_image_dir}/val2014" \
    --image_info_file="${tmp_dir}/datasplit/trainvalno5k_base.json" \
    --object_annotations_file="${tmp_dir}/datasplit/trainvalno5k_base.json" \
    --caption_annotations_file="" \
    --output_file_prefix="${output_dir}/trainvalno5k_base" \
    --num_shards=200
+python create_coco_tf_record.py \
+    --logtostderr \
+    --image_dir="${base_image_dir}/train2014" \
+    --image_dir="${base_image_dir}/val2014" \
+    --image_info_file="${tmp_dir}/datasplit/5k_base.json" \
+    --object_annotations_file="${tmp_dir}/datasplit/5k_base.json" \
+    --caption_annotations_file="" \
+    --output_file_prefix="${output_dir}/5k_base" \
+    --num_shards=10
 rm -rf "${tmp_dir}"
--- a/official/vision/beta/data/process_coco_few_shot_json_files.py
+++ b/official/vision/beta/data/process_coco_few_shot_json_files.py
@@ -87,16 +87,18 @@ BASE_CLASS_IDS = [8, 10, 11, 13, 14, 15, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
 def main(unused_argv):
  workdir = FLAGS.workdir
-  # Filter novel class annotations from the training set.
+  # Filter novel class annotations from the training and validation sets.
-  file_path = os.path.join(workdir, 'datasplit', 'trainvalno5k.json')
+  for name in ('trainvalno5k', '5k'):
-  with tf.io.gfile.GFile(file_path, 'r') as f:
+    file_path = os.path.join(workdir, 'datasplit', '{}.json'.format(name))
-    json_dict = json.load(f)
+    with tf.io.gfile.GFile(file_path, 'r') as f:
+      json_dict = json.load(f)
-  json_dict['annotations'] = [a for a in json_dict['annotations']
-                              if a['category_id'] in BASE_CLASS_IDS]
+    json_dict['annotations'] = [a for a in json_dict['annotations']
-  output_path = os.path.join(workdir, 'datasplit', 'trainvalno5k_base.json')
+                                if a['category_id'] in BASE_CLASS_IDS]
-  with tf.io.gfile.GFile(output_path, 'w') as f:
+    output_path = os.path.join(
-    json.dump(json_dict, f)
+        workdir, 'datasplit', '{}_base.json'.format(name))
+    with tf.io.gfile.GFile(output_path, 'w') as f:
+      json.dump(json_dict, f)
  for seed, shots in itertools.product(SEEDS, SHOTS):
    # Retrieve all examples for a given seed and shots setting.