Commit 2b676a9b authored by Gunho Park's avatar Gunho Park
Browse files

Merge remote-tracking branch 'upstream/master'

parents 6ddd627a bcbce005
...@@ -246,10 +246,11 @@ class Trainer(_AsyncTrainer): ...@@ -246,10 +246,11 @@ class Trainer(_AsyncTrainer):
self._train_loss = tf.keras.metrics.Mean("training_loss", dtype=tf.float32) self._train_loss = tf.keras.metrics.Mean("training_loss", dtype=tf.float32)
self._validation_loss = tf.keras.metrics.Mean( self._validation_loss = tf.keras.metrics.Mean(
"validation_loss", dtype=tf.float32) "validation_loss", dtype=tf.float32)
model_metrics = model.metrics if hasattr(model, "metrics") else []
self._train_metrics = self.task.build_metrics( self._train_metrics = self.task.build_metrics(
training=True) + self.model.metrics training=True) + model_metrics
self._validation_metrics = self.task.build_metrics( self._validation_metrics = self.task.build_metrics(
training=False) + self.model.metrics training=False) + model_metrics
self.init_async() self.init_async()
......
...@@ -181,20 +181,21 @@ class AxProcessor(DataProcessor): ...@@ -181,20 +181,21 @@ class AxProcessor(DataProcessor):
class ColaProcessor(DataProcessor): class ColaProcessor(DataProcessor):
"""Processor for the CoLA data set (GLUE version).""" """Processor for the CoLA data set (GLUE version)."""
def __init__(self, process_text_fn=tokenization.convert_to_unicode):
super(ColaProcessor, self).__init__(process_text_fn)
self.dataset = tfds.load("glue/cola", try_gcs=True)
def get_train_examples(self, data_dir): def get_train_examples(self, data_dir):
"""See base class.""" """See base class."""
return self._create_examples( return self._create_examples_tfds("train")
self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")
def get_dev_examples(self, data_dir): def get_dev_examples(self, data_dir):
"""See base class.""" """See base class."""
return self._create_examples( return self._create_examples_tfds("validation")
self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev")
def get_test_examples(self, data_dir): def get_test_examples(self, data_dir):
"""See base class.""" """See base class."""
return self._create_examples( return self._create_examples_tfds("test")
self._read_tsv(os.path.join(data_dir, "test.tsv")), "test")
def get_labels(self): def get_labels(self):
"""See base class.""" """See base class."""
...@@ -205,22 +206,19 @@ class ColaProcessor(DataProcessor): ...@@ -205,22 +206,19 @@ class ColaProcessor(DataProcessor):
"""See base class.""" """See base class."""
return "COLA" return "COLA"
def _create_examples(self, lines, set_type): def _create_examples_tfds(self, set_type):
"""Creates examples for the training/dev/test sets.""" """Creates examples for the training/dev/test sets."""
dataset = self.dataset[set_type].as_numpy_iterator()
examples = [] examples = []
for i, line in enumerate(lines): for i, example in enumerate(dataset):
# Only the test set has a header.
if set_type == "test" and i == 0:
continue
guid = "%s-%s" % (set_type, i) guid = "%s-%s" % (set_type, i)
if set_type == "test": label = "0"
text_a = self.process_text_fn(line[1]) text_a = self.process_text_fn(example["sentence"])
label = "0" if set_type != "test":
else: label = str(example["label"])
text_a = self.process_text_fn(line[3])
label = self.process_text_fn(line[1])
examples.append( examples.append(
InputExample(guid=guid, text_a=text_a, text_b=None, label=label)) InputExample(
guid=guid, text_a=text_a, text_b=None, label=label, weight=None))
return examples return examples
......
...@@ -40,6 +40,7 @@ class SentencePredictionDataConfig(cfg.DataConfig): ...@@ -40,6 +40,7 @@ class SentencePredictionDataConfig(cfg.DataConfig):
label_type: str = 'int' label_type: str = 'int'
# Whether to include the example id number. # Whether to include the example id number.
include_example_id: bool = False include_example_id: bool = False
label_field: str = 'label_ids'
# Maps the key in TfExample to feature name. # Maps the key in TfExample to feature name.
# E.g 'label_ids' to 'next_sentence_labels' # E.g 'label_ids' to 'next_sentence_labels'
label_name: Optional[Tuple[str, str]] = None label_name: Optional[Tuple[str, str]] = None
...@@ -53,6 +54,7 @@ class SentencePredictionDataLoader(data_loader.DataLoader): ...@@ -53,6 +54,7 @@ class SentencePredictionDataLoader(data_loader.DataLoader):
self._params = params self._params = params
self._seq_length = params.seq_length self._seq_length = params.seq_length
self._include_example_id = params.include_example_id self._include_example_id = params.include_example_id
self._label_field = params.label_field
if params.label_name: if params.label_name:
self._label_name_mapping = dict([params.label_name]) self._label_name_mapping = dict([params.label_name])
else: else:
...@@ -65,7 +67,7 @@ class SentencePredictionDataLoader(data_loader.DataLoader): ...@@ -65,7 +67,7 @@ class SentencePredictionDataLoader(data_loader.DataLoader):
'input_ids': tf.io.FixedLenFeature([self._seq_length], tf.int64), 'input_ids': tf.io.FixedLenFeature([self._seq_length], tf.int64),
'input_mask': tf.io.FixedLenFeature([self._seq_length], tf.int64), 'input_mask': tf.io.FixedLenFeature([self._seq_length], tf.int64),
'segment_ids': tf.io.FixedLenFeature([self._seq_length], tf.int64), 'segment_ids': tf.io.FixedLenFeature([self._seq_length], tf.int64),
'label_ids': tf.io.FixedLenFeature([], label_type), self._label_field: tf.io.FixedLenFeature([], label_type),
} }
if self._include_example_id: if self._include_example_id:
name_to_features['example_id'] = tf.io.FixedLenFeature([], tf.int64) name_to_features['example_id'] = tf.io.FixedLenFeature([], tf.int64)
...@@ -92,10 +94,10 @@ class SentencePredictionDataLoader(data_loader.DataLoader): ...@@ -92,10 +94,10 @@ class SentencePredictionDataLoader(data_loader.DataLoader):
if self._include_example_id: if self._include_example_id:
x['example_id'] = record['example_id'] x['example_id'] = record['example_id']
x['label_ids'] = record['label_ids'] x[self._label_field] = record[self._label_field]
if 'label_ids' in self._label_name_mapping: if self._label_field in self._label_name_mapping:
x[self._label_name_mapping['label_ids']] = record['label_ids'] x[self._label_name_mapping[self._label_field]] = record[self._label_field]
return x return x
...@@ -215,7 +217,7 @@ class SentencePredictionTextDataLoader(data_loader.DataLoader): ...@@ -215,7 +217,7 @@ class SentencePredictionTextDataLoader(data_loader.DataLoader):
model_inputs = self._text_processor(segments) model_inputs = self._text_processor(segments)
if self._include_example_id: if self._include_example_id:
model_inputs['example_id'] = record['example_id'] model_inputs['example_id'] = record['example_id']
model_inputs['label_ids'] = record[self._label_field] model_inputs[self._label_field] = record[self._label_field]
return model_inputs return model_inputs
def _decode(self, record: tf.Tensor): def _decode(self, record: tf.Tensor):
......
...@@ -197,13 +197,14 @@ class SentencePredictionTfdsDataLoaderTest(tf.test.TestCase, ...@@ -197,13 +197,14 @@ class SentencePredictionTfdsDataLoaderTest(tf.test.TestCase,
vocab_file=vocab_file_path) vocab_file=vocab_file_path)
dataset = loader.SentencePredictionTextDataLoader(data_config).load() dataset = loader.SentencePredictionTextDataLoader(data_config).load()
features = next(iter(dataset)) features = next(iter(dataset))
label_field = data_config.label_field
self.assertCountEqual( self.assertCountEqual(
['input_word_ids', 'input_type_ids', 'input_mask', 'label_ids'], ['input_word_ids', 'input_type_ids', 'input_mask', label_field],
features.keys()) features.keys())
self.assertEqual(features['input_word_ids'].shape, (batch_size, seq_length)) self.assertEqual(features['input_word_ids'].shape, (batch_size, seq_length))
self.assertEqual(features['input_mask'].shape, (batch_size, seq_length)) self.assertEqual(features['input_mask'].shape, (batch_size, seq_length))
self.assertEqual(features['input_type_ids'].shape, (batch_size, seq_length)) self.assertEqual(features['input_type_ids'].shape, (batch_size, seq_length))
self.assertEqual(features['label_ids'].shape, (batch_size,)) self.assertEqual(features[label_field].shape, (batch_size,))
@parameterized.parameters(True, False) @parameterized.parameters(True, False)
def test_python_sentencepiece_preprocessing(self, use_tfds): def test_python_sentencepiece_preprocessing(self, use_tfds):
...@@ -231,13 +232,14 @@ class SentencePredictionTfdsDataLoaderTest(tf.test.TestCase, ...@@ -231,13 +232,14 @@ class SentencePredictionTfdsDataLoaderTest(tf.test.TestCase,
) )
dataset = loader.SentencePredictionTextDataLoader(data_config).load() dataset = loader.SentencePredictionTextDataLoader(data_config).load()
features = next(iter(dataset)) features = next(iter(dataset))
label_field = data_config.label_field
self.assertCountEqual( self.assertCountEqual(
['input_word_ids', 'input_type_ids', 'input_mask', 'label_ids'], ['input_word_ids', 'input_type_ids', 'input_mask', label_field],
features.keys()) features.keys())
self.assertEqual(features['input_word_ids'].shape, (batch_size, seq_length)) self.assertEqual(features['input_word_ids'].shape, (batch_size, seq_length))
self.assertEqual(features['input_mask'].shape, (batch_size, seq_length)) self.assertEqual(features['input_mask'].shape, (batch_size, seq_length))
self.assertEqual(features['input_type_ids'].shape, (batch_size, seq_length)) self.assertEqual(features['input_type_ids'].shape, (batch_size, seq_length))
self.assertEqual(features['label_ids'].shape, (batch_size,)) self.assertEqual(features[label_field].shape, (batch_size,))
@parameterized.parameters(True, False) @parameterized.parameters(True, False)
def test_saved_model_preprocessing(self, use_tfds): def test_saved_model_preprocessing(self, use_tfds):
...@@ -265,13 +267,14 @@ class SentencePredictionTfdsDataLoaderTest(tf.test.TestCase, ...@@ -265,13 +267,14 @@ class SentencePredictionTfdsDataLoaderTest(tf.test.TestCase,
) )
dataset = loader.SentencePredictionTextDataLoader(data_config).load() dataset = loader.SentencePredictionTextDataLoader(data_config).load()
features = next(iter(dataset)) features = next(iter(dataset))
label_field = data_config.label_field
self.assertCountEqual( self.assertCountEqual(
['input_word_ids', 'input_type_ids', 'input_mask', 'label_ids'], ['input_word_ids', 'input_type_ids', 'input_mask', label_field],
features.keys()) features.keys())
self.assertEqual(features['input_word_ids'].shape, (batch_size, seq_length)) self.assertEqual(features['input_word_ids'].shape, (batch_size, seq_length))
self.assertEqual(features['input_mask'].shape, (batch_size, seq_length)) self.assertEqual(features['input_mask'].shape, (batch_size, seq_length))
self.assertEqual(features['input_type_ids'].shape, (batch_size, seq_length)) self.assertEqual(features['input_type_ids'].shape, (batch_size, seq_length))
self.assertEqual(features['label_ids'].shape, (batch_size,)) self.assertEqual(features[label_field].shape, (batch_size,))
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -22,7 +22,7 @@ modeling library: ...@@ -22,7 +22,7 @@ modeling library:
* [mobile_bert_encoder.py](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/networks/mobile_bert_encoder.py) * [mobile_bert_encoder.py](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/networks/mobile_bert_encoder.py)
contains `MobileBERTEncoder` implementation. contains `MobileBERTEncoder` implementation.
* [mobile_bert_layers.py](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/mobile_bert_layers.py) * [mobile_bert_layers.py](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/mobile_bert_layers.py)
contains `MobileBertEmbedding`, `MobileBertMaskedLM` and `MobileBertMaskedLM` contains `MobileBertEmbedding`, `MobileBertTransformer` and `MobileBertMaskedLM`
implementation. implementation.
## Pre-trained Models ## Pre-trained Models
......
...@@ -69,6 +69,10 @@ class SentencePredictionTask(base_task.Task): ...@@ -69,6 +69,10 @@ class SentencePredictionTask(base_task.Task):
if params.metric_type not in METRIC_TYPES: if params.metric_type not in METRIC_TYPES:
raise ValueError('Invalid metric_type: {}'.format(params.metric_type)) raise ValueError('Invalid metric_type: {}'.format(params.metric_type))
self.metric_type = params.metric_type self.metric_type = params.metric_type
if hasattr(params.train_data, 'label_field'):
self.label_field = params.train_data.label_field
else:
self.label_field = 'label_ids'
def build_model(self): def build_model(self):
if self.task_config.hub_module_url and self.task_config.init_checkpoint: if self.task_config.hub_module_url and self.task_config.init_checkpoint:
...@@ -95,7 +99,7 @@ class SentencePredictionTask(base_task.Task): ...@@ -95,7 +99,7 @@ class SentencePredictionTask(base_task.Task):
use_encoder_pooler=self.task_config.model.use_encoder_pooler) use_encoder_pooler=self.task_config.model.use_encoder_pooler)
def build_losses(self, labels, model_outputs, aux_losses=None) -> tf.Tensor: def build_losses(self, labels, model_outputs, aux_losses=None) -> tf.Tensor:
label_ids = labels['label_ids'] label_ids = labels[self.label_field]
if self.task_config.model.num_classes == 1: if self.task_config.model.num_classes == 1:
loss = tf.keras.losses.mean_squared_error(label_ids, model_outputs) loss = tf.keras.losses.mean_squared_error(label_ids, model_outputs)
else: else:
...@@ -121,7 +125,7 @@ class SentencePredictionTask(base_task.Task): ...@@ -121,7 +125,7 @@ class SentencePredictionTask(base_task.Task):
y = tf.zeros((1,), dtype=tf.float32) y = tf.zeros((1,), dtype=tf.float32)
else: else:
y = tf.zeros((1, 1), dtype=tf.int32) y = tf.zeros((1, 1), dtype=tf.int32)
x['label_ids'] = y x[self.label_field] = y
return x return x
dataset = tf.data.Dataset.range(1) dataset = tf.data.Dataset.range(1)
...@@ -144,10 +148,10 @@ class SentencePredictionTask(base_task.Task): ...@@ -144,10 +148,10 @@ class SentencePredictionTask(base_task.Task):
def process_metrics(self, metrics, labels, model_outputs): def process_metrics(self, metrics, labels, model_outputs):
for metric in metrics: for metric in metrics:
metric.update_state(labels['label_ids'], model_outputs) metric.update_state(labels[self.label_field], model_outputs)
def process_compiled_metrics(self, compiled_metrics, labels, model_outputs): def process_compiled_metrics(self, compiled_metrics, labels, model_outputs):
compiled_metrics.update_state(labels, model_outputs) compiled_metrics.update_state(labels[self.label_field], model_outputs)
def validation_step(self, inputs, model: tf.keras.Model, metrics=None): def validation_step(self, inputs, model: tf.keras.Model, metrics=None):
if self.metric_type == 'accuracy': if self.metric_type == 'accuracy':
...@@ -163,12 +167,12 @@ class SentencePredictionTask(base_task.Task): ...@@ -163,12 +167,12 @@ class SentencePredictionTask(base_task.Task):
'sentence_prediction': # Ensure one prediction along batch dimension. 'sentence_prediction': # Ensure one prediction along batch dimension.
tf.expand_dims(tf.math.argmax(outputs, axis=1), axis=1), tf.expand_dims(tf.math.argmax(outputs, axis=1), axis=1),
'labels': 'labels':
labels['label_ids'], labels[self.label_field],
}) })
if self.metric_type == 'pearson_spearman_corr': if self.metric_type == 'pearson_spearman_corr':
logs.update({ logs.update({
'sentence_prediction': outputs, 'sentence_prediction': outputs,
'labels': labels['label_ids'], 'labels': labels[self.label_field],
}) })
return logs return logs
......
This directory contains projects using TensorFlow Model Garden Modeling
libraries.
...@@ -46,7 +46,7 @@ from official.vision.beta.data import tfrecord_lib ...@@ -46,7 +46,7 @@ from official.vision.beta.data import tfrecord_lib
flags.DEFINE_boolean( flags.DEFINE_boolean(
'include_masks', False, 'Whether to include instance segmentations masks ' 'include_masks', False, 'Whether to include instance segmentations masks '
'(PNG encoded) in the result. default: False.') '(PNG encoded) in the result. default: False.')
flags.DEFINE_string('image_dir', '', 'Directory containing images.') flags.DEFINE_multi_string('image_dir', '', 'Directory containing images.')
flags.DEFINE_string( flags.DEFINE_string(
'image_info_file', '', 'File containing image information. ' 'image_info_file', '', 'File containing image information. '
'Tf Examples in the output files correspond to the image ' 'Tf Examples in the output files correspond to the image '
...@@ -159,7 +159,7 @@ def encode_caption_annotations(caption_annotations): ...@@ -159,7 +159,7 @@ def encode_caption_annotations(caption_annotations):
def create_tf_example(image, def create_tf_example(image,
image_dir, image_dirs,
bbox_annotations=None, bbox_annotations=None,
id_to_name_map=None, id_to_name_map=None,
caption_annotations=None, caption_annotations=None,
...@@ -169,7 +169,7 @@ def create_tf_example(image, ...@@ -169,7 +169,7 @@ def create_tf_example(image,
Args: Args:
image: dict with keys: [u'license', u'file_name', u'coco_url', u'height', image: dict with keys: [u'license', u'file_name', u'coco_url', u'height',
u'width', u'date_captured', u'flickr_url', u'id'] u'width', u'date_captured', u'flickr_url', u'id']
image_dir: directory containing the image files. image_dirs: list of directories containing the image files.
bbox_annotations: bbox_annotations:
list of dicts with keys: [u'segmentation', u'area', u'iscrowd', list of dicts with keys: [u'segmentation', u'area', u'iscrowd',
u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box
...@@ -190,14 +190,31 @@ def create_tf_example(image, ...@@ -190,14 +190,31 @@ def create_tf_example(image,
num_annotations_skipped: Number of (invalid) annotations that were ignored. num_annotations_skipped: Number of (invalid) annotations that were ignored.
Raises: Raises:
ValueError: if the image pointed to by data['filename'] is not a valid JPEG ValueError: if the image pointed to by data['filename'] is not a valid JPEG,
does not exist, or is not unique across image directories.
""" """
image_height = image['height'] image_height = image['height']
image_width = image['width'] image_width = image['width']
filename = image['file_name'] filename = image['file_name']
image_id = image['id'] image_id = image['id']
full_path = os.path.join(image_dir, filename) if len(image_dirs) > 1:
full_paths = [os.path.join(image_dir, filename) for image_dir in image_dirs]
full_existing_paths = [p for p in full_paths if tf.io.gfile.exists(p)]
if not full_existing_paths:
raise ValueError(
'{} does not exist across image directories.'.format(filename))
if len(full_existing_paths) > 1:
raise ValueError(
'{} is not unique across image directories'.format(filename))
full_path, = full_existing_paths
# If there is only one image directory, it's not worth checking for existence,
# since trying to open the file will raise an informative error message if it
# does not exist.
else:
image_dir, = image_dirs
full_path = os.path.join(image_dir, filename)
with tf.io.gfile.GFile(full_path, 'rb') as fid: with tf.io.gfile.GFile(full_path, 'rb') as fid:
encoded_jpg = fid.read() encoded_jpg = fid.read()
...@@ -276,7 +293,7 @@ def _load_images_info(images_info_file): ...@@ -276,7 +293,7 @@ def _load_images_info(images_info_file):
return info_dict['images'] return info_dict['images']
def generate_annotations(images, image_dir, def generate_annotations(images, image_dirs,
img_to_obj_annotation=None, img_to_obj_annotation=None,
img_to_caption_annotation=None, id_to_name_map=None, img_to_caption_annotation=None, id_to_name_map=None,
include_masks=False): include_masks=False):
...@@ -289,12 +306,12 @@ def generate_annotations(images, image_dir, ...@@ -289,12 +306,12 @@ def generate_annotations(images, image_dir,
caption_annotaion = (img_to_caption_annotation.get(image['id'], None) if caption_annotaion = (img_to_caption_annotation.get(image['id'], None) if
img_to_caption_annotation else None) img_to_caption_annotation else None)
yield (image, image_dir, object_annotation, id_to_name_map, yield (image, image_dirs, object_annotation, id_to_name_map,
caption_annotaion, include_masks) caption_annotaion, include_masks)
def _create_tf_record_from_coco_annotations(images_info_file, def _create_tf_record_from_coco_annotations(images_info_file,
image_dir, image_dirs,
output_path, output_path,
num_shards, num_shards,
object_annotations_file=None, object_annotations_file=None,
...@@ -309,7 +326,7 @@ def _create_tf_record_from_coco_annotations(images_info_file, ...@@ -309,7 +326,7 @@ def _create_tf_record_from_coco_annotations(images_info_file,
files Eg. 'image_info_test-dev2017.json', files Eg. 'image_info_test-dev2017.json',
'instance_annotations_train2017.json', 'instance_annotations_train2017.json',
'caption_annotations_train2017.json', etc. 'caption_annotations_train2017.json', etc.
image_dir: Directory containing the image files. image_dirs: List of directories containing the image files.
output_path: Path to output tf.Record file. output_path: Path to output tf.Record file.
num_shards: Number of output files to create. num_shards: Number of output files to create.
object_annotations_file: JSON file containing bounding box annotations. object_annotations_file: JSON file containing bounding box annotations.
...@@ -333,7 +350,7 @@ def _create_tf_record_from_coco_annotations(images_info_file, ...@@ -333,7 +350,7 @@ def _create_tf_record_from_coco_annotations(images_info_file,
_load_caption_annotations(caption_annotations_file)) _load_caption_annotations(caption_annotations_file))
coco_annotations_iter = generate_annotations( coco_annotations_iter = generate_annotations(
images, image_dir, img_to_obj_annotation, img_to_caption_annotation, images, image_dirs, img_to_obj_annotation, img_to_caption_annotation,
id_to_name_map=id_to_name_map, include_masks=include_masks) id_to_name_map=id_to_name_map, include_masks=include_masks)
num_skipped = tfrecord_lib.write_tf_record_dataset( num_skipped = tfrecord_lib.write_tf_record_dataset(
......
#!/bin/bash
#
# Processes the COCO few-shot benchmark into TFRecord files. Requires `wget`.
tmp_dir=$(mktemp -d -t coco-XXXXXXXXXX)
output_dir="/tmp/coco_few_shot"
while getopts "o:" o; do
case "${o}" in
o) output_dir=${OPTARG} ;;
*) echo "Usage: ${0} [-o <output_dir>]" 1>&2; exit 1 ;;
esac
done
cocosplit_url="dl.yf.io/fs-det/datasets/cocosplit"
wget --recursive --no-parent -q --show-progress --progress=bar:force:noscroll \
-P "${tmp_dir}" -A "5k.json,*10shot*.json,*30shot*.json" \
"http://${cocosplit_url}/"
mv "${tmp_dir}/${cocosplit_url}/"* "${tmp_dir}"
rm -rf "${tmp_dir}/${cocosplit_url}/"
python process_coco_few_shot_json_files.py \
--logtostderr --workdir="${tmp_dir}"
for seed in {0..9}; do
for shots in 10 30; do
python create_coco_tf_record.py \
--logtostderr \
--image_dir=/namespace/vale-project/datasets/mscoco_raw/images/train2014 \
--image_dir=/namespace/vale-project/datasets/mscoco_raw/images/val2014 \
--image_info_file="${tmp_dir}/${shots}shot_seed${seed}.json" \
--object_annotations_file="${tmp_dir}/${shots}shot_seed${seed}.json" \
--caption_annotations_file="" \
--output_file_prefix="${output_dir}/${shots}shot_seed${seed}" \
--num_shards=4
done
done
python create_coco_tf_record.py \
--logtostderr \
--image_dir=/namespace/vale-project/datasets/mscoco_raw/images/train2014 \
--image_dir=/namespace/vale-project/datasets/mscoco_raw/images/val2014 \
--image_info_file="${tmp_dir}/datasplit/5k.json" \
--object_annotations_file="${tmp_dir}/datasplit/5k.json" \
--caption_annotations_file="" \
--output_file_prefix="${output_dir}/5k" \
--num_shards=10
rm -rf "${tmp_dir}"
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Processes the JSON files for COCO few-shot.
We assume that `workdir` mirrors the contents of
http://dl.yf.io/fs-det/datasets/cocosplit/, which contains the official JSON
files for the few-shot COCO evaluation procedure that Wang et al. (2020)'s
"Frustratingly Simple Few-Shot Object Detection" paper uses.
"""
import collections
import itertools
import json
import logging
import os
from absl import app
from absl import flags
import tensorflow as tf
logger = tf.get_logger()
logger.setLevel(logging.INFO)
flags.DEFINE_string('workdir', None, 'Working directory.')
FLAGS = flags.FLAGS
CATEGORIES = ['airplane', 'apple', 'backpack', 'banana', 'baseball bat',
'baseball glove', 'bear', 'bed', 'bench', 'bicycle', 'bird',
'boat', 'book', 'bottle', 'bowl', 'broccoli', 'bus', 'cake',
'car', 'carrot', 'cat', 'cell phone', 'chair', 'clock', 'couch',
'cow', 'cup', 'dining table', 'dog', 'donut', 'elephant',
'fire hydrant', 'fork', 'frisbee', 'giraffe', 'hair drier',
'handbag', 'horse', 'hot dog', 'keyboard', 'kite', 'knife',
'laptop', 'microwave', 'motorcycle', 'mouse', 'orange', 'oven',
'parking meter', 'person', 'pizza', 'potted plant',
'refrigerator', 'remote', 'sandwich', 'scissors', 'sheep',
'sink', 'skateboard', 'skis', 'snowboard', 'spoon', 'sports ball',
'stop sign', 'suitcase', 'surfboard', 'teddy bear',
'tennis racket', 'tie', 'toaster', 'toilet', 'toothbrush',
'traffic light', 'train', 'truck', 'tv', 'umbrella', 'vase',
'wine glass', 'zebra']
SEEDS = list(range(10))
SHOTS = [10, 30]
FILE_SUFFIXES = collections.defaultdict(list)
for _seed, _shots in itertools.product(SEEDS, SHOTS):
for _category in CATEGORIES:
FILE_SUFFIXES[(_seed, _shots)].append(
'{}full_box_{}shot_{}_trainval.json'.format(
# http://dl.yf.io/fs-det/datasets/cocosplit/ is organized like so:
#
# datasplit/
# trainvalno5k.json
# 5k.json
# full_box_{1,2,3,5,10,30}shot_{category}_trainval.json
# seed{1-9}/
# full_box_{1,2,3,5,10,30}shot_{category}_trainval.json
#
# This means that the JSON files for seed0 are located in the root
# directory rather than in a `seed?/` subdirectory, hence the
# conditional expression below.
'' if _seed == 0 else 'seed{}/'.format(_seed),
_shots,
_category))
def main(unused_argv):
workdir = FLAGS.workdir
for seed, shots in itertools.product(SEEDS, SHOTS):
# Retrieve all examples for a given seed and shots setting.
file_paths = [os.path.join(workdir, suffix)
for suffix in FILE_SUFFIXES[(seed, shots)]]
json_dicts = []
for file_path in file_paths:
with tf.io.gfile.GFile(file_path, 'r') as f:
json_dicts.append(json.load(f))
# Make sure that all JSON files for a given seed and shots setting have the
# same metadata. We count on this to fuse them later on.
metadata_dicts = [{'info': d['info'], 'licenses': d['licenses'],
'categories': d['categories']} for d in json_dicts]
if not all(d == metadata_dicts[0] for d in metadata_dicts[1:]):
raise RuntimeError(
'JSON files for {} shots (seed {}) '.format(shots, seed) +
'have different info, licences, or categories fields')
# Retrieve images across all JSON files.
images = sum((d['images'] for d in json_dicts), [])
# Remove duplicate image entries.
images = list({image['id']: image for image in images}.values())
output_dict = {
'info': json_dicts[0]['info'],
'licenses': json_dicts[0]['licenses'],
'categories': json_dicts[0]['categories'],
'images': images,
'annotations': sum((d['annotations'] for d in json_dicts), [])
}
output_path = os.path.join(workdir,
'{}shot_seed{}.json'.format(shots, seed))
with tf.io.gfile.GFile(output_path, 'w') as f:
json.dump(output_dict, f)
logger.info('Processed %d shots (seed %d) and saved to %s',
shots, seed, output_path)
if __name__ == '__main__':
flags.mark_flag_as_required('workdir')
app.run(main)
...@@ -525,7 +525,6 @@ class Movinet(tf.keras.Model): ...@@ -525,7 +525,6 @@ class Movinet(tf.keras.Model):
Returns: Returns:
A dict mapping state names to state shapes. A dict mapping state names to state shapes.
""" """
def divide_resolution(shape, num_downsamples): def divide_resolution(shape, num_downsamples):
"""Downsamples the dimension to calculate strided convolution shape.""" """Downsamples the dimension to calculate strided convolution shape."""
if shape is None: if shape is None:
...@@ -564,6 +563,12 @@ class Movinet(tf.keras.Model): ...@@ -564,6 +563,12 @@ class Movinet(tf.keras.Model):
for layer_idx, layer in enumerate(params): for layer_idx, layer in enumerate(params):
expand_filters, kernel_size, strides = layer expand_filters, kernel_size, strides = layer
# If we use a 2D kernel, we apply spatial downsampling
# before the buffer.
if (tuple(strides[1:3]) != (1, 1) and
self._conv_type in ['2plus1d', '3d_2plus1d']):
num_downsamples += 1
if kernel_size[0] > 1: if kernel_size[0] > 1:
states[f'state/b{block_idx}/l{layer_idx}/stream_buffer'] = ( states[f'state/b{block_idx}/l{layer_idx}/stream_buffer'] = (
input_shape[0], input_shape[0],
...@@ -585,7 +590,11 @@ class Movinet(tf.keras.Model): ...@@ -585,7 +590,11 @@ class Movinet(tf.keras.Model):
if strides[1] != strides[2]: if strides[1] != strides[2]:
raise ValueError('Strides must match in the spatial dimensions, ' raise ValueError('Strides must match in the spatial dimensions, '
'got {}'.format(strides)) 'got {}'.format(strides))
if strides[1] != 1 or strides[2] != 1:
# If we use a 3D kernel, we apply spatial downsampling
# after the buffer.
if (tuple(strides[1:3]) != (1, 1) and
self._conv_type not in ['2plus1d', '3d_2plus1d']):
num_downsamples += 1 num_downsamples += 1
elif isinstance(block, HeadSpec): elif isinstance(block, HeadSpec):
states['state/head/pool_buffer'] = ( states['state/head/pool_buffer'] = (
......
...@@ -633,9 +633,28 @@ class StreamConvBlock(ConvBlock): ...@@ -633,9 +633,28 @@ class StreamConvBlock(ConvBlock):
states = dict(states) if states is not None else {} states = dict(states) if states is not None else {}
x = inputs x = inputs
if self._stream_buffer is not None:
# If we have no separate temporal conv, use the buffer before the 3D conv.
if self._conv_temporal is None and self._stream_buffer is not None:
x, states = self._stream_buffer(x, states=states) x, states = self._stream_buffer(x, states=states)
x = super(StreamConvBlock, self).call(x)
x = self._conv(x)
if self._batch_norm is not None:
x = self._batch_norm(x)
if self._activation_layer is not None:
x = self._activation_layer(x)
if self._conv_temporal is not None:
if self._stream_buffer is not None:
# If we have a separate temporal conv, use the buffer before the
# 1D conv instead (otherwise, we may waste computation on the 2D conv).
x, states = self._stream_buffer(x, states=states)
x = self._conv_temporal(x)
if self._batch_norm_temporal is not None:
x = self._batch_norm_temporal(x)
if self._activation_layer is not None:
x = self._activation_layer(x)
return x, states return x, states
......
...@@ -115,15 +115,31 @@ class MovinetClassifier(tf.keras.Model): ...@@ -115,15 +115,31 @@ class MovinetClassifier(tf.keras.Model):
inputs = {**states, 'image': image} inputs = {**states, 'image': image}
if backbone.use_external_states: if backbone.use_external_states:
before_states = set(states) before_states = states
endpoints, states = backbone(inputs) endpoints, states = backbone(inputs)
after_states = set(states) after_states = states
new_states = after_states - before_states new_states = set(after_states) - set(before_states)
if new_states: if new_states:
raise AttributeError('Expected input and output states to be the same. ' raise ValueError(
'Got extra states {}, expected {}'.format( 'Expected input and output states to be the same. Got extra states '
new_states, before_states)) '{}, expected {}'.format(new_states, set(before_states)))
mismatched_shapes = {}
for name in after_states:
before_shape = before_states[name].shape
after_shape = after_states[name].shape
if len(before_shape) != len(after_shape):
mismatched_shapes[name] = (before_shape, after_shape)
continue
for before, after in zip(before_shape, after_shape):
if before is not None and after is not None and before != after:
mismatched_shapes[name] = (before_shape, after_shape)
break
if mismatched_shapes:
raise ValueError(
'Got mismatched input and output state shapes: {}'.format(
mismatched_shapes))
else: else:
endpoints, states = backbone(inputs) endpoints, states = backbone(inputs)
......
DISCLAIMER: this YOLO implementation is still under development. No support will
be provided during the development phase.
# YOLO Object Detectors, You Only Look Once # YOLO Object Detectors, You Only Look Once
[![Paper](http://img.shields.io/badge/Paper-arXiv.1804.02767-B3181B?logo=arXiv)](https://arxiv.org/abs/1804.02767) [![Paper](http://img.shields.io/badge/Paper-arXiv.1804.02767-B3181B?logo=arXiv)](https://arxiv.org/abs/1804.02767)
...@@ -74,3 +77,5 @@ head could be connected to a new, more powerful backbone if a person chose to. ...@@ -74,3 +77,5 @@ head could be connected to a new, more powerful backbone if a person chose to.
[![TensorFlow 2.2](https://img.shields.io/badge/TensorFlow-2.2-FF6F00?logo=tensorflow)](https://github.com/tensorflow/tensorflow/releases/tag/v2.2.0) [![TensorFlow 2.2](https://img.shields.io/badge/TensorFlow-2.2-FF6F00?logo=tensorflow)](https://github.com/tensorflow/tensorflow/releases/tag/v2.2.0)
[![Python 3.8](https://img.shields.io/badge/Python-3.8-3776AB)](https://www.python.org/downloads/release/python-380/) [![Python 3.8](https://img.shields.io/badge/Python-3.8-3776AB)](https://www.python.org/downloads/release/python-380/)
...@@ -24,11 +24,14 @@ from official.vision.beta.configs import backbones ...@@ -24,11 +24,14 @@ from official.vision.beta.configs import backbones
@dataclasses.dataclass @dataclasses.dataclass
class DarkNet(hyperparams.Config): class Darknet(hyperparams.Config):
"""DarkNet config.""" """Darknet config."""
model_id: str = "darknet53" model_id: str = 'darknet53'
width_scale: float = 1.0
depth_scale: float = 1.0
dilate: bool = False
@dataclasses.dataclass @dataclasses.dataclass
class Backbone(backbones.Backbone): class Backbone(backbones.Backbone):
darknet: DarkNet = DarkNet() darknet: Darknet = Darknet()
...@@ -32,7 +32,7 @@ class ImageClassificationModel(hyperparams.Config): ...@@ -32,7 +32,7 @@ class ImageClassificationModel(hyperparams.Config):
num_classes: int = 0 num_classes: int = 0
input_size: List[int] = dataclasses.field(default_factory=list) input_size: List[int] = dataclasses.field(default_factory=list)
backbone: backbones.Backbone = backbones.Backbone( backbone: backbones.Backbone = backbones.Backbone(
type='darknet', resnet=backbones.DarkNet()) type='darknet', darknet=backbones.Darknet())
dropout_rate: float = 0.0 dropout_rate: float = 0.0
norm_activation: common.NormActivation = common.NormActivation() norm_activation: common.NormActivation = common.NormActivation()
# Adds a BatchNormalization layer pre-GlobalAveragePooling in classification # Adds a BatchNormalization layer pre-GlobalAveragePooling in classification
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# limitations under the License. # limitations under the License.
# Lint as: python3 # Lint as: python3
"""Tests for resnet.""" """Tests for yolo."""
from absl.testing import parameterized from absl.testing import parameterized
import numpy as np import numpy as np
...@@ -24,35 +24,48 @@ from tensorflow.python.distribute import strategy_combinations ...@@ -24,35 +24,48 @@ from tensorflow.python.distribute import strategy_combinations
from official.vision.beta.projects.yolo.modeling.backbones import darknet from official.vision.beta.projects.yolo.modeling.backbones import darknet
class DarkNetTest(parameterized.TestCase, tf.test.TestCase): class DarknetTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters( @parameterized.parameters(
(224, "darknet53", 2, 1), (224, 'darknet53', 2, 1, True),
(224, "darknettiny", 1, 2), (224, 'darknettiny', 1, 2, False),
(224, "cspdarknettiny", 1, 1), (224, 'cspdarknettiny', 1, 1, False),
(224, "cspdarknet53", 2, 1), (224, 'cspdarknet53', 2, 1, True),
) )
def test_network_creation(self, input_size, model_id, def test_network_creation(self, input_size, model_id, endpoint_filter_scale,
endpoint_filter_scale, scale_final): scale_final, dilate):
"""Test creation of ResNet family models.""" """Test creation of ResNet family models."""
tf.keras.backend.set_image_data_format("channels_last") tf.keras.backend.set_image_data_format('channels_last')
network = darknet.Darknet(model_id=model_id, min_level=3, max_level=5) network = darknet.Darknet(
model_id=model_id, min_level=3, max_level=5, dilate=dilate)
self.assertEqual(network.model_id, model_id) self.assertEqual(network.model_id, model_id)
inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1) inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
endpoints = network(inputs) endpoints = network(inputs)
self.assertAllEqual( if dilate:
[1, input_size / 2**3, input_size / 2**3, 128 * endpoint_filter_scale], self.assertAllEqual([
endpoints["3"].shape.as_list()) 1, input_size / 2**3, input_size / 2**3, 128 * endpoint_filter_scale
self.assertAllEqual( ], endpoints['3'].shape.as_list())
[1, input_size / 2**4, input_size / 2**4, 256 * endpoint_filter_scale], self.assertAllEqual([
endpoints["4"].shape.as_list()) 1, input_size / 2**3, input_size / 2**3, 256 * endpoint_filter_scale
self.assertAllEqual([ ], endpoints['4'].shape.as_list())
1, input_size / 2**5, input_size / 2**5, self.assertAllEqual([
512 * endpoint_filter_scale * scale_final 1, input_size / 2**3, input_size / 2**3,
], endpoints["5"].shape.as_list()) 512 * endpoint_filter_scale * scale_final
], endpoints['5'].shape.as_list())
else:
self.assertAllEqual([
1, input_size / 2**3, input_size / 2**3, 128 * endpoint_filter_scale
], endpoints['3'].shape.as_list())
self.assertAllEqual([
1, input_size / 2**4, input_size / 2**4, 256 * endpoint_filter_scale
], endpoints['4'].shape.as_list())
self.assertAllEqual([
1, input_size / 2**5, input_size / 2**5,
512 * endpoint_filter_scale * scale_final
], endpoints['5'].shape.as_list())
@combinations.generate( @combinations.generate(
combinations.combine( combinations.combine(
...@@ -66,20 +79,20 @@ class DarkNetTest(parameterized.TestCase, tf.test.TestCase): ...@@ -66,20 +79,20 @@ class DarkNetTest(parameterized.TestCase, tf.test.TestCase):
"""Test for sync bn on TPU and GPU devices.""" """Test for sync bn on TPU and GPU devices."""
inputs = np.random.rand(1, 224, 224, 3) inputs = np.random.rand(1, 224, 224, 3)
tf.keras.backend.set_image_data_format("channels_last") tf.keras.backend.set_image_data_format('channels_last')
with strategy.scope(): with strategy.scope():
network = darknet.Darknet(model_id="darknet53", min_size=3, max_size=5) network = darknet.Darknet(model_id='darknet53', min_size=3, max_size=5)
_ = network(inputs) _ = network(inputs)
@parameterized.parameters(1, 3, 4) @parameterized.parameters(1, 3, 4)
def test_input_specs(self, input_dim): def test_input_specs(self, input_dim):
"""Test different input feature dimensions.""" """Test different input feature dimensions."""
tf.keras.backend.set_image_data_format("channels_last") tf.keras.backend.set_image_data_format('channels_last')
input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, input_dim]) input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, input_dim])
network = darknet.Darknet( network = darknet.Darknet(
model_id="darknet53", min_level=3, max_level=5, input_specs=input_specs) model_id='darknet53', min_level=3, max_level=5, input_specs=input_specs)
inputs = tf.keras.Input(shape=(224, 224, input_dim), batch_size=1) inputs = tf.keras.Input(shape=(224, 224, input_dim), batch_size=1)
_ = network(inputs) _ = network(inputs)
...@@ -87,14 +100,14 @@ class DarkNetTest(parameterized.TestCase, tf.test.TestCase): ...@@ -87,14 +100,14 @@ class DarkNetTest(parameterized.TestCase, tf.test.TestCase):
def test_serialize_deserialize(self): def test_serialize_deserialize(self):
# Create a network object that sets all of its config options. # Create a network object that sets all of its config options.
kwargs = dict( kwargs = dict(
model_id="darknet53", model_id='darknet53',
min_level=3, min_level=3,
max_level=5, max_level=5,
use_sync_bn=False, use_sync_bn=False,
activation="relu", activation='relu',
norm_momentum=0.99, norm_momentum=0.99,
norm_epsilon=0.001, norm_epsilon=0.001,
kernel_initializer="VarianceScaling", kernel_initializer='VarianceScaling',
kernel_regularizer=None, kernel_regularizer=None,
bias_regularizer=None, bias_regularizer=None,
) )
...@@ -113,5 +126,5 @@ class DarkNetTest(parameterized.TestCase, tf.test.TestCase): ...@@ -113,5 +126,5 @@ class DarkNetTest(parameterized.TestCase, tf.test.TestCase):
self.assertAllEqual(network.get_config(), new_network.get_config()) self.assertAllEqual(network.get_config(), new_network.get_config())
if __name__ == "__main__": if __name__ == '__main__':
tf.test.main() tf.test.main()
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Feature Pyramid Network and Path Aggregation variants used in YOLO."""
import tensorflow as tf
from official.vision.beta.projects.yolo.modeling.layers import nn_blocks
@tf.keras.utils.register_keras_serializable(package='yolo')
class _IdentityRoute(tf.keras.layers.Layer):
def call(self, inputs):
return None, inputs
@tf.keras.utils.register_keras_serializable(package='yolo')
class YoloFPN(tf.keras.layers.Layer):
"""YOLO Feature pyramid network."""
def __init__(self,
fpn_depth=4,
use_spatial_attention=False,
csp_stack=False,
activation='leaky',
fpn_filter_scale=1,
use_sync_bn=False,
norm_momentum=0.99,
norm_epsilon=0.001,
kernel_initializer='glorot_uniform',
kernel_regularizer=None,
bias_regularizer=None,
**kwargs):
"""Yolo FPN initialization function (Yolo V4).
Args:
fpn_depth: `int`, number of layers to use in each FPN path
if you choose to use an FPN.
use_spatial_attention: `bool`, use the spatial attention module.
csp_stack: `bool`, CSPize the FPN.
activation: `str`, the activation function to use typically leaky or mish.
fpn_filter_scale: `int`, scaling factor for the FPN filters.
use_sync_bn: if True, use synchronized batch normalization.
norm_momentum: `float`, normalization momentum for the moving average.
norm_epsilon: `float`, small float added to variance to avoid dividing by
zero.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
**kwargs: keyword arguments to be passed.
"""
super().__init__(**kwargs)
self._fpn_depth = fpn_depth
self._activation = activation
self._use_sync_bn = use_sync_bn
self._norm_momentum = norm_momentum
self._norm_epsilon = norm_epsilon
self._kernel_initializer = kernel_initializer
self._kernel_regularizer = kernel_regularizer
self._bias_regularizer = bias_regularizer
self._use_spatial_attention = use_spatial_attention
self._filter_scale = fpn_filter_scale
self._csp_stack = csp_stack
self._base_config = dict(
activation=self._activation,
use_sync_bn=self._use_sync_bn,
kernel_regularizer=self._kernel_regularizer,
kernel_initializer=self._kernel_initializer,
bias_regularizer=self._bias_regularizer,
norm_epsilon=self._norm_epsilon,
norm_momentum=self._norm_momentum)
def get_raw_depths(self, minimum_depth, inputs):
"""Calculates the unscaled depths of the FPN branches.
Args:
minimum_depth (int): depth of the smallest branch of the FPN.
inputs (dict): dictionary of the shape of input args as a dictionary of
lists.
Returns:
The unscaled depths of the FPN branches.
"""
depths = []
for i in range(self._min_level, self._max_level + 1):
depths.append(inputs[str(i)][-1] / self._filter_scale)
return list(reversed(depths))
def build(self, inputs):
"""Use config dictionary to generate all important attributes for head.
Args:
inputs: dictionary of the shape of input args as a dictionary of lists.
"""
keys = [int(key) for key in inputs.keys()]
self._min_level = min(keys)
self._max_level = max(keys)
self._min_depth = inputs[str(self._min_level)][-1]
self._depths = self.get_raw_depths(self._min_depth, inputs)
# directly connect to an input path and process it
self.preprocessors = dict()
# resample an input and merge it with the output of another path
# inorder to aggregate backbone outputs
self.resamples = dict()
# set of convoltion layers and upsample layers that are used to
# prepare the FPN processors for output
for level, depth in zip(
reversed(range(self._min_level, self._max_level + 1)), self._depths):
if level == self._min_level:
self.resamples[str(level)] = nn_blocks.PathAggregationBlock(
filters=depth // 2,
inverted=True,
upsample=True,
drop_final=self._csp_stack == 0,
upsample_size=2,
**self._base_config)
self.preprocessors[str(level)] = _IdentityRoute()
elif level != self._max_level:
self.resamples[str(level)] = nn_blocks.PathAggregationBlock(
filters=depth // 2,
inverted=True,
upsample=True,
drop_final=False,
upsample_size=2,
**self._base_config)
self.preprocessors[str(level)] = nn_blocks.DarkRouteProcess(
filters=depth,
repetitions=self._fpn_depth - int(level == self._min_level),
block_invert=True,
insert_spp=False,
csp_stack=self._csp_stack,
**self._base_config)
else:
self.preprocessors[str(level)] = nn_blocks.DarkRouteProcess(
filters=depth,
repetitions=self._fpn_depth + 1 * int(self._csp_stack == 0),
insert_spp=True,
block_invert=False,
csp_stack=self._csp_stack,
**self._base_config)
def call(self, inputs):
outputs = dict()
layer_in = inputs[str(self._max_level)]
for level in reversed(range(self._min_level, self._max_level + 1)):
_, x = self.preprocessors[str(level)](layer_in)
outputs[str(level)] = x
if level > self._min_level:
x_next = inputs[str(level - 1)]
_, layer_in = self.resamples[str(level - 1)]([x_next, x])
return outputs
@tf.keras.utils.register_keras_serializable(package='yolo')
class YoloPAN(tf.keras.layers.Layer):
"""YOLO Path Aggregation Network."""
def __init__(self,
path_process_len=6,
max_level_process_len=None,
embed_spp=False,
use_spatial_attention=False,
csp_stack=False,
activation='leaky',
use_sync_bn=False,
norm_momentum=0.99,
norm_epsilon=0.001,
kernel_initializer='glorot_uniform',
kernel_regularizer=None,
bias_regularizer=None,
fpn_input=True,
fpn_filter_scale=1.0,
**kwargs):
"""Yolo Path Aggregation Network initialization function (Yolo V3 and V4).
Args:
path_process_len: `int`, number of layers ot use in each Decoder path.
max_level_process_len: `int`, number of layers ot use in the largest
processing path, or the backbones largest output if it is different.
embed_spp: `bool`, use the SPP found in the YoloV3 and V4 model.
use_spatial_attention: `bool`, use the spatial attention module.
csp_stack: `bool`, CSPize the FPN.
activation: `str`, the activation function to use typically leaky or mish.
use_sync_bn: if True, use synchronized batch normalization.
norm_momentum: `float`, normalization omentum for the moving average.
norm_epsilon: `float`, small float added to variance to avoid dividing
by zero.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
fpn_input: `bool`, for whether the input into this fucntion is an FPN or
a backbone.
fpn_filter_scale: `int`, scaling factor for the FPN filters.
**kwargs: keyword arguments to be passed.
"""
super().__init__(**kwargs)
self._path_process_len = path_process_len
self._embed_spp = embed_spp
self._use_spatial_attention = use_spatial_attention
self._activation = activation
self._use_sync_bn = use_sync_bn
self._norm_momentum = norm_momentum
self._norm_epsilon = norm_epsilon
self._kernel_initializer = kernel_initializer
self._kernel_regularizer = kernel_regularizer
self._bias_regularizer = bias_regularizer
self._fpn_input = fpn_input
self._max_level_process_len = max_level_process_len
self._csp_stack = csp_stack
self._fpn_filter_scale = fpn_filter_scale
if max_level_process_len is None:
self._max_level_process_len = path_process_len
self._base_config = dict(
activation=self._activation,
use_sync_bn=self._use_sync_bn,
kernel_regularizer=self._kernel_regularizer,
kernel_initializer=self._kernel_initializer,
bias_regularizer=self._bias_regularizer,
norm_epsilon=self._norm_epsilon,
norm_momentum=self._norm_momentum)
def build(self, inputs):
"""Use config dictionary to generate all important attributes for head.
Args:
inputs: dictionary of the shape of input args as a dictionary of lists.
"""
# define the key order
keys = [int(key) for key in inputs.keys()]
self._min_level = min(keys)
self._max_level = max(keys)
self._min_depth = inputs[str(self._min_level)][-1]
self._depths = self.get_raw_depths(self._min_depth, inputs)
# directly connect to an input path and process it
self.preprocessors = dict()
# resample an input and merge it with the output of another path
# inorder to aggregate backbone outputs
self.resamples = dict()
# FPN will reverse the key process order for the backbone, so we need
# adjust the order that objects are created and processed to adjust for
# this. not using an FPN will directly connect the decoder to the backbone
# therefore the object creation order needs to be done from the largest
# to smallest level.
if self._fpn_input:
# process order {... 3, 4, 5}
self._iterator = range(self._min_level, self._max_level + 1)
self._check = lambda x: x < self._max_level
self._key_shift = lambda x: x + 1
self._input = self._min_level
downsample = True
upsample = False
else:
# process order {5, 4, 3, ...}
self._iterator = list(
reversed(range(self._min_level, self._max_level + 1)))
self._check = lambda x: x > self._min_level
self._key_shift = lambda x: x - 1
self._input = self._max_level
downsample = False
upsample = True
if self._csp_stack == 0:
proc_filters = lambda x: x
resample_filters = lambda x: x // 2
else:
proc_filters = lambda x: x * 2
resample_filters = lambda x: x
for level, depth in zip(self._iterator, self._depths):
if level == self._input:
self.preprocessors[str(level)] = nn_blocks.DarkRouteProcess(
filters=proc_filters(depth),
repetitions=self._max_level_process_len,
insert_spp=self._embed_spp,
block_invert=False,
insert_sam=self._use_spatial_attention,
csp_stack=self._csp_stack,
**self._base_config)
else:
self.resamples[str(level)] = nn_blocks.PathAggregationBlock(
filters=resample_filters(depth),
upsample=upsample,
downsample=downsample,
inverted=False,
drop_final=self._csp_stack == 0,
**self._base_config)
self.preprocessors[str(level)] = nn_blocks.DarkRouteProcess(
filters=proc_filters(depth),
repetitions=self._path_process_len,
insert_spp=False,
insert_sam=self._use_spatial_attention,
csp_stack=self._csp_stack,
**self._base_config)
def get_raw_depths(self, minimum_depth, inputs):
"""Calculates the unscaled depths of the FPN branches.
Args:
minimum_depth: `int` depth of the smallest branch of the FPN.
inputs: `dict[str, tf.InputSpec]` of the shape of input args as a
dictionary of lists.
Returns:
The unscaled depths of the FPN branches.
"""
depths = []
if len(inputs.keys()) > 3 or self._fpn_filter_scale > 1:
for i in range(self._min_level, self._max_level + 1):
depths.append(inputs[str(i)][-1] * 2)
else:
for _ in range(self._min_level, self._max_level + 1):
depths.append(minimum_depth)
minimum_depth *= 2
if self._fpn_input:
return depths
return list(reversed(depths))
def call(self, inputs):
outputs = dict()
layer_in = inputs[str(self._input)]
for level in self._iterator:
x_route, x = self.preprocessors[str(level)](layer_in)
outputs[str(level)] = x
if self._check(level):
x_next = inputs[str(self._key_shift(level))]
_, layer_in = self.resamples[str(
self._key_shift(level))]([x_route, x_next])
return outputs
@tf.keras.utils.register_keras_serializable(package='yolo')
class YoloDecoder(tf.keras.Model):
"""Darknet Backbone Decoder."""
def __init__(self,
input_specs,
use_fpn=False,
use_spatial_attention=False,
csp_stack=False,
fpn_depth=4,
fpn_filter_scale=1,
path_process_len=6,
max_level_process_len=None,
embed_spp=False,
activation='leaky',
use_sync_bn=False,
norm_momentum=0.99,
norm_epsilon=0.001,
kernel_initializer='glorot_uniform',
kernel_regularizer=None,
bias_regularizer=None,
**kwargs):
"""Yolo Decoder initialization function.
A unified model that ties all decoder components into a conditionally build
YOLO decoder.
Args:
input_specs: `dict[str, tf.InputSpec]`: input specs of each of the inputs
to the heads.
use_fpn: `bool`, use the FPN found in the YoloV4 model.
use_spatial_attention: `bool`, use the spatial attention module.
csp_stack: `bool`, CSPize the FPN.
fpn_depth: `int`, number of layers ot use in each FPN path
if you choose to use an FPN.
fpn_filter_scale: `int`, scaling factor for the FPN filters.
path_process_len: `int`, number of layers ot use in each Decoder path.
max_level_process_len: `int`, number of layers ot use in the largest
processing path, or the backbones largest output if it is different.
embed_spp: `bool`, use the SPP found in the YoloV3 and V4 model.
activation: `str`, the activation function to use typically leaky or mish.
use_sync_bn: if True, use synchronized batch normalization.
norm_momentum: `float`, normalization omentum for the moving average.
norm_epsilon: `float`, small float added to variance to avoid dividing by
zero.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
**kwargs: keyword arguments to be passed.
"""
self._input_specs = input_specs
self._use_fpn = use_fpn
self._fpn_depth = fpn_depth
self._path_process_len = path_process_len
self._max_level_process_len = max_level_process_len
self._embed_spp = embed_spp
self._activation = activation
self._use_sync_bn = use_sync_bn
self._norm_momentum = norm_momentum
self._norm_epsilon = norm_epsilon
self._kernel_initializer = kernel_initializer
self._kernel_regularizer = kernel_regularizer
self._bias_regularizer = bias_regularizer
self._base_config = dict(
use_spatial_attention=use_spatial_attention,
csp_stack=csp_stack,
activation=self._activation,
use_sync_bn=self._use_sync_bn,
fpn_filter_scale=fpn_filter_scale,
norm_momentum=self._norm_momentum,
norm_epsilon=self._norm_epsilon,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)
self._decoder_config = dict(
path_process_len=self._path_process_len,
max_level_process_len=self._max_level_process_len,
embed_spp=self._embed_spp,
fpn_input=self._use_fpn,
**self._base_config)
inputs = {
key: tf.keras.layers.Input(shape=value[1:])
for key, value in input_specs.items()
}
if self._use_fpn:
inter_outs = YoloFPN(
fpn_depth=self._fpn_depth, **self._base_config)(
inputs)
outputs = YoloPAN(**self._decoder_config)(inter_outs)
else:
inter_outs = None
outputs = YoloPAN(**self._decoder_config)(inputs)
self._output_specs = {key: value.shape for key, value in outputs.items()}
super().__init__(inputs=inputs, outputs=outputs, name='YoloDecoder')
@property
def use_fpn(self):
return self._use_fpn
@property
def output_specs(self):
return self._output_specs
def get_config(self):
config = dict(
input_specs=self._input_specs,
use_fpn=self._use_fpn,
fpn_depth=self._fpn_depth,
**self._decoder_config)
return config
@classmethod
def from_config(cls, config, custom_objects=None):
return cls(**config)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment