Unverified Commit 5ffcc5b6 authored by Anirudh Vegesana's avatar Anirudh Vegesana Committed by GitHub
Browse files

Merge branch 'purdue-yolo' into detection_generator_pr

parents 0b81a843 76e0c014
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
"""Sampling module for top_k, top_p and greedy decoding.""" """Sampling module for top_k, top_p and greedy decoding."""
import abc import abc
from typing import Any, Callable, Dict from typing import Any, Callable, Dict, Optional
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
...@@ -98,10 +98,10 @@ def sample_top_p(logits, top_p): ...@@ -98,10 +98,10 @@ def sample_top_p(logits, top_p):
], -1) ], -1)
# Scatter sorted indices to original indexes. # Scatter sorted indices to original indexes.
indices_to_remove = scatter_values_on_batch_indices( indices_to_remove = scatter_values_on_batch_indices(sorted_indices_to_remove,
sorted_indices_to_remove, sorted_indices) sorted_indices)
top_p_logits = set_tensor_by_indices_to_value( top_p_logits = set_tensor_by_indices_to_value(logits, indices_to_remove,
logits, indices_to_remove, np.NINF) np.NINF)
return top_p_logits return top_p_logits
...@@ -121,13 +121,12 @@ def scatter_values_on_batch_indices(values, batch_indices): ...@@ -121,13 +121,12 @@ def scatter_values_on_batch_indices(values, batch_indices):
tensor_shape = decoding_module.shape_list(batch_indices) tensor_shape = decoding_module.shape_list(batch_indices)
broad_casted_batch_dims = tf.reshape( broad_casted_batch_dims = tf.reshape(
tf.broadcast_to( tf.broadcast_to(
tf.expand_dims(tf.range(tensor_shape[0]), axis=-1), tf.expand_dims(tf.range(tensor_shape[0]), axis=-1), tensor_shape),
tensor_shape), [1, -1]) [1, -1])
pair_indices = tf.transpose( pair_indices = tf.transpose(
tf.concat([broad_casted_batch_dims, tf.concat([broad_casted_batch_dims,
tf.reshape(batch_indices, [1, -1])], 0)) tf.reshape(batch_indices, [1, -1])], 0))
return tf.scatter_nd(pair_indices, return tf.scatter_nd(pair_indices, tf.reshape(values, [-1]), tensor_shape)
tf.reshape(values, [-1]), tensor_shape)
def set_tensor_by_indices_to_value(input_tensor, indices, value): def set_tensor_by_indices_to_value(input_tensor, indices, value):
...@@ -137,6 +136,7 @@ def set_tensor_by_indices_to_value(input_tensor, indices, value): ...@@ -137,6 +136,7 @@ def set_tensor_by_indices_to_value(input_tensor, indices, value):
input_tensor: float (batch_size, dim) input_tensor: float (batch_size, dim)
indices: bool (batch_size, dim) indices: bool (batch_size, dim)
value: float scalar value: float scalar
Returns: Returns:
output_tensor: same shape as input_tensor. output_tensor: same shape as input_tensor.
""" """
...@@ -150,11 +150,12 @@ class SamplingModule(decoding_module.DecodingModule, metaclass=abc.ABCMeta): ...@@ -150,11 +150,12 @@ class SamplingModule(decoding_module.DecodingModule, metaclass=abc.ABCMeta):
def __init__(self, def __init__(self,
symbols_to_logits_fn, symbols_to_logits_fn,
length_normalization_fn: Callable[[int, tf.DType], float],
vocab_size: int, vocab_size: int,
max_decode_length: int, max_decode_length: int,
eos_id: int, eos_id: int,
padded_decode: bool, padded_decode: bool,
length_normalization_fn: Optional[Callable[[int, tf.DType],
float]] = None,
top_k=0, top_k=0,
top_p=1.0, top_p=1.0,
sample_temperature=0.0, sample_temperature=0.0,
...@@ -170,8 +171,8 @@ class SamplingModule(decoding_module.DecodingModule, metaclass=abc.ABCMeta): ...@@ -170,8 +171,8 @@ class SamplingModule(decoding_module.DecodingModule, metaclass=abc.ABCMeta):
self.max_decode_length = max_decode_length self.max_decode_length = max_decode_length
self.top_k = tf.convert_to_tensor(top_k, dtype=tf.int32) self.top_k = tf.convert_to_tensor(top_k, dtype=tf.int32)
self.top_p = tf.convert_to_tensor(top_p, dtype=tf.float32) self.top_p = tf.convert_to_tensor(top_p, dtype=tf.float32)
self.sample_temperature = tf.convert_to_tensor(sample_temperature, self.sample_temperature = tf.convert_to_tensor(
dtype=tf.float32) sample_temperature, dtype=tf.float32)
self.enable_greedy = enable_greedy self.enable_greedy = enable_greedy
super(SamplingModule, self).__init__( super(SamplingModule, self).__init__(
length_normalization_fn=length_normalization_fn, dtype=dtype) length_normalization_fn=length_normalization_fn, dtype=dtype)
...@@ -330,12 +331,9 @@ class SamplingModule(decoding_module.DecodingModule, metaclass=abc.ABCMeta): ...@@ -330,12 +331,9 @@ class SamplingModule(decoding_module.DecodingModule, metaclass=abc.ABCMeta):
return state, state_shape_invariants return state, state_shape_invariants
def _get_new_alive_state( def _get_new_alive_state(self, new_seq: tf.Tensor, new_log_probs: tf.Tensor,
self, new_finished_flags: tf.Tensor,
new_seq: tf.Tensor, new_cache: Dict[str, tf.Tensor]) -> Dict[str, Any]:
new_log_probs: tf.Tensor,
new_finished_flags: tf.Tensor,
new_cache: Dict[str, tf.Tensor]) -> Dict[str, Any]:
"""Gather the sequences that are still alive. """Gather the sequences that are still alive.
This function resets the sequences in the alive_state that are finished. This function resets the sequences in the alive_state that are finished.
...@@ -360,9 +358,7 @@ class SamplingModule(decoding_module.DecodingModule, metaclass=abc.ABCMeta): ...@@ -360,9 +358,7 @@ class SamplingModule(decoding_module.DecodingModule, metaclass=abc.ABCMeta):
decoding_module.StateKeys.ALIVE_CACHE: new_cache decoding_module.StateKeys.ALIVE_CACHE: new_cache
} }
def _get_new_finished_state(self, def _get_new_finished_state(self, state: Dict[str, Any], new_seq: tf.Tensor,
state: Dict[str, Any],
new_seq: tf.Tensor,
new_log_probs: tf.Tensor, new_log_probs: tf.Tensor,
new_finished_flags: tf.Tensor, new_finished_flags: tf.Tensor,
batch_size: int) -> Dict[str, tf.Tensor]: batch_size: int) -> Dict[str, tf.Tensor]:
...@@ -421,10 +417,9 @@ class SamplingModule(decoding_module.DecodingModule, metaclass=abc.ABCMeta): ...@@ -421,10 +417,9 @@ class SamplingModule(decoding_module.DecodingModule, metaclass=abc.ABCMeta):
length_norm = self.length_normalization_fn(self.max_decode_length + 1, length_norm = self.length_normalization_fn(self.max_decode_length + 1,
self.dtype) self.dtype)
alive_log_probs = alive_log_probs / length_norm alive_log_probs = alive_log_probs / length_norm
seq_cond = decoding_module.expand_to_same_rank( seq_cond = decoding_module.expand_to_same_rank(finished_cond, finished_seq)
finished_cond, finished_seq) score_cond = decoding_module.expand_to_same_rank(finished_cond,
score_cond = decoding_module.expand_to_same_rank( finished_scores)
finished_cond, finished_scores)
finished_seq = tf.where(seq_cond, finished_seq, alive_seq) finished_seq = tf.where(seq_cond, finished_seq, alive_seq)
finished_scores = tf.where(score_cond, finished_scores, alive_log_probs) finished_scores = tf.where(score_cond, finished_scores, alive_log_probs)
return finished_seq, finished_scores return finished_seq, finished_scores
......
...@@ -66,4 +66,5 @@ def main(_): ...@@ -66,4 +66,5 @@ def main(_):
if __name__ == '__main__': if __name__ == '__main__':
tfm_flags.define_flags() tfm_flags.define_flags()
flags.mark_flags_as_required(['experiment', 'mode', 'model_dir'])
app.run(main) app.run(main)
...@@ -111,6 +111,7 @@ export TPU_NAME=my-dlrm-tpu ...@@ -111,6 +111,7 @@ export TPU_NAME=my-dlrm-tpu
export EXPERIMENT_NAME=my_experiment_name export EXPERIMENT_NAME=my_experiment_name
export BUCKET_NAME="gs://my_dlrm_bucket" export BUCKET_NAME="gs://my_dlrm_bucket"
export DATA_DIR="${BUCKET_NAME}/data" export DATA_DIR="${BUCKET_NAME}/data"
export EMBEDDING_DIM=32
python3 models/official/recommendation/ranking/train.py --mode=train_and_eval \ python3 models/official/recommendation/ranking/train.py --mode=train_and_eval \
--model_dir=${BUCKET_NAME}/model_dirs/${EXPERIMENT_NAME} --params_override=" --model_dir=${BUCKET_NAME}/model_dirs/${EXPERIMENT_NAME} --params_override="
...@@ -126,8 +127,8 @@ task: ...@@ -126,8 +127,8 @@ task:
global_batch_size: 16384 global_batch_size: 16384
model: model:
num_dense_features: 13 num_dense_features: 13
bottom_mlp: [512,256,128] bottom_mlp: [512,256,${EMBEDDING_DIM}]
embedding_dim: 128 embedding_dim: ${EMBEDDING_DIM}
top_mlp: [1024,1024,512,256,1] top_mlp: [1024,1024,512,256,1]
interaction: 'dot' interaction: 'dot'
vocab_sizes: [39884406, 39043, 17289, 7420, 20263, 3, 7120, 1543, 63, vocab_sizes: [39884406, 39043, 17289, 7420, 20263, 3, 7120, 1543, 63,
...@@ -135,8 +136,8 @@ task: ...@@ -135,8 +136,8 @@ task:
39979771, 25641295, 39664984, 585935, 12972, 108, 36] 39979771, 25641295, 39664984, 585935, 12972, 108, 36]
trainer: trainer:
use_orbit: true use_orbit: true
validation_interval: 90000 validation_interval: 85352
checkpoint_interval: 100000 checkpoint_interval: 85352
validation_steps: 5440 validation_steps: 5440
train_steps: 256054 train_steps: 256054
steps_per_loop: 1000 steps_per_loop: 1000
...@@ -154,7 +155,9 @@ Training on GPUs are similar to TPU training. Only distribution strategy needs ...@@ -154,7 +155,9 @@ Training on GPUs are similar to TPU training. Only distribution strategy needs
to be updated and number of GPUs provided (for 4 GPUs): to be updated and number of GPUs provided (for 4 GPUs):
```shell ```shell
python3 official/recommendation/ranking/main.py --mode=train_and_eval \ export EMBEDDING_DIM=8
python3 official/recommendation/ranking/train.py --mode=train_and_eval \
--model_dir=${BUCKET_NAME}/model_dirs/${EXPERIMENT_NAME} --params_override=" --model_dir=${BUCKET_NAME}/model_dirs/${EXPERIMENT_NAME} --params_override="
runtime: runtime:
distribution_strategy: 'mirrored' distribution_strategy: 'mirrored'
......
...@@ -12,6 +12,3 @@ ...@@ -12,6 +12,3 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Helper functions for running models in a distributed setting."""
# pylint: disable=wildcard-import
from official.common.distribute_utils import *
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# limitations under the License. # limitations under the License.
"""Ranking Model configuration definition.""" """Ranking Model configuration definition."""
from typing import Optional, List from typing import Optional, List, Union
import dataclasses import dataclasses
from official.core import exp_factory from official.core import exp_factory
...@@ -59,7 +59,13 @@ class ModelConfig(hyperparams.Config): ...@@ -59,7 +59,13 @@ class ModelConfig(hyperparams.Config):
num_dense_features: Number of dense features. num_dense_features: Number of dense features.
vocab_sizes: Vocab sizes for each of the sparse features. The order agrees vocab_sizes: Vocab sizes for each of the sparse features. The order agrees
with the order of the input data. with the order of the input data.
embedding_dim: Embedding dimension. embedding_dim: An integer or a list of embedding table dimensions.
If it's an integer then all tables will have the same embedding dimension.
If it's a list then the length should match with `vocab_sizes`.
size_threshold: A threshold for table sizes below which a keras
embedding layer is used, and above which a TPU embedding layer is used.
If it's -1 then only keras embedding layer will be used for all tables,
if 0 only then only TPU embedding layer will be used.
bottom_mlp: The sizes of hidden layers for bottom MLP applied to dense bottom_mlp: The sizes of hidden layers for bottom MLP applied to dense
features. features.
top_mlp: The sizes of hidden layers for top MLP. top_mlp: The sizes of hidden layers for top MLP.
...@@ -68,7 +74,8 @@ class ModelConfig(hyperparams.Config): ...@@ -68,7 +74,8 @@ class ModelConfig(hyperparams.Config):
""" """
num_dense_features: int = 13 num_dense_features: int = 13
vocab_sizes: List[int] = dataclasses.field(default_factory=list) vocab_sizes: List[int] = dataclasses.field(default_factory=list)
embedding_dim: int = 8 embedding_dim: Union[int, List[int]] = 8
size_threshold: int = 50_000
bottom_mlp: List[int] = dataclasses.field(default_factory=list) bottom_mlp: List[int] = dataclasses.field(default_factory=list)
top_mlp: List[int] = dataclasses.field(default_factory=list) top_mlp: List[int] = dataclasses.field(default_factory=list)
interaction: str = 'dot' interaction: str = 'dot'
...@@ -188,7 +195,7 @@ def default_config() -> Config: ...@@ -188,7 +195,7 @@ def default_config() -> Config:
runtime=cfg.RuntimeConfig(), runtime=cfg.RuntimeConfig(),
task=Task( task=Task(
model=ModelConfig( model=ModelConfig(
embedding_dim=4, embedding_dim=8,
vocab_sizes=vocab_sizes, vocab_sizes=vocab_sizes,
bottom_mlp=[64, 32, 4], bottom_mlp=[64, 32, 4],
top_mlp=[64, 32, 1]), top_mlp=[64, 32, 1]),
......
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
...@@ -136,7 +136,7 @@ class CriteoTsvReader: ...@@ -136,7 +136,7 @@ class CriteoTsvReader:
num_replicas = ctx.num_replicas_in_sync if ctx else 1 num_replicas = ctx.num_replicas_in_sync if ctx else 1
if params.is_training: if params.is_training:
dataset_size = 10000 * batch_size * num_replicas dataset_size = 1000 * batch_size * num_replicas
else: else:
dataset_size = 1000 * batch_size * num_replicas dataset_size = 1000 * batch_size * num_replicas
dense_tensor = tf.random.uniform( dense_tensor = tf.random.uniform(
...@@ -169,6 +169,7 @@ class CriteoTsvReader: ...@@ -169,6 +169,7 @@ class CriteoTsvReader:
'sparse_features': sparse_tensor_elements}, label_tensor 'sparse_features': sparse_tensor_elements}, label_tensor
dataset = tf.data.Dataset.from_tensor_slices(input_elem) dataset = tf.data.Dataset.from_tensor_slices(input_elem)
dataset = dataset.cache()
if params.is_training: if params.is_training:
dataset = dataset.repeat() dataset = dataset.repeat()
......
...@@ -17,8 +17,8 @@ ...@@ -17,8 +17,8 @@
from absl.testing import parameterized from absl.testing import parameterized
import tensorflow as tf import tensorflow as tf
from official.recommendation.ranking import data_pipeline
from official.recommendation.ranking.configs import config from official.recommendation.ranking.configs import config
from official.recommendation.ranking.data import data_pipeline
class DataPipelineTest(parameterized.TestCase, tf.test.TestCase): class DataPipelineTest(parameterized.TestCase, tf.test.TestCase):
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
"""Task for the Ranking model.""" """Task for the Ranking model."""
import math import math
from typing import Dict, List, Optional from typing import Dict, List, Optional, Union
import tensorflow as tf import tensorflow as tf
import tensorflow_recommenders as tfrs import tensorflow_recommenders as tfrs
...@@ -23,36 +23,49 @@ import tensorflow_recommenders as tfrs ...@@ -23,36 +23,49 @@ import tensorflow_recommenders as tfrs
from official.core import base_task from official.core import base_task
from official.core import config_definitions from official.core import config_definitions
from official.recommendation.ranking import common from official.recommendation.ranking import common
from official.recommendation.ranking import data_pipeline
from official.recommendation.ranking.configs import config from official.recommendation.ranking.configs import config
from official.recommendation.ranking.data import data_pipeline
RuntimeConfig = config_definitions.RuntimeConfig RuntimeConfig = config_definitions.RuntimeConfig
def _get_tpu_embedding_feature_config( def _get_tpu_embedding_feature_config(
vocab_sizes: List[int], vocab_sizes: List[int],
embedding_dim: int, embedding_dim: Union[int, List[int]],
table_name_prefix: str = 'embedding_table' table_name_prefix: str = 'embedding_table'
) -> Dict[str, tf.tpu.experimental.embedding.FeatureConfig]: ) -> Dict[str, tf.tpu.experimental.embedding.FeatureConfig]:
"""Returns TPU embedding feature config. """Returns TPU embedding feature config.
i'th table config will have vocab size of vocab_sizes[i] and embedding
dimension of embedding_dim if embedding_dim is an int or embedding_dim[i] if
embedding_dim is a list).
Args: Args:
vocab_sizes: List of sizes of categories/id's in the table. vocab_sizes: List of sizes of categories/id's in the table.
embedding_dim: Embedding dimension. embedding_dim: An integer or a list of embedding table dimensions.
table_name_prefix: a prefix for embedding tables. table_name_prefix: a prefix for embedding tables.
Returns: Returns:
A dictionary of feature_name, FeatureConfig pairs. A dictionary of feature_name, FeatureConfig pairs.
""" """
if isinstance(embedding_dim, List):
if len(vocab_sizes) != len(embedding_dim):
raise ValueError(
f'length of vocab_sizes: {len(vocab_sizes)} is not equal to the '
f'length of embedding_dim: {len(embedding_dim)}')
elif isinstance(embedding_dim, int):
embedding_dim = [embedding_dim] * len(vocab_sizes)
else:
raise ValueError('embedding_dim is not either a list or an int, got '
f'{type(embedding_dim)}')
feature_config = {} feature_config = {}
for i, vocab_size in enumerate(vocab_sizes): for i, vocab_size in enumerate(vocab_sizes):
table_config = tf.tpu.experimental.embedding.TableConfig( table_config = tf.tpu.experimental.embedding.TableConfig(
vocabulary_size=vocab_size, vocabulary_size=vocab_size,
dim=embedding_dim, dim=embedding_dim[i],
combiner='mean', combiner='mean',
initializer=tf.initializers.TruncatedNormal( initializer=tf.initializers.TruncatedNormal(
mean=0.0, stddev=1 / math.sqrt(embedding_dim)), mean=0.0, stddev=1 / math.sqrt(embedding_dim[i])),
name=table_name_prefix + '_%s' % i) name=table_name_prefix + '_%s' % i)
feature_config[str(i)] = tf.tpu.experimental.embedding.FeatureConfig( feature_config[str(i)] = tf.tpu.experimental.embedding.FeatureConfig(
table=table_config) table=table_config)
...@@ -72,7 +85,7 @@ class RankingTask(base_task.Task): ...@@ -72,7 +85,7 @@ class RankingTask(base_task.Task):
"""Task initialization. """Task initialization.
Args: Args:
params: the RannkingModel task configuration instance. params: the RankingModel task configuration instance.
optimizer_config: Optimizer configuration instance. optimizer_config: Optimizer configuration instance.
logging_dir: a string pointing to where the model, summaries etc. will be logging_dir: a string pointing to where the model, summaries etc. will be
saved. saved.
...@@ -125,15 +138,18 @@ class RankingTask(base_task.Task): ...@@ -125,15 +138,18 @@ class RankingTask(base_task.Task):
self.optimizer_config.embedding_optimizer) self.optimizer_config.embedding_optimizer)
embedding_optimizer.learning_rate = lr_callable embedding_optimizer.learning_rate = lr_callable
emb_feature_config = _get_tpu_embedding_feature_config( feature_config = _get_tpu_embedding_feature_config(
vocab_sizes=self.task_config.model.vocab_sizes, embedding_dim=self.task_config.model.embedding_dim,
embedding_dim=self.task_config.model.embedding_dim) vocab_sizes=self.task_config.model.vocab_sizes)
tpu_embedding = tfrs.layers.embedding.TPUEmbedding( embedding_layer = tfrs.experimental.layers.embedding.PartialTPUEmbedding(
emb_feature_config, embedding_optimizer) feature_config=feature_config,
optimizer=embedding_optimizer,
size_threshold=self.task_config.model.size_threshold)
if self.task_config.model.interaction == 'dot': if self.task_config.model.interaction == 'dot':
feature_interaction = tfrs.layers.feature_interaction.DotInteraction() feature_interaction = tfrs.layers.feature_interaction.DotInteraction(
skip_gather=True)
elif self.task_config.model.interaction == 'cross': elif self.task_config.model.interaction == 'cross':
feature_interaction = tf.keras.Sequential([ feature_interaction = tf.keras.Sequential([
tf.keras.layers.Concatenate(), tf.keras.layers.Concatenate(),
...@@ -145,7 +161,7 @@ class RankingTask(base_task.Task): ...@@ -145,7 +161,7 @@ class RankingTask(base_task.Task):
f'is not supported it must be either \'dot\' or \'cross\'.') f'is not supported it must be either \'dot\' or \'cross\'.')
model = tfrs.experimental.models.Ranking( model = tfrs.experimental.models.Ranking(
embedding_layer=tpu_embedding, embedding_layer=embedding_layer,
bottom_stack=tfrs.layers.blocks.MLP( bottom_stack=tfrs.layers.blocks.MLP(
units=self.task_config.model.bottom_mlp, final_activation='relu'), units=self.task_config.model.bottom_mlp, final_activation='relu'),
feature_interaction=feature_interaction, feature_interaction=feature_interaction,
...@@ -184,3 +200,5 @@ class RankingTask(base_task.Task): ...@@ -184,3 +200,5 @@ class RankingTask(base_task.Task):
@property @property
def optimizer_config(self) -> config.OptimizationConfig: def optimizer_config(self) -> config.OptimizationConfig:
return self._optimizer_config return self._optimizer_config
...@@ -18,8 +18,8 @@ from absl.testing import parameterized ...@@ -18,8 +18,8 @@ from absl.testing import parameterized
import tensorflow as tf import tensorflow as tf
from official.core import exp_factory from official.core import exp_factory
from official.recommendation.ranking import data_pipeline
from official.recommendation.ranking import task from official.recommendation.ranking import task
from official.recommendation.ranking.data import data_pipeline
class TaskTest(parameterized.TestCase, tf.test.TestCase): class TaskTest(parameterized.TestCase, tf.test.TestCase):
...@@ -34,6 +34,8 @@ class TaskTest(parameterized.TestCase, tf.test.TestCase): ...@@ -34,6 +34,8 @@ class TaskTest(parameterized.TestCase, tf.test.TestCase):
params.task.train_data.global_batch_size = 16 params.task.train_data.global_batch_size = 16
params.task.validation_data.global_batch_size = 16 params.task.validation_data.global_batch_size = 16
params.task.model.vocab_sizes = [40, 12, 11, 13, 2, 5] params.task.model.vocab_sizes = [40, 12, 11, 13, 2, 5]
params.task.model.embedding_dim = 8
params.task.model.bottom_mlp = [64, 32, 8]
params.task.use_synthetic_data = True params.task.use_synthetic_data = True
params.task.model.num_dense_features = 5 params.task.model.num_dense_features = 5
......
...@@ -20,15 +20,14 @@ from absl import app ...@@ -20,15 +20,14 @@ from absl import app
from absl import flags from absl import flags
from absl import logging from absl import logging
import orbit
import tensorflow as tf import tensorflow as tf
from official.common import distribute_utils
from official.core import base_trainer from official.core import base_trainer
from official.core import train_lib from official.core import train_lib
from official.core import train_utils from official.core import train_utils
from official.recommendation.ranking import common from official.recommendation.ranking import common
from official.recommendation.ranking.task import RankingTask from official.recommendation.ranking.task import RankingTask
from official.utils.misc import distribution_utils
from official.utils.misc import keras_utils from official.utils.misc import keras_utils
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
...@@ -86,7 +85,7 @@ def main(_) -> None: ...@@ -86,7 +85,7 @@ def main(_) -> None:
enable_tensorboard = params.trainer.callbacks.enable_tensorboard enable_tensorboard = params.trainer.callbacks.enable_tensorboard
strategy = distribution_utils.get_distribution_strategy( strategy = distribute_utils.get_distribution_strategy(
distribution_strategy=params.runtime.distribution_strategy, distribution_strategy=params.runtime.distribution_strategy,
all_reduce_alg=params.runtime.all_reduce_alg, all_reduce_alg=params.runtime.all_reduce_alg,
num_gpus=params.runtime.num_gpus, num_gpus=params.runtime.num_gpus,
...@@ -95,6 +94,21 @@ def main(_) -> None: ...@@ -95,6 +94,21 @@ def main(_) -> None:
with strategy.scope(): with strategy.scope():
model = task.build_model() model = task.build_model()
def get_dataset_fn(params):
return lambda input_context: task.build_inputs(params, input_context)
train_dataset = None
if 'train' in mode:
train_dataset = strategy.distribute_datasets_from_function(
get_dataset_fn(params.task.train_data),
options=tf.distribute.InputOptions(experimental_fetch_to_device=False))
validation_dataset = None
if 'eval' in mode:
validation_dataset = strategy.distribute_datasets_from_function(
get_dataset_fn(params.task.validation_data),
options=tf.distribute.InputOptions(experimental_fetch_to_device=False))
if params.trainer.use_orbit: if params.trainer.use_orbit:
with strategy.scope(): with strategy.scope():
checkpoint_exporter = train_utils.maybe_create_best_ckpt_exporter( checkpoint_exporter = train_utils.maybe_create_best_ckpt_exporter(
...@@ -106,6 +120,8 @@ def main(_) -> None: ...@@ -106,6 +120,8 @@ def main(_) -> None:
optimizer=model.optimizer, optimizer=model.optimizer,
train='train' in mode, train='train' in mode,
evaluate='eval' in mode, evaluate='eval' in mode,
train_dataset=train_dataset,
validation_dataset=validation_dataset,
checkpoint_exporter=checkpoint_exporter) checkpoint_exporter=checkpoint_exporter)
train_lib.run_experiment( train_lib.run_experiment(
...@@ -117,16 +133,6 @@ def main(_) -> None: ...@@ -117,16 +133,6 @@ def main(_) -> None:
trainer=trainer) trainer=trainer)
else: # Compile/fit else: # Compile/fit
train_dataset = None
if 'train' in mode:
train_dataset = orbit.utils.make_distributed_dataset(
strategy, task.build_inputs, params.task.train_data)
eval_dataset = None
if 'eval' in mode:
eval_dataset = orbit.utils.make_distributed_dataset(
strategy, task.build_inputs, params.task.validation_data)
checkpoint = tf.train.Checkpoint(model=model, optimizer=model.optimizer) checkpoint = tf.train.Checkpoint(model=model, optimizer=model.optimizer)
latest_checkpoint = tf.train.latest_checkpoint(model_dir) latest_checkpoint = tf.train.latest_checkpoint(model_dir)
...@@ -169,7 +175,7 @@ def main(_) -> None: ...@@ -169,7 +175,7 @@ def main(_) -> None:
initial_epoch=initial_epoch, initial_epoch=initial_epoch,
epochs=num_epochs, epochs=num_epochs,
steps_per_epoch=params.trainer.validation_interval, steps_per_epoch=params.trainer.validation_interval,
validation_data=eval_dataset, validation_data=validation_dataset,
validation_steps=eval_steps, validation_steps=eval_steps,
callbacks=callbacks, callbacks=callbacks,
) )
...@@ -177,7 +183,7 @@ def main(_) -> None: ...@@ -177,7 +183,7 @@ def main(_) -> None:
logging.info('Train history: %s', history.history) logging.info('Train history: %s', history.history)
elif mode == 'eval': elif mode == 'eval':
logging.info('Evaluation started') logging.info('Evaluation started')
validation_output = model.evaluate(eval_dataset, steps=eval_steps) validation_output = model.evaluate(validation_dataset, steps=eval_steps)
logging.info('Evaluation output: %s', validation_output) logging.info('Evaluation output: %s', validation_output)
else: else:
raise NotImplementedError('The mode is not implemented: %s' % mode) raise NotImplementedError('The mode is not implemented: %s' % mode)
......
...@@ -40,6 +40,8 @@ def _get_params_override(vocab_sizes, ...@@ -40,6 +40,8 @@ def _get_params_override(vocab_sizes,
'task': { 'task': {
'model': { 'model': {
'vocab_sizes': vocab_sizes, 'vocab_sizes': vocab_sizes,
'embedding_dim': [8] * len(vocab_sizes),
'bottom_mlp': [64, 32, 8],
'interaction': interaction, 'interaction': interaction,
}, },
'train_data': { 'train_data': {
......
six six
google-api-python-client>=1.6.7 google-api-python-client>=1.6.7
google-cloud-bigquery>=0.31.0
kaggle>=1.3.9 kaggle>=1.3.9
numpy>=1.15.4 numpy>=1.15.4
oauth2client oauth2client
......
This directory contains the new design of TF model garden vision framework. This directory contains the new design of TF model garden vision framework.
Stay tuned.
...@@ -80,6 +80,11 @@ class SpineNetMobile(hyperparams.Config): ...@@ -80,6 +80,11 @@ class SpineNetMobile(hyperparams.Config):
expand_ratio: int = 6 expand_ratio: int = 6
min_level: int = 3 min_level: int = 3
max_level: int = 7 max_level: int = 7
# If use_keras_upsampling_2d is True, model uses UpSampling2D keras layer
# instead of optimized custom TF op. It makes model be more keras style. We
# set this flag to True when we apply QAT from model optimization toolkit
# that requires the model should use keras layers.
use_keras_upsampling_2d: bool = False
@dataclasses.dataclass @dataclasses.dataclass
......
...@@ -78,6 +78,7 @@ class DataConfig(cfg.DataConfig): ...@@ -78,6 +78,7 @@ class DataConfig(cfg.DataConfig):
parser: Parser = Parser() parser: Parser = Parser()
shuffle_buffer_size: int = 10000 shuffle_buffer_size: int = 10000
file_type: str = 'tfrecord' file_type: str = 'tfrecord'
drop_remainder: bool = True
@dataclasses.dataclass @dataclasses.dataclass
...@@ -215,7 +216,8 @@ class Losses(hyperparams.Config): ...@@ -215,7 +216,8 @@ class Losses(hyperparams.Config):
class MaskRCNNTask(cfg.TaskConfig): class MaskRCNNTask(cfg.TaskConfig):
model: MaskRCNN = MaskRCNN() model: MaskRCNN = MaskRCNN()
train_data: DataConfig = DataConfig(is_training=True) train_data: DataConfig = DataConfig(is_training=True)
validation_data: DataConfig = DataConfig(is_training=False) validation_data: DataConfig = DataConfig(is_training=False,
drop_remainder=False)
losses: Losses = Losses() losses: Losses = Losses()
init_checkpoint: Optional[str] = None init_checkpoint: Optional[str] = None
init_checkpoint_modules: str = 'all' # all or backbone init_checkpoint_modules: str = 'all' # all or backbone
...@@ -260,7 +262,8 @@ def fasterrcnn_resnetfpn_coco() -> cfg.ExperimentConfig: ...@@ -260,7 +262,8 @@ def fasterrcnn_resnetfpn_coco() -> cfg.ExperimentConfig:
validation_data=DataConfig( validation_data=DataConfig(
input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'), input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
is_training=False, is_training=False,
global_batch_size=eval_batch_size)), global_batch_size=eval_batch_size,
drop_remainder=False)),
trainer=cfg.TrainerConfig( trainer=cfg.TrainerConfig(
train_steps=22500, train_steps=22500,
validation_steps=coco_val_samples // eval_batch_size, validation_steps=coco_val_samples // eval_batch_size,
...@@ -324,7 +327,8 @@ def maskrcnn_resnetfpn_coco() -> cfg.ExperimentConfig: ...@@ -324,7 +327,8 @@ def maskrcnn_resnetfpn_coco() -> cfg.ExperimentConfig:
validation_data=DataConfig( validation_data=DataConfig(
input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'), input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
is_training=False, is_training=False,
global_batch_size=eval_batch_size)), global_batch_size=eval_batch_size,
drop_remainder=False)),
trainer=cfg.TrainerConfig( trainer=cfg.TrainerConfig(
train_steps=22500, train_steps=22500,
validation_steps=coco_val_samples // eval_batch_size, validation_steps=coco_val_samples // eval_batch_size,
...@@ -401,7 +405,8 @@ def maskrcnn_spinenet_coco() -> cfg.ExperimentConfig: ...@@ -401,7 +405,8 @@ def maskrcnn_spinenet_coco() -> cfg.ExperimentConfig:
validation_data=DataConfig( validation_data=DataConfig(
input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'), input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
is_training=False, is_training=False,
global_batch_size=eval_batch_size)), global_batch_size=eval_batch_size,
drop_remainder=False)),
trainer=cfg.TrainerConfig( trainer=cfg.TrainerConfig(
train_steps=steps_per_epoch * 350, train_steps=steps_per_epoch * 350,
validation_steps=coco_val_samples // eval_batch_size, validation_steps=coco_val_samples // eval_batch_size,
...@@ -486,7 +491,8 @@ def cascadercnn_spinenet_coco() -> cfg.ExperimentConfig: ...@@ -486,7 +491,8 @@ def cascadercnn_spinenet_coco() -> cfg.ExperimentConfig:
validation_data=DataConfig( validation_data=DataConfig(
input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'), input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
is_training=False, is_training=False,
global_batch_size=eval_batch_size)), global_batch_size=eval_batch_size,
drop_remainder=False)),
trainer=cfg.TrainerConfig( trainer=cfg.TrainerConfig(
train_steps=steps_per_epoch * 500, train_steps=steps_per_epoch * 500,
validation_steps=coco_val_samples // eval_batch_size, validation_steps=coco_val_samples // eval_batch_size,
......
...@@ -130,6 +130,13 @@ class RetinaNet(hyperparams.Config): ...@@ -130,6 +130,13 @@ class RetinaNet(hyperparams.Config):
norm_activation: common.NormActivation = common.NormActivation() norm_activation: common.NormActivation = common.NormActivation()
@dataclasses.dataclass
class ExportConfig(hyperparams.Config):
output_normalized_coordinates: bool = False
cast_num_detections_to_float: bool = False
cast_detection_classes_to_float: bool = False
@dataclasses.dataclass @dataclasses.dataclass
class RetinaNetTask(cfg.TaskConfig): class RetinaNetTask(cfg.TaskConfig):
model: RetinaNet = RetinaNet() model: RetinaNet = RetinaNet()
...@@ -140,6 +147,7 @@ class RetinaNetTask(cfg.TaskConfig): ...@@ -140,6 +147,7 @@ class RetinaNetTask(cfg.TaskConfig):
init_checkpoint_modules: str = 'all' # all or backbone init_checkpoint_modules: str = 'all' # all or backbone
annotation_file: Optional[str] = None annotation_file: Optional[str] = None
per_category_metrics: bool = False per_category_metrics: bool = False
export_config: ExportConfig = ExportConfig()
@exp_factory.register_config_factory('retinanet') @exp_factory.register_config_factory('retinanet')
...@@ -338,7 +346,8 @@ def retinanet_spinenet_mobile_coco() -> cfg.ExperimentConfig: ...@@ -338,7 +346,8 @@ def retinanet_spinenet_mobile_coco() -> cfg.ExperimentConfig:
model_id='49', model_id='49',
stochastic_depth_drop_rate=0.2, stochastic_depth_drop_rate=0.2,
min_level=3, min_level=3,
max_level=7)), max_level=7,
use_keras_upsampling_2d=False)),
decoder=decoders.Decoder( decoder=decoders.Decoder(
type='identity', identity=decoders.Identity()), type='identity', identity=decoders.Identity()),
head=RetinaNetHead(num_filters=48, use_separable_conv=True), head=RetinaNetHead(num_filters=48, use_separable_conv=True),
......
...@@ -3,17 +3,19 @@ ...@@ -3,17 +3,19 @@
# Processes the COCO few-shot benchmark into TFRecord files. Requires `wget`. # Processes the COCO few-shot benchmark into TFRecord files. Requires `wget`.
tmp_dir=$(mktemp -d -t coco-XXXXXXXXXX) tmp_dir=$(mktemp -d -t coco-XXXXXXXXXX)
base_image_dir="/tmp/coco_images"
output_dir="/tmp/coco_few_shot" output_dir="/tmp/coco_few_shot"
while getopts "o:" o; do while getopts ":i:o:" o; do
case "${o}" in case "${o}" in
o) output_dir=${OPTARG} ;; o) output_dir=${OPTARG} ;;
*) echo "Usage: ${0} [-o <output_dir>]" 1>&2; exit 1 ;; i) base_image_dir=${OPTARG} ;;
*) echo "Usage: ${0} [-i <base_image_dir>] [-o <output_dir>]" 1>&2; exit 1 ;;
esac esac
done done
cocosplit_url="dl.yf.io/fs-det/datasets/cocosplit" cocosplit_url="dl.yf.io/fs-det/datasets/cocosplit"
wget --recursive --no-parent -q --show-progress --progress=bar:force:noscroll \ wget --recursive --no-parent -q --show-progress --progress=bar:force:noscroll \
-P "${tmp_dir}" -A "5k.json,*10shot*.json,*30shot*.json" \ -P "${tmp_dir}" -A "trainvalno5k.json,5k.json,*10shot*.json,*30shot*.json" \
"http://${cocosplit_url}/" "http://${cocosplit_url}/"
mv "${tmp_dir}/${cocosplit_url}/"* "${tmp_dir}" mv "${tmp_dir}/${cocosplit_url}/"* "${tmp_dir}"
rm -rf "${tmp_dir}/${cocosplit_url}/" rm -rf "${tmp_dir}/${cocosplit_url}/"
...@@ -25,8 +27,8 @@ for seed in {0..9}; do ...@@ -25,8 +27,8 @@ for seed in {0..9}; do
for shots in 10 30; do for shots in 10 30; do
python create_coco_tf_record.py \ python create_coco_tf_record.py \
--logtostderr \ --logtostderr \
--image_dir=/namespace/vale-project/datasets/mscoco_raw/images/train2014 \ --image_dir="${base_image_dir}/train2014" \
--image_dir=/namespace/vale-project/datasets/mscoco_raw/images/val2014 \ --image_dir="${base_image_dir}/val2014" \
--image_info_file="${tmp_dir}/${shots}shot_seed${seed}.json" \ --image_info_file="${tmp_dir}/${shots}shot_seed${seed}.json" \
--object_annotations_file="${tmp_dir}/${shots}shot_seed${seed}.json" \ --object_annotations_file="${tmp_dir}/${shots}shot_seed${seed}.json" \
--caption_annotations_file="" \ --caption_annotations_file="" \
...@@ -37,12 +39,32 @@ done ...@@ -37,12 +39,32 @@ done
python create_coco_tf_record.py \ python create_coco_tf_record.py \
--logtostderr \ --logtostderr \
--image_dir=/namespace/vale-project/datasets/mscoco_raw/images/train2014 \ --image_dir="${base_image_dir}/train2014" \
--image_dir=/namespace/vale-project/datasets/mscoco_raw/images/val2014 \ --image_dir="${base_image_dir}/val2014" \
--image_info_file="${tmp_dir}/datasplit/5k.json" \ --image_info_file="${tmp_dir}/datasplit/5k.json" \
--object_annotations_file="${tmp_dir}/datasplit/5k.json" \ --object_annotations_file="${tmp_dir}/datasplit/5k.json" \
--caption_annotations_file="" \ --caption_annotations_file="" \
--output_file_prefix="${output_dir}/5k" \ --output_file_prefix="${output_dir}/5k" \
--num_shards=10 --num_shards=10
python create_coco_tf_record.py \
--logtostderr \
--image_dir="${base_image_dir}/train2014" \
--image_dir="${base_image_dir}/val2014" \
--image_info_file="${tmp_dir}/datasplit/trainvalno5k_base.json" \
--object_annotations_file="${tmp_dir}/datasplit/trainvalno5k_base.json" \
--caption_annotations_file="" \
--output_file_prefix="${output_dir}/trainvalno5k_base" \
--num_shards=200
python create_coco_tf_record.py \
--logtostderr \
--image_dir="${base_image_dir}/train2014" \
--image_dir="${base_image_dir}/val2014" \
--image_info_file="${tmp_dir}/datasplit/5k_base.json" \
--object_annotations_file="${tmp_dir}/datasplit/5k_base.json" \
--caption_annotations_file="" \
--output_file_prefix="${output_dir}/5k_base" \
--num_shards=10
rm -rf "${tmp_dir}" rm -rf "${tmp_dir}"
...@@ -76,10 +76,30 @@ for _seed, _shots in itertools.product(SEEDS, SHOTS): ...@@ -76,10 +76,30 @@ for _seed, _shots in itertools.product(SEEDS, SHOTS):
_shots, _shots,
_category)) _category))
# Base class IDs, as defined in
# https://github.com/ucbdrive/few-shot-object-detection/blob/master/fsdet/evaluation/coco_evaluation.py#L60-L65
BASE_CLASS_IDS = [8, 10, 11, 13, 14, 15, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
35, 36, 37, 38, 39, 40, 41, 42, 43, 46, 47, 48, 49, 50, 51,
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 65, 70, 73, 74, 75,
76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
def main(unused_argv): def main(unused_argv):
workdir = FLAGS.workdir workdir = FLAGS.workdir
# Filter novel class annotations from the training and validation sets.
for name in ('trainvalno5k', '5k'):
file_path = os.path.join(workdir, 'datasplit', '{}.json'.format(name))
with tf.io.gfile.GFile(file_path, 'r') as f:
json_dict = json.load(f)
json_dict['annotations'] = [a for a in json_dict['annotations']
if a['category_id'] in BASE_CLASS_IDS]
output_path = os.path.join(
workdir, 'datasplit', '{}_base.json'.format(name))
with tf.io.gfile.GFile(output_path, 'w') as f:
json.dump(json_dict, f)
for seed, shots in itertools.product(SEEDS, SHOTS): for seed, shots in itertools.product(SEEDS, SHOTS):
# Retrieve all examples for a given seed and shots setting. # Retrieve all examples for a given seed and shots setting.
file_paths = [os.path.join(workdir, suffix) file_paths = [os.path.join(workdir, suffix)
......
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""TFDS factory functions."""
from official.vision.beta.dataloaders import decoder as base_decoder
from official.vision.beta.dataloaders import tfds_detection_decoders
from official.vision.beta.dataloaders import tfds_segmentation_decoders
from official.vision.beta.dataloaders import tfds_classification_decoders
def get_classification_decoder(tfds_name: str) -> base_decoder.Decoder:
"""Gets classification decoder.
Args:
tfds_name: `str`, name of the tfds classification decoder.
Returns:
`base_decoder.Decoder` instance.
Raises:
ValueError if the tfds_name doesn't exist in the available decoders.
"""
if tfds_name in tfds_classification_decoders.TFDS_ID_TO_DECODER_MAP:
decoder = tfds_classification_decoders.TFDS_ID_TO_DECODER_MAP[tfds_name]()
else:
raise ValueError(
f'TFDS Classification {tfds_name} is not supported')
return decoder
def get_detection_decoder(tfds_name: str) -> base_decoder.Decoder:
"""Gets detection decoder.
Args:
tfds_name: `str`, name of the tfds detection decoder.
Returns:
`base_decoder.Decoder` instance.
Raises:
ValueError if the tfds_name doesn't exist in the available decoders.
"""
if tfds_name in tfds_detection_decoders.TFDS_ID_TO_DECODER_MAP:
decoder = tfds_detection_decoders.TFDS_ID_TO_DECODER_MAP[tfds_name]()
else:
raise ValueError(f'TFDS Detection {tfds_name} is not supported')
return decoder
def get_segmentation_decoder(tfds_name: str) -> base_decoder.Decoder:
"""Gets segmentation decoder.
Args:
tfds_name: `str`, name of the tfds segmentation decoder.
Returns:
`base_decoder.Decoder` instance.
Raises:
ValueError if the tfds_name doesn't exist in the available decoders.
"""
if tfds_name in tfds_segmentation_decoders.TFDS_ID_TO_DECODER_MAP:
decoder = tfds_segmentation_decoders.TFDS_ID_TO_DECODER_MAP[tfds_name]()
else:
raise ValueError(f'TFDS Segmentation {tfds_name} is not supported')
return decoder
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment