Commit 47545935 authored by vishnubanna's avatar vishnubanna
Browse files

load tests into one file and inherit from the imagenet task and the...

load tests into one file and inherit from the imagenet task and the functioning tfds decoder for imagenet
parent 48b412c3
import tensorflow as tf
import tensorflow.keras as ks
@tf.keras.utils.register_keras_serializable(package='Text')
def mish(x):
"""Mish: A Self Regularized Non-Monotonic Activation Function
This activation is far smoother than ReLU.
Original paper: https://arxiv.org/abs/1908.08681
Args:
x: float Tensor to perform activation.
Returns:
`x` with the MISH activation applied.
"""
return x * tf.math.tanh(ks.activations.softplus(x))
\ No newline at end of file
import tensorflow as tf
import tensorflow.keras as ks
class mish(ks.layers.Layer):
def __init__(self, **kwargs):
super().__init__(**kwargs)
def call(self, x):
return x * tf.math.tanh(ks.activations.softplus(x))
from .nn_blocks import Identity, CSPTiny, CSPDownSample, CSPConnect, DarkTiny, DarkResidual, DarkConv
\ No newline at end of file
import tensorflow as tf
import tensorflow.keras as ks
import numpy as np
from absl.testing import parameterized
from official.vision.beta.projects.yolo.modeling import layers as nn_blocks
class CSPConnect(tf.test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(("same", 224, 224, 64, 1),
("downsample", 224, 224, 64, 2))
def test_pass_through(self, width, height, filters, mod):
x = ks.Input(shape=(width, height, filters))
test_layer = nn_blocks.CSPDownSample(filters=filters, filter_reduce=mod)
test_layer2 = nn_blocks.CSPConnect(filters=filters, filter_reduce=mod)
outx, px = test_layer(x)
outx = test_layer2([outx, px])
print(outx)
print(outx.shape.as_list())
self.assertAllEqual(
outx.shape.as_list(),
[None, np.ceil(width // 2),
np.ceil(height // 2), (filters)])
@parameterized.named_parameters(("same", 224, 224, 64, 1),
("downsample", 224, 224, 128, 2))
def test_gradient_pass_though(self, filters, width, height, mod):
loss = ks.losses.MeanSquaredError()
optimizer = ks.optimizers.SGD()
test_layer = nn_blocks.CSPDownSample(filters, filter_reduce=mod)
path_layer = nn_blocks.CSPConnect(filters, filter_reduce=mod)
init = tf.random_normal_initializer()
x = tf.Variable(
initial_value=init(shape=(1, width, height, filters), dtype=tf.float32))
y = tf.Variable(initial_value=init(shape=(1, int(np.ceil(width // 2)),
int(np.ceil(height // 2)),
filters),
dtype=tf.float32))
with tf.GradientTape() as tape:
x_hat, x_prev = test_layer(x)
x_hat = path_layer([x_hat, x_prev])
grad_loss = loss(x_hat, y)
grad = tape.gradient(grad_loss, test_layer.trainable_variables)
optimizer.apply_gradients(zip(grad, test_layer.trainable_variables))
self.assertNotIn(None, grad)
if __name__ == "__main__":
tf.test.main()
import tensorflow as tf
import tensorflow.keras as ks
import numpy as np
from absl.testing import parameterized
from official.vision.beta.projects.yolo.modeling import layers as nn_blocks
class CSPDownSample(tf.test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(("same", 224, 224, 64, 1),
("downsample", 224, 224, 64, 2))
def test_pass_through(self, width, height, filters, mod):
x = ks.Input(shape=(width, height, filters))
test_layer = nn_blocks.CSPDownSample(filters=filters, filter_reduce=mod)
outx, px = test_layer(x)
print(outx)
print(outx.shape.as_list())
self.assertAllEqual(
outx.shape.as_list(),
[None, np.ceil(width // 2),
np.ceil(height // 2), (filters / mod)])
@parameterized.named_parameters(("same", 224, 224, 64, 1),
("downsample", 224, 224, 128, 2))
def test_gradient_pass_though(self, filters, width, height, mod):
loss = ks.losses.MeanSquaredError()
optimizer = ks.optimizers.SGD()
test_layer = nn_blocks.CSPDownSample(filters, filter_reduce=mod)
path_layer = nn_blocks.CSPConnect(filters, filter_reduce=mod)
init = tf.random_normal_initializer()
x = tf.Variable(
initial_value=init(shape=(1, width, height, filters), dtype=tf.float32))
y = tf.Variable(initial_value=init(shape=(1, int(np.ceil(width // 2)),
int(np.ceil(height // 2)),
filters),
dtype=tf.float32))
with tf.GradientTape() as tape:
x_hat, x_prev = test_layer(x)
x_hat = path_layer([x_hat, x_prev])
grad_loss = loss(x_hat, y)
grad = tape.gradient(grad_loss, test_layer.trainable_variables)
optimizer.apply_gradients(zip(grad, test_layer.trainable_variables))
self.assertNotIn(None, grad)
if __name__ == "__main__":
tf.test.main()
import tensorflow as tf
import tensorflow.keras as ks
import tensorflow_datasets as tfds
from absl.testing import parameterized
from official.vision.beta.projects.yolo.modeling import layers as nn_blocks
class DarkConvTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(
("valid", (3, 3), "valid", (1, 1)), ("same", (3, 3), "same", (1, 1)),
("downsample", (3, 3), "same", (2, 2)), ("test", (1, 1), "valid", (1, 1)))
def test_pass_through(self, kernel_size, padding, strides):
if padding == "same":
pad_const = 1
else:
pad_const = 0
x = ks.Input(shape=(224, 224, 3))
test_layer = nn_blocks.DarkConv(filters=64,
kernel_size=kernel_size,
padding=padding,
strides=strides,
trainable=False)
outx = test_layer(x)
print(outx.shape.as_list())
test = [
None,
int((224 - kernel_size[0] + (2 * pad_const)) / strides[0] + 1),
int((224 - kernel_size[1] + (2 * pad_const)) / strides[1] + 1), 64
]
print(test)
self.assertAllEqual(outx.shape.as_list(), test)
@parameterized.named_parameters(("filters", 3))
def test_gradient_pass_though(self, filters):
loss = ks.losses.MeanSquaredError()
optimizer = ks.optimizers.SGD()
with tf.device("/CPU:0"):
test_layer = nn_blocks.DarkConv(filters, kernel_size=(3, 3), padding="same")
init = tf.random_normal_initializer()
x = tf.Variable(initial_value=init(shape=(1, 224, 224,
3), dtype=tf.float32))
y = tf.Variable(
initial_value=init(shape=(1, 224, 224, filters), dtype=tf.float32))
with tf.GradientTape() as tape:
x_hat = test_layer(x)
grad_loss = loss(x_hat, y)
grad = tape.gradient(grad_loss, test_layer.trainable_variables)
optimizer.apply_gradients(zip(grad, test_layer.trainable_variables))
self.assertNotIn(None, grad)
if __name__ == "__main__":
tf.test.main()
import tensorflow as tf
import tensorflow.keras as ks
import numpy as np
from absl.testing import parameterized
from official.vision.beta.projects.yolo.modeling import layers as nn_blocks
class DarkResidualTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(("same", 224, 224, 64, False),
("downsample", 223, 223, 32, True),
("oddball", 223, 223, 32, False))
def test_pass_through(self, width, height, filters, downsample):
mod = 1
if downsample:
mod = 2
x = ks.Input(shape=(width, height, filters))
test_layer = nn_blocks.DarkResidual(filters=filters, downsample=downsample)
outx = test_layer(x)
print(outx)
print(outx.shape.as_list())
self.assertAllEqual(
outx.shape.as_list(),
[None, np.ceil(width / mod),
np.ceil(height / mod), filters])
@parameterized.named_parameters(("same", 64, 224, 224, False),
("downsample", 32, 223, 223, True),
("oddball", 32, 223, 223, False))
def test_gradient_pass_though(self, filters, width, height, downsample):
loss = ks.losses.MeanSquaredError()
optimizer = ks.optimizers.SGD()
test_layer = nn_blocks.DarkResidual(filters, downsample=downsample)
if downsample:
mod = 2
else:
mod = 1
init = tf.random_normal_initializer()
x = tf.Variable(
initial_value=init(shape=(1, width, height, filters), dtype=tf.float32))
y = tf.Variable(initial_value=init(shape=(1, int(np.ceil(width / mod)),
int(np.ceil(height / mod)),
filters),
dtype=tf.float32))
with tf.GradientTape() as tape:
x_hat = test_layer(x)
grad_loss = loss(x_hat, y)
grad = tape.gradient(grad_loss, test_layer.trainable_variables)
optimizer.apply_gradients(zip(grad, test_layer.trainable_variables))
self.assertNotIn(None, grad)
if __name__ == "__main__":
tf.test.main()
import tensorflow as tf
import tensorflow.keras as ks
import numpy as np
from absl.testing import parameterized
from official.vision.beta.projects.yolo.modeling import layers as nn_blocks
class DarkTinyTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(("middle", 224, 224, 64, 2),
("last", 224, 224, 1024, 1))
def test_pass_through(self, width, height, filters, strides):
x = ks.Input(shape=(width, height, filters))
test_layer = nn_blocks.DarkTiny(filters=filters, strides=strides)
outx = test_layer(x)
self.assertEqual(width % strides, 0, msg="width % strides != 0")
self.assertEqual(height % strides, 0, msg="height % strides != 0")
self.assertAllEqual(outx.shape.as_list(),
[None, width // strides, height // strides, filters])
@parameterized.named_parameters(("middle", 224, 224, 64, 2),
("last", 224, 224, 1024, 1))
def test_gradient_pass_though(self, width, height, filters, strides):
loss = ks.losses.MeanSquaredError()
optimizer = ks.optimizers.SGD()
test_layer = nn_blocks.DarkTiny(filters=filters, strides=strides)
init = tf.random_normal_initializer()
x = tf.Variable(
initial_value=init(shape=(1, width, height, filters), dtype=tf.float32))
y = tf.Variable(initial_value=init(shape=(1, width // strides,
height // strides, filters),
dtype=tf.float32))
with tf.GradientTape() as tape:
x_hat = test_layer(x)
grad_loss = loss(x_hat, y)
grad = tape.gradient(grad_loss, test_layer.trainable_variables)
optimizer.apply_gradients(zip(grad, test_layer.trainable_variables))
self.assertNotIn(None, grad)
if __name__ == "__main__":
tf.test.main()
import tensorflow as tf
import tensorflow.keras as ks
import numpy as np
from absl.testing import parameterized
from official.vision.beta.projects.yolo.modeling.layers import nn_blocks
class CSPConnect(tf.test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(("same", 224, 224, 64, 1),
("downsample", 224, 224, 64, 2))
def test_pass_through(self, width, height, filters, mod):
x = ks.Input(shape=(width, height, filters))
test_layer = nn_blocks.CSPDownSample(filters=filters, filter_reduce=mod)
test_layer2 = nn_blocks.CSPConnect(filters=filters, filter_reduce=mod)
outx, px = test_layer(x)
outx = test_layer2([outx, px])
print(outx)
print(outx.shape.as_list())
self.assertAllEqual(
outx.shape.as_list(),
[None, np.ceil(width // 2),
np.ceil(height // 2), (filters)])
@parameterized.named_parameters(("same", 224, 224, 64, 1),
("downsample", 224, 224, 128, 2))
def test_gradient_pass_though(self, filters, width, height, mod):
loss = ks.losses.MeanSquaredError()
optimizer = ks.optimizers.SGD()
test_layer = nn_blocks.CSPDownSample(filters, filter_reduce=mod)
path_layer = nn_blocks.CSPConnect(filters, filter_reduce=mod)
init = tf.random_normal_initializer()
x = tf.Variable(
initial_value=init(shape=(1, width, height, filters), dtype=tf.float32))
y = tf.Variable(initial_value=init(shape=(1, int(np.ceil(width // 2)),
int(np.ceil(height // 2)),
filters),
dtype=tf.float32))
with tf.GradientTape() as tape:
x_hat, x_prev = test_layer(x)
x_hat = path_layer([x_hat, x_prev])
grad_loss = loss(x_hat, y)
grad = tape.gradient(grad_loss, test_layer.trainable_variables)
optimizer.apply_gradients(zip(grad, test_layer.trainable_variables))
self.assertNotIn(None, grad)
class CSPDownSample(tf.test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(("same", 224, 224, 64, 1),
("downsample", 224, 224, 64, 2))
def test_pass_through(self, width, height, filters, mod):
x = ks.Input(shape=(width, height, filters))
test_layer = nn_blocks.CSPDownSample(filters=filters, filter_reduce=mod)
outx, px = test_layer(x)
print(outx)
print(outx.shape.as_list())
self.assertAllEqual(
outx.shape.as_list(),
[None, np.ceil(width // 2),
np.ceil(height // 2), (filters / mod)])
@parameterized.named_parameters(("same", 224, 224, 64, 1),
("downsample", 224, 224, 128, 2))
def test_gradient_pass_though(self, filters, width, height, mod):
loss = ks.losses.MeanSquaredError()
optimizer = ks.optimizers.SGD()
test_layer = nn_blocks.CSPDownSample(filters, filter_reduce=mod)
path_layer = nn_blocks.CSPConnect(filters, filter_reduce=mod)
init = tf.random_normal_initializer()
x = tf.Variable(
initial_value=init(shape=(1, width, height, filters), dtype=tf.float32))
y = tf.Variable(initial_value=init(shape=(1, int(np.ceil(width // 2)),
int(np.ceil(height // 2)),
filters),
dtype=tf.float32))
with tf.GradientTape() as tape:
x_hat, x_prev = test_layer(x)
x_hat = path_layer([x_hat, x_prev])
grad_loss = loss(x_hat, y)
grad = tape.gradient(grad_loss, test_layer.trainable_variables)
optimizer.apply_gradients(zip(grad, test_layer.trainable_variables))
self.assertNotIn(None, grad)
class DarkConvTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(
("valid", (3, 3), "valid", (1, 1)), ("same", (3, 3), "same", (1, 1)),
("downsample", (3, 3), "same", (2, 2)), ("test", (1, 1), "valid", (1, 1)))
def test_pass_through(self, kernel_size, padding, strides):
if padding == "same":
pad_const = 1
else:
pad_const = 0
x = ks.Input(shape=(224, 224, 3))
test_layer = nn_blocks.DarkConv(filters=64,
kernel_size=kernel_size,
padding=padding,
strides=strides,
trainable=False)
outx = test_layer(x)
print(outx.shape.as_list())
test = [
None,
int((224 - kernel_size[0] + (2 * pad_const)) / strides[0] + 1),
int((224 - kernel_size[1] + (2 * pad_const)) / strides[1] + 1), 64
]
print(test)
self.assertAllEqual(outx.shape.as_list(), test)
@parameterized.named_parameters(("filters", 3))
def test_gradient_pass_though(self, filters):
loss = ks.losses.MeanSquaredError()
optimizer = ks.optimizers.SGD()
with tf.device("/CPU:0"):
test_layer = nn_blocks.DarkConv(filters, kernel_size=(3, 3), padding="same")
init = tf.random_normal_initializer()
x = tf.Variable(initial_value=init(shape=(1, 224, 224,
3), dtype=tf.float32))
y = tf.Variable(
initial_value=init(shape=(1, 224, 224, filters), dtype=tf.float32))
with tf.GradientTape() as tape:
x_hat = test_layer(x)
grad_loss = loss(x_hat, y)
grad = tape.gradient(grad_loss, test_layer.trainable_variables)
optimizer.apply_gradients(zip(grad, test_layer.trainable_variables))
self.assertNotIn(None, grad)
class DarkResidualTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(("same", 224, 224, 64, False),
("downsample", 223, 223, 32, True),
("oddball", 223, 223, 32, False))
def test_pass_through(self, width, height, filters, downsample):
mod = 1
if downsample:
mod = 2
x = ks.Input(shape=(width, height, filters))
test_layer = nn_blocks.DarkResidual(filters=filters, downsample=downsample)
outx = test_layer(x)
print(outx)
print(outx.shape.as_list())
self.assertAllEqual(
outx.shape.as_list(),
[None, np.ceil(width / mod),
np.ceil(height / mod), filters])
@parameterized.named_parameters(("same", 64, 224, 224, False),
("downsample", 32, 223, 223, True),
("oddball", 32, 223, 223, False))
def test_gradient_pass_though(self, filters, width, height, downsample):
loss = ks.losses.MeanSquaredError()
optimizer = ks.optimizers.SGD()
test_layer = nn_blocks.DarkResidual(filters, downsample=downsample)
if downsample:
mod = 2
else:
mod = 1
init = tf.random_normal_initializer()
x = tf.Variable(
initial_value=init(shape=(1, width, height, filters), dtype=tf.float32))
y = tf.Variable(initial_value=init(shape=(1, int(np.ceil(width / mod)),
int(np.ceil(height / mod)),
filters),
dtype=tf.float32))
with tf.GradientTape() as tape:
x_hat = test_layer(x)
grad_loss = loss(x_hat, y)
grad = tape.gradient(grad_loss, test_layer.trainable_variables)
optimizer.apply_gradients(zip(grad, test_layer.trainable_variables))
self.assertNotIn(None, grad)
class DarkTinyTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(("middle", 224, 224, 64, 2),
("last", 224, 224, 1024, 1))
def test_pass_through(self, width, height, filters, strides):
x = ks.Input(shape=(width, height, filters))
test_layer = nn_blocks.DarkTiny(filters=filters, strides=strides)
outx = test_layer(x)
self.assertEqual(width % strides, 0, msg="width % strides != 0")
self.assertEqual(height % strides, 0, msg="height % strides != 0")
self.assertAllEqual(outx.shape.as_list(),
[None, width // strides, height // strides, filters])
@parameterized.named_parameters(("middle", 224, 224, 64, 2),
("last", 224, 224, 1024, 1))
def test_gradient_pass_though(self, width, height, filters, strides):
loss = ks.losses.MeanSquaredError()
optimizer = ks.optimizers.SGD()
test_layer = nn_blocks.DarkTiny(filters=filters, strides=strides)
init = tf.random_normal_initializer()
x = tf.Variable(
initial_value=init(shape=(1, width, height, filters), dtype=tf.float32))
y = tf.Variable(initial_value=init(shape=(1, width // strides,
height // strides, filters),
dtype=tf.float32))
with tf.GradientTape() as tape:
x_hat = test_layer(x)
grad_loss = loss(x_hat, y)
grad = tape.gradient(grad_loss, test_layer.trainable_variables)
optimizer.apply_gradients(zip(grad, test_layer.trainable_variables))
self.assertNotIn(None, grad)
if __name__ == "__main__":
tf.test.main()
......@@ -23,30 +23,12 @@ from official.vision.beta.projects.yolo.configs import darknet_classification as
from official.vision.beta.projects.yolo.dataloaders import classification_input as cli
from official.vision.beta.dataloaders import classification_input
from official.vision.beta.modeling import factory
from official.vision.beta.tasks import image_classification
@task_factory.register_task_cls(exp_cfg.ImageClassificationTask)
class ImageClassificationTask(base_task.Task):
class ImageClassificationTask(image_classification.ImageClassificationTask):
"""A task for image classification."""
def build_model(self):
"""Builds classification model."""
input_specs = tf.keras.layers.InputSpec(
shape=[None] + self.task_config.model.input_size)
l2_weight_decay = self.task_config.losses.l2_weight_decay
# Divide weight decay by 2.0 to match the implementation of tf.nn.l2_loss.
# (https://www.tensorflow.org/api_docs/python/tf/keras/regularizers/l2)
# (https://www.tensorflow.org/api_docs/python/tf/nn/l2_loss)
l2_regularizer = (tf.keras.regularizers.l2(
l2_weight_decay / 2.0) if l2_weight_decay else None)
model = factory.build_classification_model(
input_specs=input_specs,
model_config=self.task_config.model,
l2_regularizer=l2_regularizer)
return model
def build_inputs(self, params, input_context=None):
"""Builds classification input."""
......@@ -70,142 +52,6 @@ class ImageClassificationTask(base_task.Task):
parser_fn=parser.parse_fn(params.is_training))
dataset = reader.read(input_context=input_context)
return dataset
def build_losses(self, labels, model_outputs, aux_losses=None):
"""Sparse categorical cross entropy loss.
Args:
labels: labels.
model_outputs: Output logits of the classifier.
aux_losses: auxiliarly loss tensors, i.e. `losses` in keras.Model.
Returns:
The total loss tensor.
"""
losses_config = self.task_config.losses
if losses_config.one_hot:
total_loss = tf.keras.losses.categorical_crossentropy(
labels,
model_outputs,
from_logits=True,
label_smoothing=losses_config.label_smoothing)
else:
total_loss = tf.keras.losses.sparse_categorical_crossentropy(
labels, model_outputs, from_logits=True)
total_loss = tf_utils.safe_mean(total_loss)
if aux_losses:
total_loss += tf.add_n(aux_losses)
return total_loss
def build_metrics(self, training=True):
"""Gets streaming metrics for training/validation."""
if self.task_config.losses.one_hot:
metrics = [
tf.keras.metrics.CategoricalAccuracy(name='accuracy'),
tf.keras.metrics.TopKCategoricalAccuracy(k=5, name='top_5_accuracy')]
else:
metrics = [
tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy'),
tf.keras.metrics.SparseTopKCategoricalAccuracy(
k=5, name='top_5_accuracy')]
return metrics
def train_step(self, inputs, model, optimizer, metrics=None):
"""Does forward and backward.
Args:
inputs: a dictionary of input tensors.
model: the model, forward pass definition.
optimizer: the optimizer for this training step.
metrics: a nested structure of metrics objects.
Returns:
A dictionary of logs.
"""
features, labels = inputs
if self.task_config.losses.one_hot:
labels = tf.one_hot(labels, self.task_config.model.num_classes)
num_replicas = tf.distribute.get_strategy().num_replicas_in_sync
with tf.GradientTape() as tape:
outputs = model(features, training=True)
# Casting output layer as float32 is necessary when mixed_precision is
# mixed_float16 or mixed_bfloat16 to ensure output is casted as float32.
outputs = tf.nest.map_structure(
lambda x: tf.cast(x, tf.float32), outputs)
# Computes per-replica loss.
loss = self.build_losses(
model_outputs=outputs, labels=labels, aux_losses=model.losses)
#Scales loss as the default gradients allreduce performs sum inside the
# optimizer.
scaled_loss = loss / num_replicas
# For mixed_precision policy, when LossScaleOptimizer is used, loss is
# scaled for numerical stability.
if isinstance(
optimizer, tf.keras.mixed_precision.experimental.LossScaleOptimizer):
scaled_loss = optimizer.get_scaled_loss(scaled_loss)
tf.print("batch loss: ", loss, end = "\r")
tvars = model.trainable_variables
grads = tape.gradient(scaled_loss, tvars)
# Scales back gradient before apply_gradients when LossScaleOptimizer is
# used.
if isinstance(
optimizer, tf.keras.mixed_precision.experimental.LossScaleOptimizer):
grads = optimizer.get_unscaled_gradients(grads)
# Apply gradient clipping.
if self.task_config.gradient_clip_norm > 0:
grads, _ = tf.clip_by_global_norm(
grads, self.task_config.gradient_clip_norm)
optimizer.apply_gradients(list(zip(grads, tvars)))
logs = {self.loss: loss}
if metrics:
self.process_metrics(metrics, labels, outputs)
logs.update({m.name: m.result() for m in metrics})
elif model.compiled_metrics:
self.process_compiled_metrics(model.compiled_metrics, labels, outputs)
logs.update({m.name: m.result() for m in model.metrics})
return logs
def validation_step(self, inputs, model, metrics=None):
"""Validatation step.
Args:
inputs: a dictionary of input tensors.
model: the keras.Model.
metrics: a nested structure of metrics objects.
Returns:
A dictionary of logs.
"""
features, labels = inputs
if self.task_config.losses.one_hot:
labels = tf.one_hot(labels, self.task_config.model.num_classes)
outputs = self.inference_step(features, model)
outputs = tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), outputs)
loss = self.build_losses(model_outputs=outputs, labels=labels,
aux_losses=model.losses)
logs = {self.loss: loss}
if metrics:
self.process_metrics(metrics, labels, outputs)
logs.update({m.name: m.result() for m in metrics})
elif model.compiled_metrics:
self.process_compiled_metrics(model.compiled_metrics, labels, outputs)
logs.update({m.name: m.result() for m in model.metrics})
return logs
def inference_step(self, inputs, model):
"""Performs the forward step."""
return model(inputs, training=False)
......@@ -33,15 +33,10 @@ from official.modeling import performance
FLAGS = flags.FLAGS
'''
python3 -m official.vision.beta.projects.yolo.train --mode=train_and_eval --experiment=darknet_classification --model_dir=training_dir --config_file=official/vision/beta/projects/yolo/configs/experiments/darknet53.yaml
python3 -m official.vision.beta.projects.yolo.train --mode=train_and_eval --experiment=darknet_classification --model_dir=training_dir --config_file=official/vision/beta/projects/yolo/configs/experiments/darknet53_tfds.yaml
'''
def import_overrides():
print(sys.modules["official.vision.beta.configs.backbones"])
return
def main(_):
import_overrides()
gin.parse_config_files_and_bindings(FLAGS.gin_file, FLAGS.gin_params)
print(FLAGS.experiment)
params = train_utils.parse_configuration(FLAGS)
......
......@@ -6,10 +6,10 @@ runtime:
distribution_strategy: mirrored
enable_xla: false
gpu_thread_mode: null
loss_scale: dynamic
mixed_precision_dtype: float16
loss_scale: null
mixed_precision_dtype: float32
num_cores_per_replica: 1
num_gpus: 2
num_gpus: 0
num_packs: 1
per_gpu_thread_count: 0
run_eagerly: false
......@@ -46,19 +46,19 @@ task:
drop_remainder: true
dtype: float16
enable_tf_data_service: false
global_batch_size: 16
input_path: ''
global_batch_size: 128
input_path: imagenet-2012-tfrecord/train*
is_training: true
sharding: true
shuffle_buffer_size: 100
shuffle_buffer_size: 10000
tf_data_service_address: null
tf_data_service_job_name: null
tfds_as_supervised: false
tfds_data_dir: ~/tensorflow_datasets/
tfds_download: true
tfds_name: imagenet2012
tfds_data_dir: ''
tfds_download: false
tfds_name: ''
tfds_skip_decoding_feature: ''
tfds_split: train
tfds_split: ''
validation_data:
block_length: 1
cache: false
......@@ -67,19 +67,19 @@ task:
drop_remainder: false
dtype: float16
enable_tf_data_service: false
global_batch_size: 16
input_path: ''
global_batch_size: 128
input_path: imagenet-2012-tfrecord/valid*
is_training: true
sharding: true
shuffle_buffer_size: 100
shuffle_buffer_size: 10000
tf_data_service_address: null
tf_data_service_job_name: null
tfds_as_supervised: false
tfds_data_dir: ~/tensorflow_datasets/
tfds_download: true
tfds_name: imagenet2012
tfds_data_dir: ''
tfds_download: false
tfds_name: ''
tfds_skip_decoding_feature: ''
tfds_split: validation
tfds_split: ''
trainer:
allow_tpu_summary: false
best_checkpoint_eval_metric: ''
......@@ -94,9 +94,9 @@ trainer:
learning_rate:
polynomial:
cycle: false
decay_steps: 6392000
end_learning_rate: 1.25e-05
initial_learning_rate: 0.0125
decay_steps: 799000
end_learning_rate: 0.0001
initial_learning_rate: 0.1
name: PolynomialDecay
power: 4.0
type: polynomial
......@@ -113,12 +113,12 @@ trainer:
linear:
name: linear
warmup_learning_rate: 0
warmup_steps: 8000
warmup_steps: 1000
type: linear
steps_per_loop: 10000
summary_interval: 10000
train_steps: 6400000
train_steps: 800000
train_tf_function: true
train_tf_while_loop: true
validation_interval: 10000
validation_steps: 3200
validation_steps: 400
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment