Commit 15a302ed authored by vishnubanna's avatar vishnubanna
Browse files

fixed activation functions

parent 3dc083ab
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
from official.modeling.activations.gelu import gelu from official.modeling.activations.gelu import gelu
from official.modeling.activations.relu import relu6 from official.modeling.activations.relu import relu6
from official.modeling.activations.sigmoid import hard_sigmoid from official.modeling.activations.sigmoid import hard_sigmoid
from official.modeling.activations.swish import hard_swish from official.modeling.activations.swish import hard_swish
from official.modeling.activations.swish import identity from official.modeling.activations.swish import identity
from official.modeling.activations.swish import simple_swish from official.modeling.activations.swish import simple_swish
...@@ -22,7 +22,6 @@ from official.vision import beta ...@@ -22,7 +22,6 @@ from official.vision import beta
from official.vision.beta.projects import yolo from official.vision.beta.projects import yolo
from official.vision.beta.projects.yolo.modeling.backbones import darknet from official.vision.beta.projects.yolo.modeling.backbones import darknet
from official.vision.beta.projects.yolo.configs import darknet_classification from official.vision.beta.projects.yolo.configs import darknet_classification
from official.vision.beta.projects.yolo.configs.darknet_classification import image_classification from official.vision.beta.projects.yolo.configs.darknet_classification import image_classification
from official.vision.beta.projects.yolo.configs.darknet_classification import ImageClassificationTask from official.vision.beta.projects.yolo.configs.darknet_classification import ImageClassificationTask
......
...@@ -9,6 +9,8 @@ task: ...@@ -9,6 +9,8 @@ task:
type: 'darknet' type: 'darknet'
darknet: darknet:
model_id: 'cspdarknet53' model_id: 'cspdarknet53'
norm_activation:
activation: 'mish'
losses: losses:
l2_weight_decay: 0.0005 l2_weight_decay: 0.0005
one_hot: True one_hot: True
......
...@@ -11,6 +11,8 @@ task: ...@@ -11,6 +11,8 @@ task:
type: 'darknet' type: 'darknet'
darknet: darknet:
model_id: 'cspdarknet53' model_id: 'cspdarknet53'
norm_activation:
activation: 'mish'
losses: losses:
l2_weight_decay: 0.0005 l2_weight_decay: 0.0005
one_hot: True one_hot: True
......
# Custom training loss for YOLO and Darknet
\ No newline at end of file
import tensorflow as tf
import tensorflow.keras as ks
@tf.keras.utils.register_keras_serializable(package='Text')
def mish(x):
"""Mish: A Self Regularized Non-Monotonic Activation Function
This activation is far smoother than ReLU.
Original paper: https://arxiv.org/abs/1908.08681
Args:
x: float Tensor to perform activation.
Returns:
`x` with the MISH activation applied.
"""
return x * tf.math.tanh(ks.activations.softplus(x))
\ No newline at end of file
...@@ -3,7 +3,7 @@ from functools import partial ...@@ -3,7 +3,7 @@ from functools import partial
import tensorflow as tf import tensorflow as tf
import tensorflow.keras as ks import tensorflow.keras as ks
import tensorflow.keras.backend as K import tensorflow.keras.backend as K
from official.vision.beta.projects.yolo.modeling.activations.mish import mish from official.modeling import tf_utils
...@@ -107,10 +107,7 @@ class DarkConv(ks.layers.Layer): ...@@ -107,10 +107,7 @@ class DarkConv(ks.layers.Layer):
self._bn_axis = 1 self._bn_axis = 1
# activation params # activation params
if activation is None: self._activation = activation
self._activation = 'linear'
else:
self._activation = activation
self._leaky_alpha = leaky_alpha self._leaky_alpha = leaky_alpha
super(DarkConv, self).__init__(**kwargs) super(DarkConv, self).__init__(**kwargs)
...@@ -153,10 +150,11 @@ class DarkConv(ks.layers.Layer): ...@@ -153,10 +150,11 @@ class DarkConv(ks.layers.Layer):
if self._activation == 'leaky': if self._activation == 'leaky':
alpha = {"alpha": self._leaky_alpha} alpha = {"alpha": self._leaky_alpha}
self._activation_fn = partial(tf.nn.leaky_relu, **alpha) self._activation_fn = partial(tf.nn.leaky_relu, **alpha)
elif self._activation == 'mish': elif self._activation == "mish":
self._activation_fn = mish self._activation_fn = lambda x: x * tf.math.tanh(tf.math.softplus(x))
else: else:
self._activation_fn = ks.layers.Activation(activation=self._activation) self._activation_fn = tf_utils.get_activation(self._activation)
tf.print(self._activation_fn)
def call(self, x): def call(self, x):
if self._groups != 1: if self._groups != 1:
...@@ -320,8 +318,14 @@ class DarkResidual(ks.layers.Layer): ...@@ -320,8 +318,14 @@ class DarkResidual(ks.layers.Layer):
leaky_alpha=self._leaky_alpha) leaky_alpha=self._leaky_alpha)
self._shortcut = ks.layers.Add() self._shortcut = ks.layers.Add()
self._activation_fn = ks.layers.Activation(activation=self._sc_activation) # self._activation_fn = ks.layers.Activation(activation=self._sc_activation)
if self._sc_activation == 'leaky':
alpha = {"alpha": self._leaky_alpha}
self._activation_fn = partial(tf.nn.leaky_relu, **alpha)
elif self._sc_activation == "mish":
self._activation_fn = lambda x: x * tf.math.tanh(tf.math.softplus(x))
else:
self._activation_fn = tf_utils.get_activation(self._sc_activation)
super().build(input_shape) super().build(input_shape)
def call(self, inputs): def call(self, inputs):
......
...@@ -6,10 +6,10 @@ runtime: ...@@ -6,10 +6,10 @@ runtime:
distribution_strategy: mirrored distribution_strategy: mirrored
enable_xla: false enable_xla: false
gpu_thread_mode: null gpu_thread_mode: null
loss_scale: null loss_scale: dynamic
mixed_precision_dtype: float32 mixed_precision_dtype: float16
num_cores_per_replica: 1 num_cores_per_replica: 1
num_gpus: 0 num_gpus: 2
num_packs: 1 num_packs: 1
per_gpu_thread_count: 0 per_gpu_thread_count: 0
run_eagerly: false run_eagerly: false
...@@ -28,12 +28,12 @@ task: ...@@ -28,12 +28,12 @@ task:
add_head_batch_norm: false add_head_batch_norm: false
backbone: backbone:
darknet: darknet:
model_id: darknet53 model_id: cspdarknet53
type: darknet type: darknet
dropout_rate: 0.0 dropout_rate: 0.0
input_size: [256, 256, 3] input_size: [256, 256, 3]
norm_activation: norm_activation:
activation: relu activation: mish
norm_epsilon: 0.001 norm_epsilon: 0.001
norm_momentum: 0.99 norm_momentum: 0.99
use_sync_bn: false use_sync_bn: false
...@@ -46,19 +46,19 @@ task: ...@@ -46,19 +46,19 @@ task:
drop_remainder: true drop_remainder: true
dtype: float16 dtype: float16
enable_tf_data_service: false enable_tf_data_service: false
global_batch_size: 128 global_batch_size: 16
input_path: imagenet-2012-tfrecord/train* input_path: ''
is_training: true is_training: true
sharding: true sharding: true
shuffle_buffer_size: 10000 shuffle_buffer_size: 100
tf_data_service_address: null tf_data_service_address: null
tf_data_service_job_name: null tf_data_service_job_name: null
tfds_as_supervised: false tfds_as_supervised: false
tfds_data_dir: '' tfds_data_dir: ~/tensorflow_datasets/
tfds_download: false tfds_download: true
tfds_name: '' tfds_name: imagenet2012
tfds_skip_decoding_feature: '' tfds_skip_decoding_feature: ''
tfds_split: '' tfds_split: train
validation_data: validation_data:
block_length: 1 block_length: 1
cache: false cache: false
...@@ -67,19 +67,19 @@ task: ...@@ -67,19 +67,19 @@ task:
drop_remainder: false drop_remainder: false
dtype: float16 dtype: float16
enable_tf_data_service: false enable_tf_data_service: false
global_batch_size: 128 global_batch_size: 16
input_path: imagenet-2012-tfrecord/valid* input_path: ''
is_training: true is_training: true
sharding: true sharding: true
shuffle_buffer_size: 10000 shuffle_buffer_size: 100
tf_data_service_address: null tf_data_service_address: null
tf_data_service_job_name: null tf_data_service_job_name: null
tfds_as_supervised: false tfds_as_supervised: false
tfds_data_dir: '' tfds_data_dir: ~/tensorflow_datasets/
tfds_download: false tfds_download: true
tfds_name: '' tfds_name: imagenet2012
tfds_skip_decoding_feature: '' tfds_skip_decoding_feature: ''
tfds_split: '' tfds_split: validation
trainer: trainer:
allow_tpu_summary: false allow_tpu_summary: false
best_checkpoint_eval_metric: '' best_checkpoint_eval_metric: ''
...@@ -94,9 +94,9 @@ trainer: ...@@ -94,9 +94,9 @@ trainer:
learning_rate: learning_rate:
polynomial: polynomial:
cycle: false cycle: false
decay_steps: 799000 decay_steps: 9592000
end_learning_rate: 0.0001 end_learning_rate: 1.25e-05
initial_learning_rate: 0.1 initial_learning_rate: 0.0125
name: PolynomialDecay name: PolynomialDecay
power: 4.0 power: 4.0
type: polynomial type: polynomial
...@@ -113,12 +113,12 @@ trainer: ...@@ -113,12 +113,12 @@ trainer:
linear: linear:
name: linear name: linear
warmup_learning_rate: 0 warmup_learning_rate: 0
warmup_steps: 1000 warmup_steps: 8000
type: linear type: linear
steps_per_loop: 10000 steps_per_loop: 10000
summary_interval: 10000 summary_interval: 10000
train_steps: 800000 train_steps: 9600000
train_tf_function: true train_tf_function: true
train_tf_while_loop: true train_tf_while_loop: true
validation_interval: 10000 validation_interval: 10000
validation_steps: 400 validation_steps: 3200
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment