Commit 3dc083ab authored by anivegesana's avatar anivegesana
Browse files

Turn DarkTiny from building block into stack

parent 5e85759a
...@@ -77,7 +77,6 @@ class layer_factory(object): ...@@ -77,7 +77,6 @@ class layer_factory(object):
""" """
def __init__(self): def __init__(self):
self._layer_dict = { self._layer_dict = {
"DarkTiny": (nn_blocks.DarkTiny, self.darktiny_config_todict),
"DarkConv": (nn_blocks.DarkConv, self.darkconv_config_todict), "DarkConv": (nn_blocks.DarkConv, self.darkconv_config_todict),
"MaxPool": (tf.keras.layers.MaxPool2D, self.maxpool_config_todict) "MaxPool": (tf.keras.layers.MaxPool2D, self.maxpool_config_todict)
} }
...@@ -165,12 +164,12 @@ DARKNETTINY = { ...@@ -165,12 +164,12 @@ DARKNETTINY = {
"splits": {"backbone_split": 14}, "splits": {"backbone_split": 14},
"backbone": [ "backbone": [
["DarkConv", None, 1, False, 16, None, 3, 1, "same", "leaky", -1, 0, False], ["DarkConv", None, 1, False, 16, None, 3, 1, "same", "leaky", -1, 0, False],
["DarkTiny", None, 1, True, 32, None, 3, 2, "same", "leaky", -1, 1, False], ["DarkTiny", "tiny", 1, True, 32, None, 3, 2, "same", "leaky", -1, 1, False],
["DarkTiny", None, 1, True, 64, None, 3, 2, "same", "leaky", -1, 2, False], ["DarkTiny", "tiny", 1, True, 64, None, 3, 2, "same", "leaky", -1, 2, False],
["DarkTiny", None, 1, False, 128, None, 3, 2, "same", "leaky", -1, 3, False], ["DarkTiny", "tiny", 1, False, 128, None, 3, 2, "same", "leaky", -1, 3, False],
["DarkTiny", None, 1, False, 256, None, 3, 2, "same", "leaky", -1, 4, True], ["DarkTiny", "tiny", 1, False, 256, None, 3, 2, "same", "leaky", -1, 4, True],
["DarkTiny", None, 1, False, 512, None, 3, 2, "same", "leaky", -1, 5, False], ["DarkTiny", "tiny", 1, False, 512, None, 3, 2, "same", "leaky", -1, 5, False],
["DarkTiny", None, 1, False, 1024, None, 3, 1, "same", "leaky", -1, 5, True], ["DarkTiny", "tiny", 1, False, 1024, None, 3, 1, "same", "leaky", -1, 5, True],
] ]
} }
...@@ -267,10 +266,15 @@ class Darknet(ks.Model): ...@@ -267,10 +266,15 @@ class Darknet(ks.Model):
name=f"{config.layer}_{i}") name=f"{config.layer}_{i}")
stack_outputs.append(x) stack_outputs.append(x)
elif config.stack == "csp_tiny": elif config.stack == "csp_tiny":
x_pass, x = self._tiny_stack(stack_outputs[config.route], x_pass, x = self._csp_tiny_stack(stack_outputs[config.route],
config, config,
name=f"{config.layer}_{i}") name=f"{config.layer}_{i}")
stack_outputs.append(x_pass) stack_outputs.append(x_pass)
elif config.stack == "tiny":
x = self._tiny_stack(stack_outputs[config.route],
config,
name=f"{config.layer}_{i}")
stack_outputs.append(x)
if (config.is_output and if (config.is_output and
self._min_size == None): self._min_size == None):
endpoints[str(config.output_name)] = x endpoints[str(config.output_name)] = x
...@@ -314,15 +318,32 @@ class Darknet(ks.Model): ...@@ -314,15 +318,32 @@ class Darknet(ks.Model):
self._default_dict["name"] = None self._default_dict["name"] = None
return output return output
def _tiny_stack(self, inputs, config, name): def _csp_tiny_stack(self, inputs, config, name):
self._default_dict["activation"] = self._get_activation(config.activation) self._default_dict["activation"] = self._get_activation(config.activation)
self._default_dict["name"] = f"{name}_tiny" self._default_dict["name"] = f"{name}_csp_tiny"
x, x_route = nn_blocks.CSPTiny(filters=config.filters, x, x_route = nn_blocks.CSPTiny(filters=config.filters,
**self._default_dict)(inputs) **self._default_dict)(inputs)
self._default_dict["activation"] = self._activation self._default_dict["activation"] = self._activation
self._default_dict["name"] = None self._default_dict["name"] = None
return x, x_route return x, x_route
def _tiny_stack(self, inputs, config, name):
x = tf.keras.layers.MaxPool2D(pool_size=2,
strides=config.strides,
padding="same",
data_format=None,
name=f"{name}_tiny/pool")(inputs)
self._default_dict["activation"] = self._get_activation(config.activation)
self._default_dict["name"] = f"{name}_tiny/conv"
x = nn_blocks.DarkConv(filters=config.filters,
kernel_size=(3, 3),
strides=(1, 1),
padding='same',
**self._default_dict)(x)
self._default_dict["activation"] = self._activation
self._default_dict["name"] = None
return x
def _residual_stack(self, inputs, config, name): def _residual_stack(self, inputs, config, name):
self._default_dict["activation"] = self._get_activation(config.activation) self._default_dict["activation"] = self._get_activation(config.activation)
self._default_dict["name"] = f"{name}_residual_down" self._default_dict["name"] = f"{name}_residual_down"
......
...@@ -199,113 +199,6 @@ class DarkConv(ks.layers.Layer): ...@@ -199,113 +199,6 @@ class DarkConv(ks.layers.Layer):
return repr(self.get_config()) return repr(self.get_config())
@ks.utils.register_keras_serializable(package='yolo')
class DarkTiny(ks.layers.Layer):
"""
Automatic Maxpool Downsampling Convolution layer, created to make routing easier.
Args:
filters: integer for output depth, or the number of features to learn
use_bias: boolean to indicate whether to use bias in convolution layer
kernel_initializer: string to indicate which function to use to initialize weights
bias_initializer: string to indicate which function to use to initialize bias
kernel_regularizer: string to indicate which function to use to regularize weights
bias_regularizer: string to indicate which function to use to regularize bias
use_bn: boolean for whether to use batch normalization
use_sync_bn: boolean for whether to sync batch normalization statistics
of all batch norm layers to the models' global statistics (across all input batches)
group_id: integer for which group of features to pass through the csp tiny stack.
groups: integer for how many splits there should be in the convolution feature stack output
norm_moment: float for moment to use for batch normalization
norm_epsilon: float for batch normalization epsilon
activation: string or None for activation function to use in layer,
if None activation is replaced by linear
**kwargs: Keyword Arguments
"""
def __init__(
self,
filters=1,
use_bias=True,
strides=2,
kernel_initializer='glorot_uniform',
bias_initializer='zeros',
bias_regularizer=None,
kernel_regularizer=None, # default find where is it is stated
use_bn=True,
use_sync_bn=False,
norm_momentum=0.99,
norm_epsilon=0.001,
activation='leaky',
**kwargs):
# darkconv params
self._filters = filters
self._use_bias = use_bias
self._kernel_initializer = kernel_initializer
self._bias_initializer = bias_initializer
self._bias_regularizer = bias_regularizer
self._use_bn = use_bn
self._use_sync_bn = use_sync_bn
self._strides = strides
self._kernel_regularizer = kernel_regularizer
# normal params
self._norm_moment = norm_momentum
self._norm_epsilon = norm_epsilon
# activation params
self._conv_activation = activation
super().__init__(**kwargs)
def build(self, input_shape):
self._maxpool = tf.keras.layers.MaxPool2D(pool_size=2,
strides=self._strides,
padding="same",
data_format=None)
self._convlayer = DarkConv(filters=self._filters,
kernel_size=(3, 3),
strides=(1, 1),
padding='same',
use_bias=self._use_bias,
kernel_initializer=self._kernel_initializer,
bias_initializer=self._bias_initializer,
bias_regularizer=self._bias_regularizer,
kernel_regularizer=self._kernel_regularizer,
use_bn=self._use_bn,
use_sync_bn=self._use_sync_bn,
norm_momentum=self._norm_moment,
norm_epsilon=self._norm_epsilon,
activation=self._conv_activation)
super().build(input_shape)
def call(self, inputs):
output = self._maxpool(inputs)
output = self._convlayer(output)
return output
def get_config(self):
# used to store/share parameters to reconsturct the model
layer_config = {
"filters": self._filters,
"use_bias": self._use_bias,
"strides": self._strides,
"kernel_initializer": self._kernel_initializer,
"bias_initializer": self._bias_initializer,
"l2_regularization": self._l2_regularization,
"use_bn": self._use_bn,
"use_sync_bn": self._use_sync_bn,
"norm_moment": self._norm_moment,
"norm_epsilon": self._norm_epsilon,
"activation": self._conv_activation,
"leaky_alpha": self._leaky_alpha,
"sc_activation": self._sc_activation,
}
layer_config.update(super().get_config())
return layer_config
@ks.utils.register_keras_serializable(package='yolo') @ks.utils.register_keras_serializable(package='yolo')
class DarkResidual(ks.layers.Layer): class DarkResidual(ks.layers.Layer):
''' '''
......
...@@ -183,40 +183,5 @@ class DarkResidualTest(tf.test.TestCase, parameterized.TestCase): ...@@ -183,40 +183,5 @@ class DarkResidualTest(tf.test.TestCase, parameterized.TestCase):
self.assertNotIn(None, grad) self.assertNotIn(None, grad)
class DarkTinyTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(("middle", 224, 224, 64, 2),
("last", 224, 224, 1024, 1))
def test_pass_through(self, width, height, filters, strides):
x = ks.Input(shape=(width, height, filters))
test_layer = nn_blocks.DarkTiny(filters=filters, strides=strides)
outx = test_layer(x)
self.assertEqual(width % strides, 0, msg="width % strides != 0")
self.assertEqual(height % strides, 0, msg="height % strides != 0")
self.assertAllEqual(outx.shape.as_list(),
[None, width // strides, height // strides, filters])
@parameterized.named_parameters(("middle", 224, 224, 64, 2),
("last", 224, 224, 1024, 1))
def test_gradient_pass_though(self, width, height, filters, strides):
loss = ks.losses.MeanSquaredError()
optimizer = ks.optimizers.SGD()
test_layer = nn_blocks.DarkTiny(filters=filters, strides=strides)
init = tf.random_normal_initializer()
x = tf.Variable(
initial_value=init(shape=(1, width, height, filters), dtype=tf.float32))
y = tf.Variable(initial_value=init(shape=(1, width // strides,
height // strides, filters),
dtype=tf.float32))
with tf.GradientTape() as tape:
x_hat = test_layer(x)
grad_loss = loss(x_hat, y)
grad = tape.gradient(grad_loss, test_layer.trainable_variables)
optimizer.apply_gradients(zip(grad, test_layer.trainable_variables))
self.assertNotIn(None, grad)
if __name__ == "__main__": if __name__ == "__main__":
tf.test.main() tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment