"git@developer.sourcefind.cn:OpenDAS/torchaudio.git" did not exist on "5211b843f63053053eb4d1d1e00dc2ad578a5dc1"
Commit 1ba8ea62 authored by vishnubanna's avatar vishnubanna
Browse files

Darknet review additions

parents eb275559 fba9d3d9
...@@ -15,7 +15,7 @@ This repository is the unofficial implementation of the following papers. Howeve ...@@ -15,7 +15,7 @@ This repository is the unofficial implementation of the following papers. Howeve
Yolo v1 the original implementation was released in 2015 providing a ground breaking algorithm that would quickly process images, and locate objects in a single pass through the detector. The original implementation based used a backbone derived from state of the art object classifier of the time, like [GoogLeNet](https://arxiv.org/abs/1409.4842) and [VGG](https://arxiv.org/abs/1409.1556). More attention was given to the novel Yolo Detection head that allowed for Object Detection with a single pass of an image. Though limited, the network could predict up to 90 bounding boxes per image, and was tested for about 80 classes per box. Also, the model could only make prediction at one scale. These attributes caused yolo v1 to be more limited, and less versatile, so as the year passed, the Developers continued to update and develop this model. Yolo v1 the original implementation was released in 2015 providing a ground breaking algorithm that would quickly process images, and locate objects in a single pass through the detector. The original implementation based used a backbone derived from state of the art object classifier of the time, like [GoogLeNet](https://arxiv.org/abs/1409.4842) and [VGG](https://arxiv.org/abs/1409.1556). More attention was given to the novel Yolo Detection head that allowed for Object Detection with a single pass of an image. Though limited, the network could predict up to 90 bounding boxes per image, and was tested for about 80 classes per box. Also, the model could only make prediction at one scale. These attributes caused yolo v1 to be more limited, and less versatile, so as the year passed, the Developers continued to update and develop this model.
Yolo v3 and v4 serve as the most up to date and capable versions of the Yolo network group. These model uses a custom backbone called Darknet53 that uses knowledge gained from the ResNet paper to improve its predictions. The new backbone also allows for objects to be detected at multiple scales. As for the new detection head, the model now predicts the bounding boxes using a set of anchor box priors (Anchor Boxes) as suggestions. The multiscale predictions in combination with the Anchor boxes allows for the network to make up to 1000 object predictions on a single image. Finally, the new loss function forces the network to make better prediction by using Intersection Over Union (IOU) to inform the models confidence rather than relying on the mean squared error for the entire output. Yolo v3 and v4 serve as the most up to date and capable versions of the Yolo network group. These model uses a custom backbone called Darknet53 that uses knowledge gained from the ResNet paper to improve its predictions. The new backbone also allows for objects to be detected at multiple scales. As for the new detection head, the model now predicts the bounding boxes using a set of anchor box priors (Anchor Boxes) as suggestions. The multiscale predictions in combination with the Anchor boxes allows for the network to make up to 1000 object predictions on a single image. Finally, the new loss function forces the network to make better prediction by using Intersection Over Union (IOU) to inform the model's confidence rather than relying on the mean squared error for the entire output.
## Authors ## Authors
...@@ -33,7 +33,8 @@ Yolo v3 and v4 serve as the most up to date and capable versions of the Yolo net ...@@ -33,7 +33,8 @@ Yolo v3 and v4 serve as the most up to date and capable versions of the Yolo net
## Our Goal ## Our Goal
Our goal with this model conversion is to provide highly versatile implementations of the Backbone and Yolo Head. We have tried to build the model in such a way that the Yolo head could easily be connected to a new, more powerful backbone if a person chose to.
Our goal with this model conversion is to provide implementations of the Backbone and Yolo Head. We have built the model in such a way that the Yolo head could be connected to a new, more powerful backbone if a person chose to.
## Models in the library ## Models in the library
......
"""Backbones configurations.""" """Backbones configurations."""
# Import libraries # Import libraries
import dataclasses import dataclasses
from typing import Optional
from official.modeling import hyperparams
# from official.vision.beta.configs import backbones
from official.modeling import hyperparams
@dataclasses.dataclass @dataclasses.dataclass
class DarkNet(hyperparams.Config): class DarkNet(hyperparams.Config):
"""DarkNet config.""" """DarkNet config."""
model_id: str = "darknet53" model_id: str = "darknet53"
# we could not get this to work
@dataclasses.dataclass @dataclasses.dataclass
class Backbone(backbones.Backbone): class Backbone(backbones.Backbone):
darknet: DarkNet = DarkNet() darknet: DarkNet = DarkNet()
...@@ -18,7 +18,7 @@ task: ...@@ -18,7 +18,7 @@ task:
tfds_split: 'test' tfds_split: 'test'
tfds_download: True tfds_download: True
is_training: True is_training: True
global_batch_size: 2 global_batch_size: 128
dtype: 'float16' dtype: 'float16'
validation_data: validation_data:
tfds_name: 'imagenet_a' tfds_name: 'imagenet_a'
...@@ -29,12 +29,12 @@ task: ...@@ -29,12 +29,12 @@ task:
dtype: 'float16' dtype: 'float16'
drop_remainder: False drop_remainder: False
trainer: trainer:
train_steps: 51200000 # in the paper train_steps: 800000 # in the paper
validation_steps: 25600 # size of validation data validation_steps: 400 # size of validation data
validation_interval: 150 validation_interval: 10000
steps_per_loop: 150 steps_per_loop: 10000
summary_interval: 150 summary_interval: 10000
checkpoint_interval: 150 checkpoint_interval: 10000
optimizer_config: optimizer_config:
optimizer: optimizer:
type: 'sgd' type: 'sgd'
...@@ -46,8 +46,8 @@ trainer: ...@@ -46,8 +46,8 @@ trainer:
initial_learning_rate: 0.1 initial_learning_rate: 0.1
end_learning_rate: 0.0001 end_learning_rate: 0.0001
power: 4.0 power: 4.0
decay_steps: 51136000 decay_steps: 799000
warmup: warmup:
type: 'linear' type: 'linear'
linear: linear:
warmup_steps: 64000 #lr rise from 0 to 0.1 over 1000 steps warmup_steps: 1000 #learning rate rises from 0 to 0.1 over 1000 steps
...@@ -252,7 +252,7 @@ class Darknet(ks.Model): ...@@ -252,7 +252,7 @@ class Darknet(ks.Model):
name=f"{config.layer}_{i}") name=f"{config.layer}_{i}")
stack_outputs.append(x_pass) stack_outputs.append(x_pass)
if (config.is_output and if (config.is_output and
self._min_size == None): # or isinstance(config.output_name, str): self._min_size == None):
endpoints[str(config.output_name)] = x endpoints[str(config.output_name)] = x
elif self._min_size != None and config.output_name >= self._min_size and config.output_name <= self._max_size: elif self._min_size != None and config.output_name >= self._min_size and config.output_name <= self._max_size:
endpoints[str(config.output_name)] = x endpoints[str(config.output_name)] = x
......
...@@ -14,7 +14,7 @@ class CSPConnect(ks.layers.Layer): ...@@ -14,7 +14,7 @@ class CSPConnect(ks.layers.Layer):
kernel_initializer='glorot_uniform', kernel_initializer='glorot_uniform',
bias_initializer='zeros', bias_initializer='zeros',
bias_regularizer=None, bias_regularizer=None,
weight_decay=None, # default find where is it is stated weight_decay=None,
use_bn=True, use_bn=True,
use_sync_bn=False, use_sync_bn=False,
norm_momentum=0.99, norm_momentum=0.99,
......
...@@ -14,7 +14,7 @@ class CSPDownSample(ks.layers.Layer): ...@@ -14,7 +14,7 @@ class CSPDownSample(ks.layers.Layer):
kernel_initializer='glorot_uniform', kernel_initializer='glorot_uniform',
bias_initializer='zeros', bias_initializer='zeros',
bias_regularizer=None, bias_regularizer=None,
weight_decay=None, # default find where is it is stated weight_decay=None,
use_bn=True, use_bn=True,
use_sync_bn=False, use_sync_bn=False,
norm_momentum=0.99, norm_momentum=0.99,
......
...@@ -14,7 +14,7 @@ class CSPTiny(ks.layers.Layer): ...@@ -14,7 +14,7 @@ class CSPTiny(ks.layers.Layer):
kernel_initializer='glorot_uniform', kernel_initializer='glorot_uniform',
bias_initializer='zeros', bias_initializer='zeros',
bias_regularizer=None, bias_regularizer=None,
weight_decay=None, # default find where is it is stated weight_decay=None,
use_bn=True, use_bn=True,
use_sync_bn=False, use_sync_bn=False,
group_id=1, group_id=1,
......
...@@ -23,7 +23,7 @@ class DarkConv(ks.layers.Layer): ...@@ -23,7 +23,7 @@ class DarkConv(ks.layers.Layer):
kernel_initializer='glorot_uniform', kernel_initializer='glorot_uniform',
bias_initializer='zeros', bias_initializer='zeros',
bias_regularizer=None, bias_regularizer=None,
weight_decay=None, # default find where is it is stated weight_decay=None, # Specify the weight decay as the default will not work.
use_bn=True, use_bn=True,
use_sync_bn=False, use_sync_bn=False,
norm_momentum=0.99, norm_momentum=0.99,
...@@ -99,7 +99,7 @@ class DarkConv(ks.layers.Layer): ...@@ -99,7 +99,7 @@ class DarkConv(ks.layers.Layer):
self._kernel_size) == int else self._kernel_size[0] self._kernel_size) == int else self._kernel_size[0]
if self._padding == "same" and kernel_size != 1: if self._padding == "same" and kernel_size != 1:
self._zeropad = ks.layers.ZeroPadding2D( self._zeropad = ks.layers.ZeroPadding2D(
((1, 1), (1, 1))) # symetric padding ((1, 1), (1, 1))) # symmetric padding
else: else:
self._zeropad = Identity() self._zeropad = Identity()
...@@ -107,7 +107,7 @@ class DarkConv(ks.layers.Layer): ...@@ -107,7 +107,7 @@ class DarkConv(ks.layers.Layer):
filters=self._filters, filters=self._filters,
kernel_size=self._kernel_size, kernel_size=self._kernel_size,
strides=self._strides, strides=self._strides,
padding="valid", #self._padding, padding="valid",
dilation_rate=self._dilation_rate, dilation_rate=self._dilation_rate,
use_bias=self._use_bias, use_bias=self._use_bias,
kernel_initializer=self._kernel_initializer, kernel_initializer=self._kernel_initializer,
...@@ -148,7 +148,7 @@ class DarkConv(ks.layers.Layer): ...@@ -148,7 +148,7 @@ class DarkConv(ks.layers.Layer):
return x return x
def get_config(self): def get_config(self):
# used to store/share parameters to reconsturct the model # used to store/share parameters to reconstruct the model
layer_config = { layer_config = {
"filters": self._filters, "filters": self._filters,
"kernel_size": self._kernel_size, "kernel_size": self._kernel_size,
......
...@@ -138,7 +138,7 @@ class DarkResidual(ks.layers.Layer): ...@@ -138,7 +138,7 @@ class DarkResidual(ks.layers.Layer):
return self._activation_fn(x) return self._activation_fn(x)
def get_config(self): def get_config(self):
# used to store/share parameters to reconsturct the model # used to store/share parameters to reconstruct the model
layer_config = { layer_config = {
"filters": self._filters, "filters": self._filters,
"use_bias": self._use_bias, "use_bias": self._use_bias,
......
...@@ -54,19 +54,5 @@ class DarkConvTest(tf.test.TestCase, parameterized.TestCase): ...@@ -54,19 +54,5 @@ class DarkConvTest(tf.test.TestCase, parameterized.TestCase):
self.assertNotIn(None, grad) self.assertNotIn(None, grad)
return return
# @parameterized.named_parameters(("filters", 3), ("filters", 20), ("filters", 512))
# def test_time(self, filters):
# # finish the test for time
# dataset = tfds.load("mnist")
# model = ks.Sequential([
# DarkConv(7, kernel_size=(3,3), strides = (2,2), activation='relu'),
# DarkConv(10, kernel_size=(3,3), strides = (2,2), activation='relu'),
# DarkConv(filters, kernel_size=(3,3), strides = (1,1), activation='relu'),
# DarkConv(9, kernel_size=(3,3), strides = (2,2), activation='relu'),
# ks.layers.GlobalAveragePooling2D(),
# ks.layers.Dense(10, activation='softmax')], name='test')
# return
if __name__ == "__main__": if __name__ == "__main__":
tf.test.main() tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment