Commit 71ef4530 authored by vishnubanna's avatar vishnubanna
Browse files

Abdhulla Review Majority addressed

parent 1ba8ea62
...@@ -64,8 +64,6 @@ class RevNet(hyperparams.Config): ...@@ -64,8 +64,6 @@ class RevNet(hyperparams.Config):
# Specifies the depth of RevNet. # Specifies the depth of RevNet.
model_id: int = 56 model_id: int = 56
from official.vision.beta.projects.yolo.configs.backbones import DarkNet
@dataclasses.dataclass @dataclasses.dataclass
class Backbone(hyperparams.OneOfConfig): class Backbone(hyperparams.OneOfConfig):
"""Configuration for backbones. """Configuration for backbones.
...@@ -86,4 +84,3 @@ class Backbone(hyperparams.OneOfConfig): ...@@ -86,4 +84,3 @@ class Backbone(hyperparams.OneOfConfig):
efficientnet: EfficientNet = EfficientNet() efficientnet: EfficientNet = EfficientNet()
spinenet: SpineNet = SpineNet() spinenet: SpineNet = SpineNet()
mobilenet: MobileNet = MobileNet() mobilenet: MobileNet = MobileNet()
darknet: DarkNet = DarkNet()
...@@ -21,3 +21,12 @@ from official.vision import beta ...@@ -21,3 +21,12 @@ from official.vision import beta
from official.vision.beta.projects import yolo from official.vision.beta.projects import yolo
from official.vision.beta.projects.yolo.modeling.backbones import Darknet from official.vision.beta.projects.yolo.modeling.backbones import Darknet
from official.vision.beta.projects.yolo.configs import darknet_classification
from official.vision.beta.projects.yolo.configs.darknet_classification import image_classification
from official.vision.beta.projects.yolo.configs.darknet_classification import ImageClassificationTask
from official.vision.beta.projects.yolo.tasks.image_classification import ImageClassificationTask
# task_factory.register_task_cls(ImageClassificationTask)(ImageClassificationTask)
# print(task_factory._REGISTERED_TASK_CLS)
\ No newline at end of file
...@@ -4,6 +4,8 @@ import dataclasses ...@@ -4,6 +4,8 @@ import dataclasses
from official.modeling import hyperparams from official.modeling import hyperparams
from official.vision.beta.configs import backbones
@dataclasses.dataclass @dataclasses.dataclass
class DarkNet(hyperparams.Config): class DarkNet(hyperparams.Config):
"""DarkNet config.""" """DarkNet config."""
......
import os
from typing import List
import dataclasses
from official.core import config_definitions as cfg
from official.core import exp_factory
from official.modeling import hyperparams
from official.modeling import optimization
from official.vision.beta.projects.yolo.configs import backbones
from official.vision.beta.configs import common
from official.vision.beta.configs import image_classification as imc
@dataclasses.dataclass
class ImageClassificationModel(hyperparams.Config):
num_classes: int = 0
input_size: List[int] = dataclasses.field(default_factory=list)
backbone: backbones.Backbone = backbones.Backbone(
type='darknet', resnet=backbones.DarkNet())
dropout_rate: float = 0.0
norm_activation: common.NormActivation = common.NormActivation()
# Adds a BatchNormalization layer pre-GlobalAveragePooling in classification
add_head_batch_norm: bool = False
@dataclasses.dataclass
class Losses(hyperparams.Config):
one_hot: bool = True
label_smoothing: float = 0.0
l2_weight_decay: float = 0.0
@dataclasses.dataclass
class ImageClassificationTask(cfg.TaskConfig):
"""The model config."""
model: ImageClassificationModel = ImageClassificationModel()
train_data: imc.DataConfig = imc.DataConfig(is_training=True)
validation_data: imc.DataConfig = imc.DataConfig(is_training=False)
losses: Losses = Losses()
gradient_clip_norm: float = 0.0
logging_dir:str = None
@exp_factory.register_config_factory('darknet_classification')
def image_classification() -> cfg.ExperimentConfig:
"""Image classification general."""
return cfg.ExperimentConfig(
task=ImageClassificationTask(),
trainer=cfg.TrainerConfig(),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None'
])
\ No newline at end of file
runtime: runtime:
distribution_strategy: 'mirrored' distribution_strategy: 'mirrored'
mixed_precision_dtype: 'float32' mixed_precision_dtype: 'float32'
loss_scale: 'dynamic'
task: task:
model: model:
num_classes: 1001 num_classes: 1001
...@@ -9,25 +8,21 @@ task: ...@@ -9,25 +8,21 @@ task:
backbone: backbone:
type: 'darknet' type: 'darknet'
darknet: darknet:
model_id: 'darknet53' model_id: 'cspdarknettiny'
losses: losses:
l2_weight_decay: 0.0005 l2_weight_decay: 0.0005
one_hot: True one_hot: True
train_data: train_data:
tfds_name: 'imagenet_a' input_path: 'imagenet-2012-tfrecord/train*'
tfds_split: 'test' is_training: true
tfds_download: True
is_training: True
global_batch_size: 128 global_batch_size: 128
dtype: 'float16' dtype: 'float16'
validation_data: validation_data:
tfds_name: 'imagenet_a' input_path: 'imagenet-2012-tfrecord/valid*'
tfds_split: 'test' is_training: true
tfds_download: True global_batch_size: 128
is_training: False
global_batch_size: 2
dtype: 'float16' dtype: 'float16'
drop_remainder: False drop_remainder: false
trainer: trainer:
train_steps: 800000 # in the paper train_steps: 800000 # in the paper
validation_steps: 400 # size of validation data validation_steps: 400 # size of validation data
......
"""Contains definitions of Darknet Backbone Networks.
The models are inspired by ResNet, and CSPNet
Residual networks (ResNets) were proposed in:
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
Deep Residual Learning for Image Recognition. arXiv:1512.03385
Cross Stage Partial networks (CSPNets) were proposed in:
[1] Chien-Yao Wang, Hong-Yuan Mark Liao, I-Hau Yeh, Yueh-Hua Wu, Ping-Yang Chen, Jun-Wei Hsieh
CSPNet: A New Backbone that can Enhance Learning Capability of CNN. arXiv:1911.11929
DarkNets Are used mainly for Object detection in:
[1] Joseph Redmon, Ali Farhadi
YOLOv3: An Incremental Improvement. arXiv:1804.02767
[2] Alexey Bochkovskiy, Chien-Yao Wang, Hong-Yuan Mark Liao
YOLOv4: Optimal Speed and Accuracy of Object Detection. arXiv:2004.10934
"""
import tensorflow as tf import tensorflow as tf
import tensorflow.keras as ks import tensorflow.keras as ks
import collections import collections
...@@ -8,20 +28,26 @@ from official.vision.beta.projects.yolo.modeling import building_blocks as nn_bl ...@@ -8,20 +28,26 @@ from official.vision.beta.projects.yolo.modeling import building_blocks as nn_bl
# builder required classes # builder required classes
class BlockConfig(object): class BlockConfig(object):
'''
get layer config to make code more readable
Args:
layer: string layer name
stack: the type of layer ordering to use for this specific level
repetitions: integer for the number of times to repeat block
bottelneck: boolean for does this stack have a bottle neck layer
filters: integer for the output depth of the level
pool_size: integer the pool_size of max pool layers
kernel_size: optional integer, for convolution kernel size
strides: integer or tuple to indicate convolution strides
padding: the padding to apply to layers in this stack
activation: string for the activation to use for this stack
route: integer for what level to route from to get the next input
output_name: the name to use for this output
is_output: is this layer an output in the default model
'''
def __init__(self, layer, stack, reps, bottleneck, filters, pool_size, kernel_size, def __init__(self, layer, stack, reps, bottleneck, filters, pool_size, kernel_size,
strides, padding, activation, route, output_name, is_output): strides, padding, activation, route, output_name, is_output):
'''
get layer config to make code more readable
Args:
layer: string layer name
reps: integer for the number of times to repeat block
filters: integer for the filter for this layer, or the output depth
kernel_size: integer or none, if none, it implies that the the building block handles this automatically. not a layer input
downsample: boolean, to down sample the input width and height
output: boolean, true if the layer is required as an output
'''
self.layer = layer self.layer = layer
self.stack = stack self.stack = stack
self.repetitions = reps self.repetitions = reps
...@@ -43,10 +69,6 @@ def build_block_specs(config): ...@@ -43,10 +69,6 @@ def build_block_specs(config):
specs.append(BlockConfig(*layer)) specs.append(BlockConfig(*layer))
return specs return specs
class layer_factory(object): class layer_factory(object):
""" """
class for quick look up of default layers used by darknet to class for quick look up of default layers used by darknet to
...@@ -59,9 +81,8 @@ class layer_factory(object): ...@@ -59,9 +81,8 @@ class layer_factory(object):
"DarkConv": (nn_blocks.DarkConv, self.darkconv_config_todict), "DarkConv": (nn_blocks.DarkConv, self.darkconv_config_todict),
"MaxPool": (tf.keras.layers.MaxPool2D, self.maxpool_config_todict) "MaxPool": (tf.keras.layers.MaxPool2D, self.maxpool_config_todict)
} }
return
def darkconv_config_todict(config, kwargs): def darkconv_config_todict(self, config, kwargs):
dictvals = { dictvals = {
"filters": config.filters, "filters": config.filters,
"kernel_size": config.kernel_size, "kernel_size": config.kernel_size,
...@@ -72,13 +93,13 @@ class layer_factory(object): ...@@ -72,13 +93,13 @@ class layer_factory(object):
return dictvals return dictvals
def darktiny_config_todict(config, kwargs): def darktiny_config_todict(self, config, kwargs):
dictvals = {"filters": config.filters, "strides": config.strides} dictvals = {"filters": config.filters, "strides": config.strides}
dictvals.update(kwargs) dictvals.update(kwargs)
return dictvals return dictvals
def maxpool_config_todict(config, kwargs): def maxpool_config_todict(self, config, kwargs):
return { return {
"pool_size": config.pool_size, "pool_size": config.pool_size,
"strides": config.strides, "strides": config.strides,
...@@ -87,7 +108,7 @@ class layer_factory(object): ...@@ -87,7 +108,7 @@ class layer_factory(object):
} }
def __call__(self, config, kwargs): def __call__(self, config, kwargs):
layer, get_param_dict = self._layer_dict[key] layer, get_param_dict = self._layer_dict[config.layer]
param_dict = get_param_dict(config, kwargs) param_dict = get_param_dict(config, kwargs)
return layer(**param_dict) return layer(**param_dict)
...@@ -104,12 +125,12 @@ CSPDARKNET53 = { ...@@ -104,12 +125,12 @@ CSPDARKNET53 = {
"splits": {"backbone_split": 106, "splits": {"backbone_split": 106,
"neck_split": 138}, "neck_split": 138},
"backbone": [ "backbone": [
["DarkConv", None, 1, False, 32, None, 3, 1, "same", "mish", -1, 0, False], # 1 ["DarkConv", None, 1, False, 32, None, 3, 1, "same", "mish", -1, 0, False],
["DarkRes", "csp", 1, True, 64, None, None, None, None, "mish", -1, 1, False], # 3 ["DarkRes", "csp", 1, True, 64, None, None, None, None, "mish", -1, 1, False],
["DarkRes", "csp", 2, False, 128, None, None, None, None, "mish", -1, 2, False], # 2 ["DarkRes", "csp", 2, False, 128, None, None, None, None, "mish", -1, 2, False],
["DarkRes", "csp", 8, False, 256, None, None, None, None, "mish", -1, 3, True], ["DarkRes", "csp", 8, False, 256, None, None, None, None, "mish", -1, 3, True],
["DarkRes", "csp", 8, False, 512, None, None, None, None, "mish", -1, 4, True], # 3 ["DarkRes", "csp", 8, False, 512, None, None, None, None, "mish", -1, 4, True],
["DarkRes", "csp", 4, False, 1024, None, None, None, None, "mish", -1, 5, True], # 6 #route ["DarkRes", "csp", 4, False, 1024, None, None, None, None, "mish", -1, 5, True],
] ]
} }
...@@ -117,12 +138,12 @@ DARKNET53 = { ...@@ -117,12 +138,12 @@ DARKNET53 = {
"list_names": LISTNAMES, "list_names": LISTNAMES,
"splits": {"backbone_split": 76}, "splits": {"backbone_split": 76},
"backbone": [ "backbone": [
["DarkConv", None, 1, False, 32, None, 3, 1, "same", "leaky", -1, 0, False], # 1 ["DarkConv", None, 1, False, 32, None, 3, 1, "same", "leaky", -1, 0, False],
["DarkRes", "residual", 1, True, 64, None, None, None, None, "leaky", -1, 1, False], # 3 ["DarkRes", "residual", 1, True, 64, None, None, None, None, "leaky", -1, 1, False],
["DarkRes", "residual", 2, False, 128, None, None, None, None, "leaky", -1, 2, False], # 2 ["DarkRes", "residual", 2, False, 128, None, None, None, None, "leaky", -1, 2, False],
["DarkRes", "residual", 8, False, 256, None, None, None, None, "leaky", -1, 3, True], ["DarkRes", "residual", 8, False, 256, None, None, None, None, "leaky", -1, 3, True],
["DarkRes", "residual", 8, False, 512, None, None, None, None, "leaky", -1, 4, True], # 3 ["DarkRes", "residual", 8, False, 512, None, None, None, None, "leaky", -1, 4, True],
["DarkRes", "residual", 4, False, 1024, None, None, None, None, "leaky", -1, 5, True], # 6 ["DarkRes", "residual", 4, False, 1024, None, None, None, None, "leaky", -1, 5, True],
] ]
} }
...@@ -130,12 +151,12 @@ CSPDARKNETTINY = { ...@@ -130,12 +151,12 @@ CSPDARKNETTINY = {
"list_names": LISTNAMES, "list_names": LISTNAMES,
"splits": {"backbone_split": 28}, "splits": {"backbone_split": 28},
"backbone": [ "backbone": [
["DarkConv", None, 1, False, 32, None, 3, 2, "same", "leaky", -1, 0, False], # 1 ["DarkConv", None, 1, False, 32, None, 3, 2, "same", "leaky", -1, 0, False],
["DarkConv", None, 1, False, 64, None, 3, 2, "same", "leaky", -1, 1, False], # 1 ["DarkConv", None, 1, False, 64, None, 3, 2, "same", "leaky", -1, 1, False],
["CSPTiny", "csp_tiny", 1, False, 64, None, 3, 2, "same", "leaky", -1, 2, False], # 3 ["CSPTiny", "csp_tiny", 1, False, 64, None, 3, 2, "same", "leaky", -1, 2, False],
["CSPTiny", "csp_tiny", 1, False, 128, None, 3, 2, "same", "leaky", -1, 3, False], # 3 ["CSPTiny", "csp_tiny", 1, False, 128, None, 3, 2, "same", "leaky", -1, 3, False],
["CSPTiny", "csp_tiny", 1, False, 256, None, 3, 2, "same", "leaky", -1, 4, True], # 3 ["CSPTiny", "csp_tiny", 1, False, 256, None, 3, 2, "same", "leaky", -1, 4, True],
["DarkConv", None, 1, False, 512, None, 3, 1, "same", "leaky", -1, 5, True], # 1 ["DarkConv", None, 1, False, 512, None, 3, 1, "same", "leaky", -1, 5, True],
] ]
} }
...@@ -143,13 +164,13 @@ DARKNETTINY = { ...@@ -143,13 +164,13 @@ DARKNETTINY = {
"list_names": LISTNAMES, "list_names": LISTNAMES,
"splits": {"backbone_split": 14}, "splits": {"backbone_split": 14},
"backbone": [ "backbone": [
["DarkConv", None, 1, False, 16, None, 3, 1, "same", "leaky", -1, 0, False], # 1 ["DarkConv", None, 1, False, 16, None, 3, 1, "same", "leaky", -1, 0, False],
["DarkTiny", None, 1, True, 32, None, 3, 2, "same", "leaky", -1, 1, False], # 3 ["DarkTiny", None, 1, True, 32, None, 3, 2, "same", "leaky", -1, 1, False],
["DarkTiny", None, 1, True, 64, None, 3, 2, "same", "leaky", -1, 2, False], # 3 ["DarkTiny", None, 1, True, 64, None, 3, 2, "same", "leaky", -1, 2, False],
["DarkTiny", None, 1, False, 128, None, 3, 2, "same", "leaky", -1, 3, False], # 2 ["DarkTiny", None, 1, False, 128, None, 3, 2, "same", "leaky", -1, 3, False],
["DarkTiny", None, 1, False, 256, None, 3, 2, "same", "leaky", -1, 4, True], ["DarkTiny", None, 1, False, 256, None, 3, 2, "same", "leaky", -1, 4, True],
["DarkTiny", None, 1, False, 512, None, 3, 2, "same", "leaky", -1, 5, False], # 3 ["DarkTiny", None, 1, False, 512, None, 3, 2, "same", "leaky", -1, 5, False],
["DarkTiny", None, 1, False, 1024, None, 3, 1, "same", "leaky", -1, 5, True], # 6 #route ["DarkTiny", None, 1, False, 1024, None, 3, 1, "same", "leaky", -1, 5, True],
] ]
} }
...@@ -168,8 +189,8 @@ class Darknet(ks.Model): ...@@ -168,8 +189,8 @@ class Darknet(ks.Model):
self, self,
model_id="darknet53", model_id="darknet53",
input_specs=tf.keras.layers.InputSpec(shape=[None, None, None, 3]), input_specs=tf.keras.layers.InputSpec(shape=[None, None, None, 3]),
min_size=None, min_level=None,
max_size=5, max_level=5,
activation=None, activation=None,
use_sync_bn=False, use_sync_bn=False,
norm_momentum=0.99, norm_momentum=0.99,
...@@ -177,7 +198,6 @@ class Darknet(ks.Model): ...@@ -177,7 +198,6 @@ class Darknet(ks.Model):
kernel_initializer='glorot_uniform', kernel_initializer='glorot_uniform',
kernel_regularizer=None, kernel_regularizer=None,
bias_regularizer=None, bias_regularizer=None,
config=None,
**kwargs): **kwargs):
layer_specs, splits = Darknet.get_model_config(model_id) layer_specs, splits = Darknet.get_model_config(model_id)
...@@ -188,8 +208,8 @@ class Darknet(ks.Model): ...@@ -188,8 +208,8 @@ class Darknet(ks.Model):
self._registry = layer_factory() self._registry = layer_factory()
# default layer look up # default layer look up
self._min_size = min_size self._min_size = min_level
self._max_size = max_size self._max_size = max_level
self._output_specs = None self._output_specs = None
self._kernel_initializer = kernel_initializer self._kernel_initializer = kernel_initializer
...@@ -202,7 +222,7 @@ class Darknet(ks.Model): ...@@ -202,7 +222,7 @@ class Darknet(ks.Model):
self._default_dict = { self._default_dict = {
"kernel_initializer": self._kernel_initializer, "kernel_initializer": self._kernel_initializer,
"weight_decay": self._kernel_regularizer, "kernel_regularizer": self._kernel_regularizer,
"bias_regularizer": self._bias_regularizer, "bias_regularizer": self._bias_regularizer,
"norm_momentum": self._norm_momentum, "norm_momentum": self._norm_momentum,
"norm_epsilon": self._norm_epislon, "norm_epsilon": self._norm_epislon,
...@@ -336,7 +356,30 @@ class Darknet(ks.Model): ...@@ -336,7 +356,30 @@ class Darknet(ks.Model):
backbone = BACKBONES[name]["backbone"] backbone = BACKBONES[name]["backbone"]
splits = BACKBONES[name]["splits"] splits = BACKBONES[name]["splits"]
return build_block_specs(backbone), splits return build_block_specs(backbone), splits
@property
def model_id(self):
return self._model_name
@classmethod
def from_config(cls, config, custom_objects=None):
return cls(**config)
def get_config(self):
layer_config = {
"model_id": self._model_name,
"min_level": self._min_size,
"max_level": self._max_size,
"kernel_initializer": self._kernel_initializer,
"kernel_regularizer": self._kernel_regularizer,
"bias_regularizer": self._bias_regularizer,
"norm_momentum": self._norm_momentum,
"norm_epsilon": self._norm_epislon,
"use_sync_bn": self._use_sync_bn,
"activation": self._activation
}
#layer_config.update(super().get_config())
return layer_config
@factory.register_backbone_builder('darknet') @factory.register_backbone_builder('darknet')
def build_darknet( def build_darknet(
......
...@@ -14,7 +14,7 @@ class CSPConnect(ks.layers.Layer): ...@@ -14,7 +14,7 @@ class CSPConnect(ks.layers.Layer):
kernel_initializer='glorot_uniform', kernel_initializer='glorot_uniform',
bias_initializer='zeros', bias_initializer='zeros',
bias_regularizer=None, bias_regularizer=None,
weight_decay=None, kernel_regularizer=None,
use_bn=True, use_bn=True,
use_sync_bn=False, use_sync_bn=False,
norm_momentum=0.99, norm_momentum=0.99,
...@@ -30,7 +30,7 @@ class CSPConnect(ks.layers.Layer): ...@@ -30,7 +30,7 @@ class CSPConnect(ks.layers.Layer):
#convoultion params #convoultion params
self._kernel_initializer = kernel_initializer self._kernel_initializer = kernel_initializer
self._bias_initializer = bias_initializer self._bias_initializer = bias_initializer
self._weight_decay = weight_decay self._kernel_regularizer = kernel_regularizer
self._bias_regularizer = bias_regularizer self._bias_regularizer = bias_regularizer
self._use_bn = use_bn self._use_bn = use_bn
self._use_sync_bn = use_sync_bn self._use_sync_bn = use_sync_bn
...@@ -45,7 +45,7 @@ class CSPConnect(ks.layers.Layer): ...@@ -45,7 +45,7 @@ class CSPConnect(ks.layers.Layer):
kernel_initializer=self._kernel_initializer, kernel_initializer=self._kernel_initializer,
bias_initializer=self._bias_initializer, bias_initializer=self._bias_initializer,
bias_regularizer=self._bias_regularizer, bias_regularizer=self._bias_regularizer,
weight_decay=self._weight_decay, kernel_regularizer=self._kernel_regularizer,
use_bn=self._use_bn, use_bn=self._use_bn,
use_sync_bn=self._use_sync_bn, use_sync_bn=self._use_sync_bn,
norm_momentum=self._norm_moment, norm_momentum=self._norm_moment,
...@@ -58,7 +58,7 @@ class CSPConnect(ks.layers.Layer): ...@@ -58,7 +58,7 @@ class CSPConnect(ks.layers.Layer):
kernel_initializer=self._kernel_initializer, kernel_initializer=self._kernel_initializer,
bias_initializer=self._bias_initializer, bias_initializer=self._bias_initializer,
bias_regularizer=self._bias_regularizer, bias_regularizer=self._bias_regularizer,
weight_decay=self._weight_decay, kernel_regularizer=self._kernel_regularizer,
use_bn=self._use_bn, use_bn=self._use_bn,
use_sync_bn=self._use_sync_bn, use_sync_bn=self._use_sync_bn,
norm_momentum=self._norm_moment, norm_momentum=self._norm_moment,
......
...@@ -14,7 +14,7 @@ class CSPDownSample(ks.layers.Layer): ...@@ -14,7 +14,7 @@ class CSPDownSample(ks.layers.Layer):
kernel_initializer='glorot_uniform', kernel_initializer='glorot_uniform',
bias_initializer='zeros', bias_initializer='zeros',
bias_regularizer=None, bias_regularizer=None,
weight_decay=None, kernel_regularizer=None,
use_bn=True, use_bn=True,
use_sync_bn=False, use_sync_bn=False,
norm_momentum=0.99, norm_momentum=0.99,
...@@ -30,7 +30,7 @@ class CSPDownSample(ks.layers.Layer): ...@@ -30,7 +30,7 @@ class CSPDownSample(ks.layers.Layer):
#convoultion params #convoultion params
self._kernel_initializer = kernel_initializer self._kernel_initializer = kernel_initializer
self._bias_initializer = bias_initializer self._bias_initializer = bias_initializer
self._weight_decay = weight_decay self._kernel_regularizer = kernel_regularizer
self._bias_regularizer = bias_regularizer self._bias_regularizer = bias_regularizer
self._use_bn = use_bn self._use_bn = use_bn
self._use_sync_bn = use_sync_bn self._use_sync_bn = use_sync_bn
...@@ -45,7 +45,7 @@ class CSPDownSample(ks.layers.Layer): ...@@ -45,7 +45,7 @@ class CSPDownSample(ks.layers.Layer):
kernel_initializer=self._kernel_initializer, kernel_initializer=self._kernel_initializer,
bias_initializer=self._bias_initializer, bias_initializer=self._bias_initializer,
bias_regularizer=self._bias_regularizer, bias_regularizer=self._bias_regularizer,
weight_decay=self._weight_decay, kernel_regularizer=self._kernel_regularizer,
use_bn=self._use_bn, use_bn=self._use_bn,
use_sync_bn=self._use_sync_bn, use_sync_bn=self._use_sync_bn,
norm_momentum=self._norm_moment, norm_momentum=self._norm_moment,
...@@ -57,7 +57,7 @@ class CSPDownSample(ks.layers.Layer): ...@@ -57,7 +57,7 @@ class CSPDownSample(ks.layers.Layer):
kernel_initializer=self._kernel_initializer, kernel_initializer=self._kernel_initializer,
bias_initializer=self._bias_initializer, bias_initializer=self._bias_initializer,
bias_regularizer=self._bias_regularizer, bias_regularizer=self._bias_regularizer,
weight_decay=self._weight_decay, kernel_regularizer=self._kernel_regularizer,
use_bn=self._use_bn, use_bn=self._use_bn,
use_sync_bn=self._use_sync_bn, use_sync_bn=self._use_sync_bn,
norm_momentum=self._norm_moment, norm_momentum=self._norm_moment,
...@@ -70,7 +70,7 @@ class CSPDownSample(ks.layers.Layer): ...@@ -70,7 +70,7 @@ class CSPDownSample(ks.layers.Layer):
kernel_initializer=self._kernel_initializer, kernel_initializer=self._kernel_initializer,
bias_initializer=self._bias_initializer, bias_initializer=self._bias_initializer,
bias_regularizer=self._bias_regularizer, bias_regularizer=self._bias_regularizer,
weight_decay=self._weight_decay, kernel_regularizer=self._kernel_regularizer,
use_bn=self._use_bn, use_bn=self._use_bn,
use_sync_bn=self._use_sync_bn, use_sync_bn=self._use_sync_bn,
norm_momentum=self._norm_moment, norm_momentum=self._norm_moment,
......
...@@ -14,7 +14,7 @@ class CSPTiny(ks.layers.Layer): ...@@ -14,7 +14,7 @@ class CSPTiny(ks.layers.Layer):
kernel_initializer='glorot_uniform', kernel_initializer='glorot_uniform',
bias_initializer='zeros', bias_initializer='zeros',
bias_regularizer=None, bias_regularizer=None,
weight_decay=None, kernel_regularizer=None,
use_bn=True, use_bn=True,
use_sync_bn=False, use_sync_bn=False,
group_id=1, group_id=1,
...@@ -34,7 +34,7 @@ class CSPTiny(ks.layers.Layer): ...@@ -34,7 +34,7 @@ class CSPTiny(ks.layers.Layer):
self._bias_regularizer = bias_regularizer self._bias_regularizer = bias_regularizer
self._use_bn = use_bn self._use_bn = use_bn
self._use_sync_bn = use_sync_bn self._use_sync_bn = use_sync_bn
self._weight_decay = weight_decay self._kernel_regularizer = kernel_regularizer
self._groups = groups self._groups = groups
self._group_id = group_id self._group_id = group_id
self._downsample = downsample self._downsample = downsample
...@@ -59,7 +59,7 @@ class CSPTiny(ks.layers.Layer): ...@@ -59,7 +59,7 @@ class CSPTiny(ks.layers.Layer):
kernel_initializer=self._kernel_initializer, kernel_initializer=self._kernel_initializer,
bias_initializer=self._bias_initializer, bias_initializer=self._bias_initializer,
bias_regularizer=self._bias_regularizer, bias_regularizer=self._bias_regularizer,
weight_decay=self._weight_decay, kernel_regularizer=self._kernel_regularizer,
use_bn=self._use_bn, use_bn=self._use_bn,
use_sync_bn=self._use_sync_bn, use_sync_bn=self._use_sync_bn,
norm_momentum=self._norm_moment, norm_momentum=self._norm_moment,
...@@ -75,7 +75,7 @@ class CSPTiny(ks.layers.Layer): ...@@ -75,7 +75,7 @@ class CSPTiny(ks.layers.Layer):
kernel_initializer=self._kernel_initializer, kernel_initializer=self._kernel_initializer,
bias_initializer=self._bias_initializer, bias_initializer=self._bias_initializer,
bias_regularizer=self._bias_regularizer, bias_regularizer=self._bias_regularizer,
weight_decay=self._weight_decay, kernel_regularizer=self._kernel_regularizer,
use_bn=self._use_bn, use_bn=self._use_bn,
use_sync_bn=self._use_sync_bn, use_sync_bn=self._use_sync_bn,
norm_momentum=self._norm_moment, norm_momentum=self._norm_moment,
...@@ -91,7 +91,7 @@ class CSPTiny(ks.layers.Layer): ...@@ -91,7 +91,7 @@ class CSPTiny(ks.layers.Layer):
kernel_initializer=self._kernel_initializer, kernel_initializer=self._kernel_initializer,
bias_initializer=self._bias_initializer, bias_initializer=self._bias_initializer,
bias_regularizer=self._bias_regularizer, bias_regularizer=self._bias_regularizer,
weight_decay=self._weight_decay, kernel_regularizer=self._kernel_regularizer,
use_bn=self._use_bn, use_bn=self._use_bn,
use_sync_bn=self._use_sync_bn, use_sync_bn=self._use_sync_bn,
norm_momentum=self._norm_moment, norm_momentum=self._norm_moment,
...@@ -107,7 +107,7 @@ class CSPTiny(ks.layers.Layer): ...@@ -107,7 +107,7 @@ class CSPTiny(ks.layers.Layer):
kernel_initializer=self._kernel_initializer, kernel_initializer=self._kernel_initializer,
bias_initializer=self._bias_initializer, bias_initializer=self._bias_initializer,
bias_regularizer=self._bias_regularizer, bias_regularizer=self._bias_regularizer,
weight_decay=self._weight_decay, kernel_regularizer=self._kernel_regularizer,
use_bn=self._use_bn, use_bn=self._use_bn,
use_sync_bn=self._use_sync_bn, use_sync_bn=self._use_sync_bn,
norm_momentum=self._norm_moment, norm_momentum=self._norm_moment,
...@@ -143,7 +143,7 @@ class CSPTiny(ks.layers.Layer): ...@@ -143,7 +143,7 @@ class CSPTiny(ks.layers.Layer):
"strides": self._strides, "strides": self._strides,
"kernel_initializer": self._kernel_initializer, "kernel_initializer": self._kernel_initializer,
"bias_initializer": self._bias_initializer, "bias_initializer": self._bias_initializer,
"weight_decay": self._weight_decay, "kernel_regularizer": self._kernel_regularizer,
"use_bn": self._use_bn, "use_bn": self._use_bn,
"use_sync_bn": self._use_sync_bn, "use_sync_bn": self._use_sync_bn,
"norm_moment": self._norm_moment, "norm_moment": self._norm_moment,
......
...@@ -23,7 +23,7 @@ class DarkConv(ks.layers.Layer): ...@@ -23,7 +23,7 @@ class DarkConv(ks.layers.Layer):
kernel_initializer='glorot_uniform', kernel_initializer='glorot_uniform',
bias_initializer='zeros', bias_initializer='zeros',
bias_regularizer=None, bias_regularizer=None,
weight_decay=None, # Specify the weight decay as the default will not work. kernel_regularizer=None, # Specify the weight decay as the default will not work.
use_bn=True, use_bn=True,
use_sync_bn=False, use_sync_bn=False,
norm_momentum=0.99, norm_momentum=0.99,
...@@ -66,7 +66,7 @@ class DarkConv(ks.layers.Layer): ...@@ -66,7 +66,7 @@ class DarkConv(ks.layers.Layer):
self._use_bias = use_bias self._use_bias = use_bias
self._kernel_initializer = kernel_initializer self._kernel_initializer = kernel_initializer
self._bias_initializer = bias_initializer self._bias_initializer = bias_initializer
self._weight_decay = weight_decay self._kernel_regularizer = kernel_regularizer
self._bias_regularizer = bias_regularizer self._bias_regularizer = bias_regularizer
# batchnorm params # batchnorm params
...@@ -112,7 +112,7 @@ class DarkConv(ks.layers.Layer): ...@@ -112,7 +112,7 @@ class DarkConv(ks.layers.Layer):
use_bias=self._use_bias, use_bias=self._use_bias,
kernel_initializer=self._kernel_initializer, kernel_initializer=self._kernel_initializer,
bias_initializer=self._bias_initializer, bias_initializer=self._bias_initializer,
kernel_regularizer=self._weight_decay, kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer) bias_regularizer=self._bias_regularizer)
#self.conv =tf.nn.convolution(filters=self._filters, strides=self._strides, padding=self._padding #self.conv =tf.nn.convolution(filters=self._filters, strides=self._strides, padding=self._padding
...@@ -136,8 +136,6 @@ class DarkConv(ks.layers.Layer): ...@@ -136,8 +136,6 @@ class DarkConv(ks.layers.Layer):
self._activation_fn = mish() self._activation_fn = mish()
else: else:
self._activation_fn = ks.layers.Activation(activation=self._activation) self._activation_fn = ks.layers.Activation(activation=self._activation)
super(DarkConv, self).build(input_shape)
return return
def call(self, inputs): def call(self, inputs):
...@@ -159,7 +157,7 @@ class DarkConv(ks.layers.Layer): ...@@ -159,7 +157,7 @@ class DarkConv(ks.layers.Layer):
"kernel_initializer": self._kernel_initializer, "kernel_initializer": self._kernel_initializer,
"bias_initializer": self._bias_initializer, "bias_initializer": self._bias_initializer,
"bias_regularizer": self._bias_regularizer, "bias_regularizer": self._bias_regularizer,
"l2_regularization": self._l2_regularization, "kernel_regularizer": self._kernel_regularizer,
"use_bn": self._use_bn, "use_bn": self._use_bn,
"use_sync_bn": self._use_sync_bn, "use_sync_bn": self._use_sync_bn,
"norm_moment": self._norm_moment, "norm_moment": self._norm_moment,
......
...@@ -14,7 +14,7 @@ class DarkResidual(ks.layers.Layer): ...@@ -14,7 +14,7 @@ class DarkResidual(ks.layers.Layer):
use_bias=True, use_bias=True,
kernel_initializer='glorot_uniform', kernel_initializer='glorot_uniform',
bias_initializer='zeros', bias_initializer='zeros',
weight_decay=None, kernel_regularizer=None,
bias_regularizer=None, bias_regularizer=None,
use_bn=True, use_bn=True,
use_sync_bn=False, use_sync_bn=False,
...@@ -59,7 +59,7 @@ class DarkResidual(ks.layers.Layer): ...@@ -59,7 +59,7 @@ class DarkResidual(ks.layers.Layer):
self._bias_regularizer = bias_regularizer self._bias_regularizer = bias_regularizer
self._use_bn = use_bn self._use_bn = use_bn
self._use_sync_bn = use_sync_bn self._use_sync_bn = use_sync_bn
self._weight_decay = weight_decay self._kernel_regularizer = kernel_regularizer
# normal params # normal params
self._norm_moment = norm_momentum self._norm_moment = norm_momentum
...@@ -88,7 +88,7 @@ class DarkResidual(ks.layers.Layer): ...@@ -88,7 +88,7 @@ class DarkResidual(ks.layers.Layer):
norm_momentum=self._norm_moment, norm_momentum=self._norm_moment,
norm_epsilon=self._norm_epsilon, norm_epsilon=self._norm_epsilon,
activation=self._conv_activation, activation=self._conv_activation,
weight_decay=self._weight_decay, kernel_regularizer=self._kernel_regularizer,
leaky_alpha=self._leaky_alpha) leaky_alpha=self._leaky_alpha)
else: else:
self._dconv = Identity() self._dconv = Identity()
...@@ -106,7 +106,7 @@ class DarkResidual(ks.layers.Layer): ...@@ -106,7 +106,7 @@ class DarkResidual(ks.layers.Layer):
norm_momentum=self._norm_moment, norm_momentum=self._norm_moment,
norm_epsilon=self._norm_epsilon, norm_epsilon=self._norm_epsilon,
activation=self._conv_activation, activation=self._conv_activation,
weight_decay=self._weight_decay, kernel_regularizer=self._kernel_regularizer,
leaky_alpha=self._leaky_alpha) leaky_alpha=self._leaky_alpha)
self._conv2 = DarkConv(filters=self._filters, self._conv2 = DarkConv(filters=self._filters,
kernel_size=(3, 3), kernel_size=(3, 3),
...@@ -121,7 +121,7 @@ class DarkResidual(ks.layers.Layer): ...@@ -121,7 +121,7 @@ class DarkResidual(ks.layers.Layer):
norm_momentum=self._norm_moment, norm_momentum=self._norm_moment,
norm_epsilon=self._norm_epsilon, norm_epsilon=self._norm_epsilon,
activation=self._conv_activation, activation=self._conv_activation,
weight_decay=self._weight_decay, kernel_regularizer=self._kernel_regularizer,
leaky_alpha=self._leaky_alpha) leaky_alpha=self._leaky_alpha)
self._shortcut = ks.layers.Add() self._shortcut = ks.layers.Add()
...@@ -144,7 +144,7 @@ class DarkResidual(ks.layers.Layer): ...@@ -144,7 +144,7 @@ class DarkResidual(ks.layers.Layer):
"use_bias": self._use_bias, "use_bias": self._use_bias,
"kernel_initializer": self._kernel_initializer, "kernel_initializer": self._kernel_initializer,
"bias_initializer": self._bias_initializer, "bias_initializer": self._bias_initializer,
"weight_decay": self._weight_decay, "kernel_regularizer": self._kernel_regularizer,
"use_bn": self._use_bn, "use_bn": self._use_bn,
"use_sync_bn": self._use_sync_bn, "use_sync_bn": self._use_sync_bn,
"norm_moment": self._norm_moment, "norm_moment": self._norm_moment,
......
...@@ -15,7 +15,7 @@ class DarkTiny(ks.layers.Layer): ...@@ -15,7 +15,7 @@ class DarkTiny(ks.layers.Layer):
kernel_initializer='glorot_uniform', kernel_initializer='glorot_uniform',
bias_initializer='zeros', bias_initializer='zeros',
bias_regularizer=None, bias_regularizer=None,
weight_decay=None, # default find where is it is stated kernel_regularizer=None, # default find where is it is stated
use_bn=True, use_bn=True,
use_sync_bn=False, use_sync_bn=False,
norm_momentum=0.99, norm_momentum=0.99,
...@@ -34,7 +34,7 @@ class DarkTiny(ks.layers.Layer): ...@@ -34,7 +34,7 @@ class DarkTiny(ks.layers.Layer):
self._use_bn = use_bn self._use_bn = use_bn
self._use_sync_bn = use_sync_bn self._use_sync_bn = use_sync_bn
self._strides = strides self._strides = strides
self._weight_decay = weight_decay self._kernel_regularizer = kernel_regularizer
# normal params # normal params
self._norm_moment = norm_momentum self._norm_moment = norm_momentum
...@@ -68,7 +68,7 @@ class DarkTiny(ks.layers.Layer): ...@@ -68,7 +68,7 @@ class DarkTiny(ks.layers.Layer):
kernel_initializer=self._kernel_initializer, kernel_initializer=self._kernel_initializer,
bias_initializer=self._bias_initializer, bias_initializer=self._bias_initializer,
bias_regularizer=self._bias_regularizer, bias_regularizer=self._bias_regularizer,
weight_decay=self._weight_decay, kernel_regularizer=self._kernel_regularizer,
use_bn=self._use_bn, use_bn=self._use_bn,
use_sync_bn=self._use_sync_bn, use_sync_bn=self._use_sync_bn,
norm_momentum=self._norm_moment, norm_momentum=self._norm_moment,
......
...@@ -25,25 +25,26 @@ from tensorflow.python.distribute import strategy_combinations ...@@ -25,25 +25,26 @@ from tensorflow.python.distribute import strategy_combinations
from official.vision.beta.projects.yolo.modeling.backbones import Darknet from official.vision.beta.projects.yolo.modeling.backbones import Darknet
class ResNetTest(parameterized.TestCase, tf.test.TestCase): class DarkNetTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters( @parameterized.parameters(
(224, "darknet53", 2), (224, "darknet53", 2, 1),
(224, "darknettiny", 2), (224, "darknettiny", 1, 2),
(224, "cspdarknettiny", 1), (224, "cspdarknettiny", 1, 1),
(224, "cspdarknet53", 2), (224, "cspdarknet53", 2, 1),
) )
def test_network_creation(self, input_size, model_id, def test_network_creation(self, input_size, model_id,
endpoint_filter_scale): endpoint_filter_scale, scale_final):
"""Test creation of ResNet family models.""" """Test creation of ResNet family models."""
tf.keras.backend.set_image_data_format('channels_last') tf.keras.backend.set_image_data_format('channels_last')
network = Darknet.Darknet(model_id=model_id, min_size=3, max_size=5) network = Darknet.Darknet(model_id=model_id, min_level=3, max_level=5)
self.assertEqual(network.count_params(), resnet_params[model_id]) print(network.model_id)
self.assertEqual(network.model_id, model_id)
inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1) inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
endpoints = network(inputs) endpoints = network(inputs)
self.assertAllEqual( self.assertAllEqual(
[1, input_size / 2**3, input_size / 2**3, 128 * endpoint_filter_scale], [1, input_size / 2**3, input_size / 2**3, 128 * endpoint_filter_scale],
endpoints['3'].shape.as_list()) endpoints['3'].shape.as_list())
...@@ -51,7 +52,7 @@ class ResNetTest(parameterized.TestCase, tf.test.TestCase): ...@@ -51,7 +52,7 @@ class ResNetTest(parameterized.TestCase, tf.test.TestCase):
[1, input_size / 2**4, input_size / 2**4, 256 * endpoint_filter_scale], [1, input_size / 2**4, input_size / 2**4, 256 * endpoint_filter_scale],
endpoints['4'].shape.as_list()) endpoints['4'].shape.as_list())
self.assertAllEqual( self.assertAllEqual(
[1, input_size / 2**5, input_size / 2**5, 512 * endpoint_filter_scale], [1, input_size / 2**5, input_size / 2**5, 512 * endpoint_filter_scale * scale_final],
endpoints['5'].shape.as_list()) endpoints['5'].shape.as_list())
@combinations.generate( @combinations.generate(
...@@ -64,7 +65,7 @@ class ResNetTest(parameterized.TestCase, tf.test.TestCase): ...@@ -64,7 +65,7 @@ class ResNetTest(parameterized.TestCase, tf.test.TestCase):
)) ))
def test_sync_bn_multiple_devices(self, strategy, use_sync_bn): def test_sync_bn_multiple_devices(self, strategy, use_sync_bn):
"""Test for sync bn on TPU and GPU devices.""" """Test for sync bn on TPU and GPU devices."""
inputs = np.random.rand(64, 224, 224, 3) inputs = np.random.rand(1, 224, 224, 3)
tf.keras.backend.set_image_data_format('channels_last') tf.keras.backend.set_image_data_format('channels_last')
...@@ -78,7 +79,7 @@ class ResNetTest(parameterized.TestCase, tf.test.TestCase): ...@@ -78,7 +79,7 @@ class ResNetTest(parameterized.TestCase, tf.test.TestCase):
tf.keras.backend.set_image_data_format('channels_last') tf.keras.backend.set_image_data_format('channels_last')
input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, input_dim]) input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, input_dim])
network = Darknet.Darknet(model_id="darknet53", min_size=3, max_size=5, input_shape=input_specs) network = Darknet.Darknet(model_id="darknet53", min_level=3, max_level=5, input_specs=input_specs)
inputs = tf.keras.Input(shape=(224, 224, input_dim), batch_size=1) inputs = tf.keras.Input(shape=(224, 224, input_dim), batch_size=1)
_ = network(inputs) _ = network(inputs)
...@@ -87,6 +88,8 @@ class ResNetTest(parameterized.TestCase, tf.test.TestCase): ...@@ -87,6 +88,8 @@ class ResNetTest(parameterized.TestCase, tf.test.TestCase):
# Create a network object that sets all of its config options. # Create a network object that sets all of its config options.
kwargs = dict( kwargs = dict(
model_id="darknet53", model_id="darknet53",
min_level = 3,
max_level = 5,
use_sync_bn=False, use_sync_bn=False,
activation='relu', activation='relu',
norm_momentum=0.99, norm_momentum=0.99,
......
...@@ -19,7 +19,7 @@ from official.core import base_task ...@@ -19,7 +19,7 @@ from official.core import base_task
from official.core import input_reader from official.core import input_reader
from official.core import task_factory from official.core import task_factory
from official.modeling import tf_utils from official.modeling import tf_utils
from official.vision.beta.configs import image_classification as exp_cfg from official.vision.beta.projects.yolo.configs import darknet_classification as exp_cfg
from official.vision.beta.dataloaders import classification_input from official.vision.beta.dataloaders import classification_input
from official.vision.beta.modeling import factory from official.vision.beta.modeling import factory
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
from absl import app from absl import app
from absl import flags from absl import flags
import gin import gin
import sys
from official.core import train_utils from official.core import train_utils
# pylint: disable=unused-import # pylint: disable=unused-import
...@@ -31,9 +32,21 @@ from official.modeling import performance ...@@ -31,9 +32,21 @@ from official.modeling import performance
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
'''
python3 -m official.vision.beta.projects.yolo.train --mode=train_and_eval --experiment=darknet_classification --model_dir=training_dir --config_file=official/vision/beta/projects/yolo/configs/experiments/darknet53.yaml
'''
def import_overrides():
print(sys.modules["official.vision.beta.configs.backbones"])
return
def main(_): def main(_):
import_overrides()
gin.parse_config_files_and_bindings(FLAGS.gin_file, FLAGS.gin_params) gin.parse_config_files_and_bindings(FLAGS.gin_file, FLAGS.gin_params)
print(FLAGS.experiment)
params = train_utils.parse_configuration(FLAGS) params = train_utils.parse_configuration(FLAGS)
model_dir = FLAGS.model_dir model_dir = FLAGS.model_dir
if 'train' in FLAGS.mode: if 'train' in FLAGS.mode:
# Pure eval modes do not output yaml files. Otherwise continuous eval job # Pure eval modes do not output yaml files. Otherwise continuous eval job
......
runtime:
all_reduce_alg: null
batchnorm_spatial_persistent: false
dataset_num_private_threads: null
default_shard_dim: -1
distribution_strategy: mirrored
enable_xla: false
gpu_thread_mode: null
loss_scale: null
mixed_precision_dtype: float32
num_cores_per_replica: 1
num_gpus: 0
num_packs: 1
per_gpu_thread_count: 0
run_eagerly: false
task_index: -1
tpu: null
worker_hosts: null
task:
gradient_clip_norm: 0.0
init_checkpoint: ''
logging_dir: null
losses:
l2_weight_decay: 0.0005
label_smoothing: 0.0
one_hot: true
model:
add_head_batch_norm: false
backbone:
darknet:
model_id: cspdarknettiny
type: darknet
dropout_rate: 0.0
input_size: [224, 224, 3]
norm_activation:
activation: relu
norm_epsilon: 0.001
norm_momentum: 0.99
use_sync_bn: false
num_classes: 1001
train_data:
block_length: 1
cache: false
cycle_length: 10
deterministic: null
drop_remainder: true
dtype: float16
enable_tf_data_service: false
global_batch_size: 128
input_path: imagenet-2012-tfrecord/train*
is_training: true
sharding: true
shuffle_buffer_size: 10000
tf_data_service_address: null
tf_data_service_job_name: null
tfds_as_supervised: false
tfds_data_dir: ''
tfds_download: false
tfds_name: ''
tfds_skip_decoding_feature: ''
tfds_split: ''
validation_data:
block_length: 1
cache: false
cycle_length: 10
deterministic: null
drop_remainder: false
dtype: float16
enable_tf_data_service: false
global_batch_size: 128
input_path: imagenet-2012-tfrecord/valid*
is_training: true
sharding: true
shuffle_buffer_size: 10000
tf_data_service_address: null
tf_data_service_job_name: null
tfds_as_supervised: false
tfds_data_dir: ''
tfds_download: false
tfds_name: ''
tfds_skip_decoding_feature: ''
tfds_split: ''
trainer:
allow_tpu_summary: false
best_checkpoint_eval_metric: ''
best_checkpoint_export_subdir: ''
best_checkpoint_metric_comp: higher
checkpoint_interval: 10000
continuous_eval_timeout: 3600
eval_tf_function: true
max_to_keep: 5
optimizer_config:
ema: null
learning_rate:
polynomial:
cycle: false
decay_steps: 799000
end_learning_rate: 0.0001
initial_learning_rate: 0.1
name: PolynomialDecay
power: 4.0
type: polynomial
optimizer:
sgd:
clipnorm: null
clipvalue: null
decay: 0.0
momentum: 0.9
name: SGD
nesterov: false
type: sgd
warmup:
linear:
name: linear
warmup_learning_rate: 0
warmup_steps: 1000
type: linear
steps_per_loop: 10000
summary_interval: 10000
train_steps: 800000
train_tf_function: true
train_tf_while_loop: true
validation_interval: 10000
validation_steps: 400
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment