Commit 671268c4 authored by Vishnu Banna's avatar Vishnu Banna
Browse files

addressing ocmments

parent c3fd5408
...@@ -3,6 +3,8 @@ runtime: ...@@ -3,6 +3,8 @@ runtime:
mixed_precision_dtype: 'float16' mixed_precision_dtype: 'float16'
num_gpus: 1 num_gpus: 1
task: task:
init_checkpoint: /home/vbanna/Research/checkpoints/yolo/v4-csp
init_checkpoint_modules: ['backbone', 'decoder']
smart_bias_lr: 0.1 smart_bias_lr: 0.1
model: model:
darknet_based_model: False darknet_based_model: False
...@@ -47,6 +49,11 @@ task: ...@@ -47,6 +49,11 @@ task:
'3': 2.80 '3': 2.80
objectness_smooth: objectness_smooth:
'all': 1.0 'all': 1.0
norm_activation:
activation: leaky
norm_epsilon: 0.0001
norm_momentum: 0.97
use_sync_bn: false
num_classes: 80 num_classes: 80
anchor_boxes: anchor_boxes:
anchors_per_scale: 3 anchors_per_scale: 3
...@@ -56,7 +63,7 @@ task: ...@@ -56,7 +63,7 @@ task:
train_data: train_data:
global_batch_size: 1 global_batch_size: 1
input_path: '/media/vbanna/DATA_SHARE/CV/datasets/COCO_raw/records/train*' input_path: '/media/vbanna/DATA_SHARE/CV/datasets/COCO_raw/records/train*'
shuffle_buffer_size: 10000 shuffle_buffer_size: 1
parser: parser:
mosaic: mosaic:
mosaic_frequency: 1.0 mosaic_frequency: 1.0
...@@ -73,5 +80,7 @@ task: ...@@ -73,5 +80,7 @@ task:
global_batch_size: 1 global_batch_size: 1
input_path: '/media/vbanna/DATA_SHARE/CV/datasets/COCO_raw/records/val*' input_path: '/media/vbanna/DATA_SHARE/CV/datasets/COCO_raw/records/val*'
trainer: trainer:
steps_per_loop: 1
summary_interval: 1
optimizer_config: optimizer_config:
ema: null ema: null
\ No newline at end of file
#--experiment_type=scaled_yolo mAP 47.5
runtime: runtime:
distribution_strategy: 'tpu' distribution_strategy: 'tpu'
mixed_precision_dtype: 'float32' mixed_precision_dtype: 'float32'
......
#--experiment_type=yolo_darknet mAP 43.0
runtime: runtime:
distribution_strategy: 'tpu' distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16' mixed_precision_dtype: 'bfloat16'
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""YOLO configuration definition.""" """YOLO configuration definition."""
from typing import List, Optional, Union from typing import Any, List, Optional, Union
from official.core import exp_factory from official.core import exp_factory
from official.modeling import hyperparams from official.modeling import hyperparams
...@@ -41,7 +41,10 @@ def _build_path_scales(min_level, max_level): ...@@ -41,7 +41,10 @@ def _build_path_scales(min_level, max_level):
@dataclasses.dataclass @dataclasses.dataclass
class FPNConfig(hyperparams.Config): class FPNConfig(hyperparams.Config):
all: Optional[Any] = None
def get(self): def get(self):
"""Allow for a key for each level or a single key for all the levels."""
values = self.as_dict() values = self.as_dict()
if "all" in values and values["all"] is not None: if "all" in values and values["all"] is not None:
for key in values: for key in values:
...@@ -165,6 +168,17 @@ class AnchorBoxes(hyperparams.Config): ...@@ -165,6 +168,17 @@ class AnchorBoxes(hyperparams.Config):
anchors_per_scale: int = 3 anchors_per_scale: int = 3
def get(self, min_level, max_level): def get(self, min_level, max_level):
"""Distribute them in order to each level.
Args:
min_level: `int` the lowest output level.
max_level: `int` the heighest output level.
Return:
anchors_per_level: A `Dict[List[int]]` of the anchor boxes for each level.
self.level_limits: A `List[int]` of the box size limits to link to each
level under anchor free conditions.
"""
if self.level_limits is None: if self.level_limits is None:
boxes = [box.box for box in self.boxes] boxes = [box.box for box in self.boxes]
else: else:
...@@ -310,7 +324,6 @@ def yolo_darknet() -> cfg.ExperimentConfig: ...@@ -310,7 +324,6 @@ def yolo_darknet() -> cfg.ExperimentConfig:
'nesterov': True, 'nesterov': True,
'warmup_steps': 1000, 'warmup_steps': 1000,
'weight_decay': 0.0005, 'weight_decay': 0.0005,
'sim_torch': True,
} }
}, },
'learning_rate': { 'learning_rate': {
...@@ -424,7 +437,6 @@ def scaled_yolo() -> cfg.ExperimentConfig: ...@@ -424,7 +437,6 @@ def scaled_yolo() -> cfg.ExperimentConfig:
'nesterov': True, 'nesterov': True,
'warmup_steps': steps_per_epoch * warmup_epochs, 'warmup_steps': steps_per_epoch * warmup_epochs,
'weight_decay': 0.0005 * train_batch_size/64.0, 'weight_decay': 0.0005 * train_batch_size/64.0,
'sim_torch': True,
} }
}, },
'learning_rate': { 'learning_rate': {
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
"""Yolo models.""" """Yolo models."""
from typing import Mapping, Union
import tensorflow as tf import tensorflow as tf
from official.vision.beta.projects.yolo.modeling.layers import nn_blocks from official.vision.beta.projects.yolo.modeling.layers import nn_blocks
...@@ -52,14 +53,14 @@ class Yolo(tf.keras.Model): ...@@ -52,14 +53,14 @@ class Yolo(tf.keras.Model):
return return
def call(self, inputs, training=False): def call(self, inputs, training=False):
maps = self._backbone(inputs) maps = self.backbone(inputs)
decoded_maps = self._decoder(maps) decoded_maps = self.decoder(maps)
raw_predictions = self._head(decoded_maps) raw_predictions = self.head(decoded_maps)
if training: if training:
return {"raw_output": raw_predictions} return {"raw_output": raw_predictions}
else: else:
# Post-processing. # Post-processing.
predictions = self._detection_generator(raw_predictions) predictions = self.detection_generator(raw_predictions)
predictions.update({"raw_output": raw_predictions}) predictions.update({"raw_output": raw_predictions})
return predictions return predictions
...@@ -86,6 +87,15 @@ class Yolo(tf.keras.Model): ...@@ -86,6 +87,15 @@ class Yolo(tf.keras.Model):
def from_config(cls, config): def from_config(cls, config):
return cls(**config) return cls(**config)
@property
def checkpoint_items(
self) -> Mapping[str, Union[tf.keras.Model, tf.keras.layers.Layer]]:
"""Returns a dictionary of items to be additionally checkpointed."""
items = dict(backbone=self.backbone, head=self.head)
if self.decoder is not None:
items.update(decoder=self.decoder)
return items
def fuse(self): def fuse(self):
"""Fuses all Convolution and Batchnorm layers to get better latency.""" """Fuses all Convolution and Batchnorm layers to get better latency."""
print("Fusing Conv Batch Norm Layers.") print("Fusing Conv Batch Norm Layers.")
......
...@@ -56,7 +56,6 @@ class SGDTorchConfig(BaseOptimizerConfig): ...@@ -56,7 +56,6 @@ class SGDTorchConfig(BaseOptimizerConfig):
momentum: float = 0.9 momentum: float = 0.9
warmup_steps: int = 0 warmup_steps: int = 0
weight_decay: float = 0.0 weight_decay: float = 0.0
sim_torch: bool = False
weight_keys: Optional[List[str]] = dataclasses.field( weight_keys: Optional[List[str]] = dataclasses.field(
default_factory=lambda:["kernel", "weight"]) default_factory=lambda:["kernel", "weight"])
bias_keys: Optional[List[str]] = dataclasses.field( bias_keys: Optional[List[str]] = dataclasses.field(
......
...@@ -67,7 +67,6 @@ class SGDTorch(tf.keras.optimizers.Optimizer): ...@@ -67,7 +67,6 @@ class SGDTorch(tf.keras.optimizers.Optimizer):
momentum_start=0.0, momentum_start=0.0,
warmup_steps=1000, warmup_steps=1000,
nesterov=False, nesterov=False,
sim_torch=False,
name="SGD", name="SGD",
weight_keys=["kernel", "weight"], weight_keys=["kernel", "weight"],
bias_keys=["bias", "beta"], bias_keys=["bias", "beta"],
...@@ -99,9 +98,6 @@ class SGDTorch(tf.keras.optimizers.Optimizer): ...@@ -99,9 +98,6 @@ class SGDTorch(tf.keras.optimizers.Optimizer):
# Enable Nesterov Momentum # Enable Nesterov Momentum
self.nesterov = nesterov self.nesterov = nesterov
# Simulate Pytorch Optimizer
self.sim_torch = sim_torch
# weights, biases, other # weights, biases, other
self._weight_keys = weight_keys self._weight_keys = weight_keys
self._bias_keys = bias_keys self._bias_keys = bias_keys
...@@ -110,7 +106,6 @@ class SGDTorch(tf.keras.optimizers.Optimizer): ...@@ -110,7 +106,6 @@ class SGDTorch(tf.keras.optimizers.Optimizer):
self._bset = set() self._bset = set()
self._oset = set() self._oset = set()
if self.sim_torch:
logging.info(f"Pytorch SGD simulation: ") logging.info(f"Pytorch SGD simulation: ")
logging.info(f"Weight Decay: {weight_decay}") logging.info(f"Weight Decay: {weight_decay}")
...@@ -140,14 +135,14 @@ class SGDTorch(tf.keras.optimizers.Optimizer): ...@@ -140,14 +135,14 @@ class SGDTorch(tf.keras.optimizers.Optimizer):
others = [] others = []
for var in variables: for var in variables:
# search for weights
if self._search(var, self._weight_keys): if self._search(var, self._weight_keys):
# search for weights
weights.append(var) weights.append(var)
continue elif self._search(var, self._bias_keys):
# search for biases # search for biases
if self._search(var, self._bias_keys):
biases.append(var) biases.append(var)
continue else:
# if all searches fail, add to other group # if all searches fail, add to other group
others.append(var) others.append(var)
...@@ -238,30 +233,6 @@ class SGDTorch(tf.keras.optimizers.Optimizer): ...@@ -238,30 +233,6 @@ class SGDTorch(tf.keras.optimizers.Optimizer):
return apply_state[(var_device, var_dtype)] return apply_state[(var_device, var_dtype)]
def _apply_tf(self, grad, var, weight_decay, momentum, lr):
"""Uses Tensorflow Optimizer with Weight decay SGDW."""
def decay_op(var, learning_rate, wd):
if self._weight_decay and wd > 0:
return var.assign_sub(
learning_rate * var * wd, use_locking=self._use_locking)
return tf.no_op()
decay = decay_op(var, lr, weight_decay)
with tf.control_dependencies([decay]):
if self._momentum:
momentum_var = self.get_slot(var, "momentum")
return gen_training_ops.ResourceApplyKerasMomentum(
var=var.handle,
accum=momentum_var.handle,
lr=lr,
grad=grad,
momentum=momentum,
use_locking=self._use_locking,
use_nesterov=self.nesterov)
else:
return gen_training_ops.ResourceApplyGradientDescent(
var=var.handle, alpha=lr, delta=grad, use_locking=self._use_locking)
def _apply(self, grad, var, weight_decay, momentum, lr): def _apply(self, grad, var, weight_decay, momentum, lr):
"""Uses Pytorch Optimizer with Weight decay SGDW.""" """Uses Pytorch Optimizer with Weight decay SGDW."""
dparams = grad dparams = grad
...@@ -308,10 +279,7 @@ class SGDTorch(tf.keras.optimizers.Optimizer): ...@@ -308,10 +279,7 @@ class SGDTorch(tf.keras.optimizers.Optimizer):
lr = coefficients["other_lr_t"] lr = coefficients["other_lr_t"]
momentum = coefficients["momentum"] momentum = coefficients["momentum"]
if self.sim_torch:
return self._apply(grad, var, weight_decay, momentum, lr) return self._apply(grad, var, weight_decay, momentum, lr)
else:
return self._apply_tf(grad, var, weight_decay, momentum, lr)
def _resource_apply_dense(self, grad, var, apply_state=None): def _resource_apply_dense(self, grad, var, apply_state=None):
return self._run_sgd(grad, var, apply_state=apply_state) return self._run_sgd(grad, var, apply_state=apply_state)
......
...@@ -340,34 +340,23 @@ class YoloTask(base_task.Task): ...@@ -340,34 +340,23 @@ class YoloTask(base_task.Task):
# Restoring checkpoint. # Restoring checkpoint.
if self.task_config.init_checkpoint_modules == 'all': if self.task_config.init_checkpoint_modules == 'all':
ckpt = tf.train.Checkpoint(model=model) ckpt = tf.train.Checkpoint(**model.checkpoint_items)
status = ckpt.restore(ckpt_dir_or_file) status = ckpt.read(ckpt_dir_or_file)
status.expect_partial().assert_existing_objects_matched() status.expect_partial().assert_existing_objects_matched()
elif self.task_config.init_checkpoint_modules == 'backbone':
ckpt = tf.train.Checkpoint(backbone=model.backbone)
status = ckpt.restore(ckpt_dir_or_file)
status.expect_partial().assert_existing_objects_matched()
elif self.task_config.init_checkpoint_modules == 'decoder':
ckpt = tf.train.Checkpoint(backbone=model.backbone, decoder=model.decoder)
status = ckpt.restore(ckpt_dir_or_file)
status.expect_partial()
else: else:
assert "Only 'all' or 'backbone' can be used to initialize the model." ckpt_items = {}
if 'backbone' in self.task_config.init_checkpoint_modules:
ckpt_items.update(backbone=model.backbone)
if 'decoder' in self.task_config.init_checkpoint_modules:
ckpt_items.update(decoder=model.decoder)
ckpt = tf.train.Checkpoint(**ckpt_items)
status = ckpt.read(ckpt_dir_or_file)
status.expect_partial().assert_existing_objects_matched()
logging.info('Finished loading pretrained checkpoint from %s', logging.info('Finished loading pretrained checkpoint from %s',
ckpt_dir_or_file) ckpt_dir_or_file)
def _wrap_optimizer(self, optimizer, runtime_config):
"""Wraps the optimizer object with the loss scale optimizer."""
if runtime_config and runtime_config.loss_scale:
use_float16 = runtime_config.mixed_precision_dtype == "float16"
optimizer = performance.configure_optimizer(
optimizer,
use_graph_rewrite=False,
use_float16=use_float16,
loss_scale=runtime_config.loss_scale)
return optimizer
def create_optimizer(self, def create_optimizer(self,
optimizer_config: OptimizationConfig, optimizer_config: OptimizationConfig,
runtime_config: Optional[RuntimeConfig] = None): runtime_config: Optional[RuntimeConfig] = None):
...@@ -397,7 +386,15 @@ class YoloTask(base_task.Task): ...@@ -397,7 +386,15 @@ class YoloTask(base_task.Task):
if ema: if ema:
logging.info("EMA is enabled.") logging.info("EMA is enabled.")
optimizer = opt_factory.add_ema(optimizer) optimizer = opt_factory.add_ema(optimizer)
optimizer = self._wrap_optimizer(optimizer, runtime_config)
if runtime_config and runtime_config.loss_scale:
use_float16 = runtime_config.mixed_precision_dtype == "float16"
optimizer = performance.configure_optimizer(
optimizer,
use_graph_rewrite=False,
use_float16=use_float16,
loss_scale=runtime_config.loss_scale)
return optimizer return optimizer
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment