Unverified Commit 65fec735 authored by Miao Zheng's avatar Miao Zheng Committed by GitHub
Browse files

[Refactoring] Revise function of layers and override keys in init_cfg (#893)

* [Refactoring]Approve init_cfg

* revise unit test

* revise according to comments

* add comment for wholemodule
parent a9803da1
# Copyright (c) Open-MMLab. All rights reserved. # Copyright (c) Open-MMLab. All rights reserved.
import warnings
import numpy as np import numpy as np
import torch.nn as nn import torch.nn as nn
...@@ -78,6 +80,7 @@ def bias_init_with_prob(prior_prob): ...@@ -78,6 +80,7 @@ def bias_init_with_prob(prior_prob):
class BaseInit(object): class BaseInit(object):
def __init__(self, *, bias=0, bias_prob=None, layer=None): def __init__(self, *, bias=0, bias_prob=None, layer=None):
self.wholemodule = False
if not isinstance(bias, (int, float)): if not isinstance(bias, (int, float)):
raise TypeError(f'bias must be a numbel, but got a {type(bias)}') raise TypeError(f'bias must be a numbel, but got a {type(bias)}')
...@@ -90,7 +93,11 @@ class BaseInit(object): ...@@ -90,7 +93,11 @@ class BaseInit(object):
if not isinstance(layer, (str, list)): if not isinstance(layer, (str, list)):
raise TypeError(f'layer must be a str or a list of str, \ raise TypeError(f'layer must be a str or a list of str, \
but got a {type(layer)}') but got a {type(layer)}')
else:
layer = []
warnings.warn(
'init_cfg without layer key, if you do not define override'
' key either, this init_cfg will do nothing')
if bias_prob is not None: if bias_prob is not None:
self.bias = bias_init_with_prob(bias_prob) self.bias = bias_init_with_prob(bias_prob)
else: else:
...@@ -119,12 +126,11 @@ class ConstantInit(BaseInit): ...@@ -119,12 +126,11 @@ class ConstantInit(BaseInit):
def __call__(self, module): def __call__(self, module):
def init(m): def init(m):
if self.layer is None: if self.wholemodule:
constant_init(m, self.val, self.bias) constant_init(m, self.val, self.bias)
else: else:
layername = m.__class__.__name__ layername = m.__class__.__name__
for layer_ in self.layer: if layername in self.layer:
if layername == layer_:
constant_init(m, self.val, self.bias) constant_init(m, self.val, self.bias)
module.apply(init) module.apply(init)
...@@ -157,12 +163,11 @@ class XavierInit(BaseInit): ...@@ -157,12 +163,11 @@ class XavierInit(BaseInit):
def __call__(self, module): def __call__(self, module):
def init(m): def init(m):
if self.layer is None: if self.wholemodule:
xavier_init(m, self.gain, self.bias, self.distribution) xavier_init(m, self.gain, self.bias, self.distribution)
else: else:
layername = m.__class__.__name__ layername = m.__class__.__name__
for layer_ in self.layer: if layername in self.layer:
if layername == layer_:
xavier_init(m, self.gain, self.bias, self.distribution) xavier_init(m, self.gain, self.bias, self.distribution)
module.apply(init) module.apply(init)
...@@ -194,7 +199,7 @@ class NormalInit(BaseInit): ...@@ -194,7 +199,7 @@ class NormalInit(BaseInit):
def __call__(self, module): def __call__(self, module):
def init(m): def init(m):
if self.layer is None: if self.wholemodule:
normal_init(m, self.mean, self.std, self.bias) normal_init(m, self.mean, self.std, self.bias)
else: else:
layername = m.__class__.__name__ layername = m.__class__.__name__
...@@ -231,12 +236,11 @@ class UniformInit(BaseInit): ...@@ -231,12 +236,11 @@ class UniformInit(BaseInit):
def __call__(self, module): def __call__(self, module):
def init(m): def init(m):
if self.layer is None: if self.wholemodule:
uniform_init(m, self.a, self.b, self.bias) uniform_init(m, self.a, self.b, self.bias)
else: else:
layername = m.__class__.__name__ layername = m.__class__.__name__
for layer_ in self.layer: if layername in self.layer:
if layername == layer_:
uniform_init(m, self.a, self.b, self.bias) uniform_init(m, self.a, self.b, self.bias)
module.apply(init) module.apply(init)
...@@ -285,13 +289,12 @@ class KaimingInit(BaseInit): ...@@ -285,13 +289,12 @@ class KaimingInit(BaseInit):
def __call__(self, module): def __call__(self, module):
def init(m): def init(m):
if self.layer is None: if self.wholemodule:
kaiming_init(m, self.a, self.mode, self.nonlinearity, kaiming_init(m, self.a, self.mode, self.nonlinearity,
self.bias, self.distribution) self.bias, self.distribution)
else: else:
layername = m.__class__.__name__ layername = m.__class__.__name__
for layer_ in self.layer: if layername in self.layer:
if layername == layer_:
kaiming_init(m, self.a, self.mode, self.nonlinearity, kaiming_init(m, self.a, self.mode, self.nonlinearity,
self.bias, self.distribution) self.bias, self.distribution)
...@@ -355,12 +358,16 @@ class PretrainedInit(object): ...@@ -355,12 +358,16 @@ class PretrainedInit(object):
load_state_dict(module, state_dict, strict=False, logger=logger) load_state_dict(module, state_dict, strict=False, logger=logger)
def _initialize(module, cfg): def _initialize(module, cfg, wholemodule=False):
func = build_from_cfg(cfg, INITIALIZERS) func = build_from_cfg(cfg, INITIALIZERS)
# wholemodule flag is for override mode, there is no layer key in override
# and initializer will give init values for the whole module with the name
# in override.
func.wholemodule = wholemodule
func(module) func(module)
def _initialize_override(module, override): def _initialize_override(module, override, cfg):
if not isinstance(override, (dict, list)): if not isinstance(override, (dict, list)):
raise TypeError(f'override must be a dict or a list of dict, \ raise TypeError(f'override must be a dict or a list of dict, \
but got {type(override)}') but got {type(override)}')
...@@ -368,9 +375,11 @@ def _initialize_override(module, override): ...@@ -368,9 +375,11 @@ def _initialize_override(module, override):
override = [override] if isinstance(override, dict) else override override = [override] if isinstance(override, dict) else override
for override_ in override: for override_ in override:
if 'type' not in override_.keys():
override_.update(cfg)
name = override_.pop('name', None) name = override_.pop('name', None)
if hasattr(module, name): if hasattr(module, name):
_initialize(getattr(module, name), override_) _initialize(getattr(module, name), override_, wholemodule=True)
else: else:
raise RuntimeError(f'module did not have attribute {name}') raise RuntimeError(f'module did not have attribute {name}')
...@@ -440,7 +449,8 @@ def initialize(module, init_cfg): ...@@ -440,7 +449,8 @@ def initialize(module, init_cfg):
_initialize(module, cfg) _initialize(module, cfg)
if override is not None: if override is not None:
_initialize_override(module, override) cfg.pop('layer', None)
_initialize_override(module, override, cfg)
else: else:
# All attributes in module have same initialization. # All attributes in module have same initialization.
pass pass
...@@ -103,13 +103,6 @@ def test_constaninit(): ...@@ -103,13 +103,6 @@ def test_constaninit():
assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, 2.)) assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, 2.))
assert torch.equal(model[2].bias, torch.full(model[2].bias.shape, res)) assert torch.equal(model[2].bias, torch.full(model[2].bias.shape, res))
func = ConstantInit(val=4, bias=5)
func(model)
assert torch.equal(model[0].weight, torch.full(model[0].weight.shape, 4.))
assert torch.equal(model[2].weight, torch.full(model[2].weight.shape, 4.))
assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, 5.))
assert torch.equal(model[2].bias, torch.full(model[2].bias.shape, 5.))
# test bias input type # test bias input type
with pytest.raises(TypeError): with pytest.raises(TypeError):
func = ConstantInit(val=1, bias='1') func = ConstantInit(val=1, bias='1')
...@@ -129,8 +122,8 @@ def test_xavierinit(): ...@@ -129,8 +122,8 @@ def test_xavierinit():
assert model[0].bias.allclose(torch.full_like(model[2].bias, 0.1)) assert model[0].bias.allclose(torch.full_like(model[2].bias, 0.1))
assert not model[2].bias.allclose(torch.full_like(model[0].bias, 0.1)) assert not model[2].bias.allclose(torch.full_like(model[0].bias, 0.1))
constant_func = ConstantInit(val=0, bias=0) constant_func = ConstantInit(val=0, bias=0, layer=['Conv2d', 'Linear'])
func = XavierInit(gain=100, bias_prob=0.01) func = XavierInit(gain=100, bias_prob=0.01, layer=['Conv2d', 'Linear'])
model.apply(constant_func) model.apply(constant_func)
assert torch.equal(model[0].weight, torch.full(model[0].weight.shape, 0.)) assert torch.equal(model[0].weight, torch.full(model[0].weight.shape, 0.))
assert torch.equal(model[2].weight, torch.full(model[2].weight.shape, 0.)) assert torch.equal(model[2].weight, torch.full(model[2].weight.shape, 0.))
...@@ -158,7 +151,7 @@ def test_normalinit(): ...@@ -158,7 +151,7 @@ def test_normalinit():
"""test Normalinit class.""" """test Normalinit class."""
model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Linear(1, 2)) model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Linear(1, 2))
func = NormalInit(mean=100, std=1e-5, bias=200) func = NormalInit(mean=100, std=1e-5, bias=200, layer=['Conv2d', 'Linear'])
func(model) func(model)
assert model[0].weight.allclose(torch.tensor(100.)) assert model[0].weight.allclose(torch.tensor(100.))
assert model[2].weight.allclose(torch.tensor(100.)) assert model[2].weight.allclose(torch.tensor(100.))
...@@ -178,7 +171,7 @@ def test_normalinit(): ...@@ -178,7 +171,7 @@ def test_normalinit():
def test_uniforminit(): def test_uniforminit():
""""test UniformInit class.""" """"test UniformInit class."""
model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Linear(1, 2)) model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Linear(1, 2))
func = UniformInit(a=1, b=1, bias=2) func = UniformInit(a=1, b=1, bias=2, layer=['Conv2d', 'Linear'])
func(model) func(model)
assert torch.equal(model[0].weight, torch.full(model[0].weight.shape, 1.)) assert torch.equal(model[0].weight, torch.full(model[0].weight.shape, 1.))
assert torch.equal(model[2].weight, torch.full(model[2].weight.shape, 1.)) assert torch.equal(model[2].weight, torch.full(model[2].weight.shape, 1.))
...@@ -203,8 +196,8 @@ def test_kaiminginit(): ...@@ -203,8 +196,8 @@ def test_kaiminginit():
assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, 0.1)) assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, 0.1))
assert not torch.equal(model[2].bias, torch.full(model[2].bias.shape, 0.1)) assert not torch.equal(model[2].bias, torch.full(model[2].bias.shape, 0.1))
func = KaimingInit(a=100, bias=10) func = KaimingInit(a=100, bias=10, layer=['Conv2d', 'Linear'])
constant_func = ConstantInit(val=0, bias=0) constant_func = ConstantInit(val=0, bias=0, layer=['Conv2d', 'Linear'])
model.apply(constant_func) model.apply(constant_func)
assert torch.equal(model[0].weight, torch.full(model[0].weight.shape, 0.)) assert torch.equal(model[0].weight, torch.full(model[0].weight.shape, 0.))
assert torch.equal(model[2].weight, torch.full(model[2].weight.shape, 0.)) assert torch.equal(model[2].weight, torch.full(model[2].weight.shape, 0.))
...@@ -242,7 +235,7 @@ def test_pretrainedinit(): ...@@ -242,7 +235,7 @@ def test_pretrainedinit():
"""test PretrainedInit class.""" """test PretrainedInit class."""
modelA = FooModule() modelA = FooModule()
constant_func = ConstantInit(val=1, bias=2) constant_func = ConstantInit(val=1, bias=2, layer=['Conv2d', 'Linear'])
modelA.apply(constant_func) modelA.apply(constant_func)
modelB = FooModule() modelB = FooModule()
funcB = PretrainedInit(checkpoint='modelA.pth') funcB = PretrainedInit(checkpoint='modelA.pth')
...@@ -273,7 +266,7 @@ def test_initialize(): ...@@ -273,7 +266,7 @@ def test_initialize():
model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Linear(1, 2)) model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Linear(1, 2))
foonet = FooModule() foonet = FooModule()
init_cfg = dict(type='Constant', val=1, bias=2) init_cfg = dict(type='Constant', layer=['Conv2d', 'Linear'], val=1, bias=2)
initialize(model, init_cfg) initialize(model, init_cfg)
assert torch.equal(model[0].weight, torch.full(model[0].weight.shape, 1.)) assert torch.equal(model[0].weight, torch.full(model[0].weight.shape, 1.))
assert torch.equal(model[2].weight, torch.full(model[2].weight.shape, 1.)) assert torch.equal(model[2].weight, torch.full(model[2].weight.shape, 1.))
...@@ -281,7 +274,7 @@ def test_initialize(): ...@@ -281,7 +274,7 @@ def test_initialize():
assert torch.equal(model[2].bias, torch.full(model[2].bias.shape, 2.)) assert torch.equal(model[2].bias, torch.full(model[2].bias.shape, 2.))
init_cfg = [ init_cfg = [
dict(type='Constant', layer='Conv1d', val=1, bias=2), dict(type='Constant', layer='Conv2d', val=1, bias=2),
dict(type='Constant', layer='Linear', val=3, bias=4) dict(type='Constant', layer='Linear', val=3, bias=4)
] ]
initialize(model, init_cfg) initialize(model, init_cfg)
...@@ -315,7 +308,7 @@ def test_initialize(): ...@@ -315,7 +308,7 @@ def test_initialize():
checkpoint='modelA.pth', checkpoint='modelA.pth',
override=dict(type='Constant', name='conv2d_2', val=3, bias=4)) override=dict(type='Constant', name='conv2d_2', val=3, bias=4))
modelA = FooModule() modelA = FooModule()
constant_func = ConstantInit(val=1, bias=2) constant_func = ConstantInit(val=1, bias=2, layer=['Conv2d', 'Linear'])
modelA.apply(constant_func) modelA.apply(constant_func)
with TemporaryDirectory(): with TemporaryDirectory():
torch.save(modelA.state_dict(), 'modelA.pth') torch.save(modelA.state_dict(), 'modelA.pth')
......
...@@ -187,9 +187,11 @@ def test_nest_components_weight_init(): ...@@ -187,9 +187,11 @@ def test_nest_components_weight_init():
dict(type='Constant', val=5, bias=6, layer='Conv2d'), dict(type='Constant', val=5, bias=6, layer='Conv2d'),
], ],
component1=dict( component1=dict(
type='FooConv1d', init_cfg=dict(type='Constant', val=7, bias=8)), type='FooConv1d',
init_cfg=dict(type='Constant', layer='Conv1d', val=7, bias=8)),
component2=dict( component2=dict(
type='FooConv2d', init_cfg=dict(type='Constant', val=9, bias=10)), type='FooConv2d',
init_cfg=dict(type='Constant', layer='Conv2d', val=9, bias=10)),
component3=dict(type='FooLinear'), component3=dict(type='FooLinear'),
component4=dict( component4=dict(
type='FooLinearConv1d', type='FooLinearConv1d',
...@@ -228,12 +230,99 @@ def test_nest_components_weight_init(): ...@@ -228,12 +230,99 @@ def test_nest_components_weight_init():
assert torch.equal(model.reg.bias, torch.full(model.reg.bias.shape, 14.0)) assert torch.equal(model.reg.bias, torch.full(model.reg.bias.shape, 14.0))
def test_without_layer_weight_init():
model_cfg = dict(
type='FooModel',
init_cfg=[
dict(type='Constant', val=1, bias=2, layer='Linear'),
dict(type='Constant', val=3, bias=4, layer='Conv1d'),
dict(type='Constant', val=5, bias=6, layer='Conv2d')
],
component1=dict(
type='FooConv1d', init_cfg=dict(type='Constant', val=7, bias=8)),
component2=dict(type='FooConv2d'),
component3=dict(type='FooLinear'))
model = build_from_cfg(model_cfg, FOOMODELS)
model.init_weight()
assert torch.equal(model.component1.conv1d.weight,
torch.full(model.component1.conv1d.weight.shape, 3.0))
assert torch.equal(model.component1.conv1d.bias,
torch.full(model.component1.conv1d.bias.shape, 4.0))
# init_cfg in component1 does not have layer key, so it does nothing
assert torch.equal(model.component2.conv2d.weight,
torch.full(model.component2.conv2d.weight.shape, 5.0))
assert torch.equal(model.component2.conv2d.bias,
torch.full(model.component2.conv2d.bias.shape, 6.0))
assert torch.equal(model.component3.linear.weight,
torch.full(model.component3.linear.weight.shape, 1.0))
assert torch.equal(model.component3.linear.bias,
torch.full(model.component3.linear.bias.shape, 2.0))
assert torch.equal(model.reg.weight, torch.full(model.reg.weight.shape,
1.0))
assert torch.equal(model.reg.bias, torch.full(model.reg.bias.shape, 2.0))
def test_override_weight_init():
# only initialize 'override'
model_cfg = dict(
type='FooModel',
init_cfg=[
dict(type='Constant', val=10, bias=20, override=dict(name='reg'))
],
component1=dict(type='FooConv1d'),
component3=dict(type='FooLinear'))
model = build_from_cfg(model_cfg, FOOMODELS)
model.init_weight()
assert torch.equal(model.reg.weight,
torch.full(model.reg.weight.shape, 10.0))
assert torch.equal(model.reg.bias, torch.full(model.reg.bias.shape, 20.0))
# do not initialize others
assert not torch.equal(
model.component1.conv1d.weight,
torch.full(model.component1.conv1d.weight.shape, 10.0))
assert not torch.equal(
model.component1.conv1d.bias,
torch.full(model.component1.conv1d.bias.shape, 20.0))
assert not torch.equal(
model.component3.linear.weight,
torch.full(model.component3.linear.weight.shape, 10.0))
assert not torch.equal(
model.component3.linear.bias,
torch.full(model.component3.linear.bias.shape, 20.0))
# 'override' has higher priority
model_cfg = dict(
type='FooModel',
init_cfg=[
dict(
type='Constant',
val=1,
bias=2,
override=dict(name='reg', type='Constant', val=30, bias=40))
],
component1=dict(type='FooConv1d'),
component2=dict(type='FooConv2d'),
component3=dict(type='FooLinear'))
model = build_from_cfg(model_cfg, FOOMODELS)
model.init_weight()
assert torch.equal(model.reg.weight,
torch.full(model.reg.weight.shape, 30.0))
assert torch.equal(model.reg.bias, torch.full(model.reg.bias.shape, 40.0))
def test_sequential_model_weight_init(): def test_sequential_model_weight_init():
seq_model_cfg = [ seq_model_cfg = [
dict( dict(
type='FooConv1d', init_cfg=dict(type='Constant', val=0., bias=1.)), type='FooConv1d',
init_cfg=dict(type='Constant', layer='Conv1d', val=0., bias=1.)),
dict( dict(
type='FooConv2d', init_cfg=dict(type='Constant', val=2., bias=3.)), type='FooConv2d',
init_cfg=dict(type='Constant', layer='Conv2d', val=2., bias=3.)),
] ]
layers = [build_from_cfg(cfg, COMPONENTS) for cfg in seq_model_cfg] layers = [build_from_cfg(cfg, COMPONENTS) for cfg in seq_model_cfg]
seq_model = Sequential(*layers) seq_model = Sequential(*layers)
...@@ -248,7 +337,9 @@ def test_sequential_model_weight_init(): ...@@ -248,7 +337,9 @@ def test_sequential_model_weight_init():
torch.full(seq_model[1].conv2d.bias.shape, 3.)) torch.full(seq_model[1].conv2d.bias.shape, 3.))
# inner init_cfg has highter priority # inner init_cfg has highter priority
seq_model = Sequential( seq_model = Sequential(
*layers, init_cfg=dict(type='Constant', val=4., bias=5.)) *layers,
init_cfg=dict(
type='Constant', layer=['Conv1d', 'Conv2d'], val=4., bias=5.))
assert torch.equal(seq_model[0].conv1d.weight, assert torch.equal(seq_model[0].conv1d.weight,
torch.full(seq_model[0].conv1d.weight.shape, 0.)) torch.full(seq_model[0].conv1d.weight.shape, 0.))
assert torch.equal(seq_model[0].conv1d.bias, assert torch.equal(seq_model[0].conv1d.bias,
...@@ -262,9 +353,11 @@ def test_sequential_model_weight_init(): ...@@ -262,9 +353,11 @@ def test_sequential_model_weight_init():
def test_modulelist_weight_init(): def test_modulelist_weight_init():
models_cfg = [ models_cfg = [
dict( dict(
type='FooConv1d', init_cfg=dict(type='Constant', val=0., bias=1.)), type='FooConv1d',
init_cfg=dict(type='Constant', layer='Conv1d', val=0., bias=1.)),
dict( dict(
type='FooConv2d', init_cfg=dict(type='Constant', val=2., bias=3.)), type='FooConv2d',
init_cfg=dict(type='Constant', layer='Conv2d', val=2., bias=3.)),
] ]
layers = [build_from_cfg(cfg, COMPONENTS) for cfg in models_cfg] layers = [build_from_cfg(cfg, COMPONENTS) for cfg in models_cfg]
modellist = ModuleList(layers) modellist = ModuleList(layers)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment