[Refactoring] Revise function of layers and override keys in init_cfg (#893)

* [Refactoring]Approve init_cfg * revise unit test * revise according to comments * add comment for wholemodule

[Refactoring] Revise function of layers and override keys in init_cfg (#893)
* [Refactoring]Approve init_cfg * revise unit test * revise according to comments * add comment for wholemodule
65fec735 · Miao Zheng · GitHub · a9803da1 · 65fec735 · 65fec735
Unverified Commit 65fec735 authored Mar 26, 2021 by Miao Zheng Committed by GitHub Mar 26, 2021
3 changed files
--- a/mmcv/cnn/utils/weight_init.py
+++ b/mmcv/cnn/utils/weight_init.py
 # Copyright (c) Open-MMLab. All rights reserved.
+import warnings
+
 import numpy as np
 import torch.nn as nn

@@ -78,6 +80,7 @@ def bias_init_with_prob(prior_prob):
 class BaseInit(object):

    def __init__(self, *, bias=0, bias_prob=None, layer=None):
+        self.wholemodule = False
        if not isinstance(bias, (int, float)):
            raise TypeError(f'bias must be a numbel, but got a {type(bias)}')

@@ -90,7 +93,11 @@ class BaseInit(object):
            if not isinstance(layer, (str, list)):
                raise TypeError(f'layer must be a str or a list of str, \
                    but got a {type(layer)}')
-
+        else:
+            layer = []
+            warnings.warn(
+                'init_cfg without layer key, if you do not define override'
+                ' key either, this init_cfg will do nothing')
        if bias_prob is not None:
            self.bias = bias_init_with_prob(bias_prob)
        else:
@@ -119,12 +126,11 @@ class ConstantInit(BaseInit):
    def __call__(self, module):

        def init(m):
-            if self.layer is None:
+            if self.wholemodule:
                constant_init(m, self.val, self.bias)
            else:
                layername = m.__class__.__name__
-                for layer_ in self.layer:
-                    if layername == layer_:
+                if layername in self.layer:
                    constant_init(m, self.val, self.bias)

        module.apply(init)
@@ -157,12 +163,11 @@ class XavierInit(BaseInit):
    def __call__(self, module):

        def init(m):
-            if self.layer is None:
+            if self.wholemodule:
                xavier_init(m, self.gain, self.bias, self.distribution)
            else:
                layername = m.__class__.__name__
-                for layer_ in self.layer:
-                    if layername == layer_:
+                if layername in self.layer:
                    xavier_init(m, self.gain, self.bias, self.distribution)

        module.apply(init)
@@ -194,7 +199,7 @@ class NormalInit(BaseInit):
    def __call__(self, module):

        def init(m):
-            if self.layer is None:
+            if self.wholemodule:
                normal_init(m, self.mean, self.std, self.bias)
            else:
                layername = m.__class__.__name__
@@ -231,12 +236,11 @@ class UniformInit(BaseInit):
    def __call__(self, module):

        def init(m):
-            if self.layer is None:
+            if self.wholemodule:
                uniform_init(m, self.a, self.b, self.bias)
            else:
                layername = m.__class__.__name__
-                for layer_ in self.layer:
-                    if layername == layer_:
+                if layername in self.layer:
                    uniform_init(m, self.a, self.b, self.bias)

        module.apply(init)
@@ -285,13 +289,12 @@ class KaimingInit(BaseInit):
    def __call__(self, module):

        def init(m):
-            if self.layer is None:
+            if self.wholemodule:
                kaiming_init(m, self.a, self.mode, self.nonlinearity,
                             self.bias, self.distribution)
            else:
                layername = m.__class__.__name__
-                for layer_ in self.layer:
-                    if layername == layer_:
+                if layername in self.layer:
                    kaiming_init(m, self.a, self.mode, self.nonlinearity,
                                 self.bias, self.distribution)

@@ -355,12 +358,16 @@ class PretrainedInit(object):
            load_state_dict(module, state_dict, strict=False, logger=logger)


-def _initialize(module, cfg):
+def _initialize(module, cfg, wholemodule=False):
    func = build_from_cfg(cfg, INITIALIZERS)
+    # wholemodule flag is for override mode, there is no layer key in override
+    # and initializer will give init values for the whole module with the name
+    # in override.
+    func.wholemodule = wholemodule
    func(module)


-def _initialize_override(module, override):
+def _initialize_override(module, override, cfg):
    if not isinstance(override, (dict, list)):
        raise TypeError(f'override must be a dict or a list of dict, \
                but got {type(override)}')
@@ -368,9 +375,11 @@ def _initialize_override(module, override):
    override = [override] if isinstance(override, dict) else override

    for override_ in override:
+        if 'type' not in override_.keys():
+            override_.update(cfg)
        name = override_.pop('name', None)
        if hasattr(module, name):
-            _initialize(getattr(module, name), override_)
+            _initialize(getattr(module, name), override_, wholemodule=True)
        else:
            raise RuntimeError(f'module did not have attribute {name}')

@@ -440,7 +449,8 @@ def initialize(module, init_cfg):
        _initialize(module, cfg)

        if override is not None:
-            _initialize_override(module, override)
+            cfg.pop('layer', None)
+            _initialize_override(module, override, cfg)
        else:
            # All attributes in module have same initialization.
            pass
--- a/tests/test_cnn/test_weight_init.py
+++ b/tests/test_cnn/test_weight_init.py
@@ -103,13 +103,6 @@ def test_constaninit():
    assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, 2.))
    assert torch.equal(model[2].bias, torch.full(model[2].bias.shape, res))

-    func = ConstantInit(val=4, bias=5)
-    func(model)
-    assert torch.equal(model[0].weight, torch.full(model[0].weight.shape, 4.))
-    assert torch.equal(model[2].weight, torch.full(model[2].weight.shape, 4.))
-    assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, 5.))
-    assert torch.equal(model[2].bias, torch.full(model[2].bias.shape, 5.))
-
    # test bias input type
    with pytest.raises(TypeError):
        func = ConstantInit(val=1, bias='1')
@@ -129,8 +122,8 @@ def test_xavierinit():
    assert model[0].bias.allclose(torch.full_like(model[2].bias, 0.1))
    assert not model[2].bias.allclose(torch.full_like(model[0].bias, 0.1))

-    constant_func = ConstantInit(val=0, bias=0)
-    func = XavierInit(gain=100, bias_prob=0.01)
+    constant_func = ConstantInit(val=0, bias=0, layer=['Conv2d', 'Linear'])
+    func = XavierInit(gain=100, bias_prob=0.01, layer=['Conv2d', 'Linear'])
    model.apply(constant_func)
    assert torch.equal(model[0].weight, torch.full(model[0].weight.shape, 0.))
    assert torch.equal(model[2].weight, torch.full(model[2].weight.shape, 0.))
@@ -158,7 +151,7 @@ def test_normalinit():
    """test Normalinit class."""
    model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Linear(1, 2))

-    func = NormalInit(mean=100, std=1e-5, bias=200)
+    func = NormalInit(mean=100, std=1e-5, bias=200, layer=['Conv2d', 'Linear'])
    func(model)
    assert model[0].weight.allclose(torch.tensor(100.))
    assert model[2].weight.allclose(torch.tensor(100.))
@@ -178,7 +171,7 @@ def test_normalinit():
 def test_uniforminit():
    """"test UniformInit class."""
    model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Linear(1, 2))
-    func = UniformInit(a=1, b=1, bias=2)
+    func = UniformInit(a=1, b=1, bias=2, layer=['Conv2d', 'Linear'])
    func(model)
    assert torch.equal(model[0].weight, torch.full(model[0].weight.shape, 1.))
    assert torch.equal(model[2].weight, torch.full(model[2].weight.shape, 1.))
@@ -203,8 +196,8 @@ def test_kaiminginit():
    assert torch.equal(model[0].bias, torch.full(model[0].bias.shape, 0.1))
    assert not torch.equal(model[2].bias, torch.full(model[2].bias.shape, 0.1))

-    func = KaimingInit(a=100, bias=10)
-    constant_func = ConstantInit(val=0, bias=0)
+    func = KaimingInit(a=100, bias=10, layer=['Conv2d', 'Linear'])
+    constant_func = ConstantInit(val=0, bias=0, layer=['Conv2d', 'Linear'])
    model.apply(constant_func)
    assert torch.equal(model[0].weight, torch.full(model[0].weight.shape, 0.))
    assert torch.equal(model[2].weight, torch.full(model[2].weight.shape, 0.))
@@ -242,7 +235,7 @@ def test_pretrainedinit():
    """test PretrainedInit class."""

    modelA = FooModule()
-    constant_func = ConstantInit(val=1, bias=2)
+    constant_func = ConstantInit(val=1, bias=2, layer=['Conv2d', 'Linear'])
    modelA.apply(constant_func)
    modelB = FooModule()
    funcB = PretrainedInit(checkpoint='modelA.pth')
@@ -273,7 +266,7 @@ def test_initialize():
    model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU(), nn.Linear(1, 2))
    foonet = FooModule()

-    init_cfg = dict(type='Constant', val=1, bias=2)
+    init_cfg = dict(type='Constant', layer=['Conv2d', 'Linear'], val=1, bias=2)
    initialize(model, init_cfg)
    assert torch.equal(model[0].weight, torch.full(model[0].weight.shape, 1.))
    assert torch.equal(model[2].weight, torch.full(model[2].weight.shape, 1.))
@@ -281,7 +274,7 @@ def test_initialize():
    assert torch.equal(model[2].bias, torch.full(model[2].bias.shape, 2.))

    init_cfg = [
-        dict(type='Constant', layer='Conv1d', val=1, bias=2),
+        dict(type='Constant', layer='Conv2d', val=1, bias=2),
        dict(type='Constant', layer='Linear', val=3, bias=4)
    ]
    initialize(model, init_cfg)
@@ -315,7 +308,7 @@ def test_initialize():
        checkpoint='modelA.pth',
        override=dict(type='Constant', name='conv2d_2', val=3, bias=4))
    modelA = FooModule()
-    constant_func = ConstantInit(val=1, bias=2)
+    constant_func = ConstantInit(val=1, bias=2, layer=['Conv2d', 'Linear'])
    modelA.apply(constant_func)
    with TemporaryDirectory():
        torch.save(modelA.state_dict(), 'modelA.pth')

--- a/tests/test_runner/test_basemodule.py
+++ b/tests/test_runner/test_basemodule.py
@@ -187,9 +187,11 @@ def test_nest_components_weight_init():
            dict(type='Constant', val=5, bias=6, layer='Conv2d'),
        ],
        component1=dict(
-            type='FooConv1d', init_cfg=dict(type='Constant', val=7, bias=8)),
+            type='FooConv1d',
+            init_cfg=dict(type='Constant', layer='Conv1d', val=7, bias=8)),
        component2=dict(
-            type='FooConv2d', init_cfg=dict(type='Constant', val=9, bias=10)),
+            type='FooConv2d',
+            init_cfg=dict(type='Constant', layer='Conv2d', val=9, bias=10)),
        component3=dict(type='FooLinear'),
        component4=dict(
            type='FooLinearConv1d',
@@ -228,12 +230,99 @@ def test_nest_components_weight_init():
    assert torch.equal(model.reg.bias, torch.full(model.reg.bias.shape, 14.0))


+def test_without_layer_weight_init():
+    model_cfg = dict(
+        type='FooModel',
+        init_cfg=[
+            dict(type='Constant', val=1, bias=2, layer='Linear'),
+            dict(type='Constant', val=3, bias=4, layer='Conv1d'),
+            dict(type='Constant', val=5, bias=6, layer='Conv2d')
+        ],
+        component1=dict(
+            type='FooConv1d', init_cfg=dict(type='Constant', val=7, bias=8)),
+        component2=dict(type='FooConv2d'),
+        component3=dict(type='FooLinear'))
+    model = build_from_cfg(model_cfg, FOOMODELS)
+    model.init_weight()
+
+    assert torch.equal(model.component1.conv1d.weight,
+                       torch.full(model.component1.conv1d.weight.shape, 3.0))
+    assert torch.equal(model.component1.conv1d.bias,
+                       torch.full(model.component1.conv1d.bias.shape, 4.0))
+
+    # init_cfg in component1 does not have layer key, so it does nothing
+    assert torch.equal(model.component2.conv2d.weight,
+                       torch.full(model.component2.conv2d.weight.shape, 5.0))
+    assert torch.equal(model.component2.conv2d.bias,
+                       torch.full(model.component2.conv2d.bias.shape, 6.0))
+    assert torch.equal(model.component3.linear.weight,
+                       torch.full(model.component3.linear.weight.shape, 1.0))
+    assert torch.equal(model.component3.linear.bias,
+                       torch.full(model.component3.linear.bias.shape, 2.0))
+
+    assert torch.equal(model.reg.weight, torch.full(model.reg.weight.shape,
+                                                    1.0))
+    assert torch.equal(model.reg.bias, torch.full(model.reg.bias.shape, 2.0))
+
+
+def test_override_weight_init():
+
+    # only initialize 'override'
+    model_cfg = dict(
+        type='FooModel',
+        init_cfg=[
+            dict(type='Constant', val=10, bias=20, override=dict(name='reg'))
+        ],
+        component1=dict(type='FooConv1d'),
+        component3=dict(type='FooLinear'))
+    model = build_from_cfg(model_cfg, FOOMODELS)
+    model.init_weight()
+    assert torch.equal(model.reg.weight,
+                       torch.full(model.reg.weight.shape, 10.0))
+    assert torch.equal(model.reg.bias, torch.full(model.reg.bias.shape, 20.0))
+    # do not initialize others
+    assert not torch.equal(
+        model.component1.conv1d.weight,
+        torch.full(model.component1.conv1d.weight.shape, 10.0))
+    assert not torch.equal(
+        model.component1.conv1d.bias,
+        torch.full(model.component1.conv1d.bias.shape, 20.0))
+    assert not torch.equal(
+        model.component3.linear.weight,
+        torch.full(model.component3.linear.weight.shape, 10.0))
+    assert not torch.equal(
+        model.component3.linear.bias,
+        torch.full(model.component3.linear.bias.shape, 20.0))
+
+    # 'override' has higher priority
+    model_cfg = dict(
+        type='FooModel',
+        init_cfg=[
+            dict(
+                type='Constant',
+                val=1,
+                bias=2,
+                override=dict(name='reg', type='Constant', val=30, bias=40))
+        ],
+        component1=dict(type='FooConv1d'),
+        component2=dict(type='FooConv2d'),
+        component3=dict(type='FooLinear'))
+    model = build_from_cfg(model_cfg, FOOMODELS)
+    model.init_weight()
+
+    assert torch.equal(model.reg.weight,
+                       torch.full(model.reg.weight.shape, 30.0))
+    assert torch.equal(model.reg.bias, torch.full(model.reg.bias.shape, 40.0))
+
+
 def test_sequential_model_weight_init():
    seq_model_cfg = [
        dict(
-            type='FooConv1d', init_cfg=dict(type='Constant', val=0., bias=1.)),
+            type='FooConv1d',
+            init_cfg=dict(type='Constant', layer='Conv1d', val=0., bias=1.)),
        dict(
-            type='FooConv2d', init_cfg=dict(type='Constant', val=2., bias=3.)),
+            type='FooConv2d',
+            init_cfg=dict(type='Constant', layer='Conv2d', val=2., bias=3.)),
    ]
    layers = [build_from_cfg(cfg, COMPONENTS) for cfg in seq_model_cfg]
    seq_model = Sequential(*layers)
@@ -248,7 +337,9 @@ def test_sequential_model_weight_init():
                       torch.full(seq_model[1].conv2d.bias.shape, 3.))
    # inner init_cfg has highter priority
    seq_model = Sequential(
-        *layers, init_cfg=dict(type='Constant', val=4., bias=5.))
+        *layers,
+        init_cfg=dict(
+            type='Constant', layer=['Conv1d', 'Conv2d'], val=4., bias=5.))
    assert torch.equal(seq_model[0].conv1d.weight,
                       torch.full(seq_model[0].conv1d.weight.shape, 0.))
    assert torch.equal(seq_model[0].conv1d.bias,
@@ -262,9 +353,11 @@ def test_sequential_model_weight_init():
 def test_modulelist_weight_init():
    models_cfg = [
        dict(
-            type='FooConv1d', init_cfg=dict(type='Constant', val=0., bias=1.)),
+            type='FooConv1d',
+            init_cfg=dict(type='Constant', layer='Conv1d', val=0., bias=1.)),
        dict(
-            type='FooConv2d', init_cfg=dict(type='Constant', val=2., bias=3.)),
+            type='FooConv2d',
+            init_cfg=dict(type='Constant', layer='Conv2d', val=2., bias=3.)),
    ]
    layers = [build_from_cfg(cfg, COMPONENTS) for cfg in models_cfg]
    modellist = ModuleList(layers)