test_flops_profiler.py 4.17 KB
Newer Older
1
import torch
aiss's avatar
aiss committed
2
import pytest
3
4
5
import deepspeed
import deepspeed.runtime.utils as ds_utils
from deepspeed.profiling.flops_profiler import FlopsProfiler, get_model_profile
aiss's avatar
aiss committed
6
7
from .simple_model import SimpleModel, SimpleOptimizer, random_dataloader, args_from_dict
from .common import distributed_test
8

aiss's avatar
aiss committed
9
10
11
12
13
TORCH_MAJOR = int(torch.__version__.split('.')[0])
TORCH_MINOR = int(torch.__version__.split('.')[1])
pytestmark = pytest.mark.skipif(TORCH_MAJOR < 1
                                or (TORCH_MAJOR == 1 and TORCH_MINOR < 3),
                                reason='requires Pytorch version 1.3 or above')
14

aiss's avatar
aiss committed
15
16
17
18
19
20
21
22
23

def within_range(val, target, tolerance):
    return abs(val - target) / target < tolerance


TOLERANCE = 0.05


def test_flops_profiler_in_ds_training(tmpdir):
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
    config_dict = {
        "train_batch_size": 1,
        "steps_per_print": 1,
        "optimizer": {
            "type": "Adam",
            "params": {
                "lr": 0.001,
            }
        },
        "zero_optimization": {
            "stage": 0
        },
        "fp16": {
            "enabled": True,
        },
        "flops_profiler": {
            "enabled": True,
Cheng Li's avatar
Cheng Li committed
41
            "step": 1,
42
43
44
45
46
47
48
49
50
            "module_depth": -1,
            "top_modules": 3,
        },
    }
    args = args_from_dict(tmpdir, config_dict)
    hidden_dim = 10
    model = SimpleModel(hidden_dim, empty_grad=False)

    @distributed_test(world_size=[1])
aiss's avatar
aiss committed
51
    def _test_flops_profiler_in_ds_training(args, model, hidden_dim):
52
53
54
55
56
57
58
59
60
61
62
63
64
65
        model, _, _, _ = deepspeed.initialize(args=args,
                                            model=model,
                                            model_parameters=model.parameters())

        data_loader = random_dataloader(model=model,
                                        total_samples=50,
                                        hidden_dim=hidden_dim,
                                        device=model.device,
                                        dtype=torch.half)
        for n, batch in enumerate(data_loader):
            loss = model(batch[0], batch[1])
            model.backward(loss)
            model.step()
            if n == 3: break
aiss's avatar
aiss committed
66
        assert within_range(model.flops_profiler.flops, 200, tolerance=TOLERANCE)
67
68
        assert model.flops_profiler.params == 110

aiss's avatar
aiss committed
69
    _test_flops_profiler_in_ds_training(args, model, hidden_dim)
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115


class LeNet5(torch.nn.Module):
    def __init__(self, n_classes):
        super(LeNet5, self).__init__()

        self.feature_extractor = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=1,
                            out_channels=6,
                            kernel_size=5,
                            stride=1),
            torch.nn.Tanh(),
            torch.nn.AvgPool2d(kernel_size=2),
            torch.nn.Conv2d(in_channels=6,
                            out_channels=16,
                            kernel_size=5,
                            stride=1),
            torch.nn.Tanh(),
            torch.nn.AvgPool2d(kernel_size=2),
            torch.nn.Conv2d(in_channels=16,
                            out_channels=120,
                            kernel_size=5,
                            stride=1),
            torch.nn.Tanh(),
        )

        self.classifier = torch.nn.Sequential(
            torch.nn.Linear(in_features=120,
                            out_features=84),
            torch.nn.Tanh(),
            torch.nn.Linear(in_features=84,
                            out_features=n_classes),
        )

    def forward(self, x):
        x = self.feature_extractor(x)
        x = torch.flatten(x, 1)
        logits = self.classifier(x)
        probs = torch.nn.functional.softmax(logits, dim=1)
        return logits, probs


def test_flops_profiler_in_inference():
    mod = LeNet5(10)
    batch_size = 1024
    input = torch.randn(batch_size, 1, 32, 32)
aiss's avatar
aiss committed
116
    flops, macs, params = get_model_profile(
117
118
119
        mod,
        tuple(input.shape),
        print_profile=True,
Cheng Li's avatar
Cheng Li committed
120
        detailed=True,
121
122
        module_depth=-1,
        top_modules=3,
Cheng Li's avatar
Cheng Li committed
123
        warm_up=1,
aiss's avatar
aiss committed
124
        as_string=False,
125
126
        ignore_modules=None,
    )
aiss's avatar
aiss committed
127
128
129
130
    print(flops, macs, params)
    assert within_range(flops, 866076672, TOLERANCE)
    assert within_range(macs, 426516480, TOLERANCE)
    assert params == 61706