run_perf_test.py 3.65 KB
Newer Older
Elton Zheng's avatar
Elton Zheng committed
1
2
3
4
5
6
# Copyright (c) 2019, The Microsoft DeepSpeed Team. All rights reserved.
#
# Note: please copy webtext data to "Megatron-LM" folder, before running this script.

import unittest
import re
aiss's avatar
aiss committed
7
8
#from test_common import BaseTestCase
from .test_common import BaseTestCase
Elton Zheng's avatar
Elton Zheng committed
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107


class GPT2PerfTestCase(BaseTestCase):
    def __init__(self, methodName="DeepSpeed performance test on GPT2 model"):
        super(GPT2PerfTestCase, self).__init__(methodName)

    def test_perf_1_5B(self):
        test_config = {
            "mp": 1,
            "gpus": 16,
            "nodes": 4,
            "bs": 32,
            "steps": 100,
            "layers": 48,
            "hidden_size": 1600,
            "seq_length": 1024,
            "heads": 16,
            "deepspeed": True,
            "json": "ds_config_perf_bs32.json",
        }

        self.run_test(test_config)

    def test_perf_4B(self):
        test_config = {
            "mp": 1,
            "gpus": 16,
            "nodes": 4,
            "bs": 8,
            "steps": 100,
            "layers": 64,
            "hidden_size": 2304,
            "seq_length": 1024,
            "heads": 16,
            "deepspeed": True,
            "json": "ds_config_perf_bs8.json",
        }

        self.run_test(test_config)

    def test_perf_8B(self):
        test_config = {
            "mp": 2,
            "gpus": 16,
            "nodes": 4,
            "bs": 16,
            "steps": 100,
            "layers": 72,
            "hidden_size": 3072,
            "seq_length": 1024,
            "heads": 24,
            "deepspeed": True,
            "json": "ds_config_perf_bs16.json",
        }

        self.run_test(test_config)

    def test_perf_20B(self):
        test_config = {
            "mp": 4,
            "gpus": 16,
            "nodes": 4,
            "bs": 8,
            "steps": 50,
            "layers": 111,
            "hidden_size": 3808,
            "seq_length": 1024,
            "heads": 32,
            "ckpt_num_layers": 1,
            "deepspeed": True,
            "json": "ds_config_perf_bs8.json",
        }

        self.run_test(test_config)

    def run_test(self, test_config):
        print("\n")
        print("{0}: starting......".format(self.id()))
        prefix = "gpt2_perf"

        test_file = self.gen_output_name(test_config, prefix)
        self.run_gpt2_test(test_config, test_file)
        exec_time = self.grep_latency_from_file(test_file)

        if exec_time == 0.0:
            print("{0}: no latency found in file {1}".format(self.id(), test_file))
        else:
            print("{0}: execution time per iteration is {1}ms.".format(
                self.id(),
                exec_time))

    def grep_latency_from_file(self, file_name):
        latency = 0.0
        count = 0

        with open(file_name, 'r') as f:
            lines = f.readlines()
            line_filter = "elapsed time per iteration"
            match_number = re.compile(
aiss's avatar
aiss committed
108
                r'elapsed time per iteration \(ms\): ([-+]?[0-9]+\.?[0-9]*(?:[Ee][-+]?[0-9]+)?)'
Elton Zheng's avatar
Elton Zheng committed
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
            )

            for line in lines:
                if line_filter in line:
                    ms_per_iter = re.findall(match_number, line)
                    latency += float(ms_per_iter[0])
                    count += 1

        if count > 0:
            latency /= count

        return latency


def suite():
    suite = unittest.TestSuite()
    suite.addTest(GPT2PerfTestCase('test_perf_1_5B'))
    suite.addTest(GPT2PerfTestCase('test_perf_4B'))
    suite.addTest(GPT2PerfTestCase('test_perf_8B'))
    suite.addTest(GPT2PerfTestCase('test_perf_20B'))
    return suite


if __name__ == '__main__':
    runner = unittest.TextTestRunner(failfast=True)
    runner.run(suite())