test_common.py 3.05 KB
Newer Older
aiss's avatar
aiss committed
1
2
3
4
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0

# DeepSpeed Team
Elton Zheng's avatar
Elton Zheng committed
5
6
7
8
9
10
11
12

import unittest
import subprocess
import os
import time


class BaseTestCase(unittest.TestCase):
aiss's avatar
aiss committed
13

Elton Zheng's avatar
Elton Zheng committed
14
15
16
17
18
19
    def __init__(self, methodName="DeepSpeed performance test"):
        super(BaseTestCase, self).__init__(methodName)
        self.test_dir = "./test"
        self.baseline_dir = "./baseline"
        self.timestr = time.strftime("%Y%m%d-%H%M%S")

20
    def gen_output_name(self, test_config, prefix, baseline_config=False):
Elton Zheng's avatar
Elton Zheng committed
21
22
23
24
25
26
27
        other_args = test_config["other_args"] if "other_args" in test_config else ""
        zero_args = "_zero" if "zero" in test_config and test_config["zero"] else ""
        other_args = other_args.strip(' -\\').replace(" ", "").replace("\"", "")

        if other_args:
            other_args = "_" + other_args

28
        if test_config["deepspeed"] and not baseline_config:
Elton Zheng's avatar
Elton Zheng committed
29
            file_name = "_mp{0}_gpu{1}_node{2}_bs{3}_step{4}_layer{5}_hidden{6}_seq{7}_head{8}{9}_ds{10}-{11}.log".format(
aiss's avatar
aiss committed
30
31
32
                test_config["mp"], test_config["gpus"], test_config["nodes"], test_config["bs"], test_config["steps"],
                test_config["layers"], test_config["hidden_size"], test_config["seq_length"], test_config["heads"],
                other_args, zero_args, self.timestr)
Elton Zheng's avatar
Elton Zheng committed
33
34
35
            save_dir = self.test_dir
        else:
            file_name = "_mp{0}_gpu{1}_node{2}_bs{3}_step{4}_layer{5}_hidden{6}_seq{7}_head{8}{9}.log".format(
aiss's avatar
aiss committed
36
37
                test_config["mp"], test_config["gpus"], test_config["nodes"], test_config["bs"], test_config["steps"],
                test_config["layers"], test_config["hidden_size"], test_config["seq_length"], test_config["heads"],
Elton Zheng's avatar
Elton Zheng committed
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
                other_args)
            save_dir = self.baseline_dir

        return os.path.join(save_dir, prefix + file_name)

    def ensure_directory_exists(self, filename):
        dirname = os.path.dirname(filename)
        if not os.path.exists(dirname):
            os.makedirs(dirname)

    def clean_test_env(self):
        cmd = "dlts_ssh pkill -9 -f /usr/bin/python"
        print(cmd)
        subprocess.run(cmd, shell=True, check=False, executable='/bin/bash')
        time.sleep(20)

    def run_gpt2_test(self, test_config, output):
        ds_flag = "-d " + test_config["json"] if test_config["deepspeed"] else ""
aiss's avatar
aiss committed
56
57
        ckpt_num = test_config["ckpt_num_layers"] if "ckpt_num_layers" in test_config else 1
        other_args = "-o " + test_config["other_args"] if "other_args" in test_config else ""
Elton Zheng's avatar
Elton Zheng committed
58
59

        cmd = "./ds_gpt2_test.sh -m {0} -g {1} -n {2} -b {3} -s {4} -l {5} -h {6} -q {7} -e {8} -c {9} {10} {11}".format(
aiss's avatar
aiss committed
60
61
62
            test_config["mp"], test_config["gpus"], test_config["nodes"], test_config["bs"], test_config["steps"],
            test_config["layers"], test_config["hidden_size"], test_config["seq_length"], test_config["heads"],
            ckpt_num, other_args, ds_flag)
Elton Zheng's avatar
Elton Zheng committed
63
64
65
66

        self.ensure_directory_exists(output)
        with open(output, "w") as f:
            print(cmd)
aiss's avatar
aiss committed
67
            subprocess.run(cmd, shell=True, check=False, executable='/bin/bash', stdout=f, stderr=f)