get_run_config.py 3.27 KB
Newer Older
zhouxiang's avatar
zhouxiang committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import random
from time import sleep

import torch

from lmdeploy.model import MODELS


def get_conda_allcate_prefix(config, model):
    cuda_prefix = ''
    tp_num = get_tp_num(config, model)
    if tp_num is None:
        return cuda_prefix
    available_cuda = _get_available_cude()
    if len(available_cuda) < tp_num:
        raise torch.cuda.OutOfMemoryError

    cuda_prefix = 'CUDA_VISIBLE_DEVICES=' + ','.join(
        random.sample(available_cuda, tp_num))

    torch.cuda.empty_cache()
    return cuda_prefix


def get_tp_config(config, model, need_tp):
    tp_num = str(get_tp_num(config, model))
    tp_info = ''
    if need_tp and tp_num is not None:
        tp_info = '--tp ' + str(get_tp_num(config, model))
    return tp_info


def get_tp_num(config, model):
    tp_config = config.get('tp_config')
    tp_num = 1
    if tp_config is None:
        return None
    model_name = _simple_model_name(model)
    if model_name in tp_config.keys():
        tp_num = tp_config.get(model_name)
    return tp_num


def get_command_with_extra(cmd,
                           config,
                           model,
                           need_tp: bool = False,
                           cuda_prefix: str = None,
                           need_sleep: bool = True):
    if need_sleep:
        sleep(random.uniform(0, 5))
    if cuda_prefix is None:
        cuda_prefix = get_conda_allcate_prefix(config, model)
    tp_config = get_tp_config(config, model, need_tp)

    if cuda_prefix is not None and len(cuda_prefix) > 0:
        cmd = ' '.join([cuda_prefix, cmd])
    if tp_config is not None and len(tp_config) > 0:
        cmd = ' '.join([cmd, tp_config])

    torch.cuda.empty_cache()
    return cmd


def get_model_name(model):
    model_names = [
        'llama', 'llama2', 'internlm', 'internlm2', 'baichuan2', 'chatglm2',
        'falcon', 'yi', 'qwen1.5'
    ]
    model_names += list(MODELS.module_dict.keys())
    model_names.sort()
    model_name = _simple_model_name(model)
    model_name = model_name.lower()

    if model_name in model_names:
        return model_name
    model_name = model_name.replace('-chat', '')
    model_name = model_name.replace('-v0.1', '')
    if model_name in model_names:
        return model_name
    if (model_name == 'qwen-vl'):
        return 'qwen-7b'
    if ('llama-2' in model_name):
        return 'llama-2'
    return model_name.split('-')[0]


def _get_available_cude():
    devices = torch.cuda.device_count()

    available_cuda = []
    for i in range(devices):
        if (torch.cuda.utilization(i) > 5):
            continue
        if ('no processes are running'
                not in torch.cuda.list_gpu_processes(i)):
            continue

        available_cuda.append(str(i))

    return available_cuda


def _simple_model_name(model):
    if '/' in model:
        model_name = model.split('/')[1]
    else:
        model_name = model
    model_name = model_name.replace('-inner-w4a16', '')
    model_name = model_name.replace('-inner-w8a8', '')
    model_name = model_name.replace('-inner-kvint8', '')
    model_name = model_name.replace('-w4a16', '')
    return model_name


def _split_model_name(model):
    model_name = model.split('/')[1]
    return model_name


if __name__ == '__main__':
    print(_simple_model_name('baichuan-inc/Baichuan2-7B-Chat-inner-w4a16'))