import random from time import sleep import torch from lmdeploy.model import MODELS def get_conda_allcate_prefix(config, model): cuda_prefix = '' tp_num = get_tp_num(config, model) if tp_num is None: return cuda_prefix available_cuda = _get_available_cude() if len(available_cuda) < tp_num: raise torch.cuda.OutOfMemoryError cuda_prefix = 'CUDA_VISIBLE_DEVICES=' + ','.join( random.sample(available_cuda, tp_num)) torch.cuda.empty_cache() return cuda_prefix def get_tp_config(config, model, need_tp): tp_num = str(get_tp_num(config, model)) tp_info = '' if need_tp and tp_num is not None: tp_info = '--tp ' + str(get_tp_num(config, model)) return tp_info def get_tp_num(config, model): tp_config = config.get('tp_config') tp_num = 1 if tp_config is None: return None model_name = _simple_model_name(model) if model_name in tp_config.keys(): tp_num = tp_config.get(model_name) return tp_num def get_command_with_extra(cmd, config, model, need_tp: bool = False, cuda_prefix: str = None, need_sleep: bool = True): if need_sleep: sleep(random.uniform(0, 5)) if cuda_prefix is None: cuda_prefix = get_conda_allcate_prefix(config, model) tp_config = get_tp_config(config, model, need_tp) if cuda_prefix is not None and len(cuda_prefix) > 0: cmd = ' '.join([cuda_prefix, cmd]) if tp_config is not None and len(tp_config) > 0: cmd = ' '.join([cmd, tp_config]) torch.cuda.empty_cache() return cmd def get_model_name(model): model_names = [ 'llama', 'llama2', 'internlm', 'internlm2', 'baichuan2', 'chatglm2', 'falcon', 'yi', 'qwen1.5' ] model_names += list(MODELS.module_dict.keys()) model_names.sort() model_name = _simple_model_name(model) model_name = model_name.lower() if model_name in model_names: return model_name model_name = model_name.replace('-chat', '') model_name = model_name.replace('-v0.1', '') if model_name in model_names: return model_name if (model_name == 'qwen-vl'): return 'qwen-7b' if ('llama-2' in model_name): return 'llama-2' return model_name.split('-')[0] def _get_available_cude(): devices = torch.cuda.device_count() available_cuda = [] for i in range(devices): if (torch.cuda.utilization(i) > 5): continue if ('no processes are running' not in torch.cuda.list_gpu_processes(i)): continue available_cuda.append(str(i)) return available_cuda def _simple_model_name(model): if '/' in model: model_name = model.split('/')[1] else: model_name = model model_name = model_name.replace('-inner-w4a16', '') model_name = model_name.replace('-inner-w8a8', '') model_name = model_name.replace('-inner-kvint8', '') model_name = model_name.replace('-w4a16', '') return model_name def _split_model_name(model): model_name = model.split('/')[1] return model_name if __name__ == '__main__': print(_simple_model_name('baichuan-inc/Baichuan2-7B-Chat-inner-w4a16'))