test_pipeline_chat_turbomind.py 2.03 KB
Newer Older
zhouxiang's avatar
zhouxiang committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import os
from multiprocessing import Process

import pytest
from utils.config_utils import get_all_model_list, get_cuda_id_by_workerid
from utils.pipeline_chat import (assert_pipeline_chat_log,
                                 run_pipeline_chat_test)


@pytest.mark.order(6)
@pytest.mark.usefixtures('common_case_config')
@pytest.mark.pipeline_chat
@pytest.mark.gpu_num_1
@pytest.mark.flaky(reruns=0)
@pytest.mark.parametrize('model', get_all_model_list(tp_num=1))
def test_pipeline_chat_tp1(config, common_case_config, model, worker_id):
    if 'gw' in worker_id:
        os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id)
    p = Process(target=run_pipeline_chat_test,
                args=(config, common_case_config, model, 'turbomind'))
    p.start()
    p.join()
    assert_pipeline_chat_log(config, common_case_config, model)


@pytest.mark.order(6)
@pytest.mark.usefixtures('common_case_config')
@pytest.mark.pipeline_chat
@pytest.mark.gpu_num_2
@pytest.mark.flaky(reruns=0)
@pytest.mark.parametrize('model', get_all_model_list(tp_num=2))
def test_pipeline_chat_tp2(config, common_case_config, model, worker_id):
    if 'gw' in worker_id:
        os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id,
                                                                     tp_num=2)
    p = Process(target=run_pipeline_chat_test,
                args=(config, common_case_config, model, 'turbomind'))
    p.start()
    p.join()
    assert_pipeline_chat_log(config, common_case_config, model)


@pytest.mark.order(6)
@pytest.mark.usefixtures('common_case_config')
@pytest.mark.pipeline_chat
@pytest.mark.flaky(reruns=0)
@pytest.mark.pr_test
@pytest.mark.parametrize(
    'model',
    ['internlm/internlm2-chat-20b', 'internlm/internlm2-chat-20b-inner-w4a16'])
def test_pipeline_chat_pr(config, common_case_config, model):
    p = Process(target=run_pipeline_chat_test,
                args=(config, common_case_config, model, 'turbomind'))
    p.start()
    p.join()
    assert_pipeline_chat_log(config, common_case_config, model)