main.py 2.81 KB
Newer Older
Rayyyyy's avatar
Rayyyyy committed
1
2
3
4
5
6
7
8
#!/usr/bin/env python3
import argparse
import os
from multiprocessing import Process, Value
from loguru import logger
from llm_service import Worker, llm_inference


chenych's avatar
chenych committed
9
def check_envs(args):
Rayyyyy's avatar
Rayyyyy committed
10

chenych's avatar
chenych committed
11
12
13
14
15
16
    if all(isinstance(item, int) for item in args.DCU_ID):
        os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(map(str, args.DCU_ID))
        logger.info(f"Set environment variable CUDA_VISIBLE_DEVICES to {args.DCU_ID}")
    else:
        logger.error(f"The --DCU_ID argument must be a list of integers, but got {args.DCU_ID}")
        raise ValueError("The --DCU_ID argument must be a list of integers")
Rayyyyy's avatar
Rayyyyy committed
17
18
19
20
21
22

def parse_args():
    """Parse args."""
    parser = argparse.ArgumentParser(description='Executor.')
    parser.add_argument(
        '--DCU_ID',
chenych's avatar
chenych committed
23
24
        default=[1,2,6,7],
        help='设置DCU')
Rayyyyy's avatar
Rayyyyy committed
25
26
    parser.add_argument(
        '--config_path',
chenych's avatar
chenych committed
27
        default='/path/to/your/ai/config.ini',
Rayyyyy's avatar
Rayyyyy committed
28
29
30
31
32
        type=str,
        help='config.ini路径')
    parser.add_argument(
        '--standalone',
        default=False,
chenych's avatar
chenych committed
33
        help='部署LLM推理服务.')
Rayyyyy's avatar
Rayyyyy committed
34
    parser.add_argument(
chenych's avatar
chenych committed
35
        '--accelerate',
Rayyyyy's avatar
Rayyyyy committed
36
37
        default=False,
        type=bool,
chenych's avatar
chenych committed
38
        help='LLM推理是否启用加速'
Rayyyyy's avatar
Rayyyyy committed
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
    )
    args = parser.parse_args()
    return args


def build_reply_text(reply: str, references: list):
    if len(references) < 1:
        return reply

    ret = reply
    for ref in references:
        ret += '\n'
        ret += ref
    return ret


def reply_workflow(assistant):

chenych's avatar
chenych committed
57
    queries = ['你好,我们公司想要购买几台测试机,请问需要联系贵公司哪位?']
Rayyyyy's avatar
Rayyyyy committed
58
59
60
61
62
63
64
65
66
67
68
    for query in queries:
        code, reply, references = assistant.produce_response(query=query,
                                                     history=[],
                                                     judgment=False)
        logger.info(f'{code}, {query}, {reply}, {references}')


def run():
    args = parse_args()
    if args.standalone is True:
        import time
chenych's avatar
chenych committed
69
        check_envs(args)
Rayyyyy's avatar
Rayyyyy committed
70
71
72
73
        server_ready = Value('i', 0)
        server_process = Process(target=llm_inference,
                                 args=(args.config_path,
                                       len(args.DCU_ID),
chenych's avatar
chenych committed
74
                                       args.accelerate,
Rayyyyy's avatar
Rayyyyy committed
75
76
77
78
79
80
                                       server_ready))

        server_process.daemon = True
        server_process.start()
        while True:
            if server_ready.value == 0:
chenych's avatar
chenych committed
81
                logger.info('waiting for server to be ready..')
Rayyyyy's avatar
Rayyyyy committed
82
83
84
85
86
87
88
89
90
91
92
93
94
95
                time.sleep(15)
            elif server_ready.value == 1:
                break
            else:
                logger.error('start local LLM server failed, quit.')
                raise Exception('local LLM path')
        logger.info('LLM Server start.')

    assistant = Worker(args=args)
    reply_workflow(assistant)


if __name__ == '__main__':
    run()