#!/usr/bin/env python3 import argparse import os from multiprocessing import Process, Value from loguru import logger from llm_service import Worker, llm_inference def set_envs(dcu_ids): try: os.environ["CUDA_VISIBLE_DEVICES"] = dcu_ids logger.info(f"Set environment variable CUDA_VISIBLE_DEVICES to {dcu_ids}") except Exception as e: logger.error(f"{e}, but got {dcu_ids}") raise ValueError(f"{e}") def parse_args(): """Parse args.""" parser = argparse.ArgumentParser(description='Executor.') parser.add_argument( '--DCU_ID', type=str, default='0', help='设置DCU卡号,卡号之间用英文逗号隔开,输入样例:"0,1,2"') parser.add_argument( '--config_path', default='/path/of/config.ini', type=str, help='config.ini路径') parser.add_argument( '--standalone', default=False, help='部署LLM推理服务') parser.add_argument( '--use_vllm', default=False, type=bool, help='是否启用LLM推理加速' ) args = parser.parse_args() return args def build_reply_text(reply: str, references: list): if len(references) < 1: return reply ret = reply for ref in references: ret += '\n' ret += ref return ret def reply_workflow(assistant): queries = ['我们公司想要购买几台测试机,请问需要联系哪位?'] for query in queries: code, reply, references = assistant.produce_response(query=query, history=[], judgment=False) logger.info(f'{code}, {query}, {reply}, {references}') def run(): args = parse_args() if args.standalone is True: import time set_envs(args) server_ready = Value('i', 0) server_process = Process(target=llm_inference, args=(args.config_path, len(args.DCU_ID), args.use_vllm, server_ready)) server_process.daemon = True server_process.start() while True: if server_ready.value == 0: logger.info('waiting for server to be ready.') time.sleep(15) elif server_ready.value == 1: break else: logger.error('start local LLM server failed, quit.') raise Exception('local LLM path') logger.info('LLM Server start.') assistant = Worker(args=args) reply_workflow(assistant) if __name__ == '__main__': run()