Commit 2397728d authored by chenych's avatar chenych
Browse files

Update infos and codes

parent 405b3897
......@@ -279,12 +279,10 @@ def vllm_inference_stream(bind_port, model, tokenizer, sampling_params):
logger.info("****************** in stream chat ******************")
response = web.StreamResponse()
await response.prepare(request)
text_outputs = None
async for request_output in results_generator:
prompt = request_output.prompt
text_outputs = [output.text for output in request_output.outputs]
ret = {"text": text_outputs}
await response.write((json.dumps(ret)+"\0").encode("utf-8"))
await response.write((json.dumps({"text": text_outputs})+"\0").encode("utf-8"))
response.write_eof()
return response
......@@ -293,28 +291,6 @@ def vllm_inference_stream(bind_port, model, tokenizer, sampling_params):
web.run_app(app, host='0.0.0.0', port=bind_port)
def infer_test(args):
config = configparser.ConfigParser()
config.read(args.config_path)
model_path = config['llm']['local_llm_path']
use_vllm = config.getboolean('llm', 'use_vllm')
tensor_parallel_size = config.getint('llm', 'tensor_parallel_size')
stream_chat = config.getboolean('llm', 'stream_chat')
logger.info(f"Get params: model_path {model_path}, use_vllm {use_vllm}, tensor_parallel_size {tensor_parallel_size}, stream_chat {stream_chat}")
model, tokenizer = init_model(model_path, use_vllm, tensor_parallel_size)
llm_infer = LLMInference(model,
tokenizer,
use_vllm=use_vllm)
time_first = time.time()
output_text = llm_infer.chat(args.query)
time_second = time.time()
logger.debug('问题:{} 回答:{} \ntimecost {} '.format(
args.query, output_text, time_second - time_first))
def set_envs(dcu_ids):
try:
os.environ["CUDA_VISIBLE_DEVICES"] = dcu_ids
......@@ -366,7 +342,6 @@ def main():
vllm_inference(bind_port, model, tokenizer, sampling_params)
else:
hf_inference(bind_port, model, tokenizer, stream_chat)
# infer_test(args)
if __name__ == '__main__':
......
......@@ -20,6 +20,6 @@ textract==1.6.5
tiktoken==0.7.0
tenacity==8.3.0
tokenizers==0.15.2
transformers==4.38.0
transformers
unstructured==0.11.2
PyMuPDF==1.24.3
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment