Commit 28408cdf authored by xuxz's avatar xuxz
Browse files

Update offline_streaming_inference_chat_demo.py

parent 8465317a
'''
python offline_streaming_inference_chat_demo.py --model /models/llama2/Llama-2-7b-chat-hf --template template_llama_chat.jinja --dtype float16 --enforce-eager -tp 1
python offline_streaming_inference_chat_demo.py --model /models/llama2/Llama-2-7b-chat-hf --dtype float16 --enforce-eager -tp 1
'''
from vllm.sampling_params import SamplingParams
from vllm.engine.async_llm_engine import AsyncEngineArgs, AsyncLLMEngine
......@@ -36,7 +36,6 @@ class FlexibleArgumentParser(argparse.ArgumentParser):
return super().parse_args(processed_args, namespace)
parser = FlexibleArgumentParser()
parser.add_argument('--template', type=str, help="Path to template")
parser = AsyncEngineArgs.add_cli_args(parser)
args = parser.parse_args()
......@@ -47,13 +46,13 @@ args = parser.parse_args()
# ]
tokenizer = AutoTokenizer.from_pretrained(args.model)
try:
f = open(args.template,'r')
tokenizer.chat_template = f.read()
except Exception as e:
print('except:',e)
finally:
f.close()
# try:
# f = open(args.template,'r')
# tokenizer.chat_template = f.read()
# except Exception as e:
# print('except:',e)
# finally:
# f.close()
......@@ -110,4 +109,3 @@ while True:
history.append({"role": "assistant", "content": response})
print()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment