Commit 8465317a authored by xuxzh1's avatar xuxzh1 🎱
Browse files

fix

parent f6ce3afa
'''
python offline_streaming_inference_chat_demo.py --model /models/llama2/Llama-2-7b-chat-hf --template template_llama_chat.jinja --dtype float16 --enforce-eager -tp 1
'''
from vllm.sampling_params import SamplingParams
from vllm.engine.async_llm_engine import AsyncEngineArgs, AsyncLLMEngine
import asyncio
from vllm.utils import FlexibleArgumentParser
from transformers import AutoTokenizer
import logging
import argparse
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment