Commit 11bf5753 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'v0.6.2-dev' of ssh://10.6.10.68:10022/dcutoolkit/deeplearing/vllm into v0.6.2-dev

parents 8516ba86 e06809f9
'''
python offline_streaming_inference_chat_demo.py --model /models/llama2/Llama-2-7b-chat-hf --template template_llama_chat.jinja --dtype float16 --enforce-eager -tp 1
'''
from vllm.sampling_params import SamplingParams
from vllm.engine.async_llm_engine import AsyncEngineArgs, AsyncLLMEngine
import asyncio
from vllm.utils import FlexibleArgumentParser
from transformers import AutoTokenizer
import logging
import argparse
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment