Commit 11bf5753 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'v0.6.2-dev' of ssh://10.6.10.68:10022/dcutoolkit/deeplearing/vllm into v0.6.2-dev

parents 8516ba86 e06809f9
'''
python offline_streaming_inference_chat_demo.py --model /models/llama2/Llama-2-7b-chat-hf --template template_llama_chat.jinja --dtype float16 --enforce-eager -tp 1
'''
from vllm.sampling_params import SamplingParams from vllm.sampling_params import SamplingParams
from vllm.engine.async_llm_engine import AsyncEngineArgs, AsyncLLMEngine from vllm.engine.async_llm_engine import AsyncEngineArgs, AsyncLLMEngine
import asyncio import asyncio
from vllm.utils import FlexibleArgumentParser
from transformers import AutoTokenizer from transformers import AutoTokenizer
import logging import logging
import argparse import argparse
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment