Commit e06809f9 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'v0.6.2-dev' into 'v0.6.2-dev'

fix

See merge request dcutoolkit/deeplearing/vllm!47
parents f6ce3afa 8465317a
'''
python offline_streaming_inference_chat_demo.py --model /models/llama2/Llama-2-7b-chat-hf --template template_llama_chat.jinja --dtype float16 --enforce-eager -tp 1
'''
from vllm.sampling_params import SamplingParams from vllm.sampling_params import SamplingParams
from vllm.engine.async_llm_engine import AsyncEngineArgs, AsyncLLMEngine from vllm.engine.async_llm_engine import AsyncEngineArgs, AsyncLLMEngine
import asyncio import asyncio
from vllm.utils import FlexibleArgumentParser
from transformers import AutoTokenizer from transformers import AutoTokenizer
import logging import logging
import argparse import argparse
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment