"...text-generation-inference.git" did not exist on "8ad60b752fdf678153e7685d9f0e30e88a60cd42"
Unverified Commit 82ad323d authored by Zhuohan Li's avatar Zhuohan Li Committed by GitHub
Browse files

[Fix] Add chat completion Example and simplify dependencies (#576)

parent df5dd3c6
import openai
# Modify OpenAI's API key and API base to use vLLM's API server.
openai.api_key = "EMPTY"
openai.api_base = "http://localhost:8000/v1"
# List models API
models = openai.Model.list()
print("Models:", models)
model = models["data"][0]["id"]
# Chat completion API
chat_completion = openai.ChatCompletion.create(
model=model,
messages=[{
"role": "system",
"content": "You are a helpful assistant."
}, {
"role": "user",
"content": "Who won the world series in 2020?"
}, {
"role":
"assistant",
"content":
"The Los Angeles Dodgers won the World Series in 2020."
}, {
"role": "user",
"content": "Where was it played?"
}])
print("Chat completion results:")
print(chat_completion)
...@@ -3,26 +3,26 @@ import openai ...@@ -3,26 +3,26 @@ import openai
# Modify OpenAI's API key and API base to use vLLM's API server. # Modify OpenAI's API key and API base to use vLLM's API server.
openai.api_key = "EMPTY" openai.api_key = "EMPTY"
openai.api_base = "http://localhost:8000/v1" openai.api_base = "http://localhost:8000/v1"
model = "facebook/opt-125m"
# Test list models API # List models API
models = openai.Model.list() models = openai.Model.list()
print("Models:", models) print("Models:", models)
# Test completion API model = models["data"][0]["id"]
stream = True
# Completion API
stream = False
completion = openai.Completion.create( completion = openai.Completion.create(
model=model, model=model,
prompt="A robot may not injure a human being", prompt="A robot may not injure a human being",
echo=False, echo=False,
n=2, n=2,
best_of=3,
stream=stream, stream=stream,
logprobs=3) logprobs=3)
# print the completion print("Completion results:")
if stream: if stream:
for c in completion: for c in completion:
print(c) print(c)
else: else:
print("Completion result:", completion) print(completion)
...@@ -9,4 +9,3 @@ xformers >= 0.0.19 ...@@ -9,4 +9,3 @@ xformers >= 0.0.19
fastapi fastapi
uvicorn uvicorn
pydantic < 2 # Required for OpenAI server. pydantic < 2 # Required for OpenAI server.
fschat # Required for OpenAI ChatCompletion Endpoint.
...@@ -13,9 +13,6 @@ from fastapi import BackgroundTasks, Request ...@@ -13,9 +13,6 @@ from fastapi import BackgroundTasks, Request
from fastapi.exceptions import RequestValidationError from fastapi.exceptions import RequestValidationError
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, StreamingResponse from fastapi.responses import JSONResponse, StreamingResponse
from fastchat.conversation import Conversation, SeparatorStyle
from fastchat.model.model_adapter import get_conversation_template
import uvicorn import uvicorn
from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.arg_utils import AsyncEngineArgs
...@@ -33,6 +30,13 @@ from vllm.sampling_params import SamplingParams ...@@ -33,6 +30,13 @@ from vllm.sampling_params import SamplingParams
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.transformers_utils.tokenizer import get_tokenizer
from vllm.utils import random_uuid from vllm.utils import random_uuid
try:
from fastchat.conversation import Conversation, SeparatorStyle
from fastchat.model.model_adapter import get_conversation_template
_fastchat_available = True
except ImportError:
_fastchat_available = False
TIMEOUT_KEEP_ALIVE = 5 # seconds TIMEOUT_KEEP_ALIVE = 5 # seconds
logger = init_logger(__name__) logger = init_logger(__name__)
...@@ -63,6 +67,11 @@ async def check_model(request) -> Optional[JSONResponse]: ...@@ -63,6 +67,11 @@ async def check_model(request) -> Optional[JSONResponse]:
async def get_gen_prompt(request) -> str: async def get_gen_prompt(request) -> str:
if not _fastchat_available:
raise ModuleNotFoundError(
"fastchat is not installed. Please install fastchat to use "
"the chat completion and conversation APIs: `$ pip install fschat`"
)
conv = get_conversation_template(request.model) conv = get_conversation_template(request.model)
conv = Conversation( conv = Conversation(
name=conv.name, name=conv.name,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment