Commit 3c9817d2 authored by zhuwenwen's avatar zhuwenwen
Browse files

add VLLM_OPTEST_MODELS_PATH/OPTEST_MODELS_PATH to load models from local path...

add VLLM_OPTEST_MODELS_PATH/OPTEST_MODELS_PATH  to load models from local path instead of Hugging Face Hub
parent 49204f68
import weakref import weakref
from typing import List from typing import List
import os
import pytest import pytest
...@@ -7,8 +8,9 @@ from vllm import LLM, RequestOutput, SamplingParams ...@@ -7,8 +8,9 @@ from vllm import LLM, RequestOutput, SamplingParams
from ...conftest import cleanup from ...conftest import cleanup
from ..openai.test_vision import TEST_IMAGE_URLS from ..openai.test_vision import TEST_IMAGE_URLS
from ...utils import models_path_prefix
MODEL_NAME = "facebook/opt-125m" MODEL_NAME = os.path.join(models_path_prefix, "facebook/opt-125m")
PROMPTS = [ PROMPTS = [
"Hello, my name is", "Hello, my name is",
...@@ -145,7 +147,7 @@ def test_multiple_sampling_params(llm: LLM): ...@@ -145,7 +147,7 @@ def test_multiple_sampling_params(llm: LLM):
def test_chat(): def test_chat():
llm = LLM(model="meta-llama/Meta-Llama-3-8B-Instruct") llm = LLM(model=os.path.join(models_path_prefix, "meta-llama/Meta-Llama-3-8B-Instruct"))
prompt1 = "Explain the concept of entropy." prompt1 = "Explain the concept of entropy."
messages = [ messages = [
...@@ -164,7 +166,7 @@ def test_chat(): ...@@ -164,7 +166,7 @@ def test_chat():
def test_multi_chat(): def test_multi_chat():
llm = LLM(model="meta-llama/Meta-Llama-3-8B-Instruct") llm = LLM(model=os.path.join(models_path_prefix, "meta-llama/Meta-Llama-3-8B-Instruct"))
prompt1 = "Explain the concept of entropy." prompt1 = "Explain the concept of entropy."
prompt2 = "Explain what among us is." prompt2 = "Explain what among us is."
...@@ -201,7 +203,7 @@ def test_multi_chat(): ...@@ -201,7 +203,7 @@ def test_multi_chat():
[[TEST_IMAGE_URLS[0], TEST_IMAGE_URLS[1]]]) [[TEST_IMAGE_URLS[0], TEST_IMAGE_URLS[1]]])
def test_chat_multi_image(image_urls: List[str]): def test_chat_multi_image(image_urls: List[str]):
llm = LLM( llm = LLM(
model="microsoft/Phi-3.5-vision-instruct", model=os.path.join(models_path_prefix, "microsoft/Phi-3.5-vision-instruct"),
dtype="bfloat16", dtype="bfloat16",
max_model_len=4096, max_model_len=4096,
max_num_seqs=5, max_num_seqs=5,
......
import weakref import weakref
import pytest import pytest
import os
# downloading lora to test lora requests # downloading lora to test lora requests
from huggingface_hub import snapshot_download from huggingface_hub import snapshot_download
...@@ -8,8 +9,9 @@ from vllm import LLM ...@@ -8,8 +9,9 @@ from vllm import LLM
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from ...conftest import cleanup from ...conftest import cleanup
from ...utils import models_path_prefix
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" MODEL_NAME = os.path.join(models_path_prefix, "HuggingFaceH4/zephyr-7b-beta")
PROMPTS = [ PROMPTS = [
"Hello, my name is", "Hello, my name is",
...@@ -18,7 +20,7 @@ PROMPTS = [ ...@@ -18,7 +20,7 @@ PROMPTS = [
"The future of AI is", "The future of AI is",
] ]
LORA_NAME = "typeof/zephyr-7b-beta-lora" LORA_NAME = os.path.join(models_path_prefix, "typeof/zephyr-7b-beta-lora")
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
......
...@@ -4,14 +4,16 @@ import weakref ...@@ -4,14 +4,16 @@ import weakref
import jsonschema import jsonschema
import pytest import pytest
import os
from vllm.entrypoints.llm import LLM from vllm.entrypoints.llm import LLM
from vllm.outputs import RequestOutput from vllm.outputs import RequestOutput
from vllm.sampling_params import SamplingParams from vllm.sampling_params import SamplingParams
from ...conftest import cleanup from ...conftest import cleanup
from ...utils import models_path_prefix
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" MODEL_NAME = os.path.join(models_path_prefix, "HuggingFaceH4/zephyr-7b-beta")
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
......
import sys import sys
import os
from vllm import LLM, SamplingParams from vllm import LLM, SamplingParams
from ...utils import models_path_prefix
def test_lazy_outlines(sample_regex): def test_lazy_outlines(sample_regex):
...@@ -14,7 +16,7 @@ def test_lazy_outlines(sample_regex): ...@@ -14,7 +16,7 @@ def test_lazy_outlines(sample_regex):
] ]
sampling_params = SamplingParams(temperature=0.8, top_p=0.95) sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
llm = LLM(model="facebook/opt-125m", llm = LLM(model=os.path.join(models_path_prefix, "facebook/opt-125m"),
enforce_eager=True, enforce_eager=True,
gpu_memory_utilization=0.3) gpu_memory_utilization=0.3)
outputs = llm.generate(prompts, sampling_params) outputs = llm.generate(prompts, sampling_params)
...@@ -26,7 +28,7 @@ def test_lazy_outlines(sample_regex): ...@@ -26,7 +28,7 @@ def test_lazy_outlines(sample_regex):
# make sure outlines is not imported # make sure outlines is not imported
assert 'outlines' not in sys.modules assert 'outlines' not in sys.modules
llm = LLM(model="facebook/opt-125m", llm = LLM(model=os.path.join(models_path_prefix, "facebook/opt-125m"),
enforce_eager=True, enforce_eager=True,
guided_decoding_backend="lm-format-enforcer", guided_decoding_backend="lm-format-enforcer",
gpu_memory_utilization=0.3) gpu_memory_utilization=0.3)
......
import pytest import pytest
import os
from vllm import LLM from vllm import LLM
from ...utils import models_path_prefix
def test_empty_prompt(): def test_empty_prompt():
llm = LLM(model="gpt2") llm = LLM(model=os.path.join(models_path_prefix, "gpt2"))
with pytest.raises(ValueError, match='Prompt cannot be empty'): with pytest.raises(ValueError, match='Prompt cannot be empty'):
llm.generate([""]) llm.generate([""])
...@@ -2,14 +2,16 @@ ...@@ -2,14 +2,16 @@
import importlib import importlib
import sys import sys
import weakref import weakref
import os
import pytest import pytest
from vllm import LLM from vllm import LLM
from ...conftest import cleanup from ...conftest import cleanup
from ...utils import models_path_prefix
MODEL_NAME = "facebook/opt-125m" MODEL_NAME = os.path.join(models_path_prefix, "facebook/opt-125m")
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
......
...@@ -9,10 +9,12 @@ AsyncLLMEngine are working correctly. ...@@ -9,10 +9,12 @@ AsyncLLMEngine are working correctly.
import lm_eval import lm_eval
import pytest import pytest
import os
from ...utils import RemoteOpenAIServer from ...utils import RemoteOpenAIServer
from ...utils import models_path_prefix
MODEL_NAME = "Qwen/Qwen2-1.5B-Instruct" MODEL_NAME = os.path.join(models_path_prefix, "Qwen/Qwen2-1.5B-Instruct")
NUM_CONCURRENT = 500 NUM_CONCURRENT = 500
TASK = "gsm8k" TASK = "gsm8k"
FILTER = "exact_match,strict-match" FILTER = "exact_match,strict-match"
......
...@@ -2,14 +2,15 @@ from typing import Dict, List ...@@ -2,14 +2,15 @@ from typing import Dict, List
import openai import openai
import pytest import pytest
import os
import pytest_asyncio import pytest_asyncio
from vllm.assets.audio import AudioAsset from vllm.assets.audio import AudioAsset
from vllm.multimodal.utils import encode_audio_base64, fetch_audio from vllm.multimodal.utils import encode_audio_base64, fetch_audio
from ...utils import RemoteOpenAIServer from ...utils import RemoteOpenAIServer, models_path_prefix
MODEL_NAME = "fixie-ai/ultravox-v0_3" MODEL_NAME = os.path.join(models_path_prefix, "fixie-ai/ultravox-v0_3")
TEST_AUDIO_URLS = [ TEST_AUDIO_URLS = [
AudioAsset("winning_call").url, AudioAsset("winning_call").url,
] ]
......
...@@ -7,9 +7,9 @@ import requests ...@@ -7,9 +7,9 @@ import requests
from vllm.version import __version__ as VLLM_VERSION from vllm.version import __version__ as VLLM_VERSION
from ...utils import RemoteOpenAIServer from ...utils import RemoteOpenAIServer, models_path_prefix
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" MODEL_NAME = os.path.join(models_path_prefix, "HuggingFaceH4/zephyr-7b-beta")
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
......
...@@ -6,19 +6,20 @@ from typing import Dict, List, Optional ...@@ -6,19 +6,20 @@ from typing import Dict, List, Optional
import jsonschema import jsonschema
import openai # use the official client for correctness check import openai # use the official client for correctness check
import pytest import pytest
import os
import pytest_asyncio import pytest_asyncio
import torch import torch
from openai import BadRequestError from openai import BadRequestError
from ...utils import RemoteOpenAIServer from ...utils import RemoteOpenAIServer, models_path_prefix
from .test_completion import zephyr_lora_added_tokens_files # noqa: F401 from .test_completion import zephyr_lora_added_tokens_files # noqa: F401
from .test_completion import zephyr_lora_files # noqa: F401 from .test_completion import zephyr_lora_files # noqa: F401
# any model with a chat template should work here # any model with a chat template should work here
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" MODEL_NAME = os.path.join(models_path_prefix, "HuggingFaceH4/zephyr-7b-beta")
# technically this needs Mistral-7B-v0.1 as base, but we're not testing # technically this needs Mistral-7B-v0.1 as base, but we're not testing
# generation quality here # generation quality here
LORA_NAME = "typeof/zephyr-7b-beta-lora" LORA_NAME = os.path.join(models_path_prefix, "typeof/zephyr-7b-beta-lora")
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
......
import pytest import pytest
import os
from vllm.entrypoints.chat_utils import (apply_hf_chat_template, from vllm.entrypoints.chat_utils import (apply_hf_chat_template,
load_chat_template) load_chat_template)
from vllm.entrypoints.openai.protocol import ChatCompletionRequest from vllm.entrypoints.openai.protocol import ChatCompletionRequest
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.transformers_utils.tokenizer import get_tokenizer
from ...utils import VLLM_PATH from ...utils import VLLM_PATH, models_path_prefix
chatml_jinja_path = VLLM_PATH / "examples/template_chatml.jinja" chatml_jinja_path = VLLM_PATH / "examples/template_chatml.jinja"
assert chatml_jinja_path.exists() assert chatml_jinja_path.exists()
# Define models, templates, and their corresponding expected outputs # Define models, templates, and their corresponding expected outputs
MODEL_TEMPLATE_GENERATON_OUTPUT = [ MODEL_TEMPLATE_GENERATON_OUTPUT = [
("facebook/opt-125m", chatml_jinja_path, True, """<|im_start|>user (os.path.join(models_path_prefix, "facebook/opt-125m"), chatml_jinja_path, True, """<|im_start|>user
Hello<|im_end|> Hello<|im_end|>
<|im_start|>assistant <|im_start|>assistant
Hi there!<|im_end|> Hi there!<|im_end|>
...@@ -20,7 +21,7 @@ Hi there!<|im_end|> ...@@ -20,7 +21,7 @@ Hi there!<|im_end|>
What is the capital of<|im_end|> What is the capital of<|im_end|>
<|im_start|>assistant <|im_start|>assistant
"""), """),
("facebook/opt-125m", chatml_jinja_path, False, """<|im_start|>user (os.path.join(models_path_prefix, "facebook/opt-125m"), chatml_jinja_path, False, """<|im_start|>user
Hello<|im_end|> Hello<|im_end|>
<|im_start|>assistant <|im_start|>assistant
Hi there!<|im_end|> Hi there!<|im_end|>
......
...@@ -8,22 +8,23 @@ from typing import Dict, List, Optional ...@@ -8,22 +8,23 @@ from typing import Dict, List, Optional
import jsonschema import jsonschema
import openai # use the official client for correctness check import openai # use the official client for correctness check
import pytest import pytest
import os
import pytest_asyncio import pytest_asyncio
# downloading lora to test lora requests # downloading lora to test lora requests
from huggingface_hub import snapshot_download # from huggingface_hub import snapshot_download
from openai import BadRequestError from openai import BadRequestError
from transformers import AutoTokenizer from transformers import AutoTokenizer
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.transformers_utils.tokenizer import get_tokenizer
from ...utils import RemoteOpenAIServer from ...utils import RemoteOpenAIServer, models_path_prefix
# any model with a chat template should work here # any model with a chat template should work here
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" MODEL_NAME = os.path.join(models_path_prefix, "HuggingFaceH4/zephyr-7b-beta")
# technically these adapters use a different base model, # technically these adapters use a different base model,
# but we're not testing generation quality here # but we're not testing generation quality here
LORA_NAME = "typeof/zephyr-7b-beta-lora" LORA_NAME = os.path.join(models_path_prefix, "typeof/zephyr-7b-beta-lora")
PA_NAME = "swapnilbp/llama_tweet_ptune" PA_NAME = os.path.join(models_path_prefix, "swapnilbp/llama_tweet_ptune")
# if PA_NAME changes, PA_NUM_VIRTUAL_TOKENS might also # if PA_NAME changes, PA_NUM_VIRTUAL_TOKENS might also
# need to change to match the prompt adapter # need to change to match the prompt adapter
PA_NUM_VIRTUAL_TOKENS = 8 PA_NUM_VIRTUAL_TOKENS = 8
...@@ -31,7 +32,8 @@ PA_NUM_VIRTUAL_TOKENS = 8 ...@@ -31,7 +32,8 @@ PA_NUM_VIRTUAL_TOKENS = 8
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
def zephyr_lora_files(): def zephyr_lora_files():
return snapshot_download(repo_id=LORA_NAME) # return snapshot_download(repo_id=LORA_NAME)
return LORA_NAME
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
...@@ -52,7 +54,8 @@ def zephyr_lora_added_tokens_files(zephyr_lora_files): ...@@ -52,7 +54,8 @@ def zephyr_lora_added_tokens_files(zephyr_lora_files):
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
def zephyr_pa_files(): def zephyr_pa_files():
return snapshot_download(repo_id=PA_NAME) # return snapshot_download(repo_id=PA_NAME)
return PA_NAME
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
......
...@@ -3,11 +3,12 @@ import base64 ...@@ -3,11 +3,12 @@ import base64
import numpy as np import numpy as np
import openai import openai
import pytest import pytest
import os
import pytest_asyncio import pytest_asyncio
from ...utils import RemoteOpenAIServer from ...utils import RemoteOpenAIServer, models_path_prefix
EMBEDDING_MODEL_NAME = "intfloat/e5-mistral-7b-instruct" EMBEDDING_MODEL_NAME = os.path.join(models_path_prefix, "intfloat/e5-mistral-7b-instruct")
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
......
import openai import openai
import pytest import pytest
import os
import pytest_asyncio import pytest_asyncio
from ...utils import RemoteOpenAIServer from ...utils import RemoteOpenAIServer, models_path_prefix
MODEL_NAME = "facebook/bart-base" MODEL_NAME = os.path.join(models_path_prefix, "facebook/bart-base")
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
......
# This unit test should be moved to a new # This unit test should be moved to a new
# tests/test_guided_decoding directory. # tests/test_guided_decoding directory.
import pytest import pytest
import os
import torch import torch
from transformers import AutoTokenizer from transformers import AutoTokenizer
...@@ -9,11 +10,12 @@ from vllm.model_executor.guided_decoding import ( ...@@ -9,11 +10,12 @@ from vllm.model_executor.guided_decoding import (
get_guided_decoding_logits_processor) get_guided_decoding_logits_processor)
from vllm.model_executor.guided_decoding.outlines_logits_processors import ( from vllm.model_executor.guided_decoding.outlines_logits_processors import (
JSONLogitsProcessor, RegexLogitsProcessor) JSONLogitsProcessor, RegexLogitsProcessor)
from ...utils import models_path_prefix
def test_guided_logits_processors(sample_regex, sample_json_schema): def test_guided_logits_processors(sample_regex, sample_json_schema):
"""Basic unit test for RegexLogitsProcessor and JSONLogitsProcessor.""" """Basic unit test for RegexLogitsProcessor and JSONLogitsProcessor."""
tokenizer = AutoTokenizer.from_pretrained('HuggingFaceH4/zephyr-7b-beta') tokenizer = AutoTokenizer.from_pretrained(os.path.join(models_path_prefix, 'HuggingFaceH4/zephyr-7b-beta'))
regex_LP = RegexLogitsProcessor(sample_regex, tokenizer) regex_LP = RegexLogitsProcessor(sample_regex, tokenizer)
json_LP = JSONLogitsProcessor(sample_json_schema, json_LP = JSONLogitsProcessor(sample_json_schema,
tokenizer, tokenizer,
...@@ -41,7 +43,7 @@ def test_guided_logits_processors(sample_regex, sample_json_schema): ...@@ -41,7 +43,7 @@ def test_guided_logits_processors(sample_regex, sample_json_schema):
@pytest.mark.parametrize("backend", ["outlines", "lm-format-enforcer"]) @pytest.mark.parametrize("backend", ["outlines", "lm-format-enforcer"])
async def test_guided_logits_processor_black_box(backend: str, sample_regex, async def test_guided_logits_processor_black_box(backend: str, sample_regex,
sample_json_schema): sample_json_schema):
tokenizer = AutoTokenizer.from_pretrained('HuggingFaceH4/zephyr-7b-beta') tokenizer = AutoTokenizer.from_pretrained(os.path.join(models_path_prefix, 'HuggingFaceH4/zephyr-7b-beta'))
token_ids = tokenizer.encode( token_ids = tokenizer.encode(
f"Give an example IPv4 address with this regex: {sample_regex}") f"Give an example IPv4 address with this regex: {sample_regex}")
regex_request = CompletionRequest(model='test', regex_request = CompletionRequest(model='test',
......
...@@ -2,17 +2,18 @@ import json ...@@ -2,17 +2,18 @@ import json
import openai # use the official client for correctness check import openai # use the official client for correctness check
import pytest import pytest
import os
import pytest_asyncio import pytest_asyncio
# downloading lora to test lora requests # downloading lora to test lora requests
from huggingface_hub import snapshot_download from huggingface_hub import snapshot_download
from ...utils import RemoteOpenAIServer from ...utils import RemoteOpenAIServer, models_path_prefix
# any model with a chat template should work here # any model with a chat template should work here
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" MODEL_NAME = os.path.join(models_path_prefix, "HuggingFaceH4/zephyr-7b-beta")
# technically this needs Mistral-7B-v0.1 as base, but we're not testing # technically this needs Mistral-7B-v0.1 as base, but we're not testing
# generation quality here # generation quality here
LORA_NAME = "typeof/zephyr-7b-beta-lora" LORA_NAME = os.path.join(models_path_prefix, "typeof/zephyr-7b-beta-lora")
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
......
...@@ -6,14 +6,15 @@ from http import HTTPStatus ...@@ -6,14 +6,15 @@ from http import HTTPStatus
import openai import openai
import pytest import pytest
import os
import pytest_asyncio import pytest_asyncio
import requests import requests
from prometheus_client.parser import text_string_to_metric_families from prometheus_client.parser import text_string_to_metric_families
from transformers import AutoTokenizer from transformers import AutoTokenizer
from ...utils import RemoteOpenAIServer from ...utils import RemoteOpenAIServer, models_path_prefix
MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" MODEL_NAME = os.path.join(models_path_prefix, "TinyLlama/TinyLlama-1.1B-Chat-v1.0")
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
...@@ -211,7 +212,7 @@ def test_metrics_exist_run_batch(): ...@@ -211,7 +212,7 @@ def test_metrics_exist_run_batch():
"-o", "-o",
output_file.name, output_file.name,
"--model", "--model",
"intfloat/e5-mistral-7b-instruct", os.path.join(models_path_prefix, "intfloat/e5-mistral-7b-instruct"),
"--enable-metrics", "--enable-metrics",
"--url", "--url",
base_url, base_url,
......
import openai # use the official client for correctness check import openai # use the official client for correctness check
import pytest import pytest
import os
import pytest_asyncio import pytest_asyncio
# downloading lora to test lora requests # downloading lora to test lora requests
from huggingface_hub import snapshot_download from huggingface_hub import snapshot_download
from ...utils import RemoteOpenAIServer from ...utils import RemoteOpenAIServer, models_path_prefix
# any model with a chat template should work here # any model with a chat template should work here
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" MODEL_NAME = os.path.join(models_path_prefix, "HuggingFaceH4/zephyr-7b-beta")
# technically this needs Mistral-7B-v0.1 as base, but we're not testing # technically this needs Mistral-7B-v0.1 as base, but we're not testing
# generation quality here # generation quality here
LORA_NAME = "typeof/zephyr-7b-beta-lora" LORA_NAME = os.path.join(models_path_prefix, "typeof/zephyr-7b-beta-lora")
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
def zephyr_lora_files(): def zephyr_lora_files():
return snapshot_download(repo_id=LORA_NAME) # return snapshot_download(repo_id=LORA_NAME)
return LORA_NAME
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
......
from ...utils import VLLM_PATH, RemoteOpenAIServer import os
import vllm.envs as envs from ...utils import VLLM_PATH, RemoteOpenAIServer, models_path_prefix, envs
chatml_jinja_path = VLLM_PATH / "examples/template_chatml.jinja" chatml_jinja_path = VLLM_PATH / "examples/template_chatml.jinja"
assert chatml_jinja_path.exists() assert chatml_jinja_path.exists()
...@@ -54,5 +54,5 @@ def run_and_test_dummy_opt_api_server(model, tp=1): ...@@ -54,5 +54,5 @@ def run_and_test_dummy_opt_api_server(model, tp=1):
def test_oot_registration_for_api_server(dummy_opt_path: str): def test_oot_registration_for_api_server(dummy_opt_path: str):
dummy_opt_path="facebook/opt-125m" dummy_opt_path = os.path.join(models_path_prefix, "facebook/opt-125m")
run_and_test_dummy_opt_api_server(dummy_opt_path) run_and_test_dummy_opt_api_server(dummy_opt_path)
...@@ -4,12 +4,12 @@ import re ...@@ -4,12 +4,12 @@ import re
import openai import openai
import pytest import pytest
from ...utils import RemoteOpenAIServer from ...utils import RemoteOpenAIServer, models_path_prefix
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_empty_prompt(): async def test_empty_prompt():
model_name = "gpt2" model_name = os.path.join(models_path_prefix, "gpt2")
server_args = ["--enforce-eager"] server_args = ["--enforce-eager"]
with RemoteOpenAIServer(model_name, server_args) as remote_server: with RemoteOpenAIServer(model_name, server_args) as remote_server:
client = remote_server.get_async_client() client = remote_server.get_async_client()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment