"vscode:/vscode.git/clone" did not exist on "06311e295666916d3456a357cdd91dd2a03c34e2"
Unverified Commit 350f9e10 authored by Cyrus Leung's avatar Cyrus Leung Committed by GitHub
Browse files

[CI/Build] Move `test_utils.py` to `tests/utils.py` (#4425)

Since #4335 was merged, I've noticed that the definition of ServerRunner in the tests is the same as in the test for OpenAI API. I have moved the class to the test utilities to avoid code duplication. (Although it only has been repeated twice so far, I will add another similar test suite in #4200 which would duplicate the code a third time)

Also, I have moved the test utilities file (test_utils.py) to under the test directory (tests/utils.py), since none of its code is actually used in the main package. Note that I have added __init__.py to each test subpackage and updated the ray.init() call in the test utilities file in order to relative import tests/utils.py.
parent 702bee46
...@@ -13,9 +13,10 @@ import os ...@@ -13,9 +13,10 @@ import os
import pytest import pytest
import torch import torch
from tests.models.utils import check_logprobs_close
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
from .utils import check_logprobs_close
os.environ["TOKENIZERS_PARALLELISM"] = "true" os.environ["TOKENIZERS_PARALLELISM"] = "true"
MAX_MODEL_LEN = 1024 MAX_MODEL_LEN = 1024
......
...@@ -15,9 +15,10 @@ from dataclasses import dataclass ...@@ -15,9 +15,10 @@ from dataclasses import dataclass
import pytest import pytest
import torch import torch
from tests.models.utils import check_logprobs_close
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
from .utils import check_logprobs_close
capability = torch.cuda.get_device_capability() capability = torch.cuda.get_device_capability()
capability = capability[0] * 10 + capability[1] capability = capability[0] * 10 + capability[1]
marlin_not_supported = (capability < marlin_not_supported = (capability <
......
...@@ -4,7 +4,7 @@ Run `pytest tests/models/test_mistral.py`. ...@@ -4,7 +4,7 @@ Run `pytest tests/models/test_mistral.py`.
""" """
import pytest import pytest
from tests.models.utils import check_logprobs_close from .utils import check_logprobs_close
MODELS = [ MODELS = [
"mistralai/Mistral-7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.1",
......
import pytest import pytest
import torch import torch
from tests.conftest import VllmRunner
from vllm import SamplingParams from vllm import SamplingParams
from ..conftest import VllmRunner
MODELS = ["facebook/opt-125m"] MODELS = ["facebook/opt-125m"]
......
...@@ -9,7 +9,6 @@ import torch ...@@ -9,7 +9,6 @@ import torch
from pynvml import (nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo, from pynvml import (nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo,
nvmlInit) nvmlInit)
from tests.conftest import cleanup
from vllm import LLM from vllm import LLM
from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine from vllm.engine.async_llm_engine import AsyncLLMEngine
...@@ -21,6 +20,8 @@ from vllm.sequence import Logprob, MultiModalData ...@@ -21,6 +20,8 @@ from vllm.sequence import Logprob, MultiModalData
from vllm.usage.usage_lib import UsageContext from vllm.usage.usage_lib import UsageContext
from vllm.utils import Counter, random_uuid from vllm.utils import Counter, random_uuid
from ...conftest import cleanup
class AsyncLLM: class AsyncLLM:
"""AsyncLLM """AsyncLLM
......
...@@ -9,12 +9,13 @@ import pytest ...@@ -9,12 +9,13 @@ import pytest
import ray import ray
import torch import torch
from tests.entrypoints.test_openai_server import ServerRunner
from vllm import SamplingParams from vllm import SamplingParams
from vllm.model_executor.model_loader.tensorizer import ( from vllm.model_executor.model_loader.tensorizer import (
EncryptionParams, TensorizerConfig, TensorSerializer, EncryptionParams, TensorizerConfig, TensorSerializer,
is_vllm_serialized_tensorizer, load_with_tensorizer, open_stream) is_vllm_serialized_tensorizer, load_with_tensorizer, open_stream)
from ..utils import ServerRunner
prompts = [ prompts = [
"Hello, my name is", "Hello, my name is",
"The president of the United States is", "The president of the United States is",
......
import pytest import pytest
from tests.core.utils import create_dummy_prompt
from vllm.sequence import (CompletionSequenceGroupOutput, SamplerOutput, from vllm.sequence import (CompletionSequenceGroupOutput, SamplerOutput,
SequenceData, SequenceOutput) SequenceData, SequenceOutput)
from .core.utils import create_dummy_prompt
@pytest.fixture @pytest.fixture
def sample_outputs(): def sample_outputs():
......
import os
import subprocess
import sys
import time
import ray import ray
import requests
from vllm.distributed import (ensure_model_parallel_initialized, from vllm.distributed import (ensure_model_parallel_initialized,
init_distributed_environment) init_distributed_environment)
from vllm.utils import get_open_port from vllm.utils import get_open_port
# Path to root of repository so that utilities can be imported by ray workers
VLLM_PATH = os.path.abspath(os.path.join(__file__, os.pardir, os.pardir))
@ray.remote(num_gpus=1)
class ServerRunner:
MAX_SERVER_START_WAIT_S = 600 # wait for server to start for 60 seconds
def __init__(self, args):
env = os.environ.copy()
env["PYTHONUNBUFFERED"] = "1"
self.proc = subprocess.Popen(
["python3", "-m", "vllm.entrypoints.openai.api_server"] + args,
env=env,
stdout=sys.stdout,
stderr=sys.stderr,
)
self._wait_for_server()
def ready(self):
return True
def _wait_for_server(self):
# run health check
start = time.time()
while True:
try:
if requests.get(
"http://localhost:8000/health").status_code == 200:
break
except Exception as err:
if self.proc.poll() is not None:
raise RuntimeError("Server exited unexpectedly.") from err
time.sleep(0.5)
if time.time() - start > self.MAX_SERVER_START_WAIT_S:
raise RuntimeError(
"Server failed to start in time.") from err
def __del__(self):
if hasattr(self, "proc"):
self.proc.terminate()
def init_test_distributed_environment( def init_test_distributed_environment(
tp_size: int, tp_size: int,
...@@ -28,7 +77,7 @@ def multi_process_tensor_parallel( ...@@ -28,7 +77,7 @@ def multi_process_tensor_parallel(
) -> None: ) -> None:
# Using ray helps debugging the error when it failed # Using ray helps debugging the error when it failed
# as compared to multiprocessing. # as compared to multiprocessing.
ray.init() ray.init(runtime_env={"working_dir": VLLM_PATH})
distributed_init_port = get_open_port() distributed_init_port = get_open_port()
refs = [] refs = []
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment