test_completion_mocker_engine.py 3.51 KB
Newer Older
1
2
3
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

4
5
6
7
8
9
# Parallelization: Hermetic test (xdist-safe via dynamic ports).
# Tested on: Linux (Ubuntu 24.04 container), Intel(R) Core(TM) i9-14900K, 32 vCPU.
# post_merge wall time:
# - Serialized: 97.29s.
# - Parallel (-n auto): 30.29s (67.00s saved, 3.21x).
# GPU Requirement: gpu_0 (CPU-only, mocker does not use GPU)
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25

from __future__ import annotations

import logging
import time
from typing import Any, Dict

import pytest
import requests

from tests.utils.constants import QWEN

logger = logging.getLogger(__name__)

TEST_MODEL = QWEN

26
27
pytestmark = [
    pytest.mark.e2e,
28
    pytest.mark.gpu_0,  # Mocker is CPU-only (no GPU required)
29
    pytest.mark.post_merge,
30
    pytest.mark.parallel,
31
32
33
    pytest.mark.model(TEST_MODEL),
]

34
35
36

def _send_completion_request(
    payload: Dict[str, Any],
37
    frontend_port: int,
38
39
40
41
42
43
44
45
    timeout: int = 180,
) -> requests.Response:
    """Send a text completion request"""

    headers = {"Content-Type": "application/json"}
    print(f"Sending request: {time.time()}")

    response = requests.post(
46
        f"http://localhost:{frontend_port}/v1/completions",
47
48
49
50
51
52
53
        headers=headers,
        json=payload,
        timeout=timeout,
    )
    return response


54
55
def test_completion_string_prompt(start_services_with_mocker) -> None:
    frontend_port = start_services_with_mocker
56
57
58
59
60
61
    payload: Dict[str, Any] = {
        "model": TEST_MODEL,
        "prompt": "Tell me about Mars",
        "max_tokens": 2000,
    }

62
    response = _send_completion_request(payload, frontend_port)
63
64
65
66
67
68
69

    assert response.status_code == 200, (
        f"Completion request failed with status "
        f"{response.status_code}: {response.text}"
    )


70
71
def test_completion_empty_array_prompt(start_services_with_mocker) -> None:
    frontend_port = start_services_with_mocker
72
73
74
75
76
77
    payload: Dict[str, Any] = {
        "model": TEST_MODEL,
        "prompt": [],
        "max_tokens": 2000,
    }

78
    response = _send_completion_request(payload, frontend_port)
79
80
81
82
83
84
85

    assert response.status_code == 400, (
        f"Completion request should failed with status 400 but got"
        f"{response.status_code}: {response.text}"
    )


86
87
def test_completion_single_element_array_prompt(start_services_with_mocker) -> None:
    frontend_port = start_services_with_mocker
88
89
90
91
92
93
    payload: Dict[str, Any] = {
        "model": TEST_MODEL,
        "prompt": ["Tell me about Mars"],
        "max_tokens": 2000,
    }

94
    response = _send_completion_request(payload, frontend_port)
95
96
97
98
99
100
101

    assert response.status_code == 200, (
        f"Completion request failed with status "
        f"{response.status_code}: {response.text}"
    )


102
103
def test_completion_multi_element_array_prompt(start_services_with_mocker) -> None:
    frontend_port = start_services_with_mocker
104
105
    payload: Dict[str, Any] = {
        "model": TEST_MODEL,
106
107
108
109
110
111
        "prompt": [
            "Tell me about Mars",
            "Tell me about Ceres",
            "Tell me about Jupiter",
        ],
        "max_tokens": 300,
112
113
    }

114
    response = _send_completion_request(payload, frontend_port)
115
116
117
118
119
120
121
122
123
    response_data = response.json()

    assert response.status_code == 200, (
        f"Completion request failed with status "
        f"{response.status_code}: {response.text}"
    )

    expected_choices = len(payload.get("prompt"))  # type: ignore
    choices = len(response_data.get("choices", []))
124
125

    assert (
126
127
        expected_choices == choices
    ), f"Expected {expected_choices} choices, got {choices}"