payload_builder.py 5.18 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from typing import Any, Dict, List, Optional, Union

from tests.utils.client import send_request
from tests.utils.payloads import ChatPayload, CompletionPayload, MetricsPayload

# Common default text prompt used across tests
TEXT_PROMPT = "Tell me a short joke about AI."


def chat_payload_default(
    repeat_count: int = 3,
    expected_response: Optional[List[str]] = None,
    expected_log: Optional[List[str]] = None,
    max_tokens: int = 150,
    temperature: float = 0.1,
    stream: bool = False,
) -> ChatPayload:
    return ChatPayload(
        body={
            "messages": [
                {
                    "role": "user",
                    "content": TEXT_PROMPT,
                }
            ],
            "max_tokens": max_tokens,
            "temperature": temperature,
            "stream": stream,
        },
        repeat_count=repeat_count,
        expected_log=expected_log or [],
        expected_response=expected_response or ["AI"],
    )


def completion_payload_default(
    repeat_count: int = 3,
    expected_response: Optional[List[str]] = None,
    expected_log: Optional[List[str]] = None,
    max_tokens: int = 150,
    temperature: float = 0.1,
    stream: bool = False,
) -> CompletionPayload:
    return CompletionPayload(
        body={
            "prompt": TEXT_PROMPT,
            "max_tokens": max_tokens,
            "temperature": temperature,
            "stream": stream,
        },
        repeat_count=repeat_count,
        expected_log=expected_log or [],
        expected_response=expected_response or ["AI"],
    )


def metric_payload_default(
    min_num_requests: int,
    repeat_count: int = 1,
    expected_log: Optional[List[str]] = None,
) -> MetricsPayload:
    return MetricsPayload(
        body={},
        repeat_count=repeat_count,
        expected_log=expected_log or [],
        expected_response=[],
        min_num_requests=min_num_requests,
    )


def chat_payload(
    content: Union[str, List[Dict[str, Any]]],
    repeat_count: int = 1,
    expected_response: Optional[List[str]] = None,
    expected_log: Optional[List[str]] = None,
    max_tokens: int = 300,
    temperature: Optional[float] = None,
    stream: bool = False,
) -> ChatPayload:
    body: Dict[str, Any] = {
        "messages": [
            {
                "role": "user",
                "content": content,
            }
        ],
        "max_tokens": max_tokens,
        "stream": stream,
    }
    if temperature is not None:
        body["temperature"] = temperature

    return ChatPayload(
        body=body,
        repeat_count=repeat_count,
        expected_log=expected_log or [],
        expected_response=expected_response or [],
    )


def completion_payload(
    prompt: str,
    repeat_count: int = 3,
    expected_response: Optional[List[str]] = None,
    expected_log: Optional[List[str]] = None,
    max_tokens: int = 150,
    temperature: float = 0.1,
    stream: bool = False,
) -> CompletionPayload:
    return CompletionPayload(
        body={
            "prompt": prompt,
            "max_tokens": max_tokens,
            "temperature": temperature,
            "stream": stream,
        },
        repeat_count=repeat_count,
        expected_log=expected_log or [],
        expected_response=expected_response or [],
    )


# Build small request-based health checks for chat and completions
# these should only be used as a last resort. Generally want to use an actual health check


def make_chat_health_check(port: int, model: str):
    def _check_chat_endpoint(remaining_timeout: float = 30.0) -> bool:
        payload = chat_payload_default(
            repeat_count=1,
            expected_response=[],
            max_tokens=8,
            temperature=0.0,
            stream=False,
        ).with_model(model)
        payload.port = port
        try:
            resp = send_request(
                payload.url(),
                payload.body,
                timeout=min(max(1.0, remaining_timeout), 5.0),
                method=payload.method,
                log_level=10,
            )
            # Validate structure only; expected_response is empty
            _ = payload.response_handler(resp)
            return True
        except Exception:
            return False

    return _check_chat_endpoint


def make_completions_health_check(port: int, model: str):
    def _check_completions_endpoint(remaining_timeout: float = 30.0) -> bool:
        payload = completion_payload_default(
            repeat_count=1,
            expected_response=[],
            max_tokens=8,
            temperature=0.0,
            stream=False,
        ).with_model(model)
        payload.port = port
        try:
            resp = send_request(
                payload.url(),
                payload.body,
                timeout=min(max(1.0, remaining_timeout), 5.0),
                method=payload.method,
                log_level=10,
            )
            out = payload.response_handler(resp)
            if not out:
                raise ValueError("")
            return True
        except Exception:
            return False

    return _check_completions_endpoint