test_sagemaker_stateful_sessions.py 4.75 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project


import openai  # use the official client for correctness check
import pytest
import requests

from ...utils import RemoteOpenAIServer
from .conftest import (
    HEADER_SAGEMAKER_CLOSED_SESSION_ID,
    HEADER_SAGEMAKER_NEW_SESSION_ID,
    HEADER_SAGEMAKER_SESSION_ID,
    MODEL_NAME_SMOLLM,
)

CLOSE_BADREQUEST_CASES = [
    (
        "nonexistent_session_id",
        {"session_id": "nonexistent-session-id"},
        {},
        "session not found",
    ),
    ("malformed_close_request", {}, {"extra-field": "extra-field-data"}, None),
]


@pytest.mark.asyncio
async def test_create_session_badrequest(basic_server_with_lora: RemoteOpenAIServer):
    bad_response = requests.post(
        basic_server_with_lora.url_for("invocations"),
        json={"requestType": "NEW_SESSION", "extra-field": "extra-field-data"},
    )

    assert bad_response.status_code == 400


@pytest.mark.asyncio
@pytest.mark.parametrize(
    "test_name,session_id_change,request_body_change,expected_error",
    CLOSE_BADREQUEST_CASES,
)
async def test_close_session_badrequest(
    basic_server_with_lora: RemoteOpenAIServer,
    test_name: str,
    session_id_change: dict[str, str],
    request_body_change: dict[str, str],
    expected_error: str | None,
):
    # first attempt to create a session
    url = basic_server_with_lora.url_for("invocations")
    create_response = requests.post(url, json={"requestType": "NEW_SESSION"})
    create_response.raise_for_status()
    valid_session_id, expiration = create_response.headers.get(
        HEADER_SAGEMAKER_NEW_SESSION_ID, ""
    ).split(";")
    assert valid_session_id

    close_request_json = {"requestType": "CLOSE"}
    if request_body_change:
        close_request_json.update(request_body_change)
    bad_session_id = session_id_change.get("session_id")
    bad_close_response = requests.post(
        url,
        headers={HEADER_SAGEMAKER_SESSION_ID: bad_session_id or valid_session_id},
        json=close_request_json,
    )

    # clean up created session, should succeed
    clean_up_response = requests.post(
        url,
        headers={HEADER_SAGEMAKER_SESSION_ID: valid_session_id},
        json={"requestType": "CLOSE"},
    )
    clean_up_response.raise_for_status()

    assert bad_close_response.status_code == 400
    if expected_error:
        assert expected_error in bad_close_response.json()["error"]["message"]


@pytest.mark.asyncio
async def test_close_session_invalidrequest(
    basic_server_with_lora: RemoteOpenAIServer, async_client: openai.AsyncOpenAI
):
    # first attempt to create a session
    url = basic_server_with_lora.url_for("invocations")
    create_response = requests.post(url, json={"requestType": "NEW_SESSION"})
    create_response.raise_for_status()
    valid_session_id, expiration = create_response.headers.get(
        HEADER_SAGEMAKER_NEW_SESSION_ID, ""
    ).split(";")
    assert valid_session_id

    close_request_json = {"requestType": "CLOSE"}
    invalid_close_response = requests.post(
        url,
        # no headers to specify session_id
        json=close_request_json,
    )

    # clean up created session, should succeed
    clean_up_response = requests.post(
        url,
        headers={HEADER_SAGEMAKER_SESSION_ID: valid_session_id},
        json={"requestType": "CLOSE"},
    )
    clean_up_response.raise_for_status()

    assert invalid_close_response.status_code == 424
    assert "invalid session_id" in invalid_close_response.json()["error"]["message"]


@pytest.mark.asyncio
async def test_session(basic_server_with_lora: RemoteOpenAIServer):
    # first attempt to create a session
    url = basic_server_with_lora.url_for("invocations")
    create_response = requests.post(url, json={"requestType": "NEW_SESSION"})
    create_response.raise_for_status()
    valid_session_id, expiration = create_response.headers.get(
        HEADER_SAGEMAKER_NEW_SESSION_ID, ""
    ).split(";")
    assert valid_session_id

    # test invocation with session id

    request_args = {
        "model": MODEL_NAME_SMOLLM,
        "prompt": "what is 1+1?",
        "max_completion_tokens": 5,
        "temperature": 0.0,
        "logprobs": False,
    }

    invocation_response = requests.post(
        basic_server_with_lora.url_for("invocations"),
        headers={HEADER_SAGEMAKER_SESSION_ID: valid_session_id},
        json=request_args,
    )
    invocation_response.raise_for_status()

    # close created session, should succeed
    close_response = requests.post(
        url,
        headers={HEADER_SAGEMAKER_SESSION_ID: valid_session_id},
        json={"requestType": "CLOSE"},
    )
    close_response.raise_for_status()

    assert (
        close_response.headers.get(HEADER_SAGEMAKER_CLOSED_SESSION_ID)
        == valid_session_id
    )