test_preprocess.py 2.08 KB
Newer Older
1
2
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
4
5
6
7
8
9
import pytest

from vllm.renderers.inputs.preprocess import (
    parse_dec_only_prompt,
    parse_enc_dec_prompt,
    prompt_to_seq,
)
10
11
12
13
14
15
16
17
18
19
20
21
22
23


def test_empty_input():
    assert prompt_to_seq([]) == []
    assert prompt_to_seq([[]]) == [[]]
    assert prompt_to_seq([[], []]) == [[], []]


def test_text_input():
    assert prompt_to_seq("foo") == ["foo"]
    assert prompt_to_seq(["foo"]) == ["foo"]
    assert prompt_to_seq(["foo", "bar"]) == ["foo", "bar"]


24
def test_tokens_input():
25
26
27
28
29
    assert prompt_to_seq([1, 2]) == [[1, 2]]
    assert prompt_to_seq([[1, 2]]) == [[1, 2]]
    assert prompt_to_seq([[1, 2], [3, 4]]) == [[1, 2], [3, 4]]


30
def test_text_tokens_input():
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
    assert prompt_to_seq([[1, 2], "foo"]) == [[1, 2], "foo"]
    assert prompt_to_seq(["foo", [1, 2]]) == ["foo", [1, 2]]


def test_bytes_input():
    assert prompt_to_seq(b"foo") == [b"foo"]
    assert prompt_to_seq([b"foo"]) == [b"foo"]
    assert prompt_to_seq([b"foo", b"bar"]) == [b"foo", b"bar"]


def test_dict_input():
    assert prompt_to_seq({"prompt": "foo"}) == [{"prompt": "foo"}]
    assert prompt_to_seq([{"prompt": "foo"}]) == [{"prompt": "foo"}]
    assert prompt_to_seq([{"prompt": "foo"}, {"prompt_token_ids": [1, 2]}]) == [
        {"prompt": "foo"},
        {"prompt_token_ids": [1, 2]},
    ]
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67


def test_parse_dec_only_prompt_rejects_non_string_prompt_field():
    with pytest.raises(TypeError, match="Prompt text should be a string"):
        parse_dec_only_prompt({"prompt": [1, 2, 3], "cache_salt": "abc"})


def test_parse_dec_only_prompt_rejects_non_string_prompt_list():
    with pytest.raises(TypeError, match="Prompt text should be a string"):
        parse_dec_only_prompt({"prompt": [1, "x"]})


def test_parse_enc_dec_prompt_rejects_nested_non_string_prompt_field():
    with pytest.raises(TypeError, match="Prompt text should be a string"):
        parse_enc_dec_prompt(
            {
                "encoder_prompt": {"prompt": [1, 2, 3]},
                "decoder_prompt": {"prompt": [4, 5]},
            }
        )