test_bind_cache.py 1.54 KB
Newer Older
1
2
3
4
5
6
"""
Usage:
python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --port 30000
python3 test_bind_cache.py
"""

Lianmin Zheng's avatar
Lianmin Zheng committed
7
8
9
import unittest

import sglang as sgl
Ying Sheng's avatar
Ying Sheng committed
10
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
Lianmin Zheng's avatar
Lianmin Zheng committed
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37


class TestBind(unittest.TestCase):
    backend = None

    def setUp(self):
        cls = type(self)

        if cls.backend is None:
            cls.backend = RuntimeEndpoint(base_url="http://localhost:30000")

    def test_bind(self):
        @sgl.function
        def few_shot_qa(s, prompt, question):
            s += prompt
            s += "Q: What is the capital of France?\n"
            s += "A: Paris\n"
            s += "Q: " + question + "\n"
            s += "A:" + sgl.gen("answer", stop="\n")

        few_shot_qa_2 = few_shot_qa.bind(
            prompt="The following are questions with answers.\n\n"
        )

        tracer = few_shot_qa_2.trace()
        print(tracer.last_node.print_graph_dfs() + "\n")

38
    def test_cache(self):
Lianmin Zheng's avatar
Lianmin Zheng committed
39
40
41
42
43
44
45
46
47
48
49
        @sgl.function
        def few_shot_qa(s, prompt, question):
            s += prompt
            s += "Q: What is the capital of France?\n"
            s += "A: Paris\n"
            s += "Q: " + question + "\n"
            s += "A:" + sgl.gen("answer", stop="\n")

        few_shot_qa_2 = few_shot_qa.bind(
            prompt="Answer the following questions as if you were a 5-year-old kid.\n\n"
        )
50
        few_shot_qa_2.cache(self.backend)
Lianmin Zheng's avatar
Lianmin Zheng committed
51
52
53
54
55
56
57


if __name__ == "__main__":
    unittest.main(warnings="ignore")

    # t = TestBind()
    # t.setUp()
58
    # t.test_cache()