test_bind_cache.py 1.4 KB
Newer Older
Lianmin Zheng's avatar
Lianmin Zheng committed
1
2
3
import unittest

import sglang as sgl
Ying Sheng's avatar
Ying Sheng committed
4
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
Lianmin Zheng's avatar
Lianmin Zheng committed
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31


class TestBind(unittest.TestCase):
    backend = None

    def setUp(self):
        cls = type(self)

        if cls.backend is None:
            cls.backend = RuntimeEndpoint(base_url="http://localhost:30000")

    def test_bind(self):
        @sgl.function
        def few_shot_qa(s, prompt, question):
            s += prompt
            s += "Q: What is the capital of France?\n"
            s += "A: Paris\n"
            s += "Q: " + question + "\n"
            s += "A:" + sgl.gen("answer", stop="\n")

        few_shot_qa_2 = few_shot_qa.bind(
            prompt="The following are questions with answers.\n\n"
        )

        tracer = few_shot_qa_2.trace()
        print(tracer.last_node.print_graph_dfs() + "\n")

32
    def test_cache(self):
Lianmin Zheng's avatar
Lianmin Zheng committed
33
34
35
36
37
38
39
40
41
42
43
        @sgl.function
        def few_shot_qa(s, prompt, question):
            s += prompt
            s += "Q: What is the capital of France?\n"
            s += "A: Paris\n"
            s += "Q: " + question + "\n"
            s += "A:" + sgl.gen("answer", stop="\n")

        few_shot_qa_2 = few_shot_qa.bind(
            prompt="Answer the following questions as if you were a 5-year-old kid.\n\n"
        )
44
        few_shot_qa_2.cache(self.backend)
Lianmin Zheng's avatar
Lianmin Zheng committed
45
46
47
48
49
50
51


if __name__ == "__main__":
    unittest.main(warnings="ignore")

    # t = TestBind()
    # t.setUp()
52
    # t.test_cache()