"vscode:/vscode.git/clone" did not exist on "d69c8625624f965a03839e97690daf2866c2782e"
bench_other.py 2.91 KB
Newer Older
1
2
3
4
5
6
import argparse
import json
import time
from concurrent.futures import ThreadPoolExecutor
from functools import partial

Liangsheng Yin's avatar
Liangsheng Yin committed
7
8
9
from tqdm import tqdm

from sglang.lang.ir import REGEX_FLOAT, REGEX_INT, REGEX_STRING
Liangsheng Yin's avatar
Liangsheng Yin committed
10
from sglang.test.test_utils import add_common_other_args_and_parse, get_call_generate
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from sglang.utils import dump_state_text, read_jsonl

REGEX_LIST = r"\[(" + REGEX_STRING + ", )*" + REGEX_STRING + r"\]"


# fmt: off
def json_decode(document, generate):
    s = "Please extract the information of a city from the following wikipedia page.\n"
    s += "Page begin.\n" + document + "Page end.\n"
    s += "Here is the name, country, and symbol of the city in JSON format.\n"
    s += "{\n"
    s += '  "name": '
    s += generate(s, max_tokens=8, regex=REGEX_STRING + ",") + "\n"
    s += '  "country": '
    s += generate(s, max_tokens=8, regex=REGEX_STRING + ",") + "\n"
    s += '  "latitude": '
    s += generate(s, max_tokens=8, regex=REGEX_FLOAT + ",") + "\n"
    s += '  "population": '
    s += generate(s, max_tokens=8, regex=REGEX_INT + ",") + "\n"
    s += '  "top 3 landmarks": '
    s += generate(s, max_tokens=24, regex=REGEX_LIST) + "\n"
    s += "}\n"

    return s
# fmt: on


def main(args):
    lines = read_jsonl(args.data_path)
    arguments = []
    for i in range(len(lines[: args.num_questions])):
        arguments.append(
            {
                "document": lines[i]["document"],
            }
        )
    states = [None] * len(arguments)

    # Select backend
Liangsheng Yin's avatar
Liangsheng Yin committed
50
    call_generate = partial(get_call_generate(args), temperature=0)
51
52
53

    # Run requests
    def get_one_answer(i):
Liangsheng Yin's avatar
Liangsheng Yin committed
54
        states[i] = json_decode(generate=call_generate, **arguments[i])
55
56
57
58
59
60
61

    tic = time.time()
    if args.parallel == 1:
        for i in tqdm(range(len(arguments))):
            get_one_answer(i)
    else:
        with ThreadPoolExecutor(args.parallel) as executor:
Liangsheng Yin's avatar
Liangsheng Yin committed
62
63
64
65
66
67
            rets = list(
                tqdm(
                    executor.map(get_one_answer, list(range(len(arguments)))),
                    total=len(arguments),
                )
            )
68
69
70
71
72
73
74
75
76
77
78
79
80
            for _ in rets:
                pass

    latency = time.time() - tic

    # Compute accuracy
    print(f"Latency: {latency:.3f}")

    # Write results
    dump_state_text(f"tmp_output_{args.backend}.txt", states)

    with open(args.result_file, "a") as fout:
        value = {
Lianmin Zheng's avatar
Lianmin Zheng committed
81
            "task": "json_decode_regex",
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
            "backend": args.backend,
            "num_gpus": 1,
            "latency": round(latency, 3),
            "num_requests": args.num_questions,
            "other": {
                "parallel": args.parallel,
            },
        }
        fout.write(json.dumps(value) + "\n")


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--data-path", type=str, default="questions.jsonl")
    parser.add_argument("--num-questions", type=int, default=20)
    args = add_common_other_args_and_parse(parser)
    main(args)