bench_other.py 2.41 KB
Newer Older
Lianmin Zheng's avatar
Lianmin Zheng committed
1
2
3
4
import argparse
import json
import time

Liangsheng Yin's avatar
Liangsheng Yin committed
5
6
7
8
9
10
11
from agent_functions import (
    action_location_object_prompt,
    action_location_sector_prompt,
    generate_event_triple_prompt,
    generate_pronunciatio_prompt,
    poignancy_event_prompt,
)
Lianmin Zheng's avatar
Lianmin Zheng committed
12
from tqdm import tqdm
Liangsheng Yin's avatar
Liangsheng Yin committed
13

Liangsheng Yin's avatar
Liangsheng Yin committed
14
from sglang.test.test_utils import add_common_other_args_and_parse, get_call_generate
Liangsheng Yin's avatar
Liangsheng Yin committed
15
from sglang.utils import dump_state_text, read_jsonl
Lianmin Zheng's avatar
Lianmin Zheng committed
16
17
18


def main(args):
Liangsheng Yin's avatar
Liangsheng Yin committed
19
    lines = read_jsonl(args.data_path)[: args.num_events]
Lianmin Zheng's avatar
Lianmin Zheng committed
20
21
22
23
24
25
26
27
28
29
30
31
    mapping = {
        "poignancy_event": poignancy_event_prompt,
        "generate_event_triple": generate_event_triple_prompt,
        "generate_pronunciatio": generate_pronunciatio_prompt,
        "action_location_sector": action_location_sector_prompt,
        "action_location_object": action_location_object_prompt,
    }

    arguments = [mapping[k](**v) for l in lines for k, v in l.items()]
    states = []

    # Select backend
Liangsheng Yin's avatar
Liangsheng Yin committed
32
    call_generate = get_call_generate(args)
Lianmin Zheng's avatar
Lianmin Zheng committed
33
34
35
36
37

    def get_one_answer(arg):
        answer = call_generate(**arg, temperature=0)
        states.append(answer)

Liangsheng Yin's avatar
Liangsheng Yin committed
38
39
40
41
    async def get_one_answer_async(arg):
        answer = await call_generate(**arg, temperature=0)
        states.append(answer)

Lianmin Zheng's avatar
Lianmin Zheng committed
42
43
    tic = time.time()
    # we always sequentially execute agent calls to maintain its dependency
Liangsheng Yin's avatar
Liangsheng Yin committed
44
45
46
47
48
49
50
51
52
    if args.backend != "lmql":
        for arg in tqdm(arguments):
            get_one_answer(arg)
    else:
        import asyncio

        loop = asyncio.get_event_loop()
        for arg in tqdm(arguments):
            loop.run_until_complete(get_one_answer_async(arg))
Lianmin Zheng's avatar
Lianmin Zheng committed
53
54
55
56
57
58
59
60
61
62
63
64
65
    latency = time.time() - tic

    print(f"Latency: {latency:.3f}")

    # Write results
    dump_state_text(f"tmp_output_{args.backend}.txt", states)

    with open(args.result_file, "a") as fout:
        value = {
            "task": "Generative Agents",
            "backend": args.backend,
            "num_gpus": 1,
            "latency": round(latency, 3),
Liangsheng Yin's avatar
Liangsheng Yin committed
66
            # to pack weighted functions as a single agent
Lianmin Zheng's avatar
Lianmin Zheng committed
67
68
69
70
71
72
73
74
75
76
77
78
79
80
            "num_requests": len(arguments) / len(mapping),
            "other": {
                "parallel": args.parallel,
            },
        }
        fout.write(json.dumps(value) + "\n")


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--data-path", type=str, default="agent_calls.jsonl")
    parser.add_argument("--num-events", type=int, default=10)
    args = add_common_other_args_and_parse(parser)
    main(args)