"sgl-router/vscode:/vscode.git/clone" did not exist on "41d33e4736707cea54aa731055cf88f367befefc"
main.py 1.77 KB
Newer Older
Jason Phang's avatar
Jason Phang committed
1
2
import argparse
import json
Jason Phang's avatar
seed  
Jason Phang committed
3
4
import numpy as np
import random
Leo Gao's avatar
Leo Gao committed
5
import logging
Leo Gao's avatar
Leo Gao committed
6

Leo Gao's avatar
Leo Gao committed
7
from lm_eval import models, tasks, evaluator, base
Jason Phang's avatar
lib  
Jason Phang committed
8

Leo Gao's avatar
Leo Gao committed
9
logging.getLogger("openai").setLevel(logging.WARNING)
Leo Gao's avatar
Leo Gao committed
10

Jason Phang's avatar
Jason Phang committed
11
12
13
14
15
16
def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model', required=True)
    parser.add_argument('--model_args', default="")
    parser.add_argument('--tasks', default="all_tasks")
    parser.add_argument('--provide_description', action="store_true")
Leo Gao's avatar
Leo Gao committed
17
    parser.add_argument('--num_fewshot', type=int, default=0)
Leo Gao's avatar
Leo Gao committed
18
    parser.add_argument('--batch_size', type=int, default=None)
Leo Gao's avatar
Leo Gao committed
19
    parser.add_argument('--device', type=str, default=None)
Jason Phang's avatar
Jason Phang committed
20
    parser.add_argument('--output_path', default=None)
Leo Gao's avatar
Leo Gao committed
21
    parser.add_argument('--limit', type=int, default=None)
Leo Gao's avatar
Leo Gao committed
22
    parser.add_argument('--no_cache', action="store_true")
Jason Phang's avatar
Jason Phang committed
23
24
25
    return parser.parse_args()

def main():
Leo Gao's avatar
Leo Gao committed
26

Jason Phang's avatar
Jason Phang committed
27
    args = parse_args()
Jason Phang's avatar
seed  
Jason Phang committed
28

29
    assert not args.provide_description # not implemented
Leo Gao's avatar
Leo Gao committed
30
31
32
    
    if args.limit:
        print("WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.")
Leo Gao's avatar
Leo Gao committed
33

Jason Phang's avatar
Jason Phang committed
34
35
36
37
    if args.tasks == "all_tasks":
        task_names = tasks.ALL_TASKS
    else:
        task_names = args.tasks.split(",")
Leo Gao's avatar
Leo Gao committed
38

39
    results = evaluator.simple_evaluate(args.model, args.model_args, task_names, args.num_fewshot, args.batch_size, args.device, args.no_cache, args.limit)
Leo Gao's avatar
Update  
Leo Gao committed
40

Jason Phang's avatar
Jason Phang committed
41
    dumped = json.dumps(results, indent=2)
42
    
Jason Phang's avatar
Jason Phang committed
43
    print(dumped)
44

Jason Phang's avatar
Jason Phang committed
45
46
47
    if args.output_path:
        with open(args.output_path, "w") as f:
            f.write(dumped)
Jason Phang's avatar
Jason Phang committed
48

49
    print(f"{args.model} ({args.model_args}), limit: {args.limit}, provide_description: {args.provide_description}, num_fewshot: {args.num_fewshot}, batch_size: {args.batch_size}")
50
    print(evaluator.make_table(results))
Jason Phang's avatar
lib  
Jason Phang committed
51

Jason Phang's avatar
Jason Phang committed
52
if __name__ == "__main__":
Jason Phang's avatar
lib  
Jason Phang committed
53
    main()