eval_llm_compression.py 1.98 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from mmengine.config import read_base

with read_base():
    # LLM compression datasets
    from .datasets.llm_compression.llm_compression import llm_compression_datasets

    # Model configs
    from .models.qwen.hf_qwen1_5_7b import models as qwen1_5_7b
    from .models.qwen.hf_qwen1_5_14b import models as qwen1_5_14b
    from .models.hf_llama.hf_llama2_7b import models as llama2_7b
    from .models.hf_llama.hf_llama2_13b import models as llama2_13b


from opencompass.partitioners import NaivePartitioner
from opencompass.runners import LocalRunner
from opencompass.tasks import OpenICLInferTask, OpenICLEvalTask
from opencompass.summarizers import LLMCompressionSummarizer


# -------------Inference Stage ----------------------------------------
datasets = [*llm_compression_datasets]
workdir = 'outputs/llm_compression'

models = [
    *qwen1_5_7b,
    *qwen1_5_14b,
    *llama2_7b,
    *llama2_13b,
]

# Set custom batch_size and num_gpus for faster loss calculation
# Smaller batch_size should give more precise results, at the cost of worse performance
model_cfg = dict(
    batch_size=8,
    run_cfg=dict(num_gpus=4, num_procs=1)
)

for mdl in models:
    mdl.update(model_cfg)


infer = dict(
    # The OpenCompass implementation of BPC currently only supports NaivePartitioner, as the sliding window approach requires the dataset to be loaded sequentially. Using other partitioner types may produce incorrect results.
44
    partitioner=dict(type=NaivePartitioner),
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
    runner=dict(
        type=LocalRunner,
        task=dict(type=OpenICLInferTask),
        max_num_workers=256,  # Maximum concurrent evaluation task count
    ),
)


# -------------Evaluation Stage ----------------------------------------
eval = dict(
    partitioner=dict(type=NaivePartitioner),
    runner=dict(
        type=LocalRunner,
        task=dict(type=OpenICLEvalTask),
        max_num_workers=256,
    )
)


# -------------Summarization Stage ----------------------------------------
summarizer = dict(type=LLMCompressionSummarizer)