eval_needlebench.py 1.41 KB
Newer Older
1
2
from mmengine.config import read_base
with read_base():
3
4
    from .models.hf_internlm.lmdeploy_internlm2_chat_7b import models as internlm2_chat_7b_200k
    from .models.hf_internlm.hf_internlm2_chat_7b import models as internlm2_chat_7b
5

6
7
    # Evaluate needlebench_4k, adjust the configuration to use 8k, 32k, 128k, 200k, or 1000k if necessary.
    # from .datasets.needlebench.needlebench_4k.needlebench_4k import needlebench_datasets
8
9
    # from .summarizers.needlebench import needlebench_4k_summarizer as summarizer

10
11
12
13
    # only eval original "needle in a haystack test" in needlebench_4k
    from .datasets.needlebench.needlebench_4k.needlebench_single_4k import needlebench_zh_datasets, needlebench_en_datasets
    from .summarizers.needlebench import needlebench_4k_summarizer as summarizer

14
    # eval Ancestral Tracing Challenge(ATC)
15
16
    # from .datasets.needlebench.atc.atc_choice_50 import needlebench_datasets
    # from .summarizers.needlebench import atc_summarizer_50 as summarizer
17
18
19

datasets = sum([v for k, v in locals().items() if ('datasets' in k)], [])

20
21
22
for m in internlm2_chat_7b:
    m['max_seq_len'] = 32768 # Ensure InternLM2-7B model can receive the full length of long texts, adjust for other models based on their supported maximum sequence length.
    m['max_out_len'] = 2000 # Ensure complete responses from the model in multi-needle retrieval tasks.
23

24
models = internlm2_chat_7b
25
26

work_dir = './outputs/needlebench'