evalscope_test.sh 694 Bytes
Newer Older
sunzhq2's avatar
init  
sunzhq2 committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#!/bin/bash

## 包含推理
# evalscope eval \
#     --model 'qwen3-8B' \
#     --api-url 'http://0.0.0.0:8000/v1/chat/completions' \
#     --api-key 'EMPTY' \
#     --datasets 'math_500' \
#     --dataset-args '{"math_500": {"local_path": "/data1/sunzhq/llm-benchmark/MATH-500"}}' \
#     --eval-batch-size 32 \
#     --generation-config '{"batch_size": 32, "temperature": 0.0}' \
#     --timeout 1800 \



evalscope eval \
  --use-cache /data1/sunzhq/llm-benchmark/tools/evalscope-data \
  --datasets math_500 \
  --model-id qwen3-8B \
  --no-timestamp \
  --rerun-review 

# --dataset-args '{"math_500": {"local_path": "/data1/sunzhq/llm-benchmark/MATH-500", "subset_list": ["Level 2"]}}' \