evalscope_test.sh 642 Bytes
Newer Older
sunzhq2's avatar
init  
sunzhq2 committed
1
2
3
4
5
6
7
8
#!/bin/bash

## 包含推理
# evalscope eval \
#     --model 'qwen3-8B' \
#     --api-url 'http://0.0.0.0:8000/v1/chat/completions' \
#     --api-key 'EMPTY' \
#     --datasets 'math_500' \
sunzhq2's avatar
sunzhq2 committed
9
#     --dataset-args '{"math_500": {"local_path": "../MATH-500", "subset_list": ["Level 1"]}}' \
sunzhq2's avatar
init  
sunzhq2 committed
10
11
#     --eval-batch-size 32 \
#     --generation-config '{"batch_size": 32, "temperature": 0.0}' \
sunzhq2's avatar
sunzhq2 committed
12
#     --timeout 1800
sunzhq2's avatar
init  
sunzhq2 committed
13
14
15
16



evalscope eval \
sunzhq2's avatar
sunzhq2 committed
17
  --use-cache ./evalscope-data-001 \
sunzhq2's avatar
init  
sunzhq2 committed
18
19
20
21
22
  --datasets math_500 \
  --model-id qwen3-8B \
  --no-timestamp \
  --rerun-review 

sunzhq2's avatar
sunzhq2 committed
23
# --dataset-args '{"math_500": {"local_path": "../MATH-500", "subset_list": ["Level 2"]}}' \