#!/bin/bash ## 包含推理 # evalscope eval \ # --model 'qwen3-8B' \ # --api-url 'http://0.0.0.0:8000/v1/chat/completions' \ # --api-key 'EMPTY' \ # --datasets 'math_500' \ # --dataset-args '{"math_500": {"local_path": "/data1/sunzhq/llm-benchmark/MATH-500"}}' \ # --eval-batch-size 32 \ # --generation-config '{"batch_size": 32, "temperature": 0.0}' \ # --timeout 1800 \ evalscope eval \ --use-cache /data1/sunzhq/llm-benchmark/tools/evalscope-data \ --datasets math_500 \ --model-id qwen3-8B \ --no-timestamp \ --rerun-review # --dataset-args '{"math_500": {"local_path": "/data1/sunzhq/llm-benchmark/MATH-500", "subset_list": ["Level 2"]}}' \