eval_instruct.sh 439 Bytes
Newer Older
luopl's avatar
luopl committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
#!/bin/bash

# RealWorldQA Evaluation Script (Instruct Model)
# This script evaluates the inference results using rule-based and optionally model-based extraction

python run_realworldqa.py eval \
    --data-dir /path/to/data \
    --input-file results/RealWorldQA_results.jsonl \
    --output-file results/RealWorldQA_evaluation.csv \
    --dataset RealWorldQA \
    --eval-model gpt-3.5-turbo-0125 \
    --api-type dash \
    --nproc 4