Commit a69ad956 authored by Yen-Ting Lin's avatar Yen-Ting Lin
Browse files

Update dataset paths and output path

parent e15672a4
...@@ -57,7 +57,7 @@ lm_eval \ ...@@ -57,7 +57,7 @@ lm_eval \
--tasks $tasks \ --tasks $tasks \
--num_fewshot 0 \ --num_fewshot 0 \
--batch_size 8 \ --batch_size 8 \
--output_path evals \ --output_path eval_results \
--write_out \ --write_out \
--log_samples \ --log_samples \
--verbosity DEBUG \ --verbosity DEBUG \
......
dataset_path: yentinglin/legal_benchmark dataset_path: yentinglin/PegaEval
test_split: train test_split: train
output_type: multiple_choice output_type: multiple_choice
process_docs: !function utils.process_docs process_docs: !function utils.process_docs
...@@ -9,8 +9,5 @@ metric_list: ...@@ -9,8 +9,5 @@ metric_list:
- metric: acc - metric: acc
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata: metadata:
version: 0.1 version: 0.1
...@@ -12,8 +12,5 @@ metric_list: ...@@ -12,8 +12,5 @@ metric_list:
- metric: acc - metric: acc
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata: metadata:
version: 0.1 version: 0.1
...@@ -12,8 +12,5 @@ metric_list: ...@@ -12,8 +12,5 @@ metric_list:
- metric: acc - metric: acc
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata: metadata:
version: 0.1 version: 0.1
dataset_path: yentinglin/PegaEval dataset_path: lianghsun/tw-legal-benchmark-v1
test_split: train test_split: train
output_type: multiple_choice output_type: multiple_choice
process_docs: !function utils.process_docs process_docs: !function utils.process_docs
...@@ -9,8 +9,5 @@ metric_list: ...@@ -9,8 +9,5 @@ metric_list:
- metric: acc - metric: acc
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata: metadata:
version: 0.1 version: 0.1
...@@ -9,8 +9,5 @@ metric_list: ...@@ -9,8 +9,5 @@ metric_list:
- metric: acc - metric: acc
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata: metadata:
version: 0.1 version: 0.1
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment