Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
9822b06e
Unverified
Commit
9822b06e
authored
Mar 01, 2024
by
Lintang Sutawika
Committed by
GitHub
Mar 01, 2024
Browse files
Merge branch 'main' into weight_by_size
parents
51f27158
b177c82c
Changes
656
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
63 additions
and
7 deletions
+63
-7
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_telecommunications_and_wireless_technology.yaml
...rect_hard_telecommunications_and_wireless_technology.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/_hard_kmmlu_yaml
lm_eval/tasks/kmmlu/hard/_hard_kmmlu_yaml
+6
-7
lm_eval/tasks/kmmlu/hard/kmmlu_hard_accounting.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_accounting.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_agricultural_sciences.yaml
...al/tasks/kmmlu/hard/kmmlu_hard_agricultural_sciences.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_aviation_engineering_and_maintenance.yaml
...hard/kmmlu_hard_aviation_engineering_and_maintenance.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_biology.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_biology.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemical_engineering.yaml
...val/tasks/kmmlu/hard/kmmlu_hard_chemical_engineering.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemistry.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemistry.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_civil_engineering.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_civil_engineering.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_computer_science.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_computer_science.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_construction.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_construction.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_criminal_law.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_criminal_law.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_ecology.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_ecology.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_economics.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_economics.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_education.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_education.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_electrical_engineering.yaml
...l/tasks/kmmlu/hard/kmmlu_hard_electrical_engineering.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_electronics_engineering.yaml
.../tasks/kmmlu/hard/kmmlu_hard_electronics_engineering.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_energy_management.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_energy_management.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_environmental_science.yaml
...al/tasks/kmmlu/hard/kmmlu_hard_environmental_science.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_fashion.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_fashion.yaml
+3
-0
No files found.
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_telecommunications_and_wireless_technology.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
telecommunications_and_wireless_technology
include
:
_direct_hard_kmmlu_yaml
task
:
kmmlu_hard_direct_telecommunications_and_wireless_technology
lm_eval/tasks/kmmlu/
_default
_kmmlu_yaml
→
lm_eval/tasks/kmmlu/
hard/_hard
_kmmlu_yaml
View file @
9822b06e
group: kmmlu
dataset_path: HAERAE-HUB/K-MMLU-Preview
group:
- kmmlu
- kmmlu_hard
dataset_path: HAERAE-HUB/KMMLU-HARD
output_type: multiple_choice
training_split: train
validation_split: dev
test_split: test
fewshot_split: dev
output_type: multiple_choice
doc_to_text: "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:"
doc_to_choice: ["A", "B", "C", "D"]
doc_to_target: "{{
['A', 'B', 'C', 'D'][
answer-1
]
}}"
doc_to_target: "{{answer-1}}"
metric_list:
- metric: acc
aggregation: mean
...
...
@@ -17,4 +16,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
version:
1.1
version:
2.0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_accounting.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
accounting
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_accounting
lm_eval/tasks/kmmlu/hard/kmmlu_hard_agricultural_sciences.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
agricultural_sciences
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_agricultural_sciences
lm_eval/tasks/kmmlu/hard/kmmlu_hard_aviation_engineering_and_maintenance.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
aviation_engineering_and_maintenance
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_aviation_engineering_and_maintenance
lm_eval/tasks/kmmlu/hard/kmmlu_hard_biology.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
biology
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_biology
lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemical_engineering.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
chemical_engineering
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_chemical_engineering
lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemistry.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
chemistry
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_chemistry
lm_eval/tasks/kmmlu/hard/kmmlu_hard_civil_engineering.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
civil_engineering
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_civil_engineering
lm_eval/tasks/kmmlu/hard/kmmlu_hard_computer_science.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
computer_science
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_computer_science
lm_eval/tasks/kmmlu/hard/kmmlu_hard_construction.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
construction
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_construction
lm_eval/tasks/kmmlu/hard/kmmlu_hard_criminal_law.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
criminal_law
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_criminal_law
lm_eval/tasks/kmmlu/hard/kmmlu_hard_ecology.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
ecology
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_ecology
lm_eval/tasks/kmmlu/hard/kmmlu_hard_economics.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
economics
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_economics
lm_eval/tasks/kmmlu/hard/kmmlu_hard_education.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
education
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_education
lm_eval/tasks/kmmlu/hard/kmmlu_hard_electrical_engineering.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
electrical_engineering
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_electrical_engineering
lm_eval/tasks/kmmlu/hard/kmmlu_hard_electronics_engineering.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
electronics_engineering
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_electronics_engineering
lm_eval/tasks/kmmlu/hard/kmmlu_hard_energy_management.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
energy_management
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_energy_management
lm_eval/tasks/kmmlu/hard/kmmlu_hard_environmental_science.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
environmental_science
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_environmental_science
lm_eval/tasks/kmmlu/hard/kmmlu_hard_fashion.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
fashion
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_fashion
Prev
1
…
14
15
16
17
18
19
20
21
22
…
33
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment