Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
9822b06e
Unverified
Commit
9822b06e
authored
Mar 01, 2024
by
Lintang Sutawika
Committed by
GitHub
Mar 01, 2024
Browse files
Merge branch 'main' into weight_by_size
parents
51f27158
b177c82c
Changes
656
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
84 additions
and
0 deletions
+84
-0
lm_eval/tasks/kmmlu/direct/kmmlu_direct_marketing.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_marketing.yaml
+3
-0
lm_eval/tasks/kmmlu/direct/kmmlu_direct_materials_engineering.yaml
...asks/kmmlu/direct/kmmlu_direct_materials_engineering.yaml
+3
-0
lm_eval/tasks/kmmlu/direct/kmmlu_direct_math.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_math.yaml
+3
-0
lm_eval/tasks/kmmlu/direct/kmmlu_direct_mechanical_engineering.yaml
...sks/kmmlu/direct/kmmlu_direct_mechanical_engineering.yaml
+3
-0
lm_eval/tasks/kmmlu/direct/kmmlu_direct_nondestructive_testing.yaml
...sks/kmmlu/direct/kmmlu_direct_nondestructive_testing.yaml
+3
-0
lm_eval/tasks/kmmlu/direct/kmmlu_direct_patent.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_patent.yaml
+3
-0
lm_eval/tasks/kmmlu/direct/kmmlu_direct_political_science_and_sociology.yaml
.../direct/kmmlu_direct_political_science_and_sociology.yaml
+3
-0
lm_eval/tasks/kmmlu/direct/kmmlu_direct_psychology.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_psychology.yaml
+3
-0
lm_eval/tasks/kmmlu/direct/kmmlu_direct_public_safety.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_public_safety.yaml
+3
-0
lm_eval/tasks/kmmlu/direct/kmmlu_direct_railway_and_automotive_engineering.yaml
...rect/kmmlu_direct_railway_and_automotive_engineering.yaml
+3
-0
lm_eval/tasks/kmmlu/direct/kmmlu_direct_real_estate.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_real_estate.yaml
+3
-0
lm_eval/tasks/kmmlu/direct/kmmlu_direct_refrigerating_machinery.yaml
...ks/kmmlu/direct/kmmlu_direct_refrigerating_machinery.yaml
+3
-0
lm_eval/tasks/kmmlu/direct/kmmlu_direct_social_welfare.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_social_welfare.yaml
+3
-0
lm_eval/tasks/kmmlu/direct/kmmlu_direct_taxation.yaml
lm_eval/tasks/kmmlu/direct/kmmlu_direct_taxation.yaml
+3
-0
lm_eval/tasks/kmmlu/direct/kmmlu_direct_telecommunications_and_wireless_technology.yaml
...lu_direct_telecommunications_and_wireless_technology.yaml
+3
-0
lm_eval/tasks/kmmlu/direct_hard/_direct_hard_kmmlu_yaml
lm_eval/tasks/kmmlu/direct_hard/_direct_hard_kmmlu_yaml
+27
-0
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_accounting.yaml
...tasks/kmmlu/direct_hard/kmmlu_direct_hard_accounting.yaml
+3
-0
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_agricultural_sciences.yaml
.../direct_hard/kmmlu_direct_hard_agricultural_sciences.yaml
+3
-0
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_aviation_engineering_and_maintenance.yaml
...mlu_direct_hard_aviation_engineering_and_maintenance.yaml
+3
-0
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_biology.yaml
...al/tasks/kmmlu/direct_hard/kmmlu_direct_hard_biology.yaml
+3
-0
No files found.
lm_eval/tasks/kmmlu/direct/kmmlu_direct_marketing.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
Marketing
include
:
_direct_kmmlu_yaml
task
:
kmmlu_direct_marketing
lm_eval/tasks/kmmlu/direct/kmmlu_direct_materials_engineering.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
Materials-Engineering
include
:
_direct_kmmlu_yaml
task
:
kmmlu_direct_materials_engineering
lm_eval/tasks/kmmlu/direct/kmmlu_direct_math.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
Math
include
:
_direct_kmmlu_yaml
task
:
kmmlu_direct_math
lm_eval/tasks/kmmlu/direct/kmmlu_direct_mechanical_engineering.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
Mechanical-Engineering
include
:
_direct_kmmlu_yaml
task
:
kmmlu_direct_mechanical_engineering
lm_eval/tasks/kmmlu/direct/kmmlu_direct_nondestructive_testing.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
Nondestructive-Testing
include
:
_direct_kmmlu_yaml
task
:
kmmlu_direct_nondestructive_testing
lm_eval/tasks/kmmlu/direct/kmmlu_direct_patent.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
Patent
include
:
_direct_kmmlu_yaml
task
:
kmmlu_direct_patent
lm_eval/tasks/kmmlu/direct/kmmlu_direct_political_science_and_sociology.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
Political-Science-and-Sociology
include
:
_direct_kmmlu_yaml
task
:
kmmlu_direct_political_science_and_sociology
lm_eval/tasks/kmmlu/direct/kmmlu_direct_psychology.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
Psychology
include
:
_direct_kmmlu_yaml
task
:
kmmlu_direct_psychology
lm_eval/tasks/kmmlu/direct/kmmlu_direct_public_safety.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
Public-Safety
include
:
_direct_kmmlu_yaml
task
:
kmmlu_direct_public_safety
lm_eval/tasks/kmmlu/direct/kmmlu_direct_railway_and_automotive_engineering.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
Railway-and-Automotive-Engineering
include
:
_direct_kmmlu_yaml
task
:
kmmlu_direct_railway_and_automotive_engineering
lm_eval/tasks/kmmlu/direct/kmmlu_direct_real_estate.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
Real-Estate
include
:
_direct_kmmlu_yaml
task
:
kmmlu_direct_real_estate
lm_eval/tasks/kmmlu/direct/kmmlu_direct_refrigerating_machinery.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
Refrigerating-Machinery
include
:
_direct_kmmlu_yaml
task
:
kmmlu_direct_refrigerating_machinery
lm_eval/tasks/kmmlu/direct/kmmlu_direct_social_welfare.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
Social-Welfare
include
:
_direct_kmmlu_yaml
task
:
kmmlu_direct_social_welfare
lm_eval/tasks/kmmlu/direct/kmmlu_direct_taxation.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
Taxation
include
:
_direct_kmmlu_yaml
task
:
kmmlu_direct_taxation
lm_eval/tasks/kmmlu/direct/kmmlu_direct_telecommunications_and_wireless_technology.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
Telecommunications-and-Wireless-Technology
include
:
_direct_kmmlu_yaml
task
:
kmmlu_direct_telecommunications_and_wireless_technology
lm_eval/tasks/kmmlu/direct_hard/_direct_hard_kmmlu_yaml
0 → 100644
View file @
9822b06e
group:
- kmmlu
- kmmlu_hard_direct
dataset_path: HAERAE-HUB/KMMLU-HARD
output_type: generate_until
test_split: test
fewshot_split: dev
doc_to_text: "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:"
doc_to_target: "{{['A', 'B', 'C', 'D'][answer-1]}}"
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
regexes_to_ignore:
- " "
generation_kwargs:
until:
- "Q:"
- "\n\n"
- "</s>"
- "."
do_sample: false
temperature: 0.0
metadata:
version: 2.0
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_accounting.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
accounting
include
:
_direct_hard_kmmlu_yaml
task
:
kmmlu_hard_direct_accounting
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_agricultural_sciences.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
agricultural_sciences
include
:
_direct_hard_kmmlu_yaml
task
:
kmmlu_hard_direct_agricultural_sciences
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_aviation_engineering_and_maintenance.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
aviation_engineering_and_maintenance
include
:
_direct_hard_kmmlu_yaml
task
:
kmmlu_hard_direct_aviation_engineering_and_maintenance
lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_biology.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
biology
include
:
_direct_hard_kmmlu_yaml
task
:
kmmlu_hard_direct_biology
Prev
1
…
11
12
13
14
15
16
17
18
19
…
33
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment