Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
cda25fef
Unverified
Commit
cda25fef
authored
Jan 02, 2024
by
Lintang Sutawika
Committed by
GitHub
Jan 02, 2024
Browse files
Merge branch 'main' into standardize_metrics
parents
dfb41835
4d10ad56
Changes
249
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
18 additions
and
24 deletions
+18
-24
lm_eval/tasks/kmmlu/kmmlu_environmental_science.yaml
lm_eval/tasks/kmmlu/kmmlu_environmental_science.yaml
+1
-1
lm_eval/tasks/kmmlu/kmmlu_food_processing.yaml
lm_eval/tasks/kmmlu/kmmlu_food_processing.yaml
+1
-1
lm_eval/tasks/kmmlu/kmmlu_gas_technology_and_engineering.yaml
...val/tasks/kmmlu/kmmlu_gas_technology_and_engineering.yaml
+1
-1
lm_eval/tasks/kmmlu/kmmlu_general_physics.yaml
lm_eval/tasks/kmmlu/kmmlu_general_physics.yaml
+0
-3
lm_eval/tasks/kmmlu/kmmlu_industrial_engineer.yaml
lm_eval/tasks/kmmlu/kmmlu_industrial_engineer.yaml
+1
-1
lm_eval/tasks/kmmlu/kmmlu_information_technology.yaml
lm_eval/tasks/kmmlu/kmmlu_information_technology.yaml
+1
-1
lm_eval/tasks/kmmlu/kmmlu_interior_architecture_and_design.yaml
...l/tasks/kmmlu/kmmlu_interior_architecture_and_design.yaml
+1
-1
lm_eval/tasks/kmmlu/kmmlu_korean_language.yaml
lm_eval/tasks/kmmlu/kmmlu_korean_language.yaml
+0
-3
lm_eval/tasks/kmmlu/kmmlu_machine_design_and_manufacturing.yaml
...l/tasks/kmmlu/kmmlu_machine_design_and_manufacturing.yaml
+1
-1
lm_eval/tasks/kmmlu/kmmlu_maritime_engineering.yaml
lm_eval/tasks/kmmlu/kmmlu_maritime_engineering.yaml
+1
-1
lm_eval/tasks/kmmlu/kmmlu_materials_engineering.yaml
lm_eval/tasks/kmmlu/kmmlu_materials_engineering.yaml
+1
-1
lm_eval/tasks/kmmlu/kmmlu_mechanical_engineering.yaml
lm_eval/tasks/kmmlu/kmmlu_mechanical_engineering.yaml
+1
-1
lm_eval/tasks/kmmlu/kmmlu_nondestructive_testing.yaml
lm_eval/tasks/kmmlu/kmmlu_nondestructive_testing.yaml
+1
-1
lm_eval/tasks/kmmlu/kmmlu_political_science_and_sociology.yaml
...al/tasks/kmmlu/kmmlu_political_science_and_sociology.yaml
+1
-1
lm_eval/tasks/kmmlu/kmmlu_public_safety.yaml
lm_eval/tasks/kmmlu/kmmlu_public_safety.yaml
+1
-1
lm_eval/tasks/kmmlu/kmmlu_railway_and_automotive_engineering.yaml
...tasks/kmmlu/kmmlu_railway_and_automotive_engineering.yaml
+1
-1
lm_eval/tasks/kmmlu/kmmlu_real_estate.yaml
lm_eval/tasks/kmmlu/kmmlu_real_estate.yaml
+1
-1
lm_eval/tasks/kmmlu/kmmlu_refrigerating_machinery.yaml
lm_eval/tasks/kmmlu/kmmlu_refrigerating_machinery.yaml
+1
-1
lm_eval/tasks/kmmlu/kmmlu_social_welfare.yaml
lm_eval/tasks/kmmlu/kmmlu_social_welfare.yaml
+1
-1
lm_eval/tasks/kmmlu/kmmlu_telecommunications_and_wireless_technology.yaml
...mlu/kmmlu_telecommunications_and_wireless_technology.yaml
+1
-1
No files found.
lm_eval/tasks/kmmlu/kmmlu_environmental_science.yaml
View file @
cda25fef
"
dataset_name"
:
"
Environmental
Science"
"
dataset_name"
:
"
Environmental
-
Science"
"
include"
:
"
_default_kmmlu_yaml"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_environmental_science"
"
task"
:
"
kmmlu_environmental_science"
lm_eval/tasks/kmmlu/kmmlu_food_processing.yaml
View file @
cda25fef
"
dataset_name"
:
"
Food
Processing"
"
dataset_name"
:
"
Food
-
Processing"
"
include"
:
"
_default_kmmlu_yaml"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_food_processing"
"
task"
:
"
kmmlu_food_processing"
lm_eval/tasks/kmmlu/kmmlu_gas_technology_and_engineering.yaml
View file @
cda25fef
"
dataset_name"
:
"
Gas
Technology
and
Engineering"
"
dataset_name"
:
"
Gas
-
Technology
-
and
-
Engineering"
"
include"
:
"
_default_kmmlu_yaml"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_gas_technology_and_engineering"
"
task"
:
"
kmmlu_gas_technology_and_engineering"
lm_eval/tasks/kmmlu/kmmlu_general_physics.yaml
deleted
100644 → 0
View file @
dfb41835
"
dataset_name"
:
"
General
Physics"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_general_physics"
lm_eval/tasks/kmmlu/kmmlu_industrial_engineer.yaml
View file @
cda25fef
"
dataset_name"
:
"
Industrial
Engineer"
"
dataset_name"
:
"
Industrial
-
Engineer"
"
include"
:
"
_default_kmmlu_yaml"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_industrial_engineer"
"
task"
:
"
kmmlu_industrial_engineer"
lm_eval/tasks/kmmlu/kmmlu_information_technology.yaml
View file @
cda25fef
"
dataset_name"
:
"
Information
Technology"
"
dataset_name"
:
"
Information
-
Technology"
"
include"
:
"
_default_kmmlu_yaml"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_information_technology"
"
task"
:
"
kmmlu_information_technology"
lm_eval/tasks/kmmlu/kmmlu_interior_architecture_and_design.yaml
View file @
cda25fef
"
dataset_name"
:
"
Interior
Architecture
and
Design"
"
dataset_name"
:
"
Interior
-
Architecture
-
and
-
Design"
"
include"
:
"
_default_kmmlu_yaml"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_interior_architecture_and_design"
"
task"
:
"
kmmlu_interior_architecture_and_design"
lm_eval/tasks/kmmlu/kmmlu_korean_language.yaml
deleted
100644 → 0
View file @
dfb41835
"
dataset_name"
:
"
Korean
Language"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_korean_language"
lm_eval/tasks/kmmlu/kmmlu_machine_design_and_manufacturing.yaml
View file @
cda25fef
"
dataset_name"
:
"
Machine
Design
and
Manufacturing"
"
dataset_name"
:
"
Machine
-
Design
-
and
-
Manufacturing"
"
include"
:
"
_default_kmmlu_yaml"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_machine_design_and_manufacturing"
"
task"
:
"
kmmlu_machine_design_and_manufacturing"
lm_eval/tasks/kmmlu/kmmlu_maritime_engineering.yaml
View file @
cda25fef
"
dataset_name"
:
"
Maritime
Engineering"
"
dataset_name"
:
"
Maritime
-
Engineering"
"
include"
:
"
_default_kmmlu_yaml"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_maritime_engineering"
"
task"
:
"
kmmlu_maritime_engineering"
lm_eval/tasks/kmmlu/kmmlu_materials_engineering.yaml
View file @
cda25fef
"
dataset_name"
:
"
Materials
Engineering"
"
dataset_name"
:
"
Materials
-
Engineering"
"
include"
:
"
_default_kmmlu_yaml"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_materials_engineering"
"
task"
:
"
kmmlu_materials_engineering"
lm_eval/tasks/kmmlu/kmmlu_mechanical_engineering.yaml
View file @
cda25fef
"
dataset_name"
:
"
Mechanical
Engineering"
"
dataset_name"
:
"
Mechanical
-
Engineering"
"
include"
:
"
_default_kmmlu_yaml"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_mechanical_engineering"
"
task"
:
"
kmmlu_mechanical_engineering"
lm_eval/tasks/kmmlu/kmmlu_nondestructive_testing.yaml
View file @
cda25fef
"
dataset_name"
:
"
Nondestructive
Testing"
"
dataset_name"
:
"
Nondestructive
-
Testing"
"
include"
:
"
_default_kmmlu_yaml"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_nondestructive_testing"
"
task"
:
"
kmmlu_nondestructive_testing"
lm_eval/tasks/kmmlu/kmmlu_political_science_and_sociology.yaml
View file @
cda25fef
"
dataset_name"
:
"
Political
Science
and
Sociology"
"
dataset_name"
:
"
Political
-
Science
-
and
-
Sociology"
"
include"
:
"
_default_kmmlu_yaml"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_political_science_and_sociology"
"
task"
:
"
kmmlu_political_science_and_sociology"
lm_eval/tasks/kmmlu/kmmlu_public_safety.yaml
View file @
cda25fef
"
dataset_name"
:
"
Public
Safety"
"
dataset_name"
:
"
Public
-
Safety"
"
include"
:
"
_default_kmmlu_yaml"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_public_safety"
"
task"
:
"
kmmlu_public_safety"
lm_eval/tasks/kmmlu/kmmlu_railway_and_automotive_engineering.yaml
View file @
cda25fef
"
dataset_name"
:
"
Railway
and
Automotive
Engineering"
"
dataset_name"
:
"
Railway
-
and
-
Automotive
-
Engineering"
"
include"
:
"
_default_kmmlu_yaml"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_railway_and_automotive_engineering"
"
task"
:
"
kmmlu_railway_and_automotive_engineering"
lm_eval/tasks/kmmlu/kmmlu_real_estate.yaml
View file @
cda25fef
"
dataset_name"
:
"
Real
Estate"
"
dataset_name"
:
"
Real
-
Estate"
"
include"
:
"
_default_kmmlu_yaml"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_real_estate"
"
task"
:
"
kmmlu_real_estate"
lm_eval/tasks/kmmlu/kmmlu_refrigerating_machinery.yaml
View file @
cda25fef
"
dataset_name"
:
"
Refrigerating
Machinery"
"
dataset_name"
:
"
Refrigerating
-
Machinery"
"
include"
:
"
_default_kmmlu_yaml"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_refrigerating_machinery"
"
task"
:
"
kmmlu_refrigerating_machinery"
lm_eval/tasks/kmmlu/kmmlu_social_welfare.yaml
View file @
cda25fef
"
dataset_name"
:
"
Social
Welfare"
"
dataset_name"
:
"
Social
-
Welfare"
"
include"
:
"
_default_kmmlu_yaml"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_social_welfare"
"
task"
:
"
kmmlu_social_welfare"
lm_eval/tasks/kmmlu/kmmlu_telecommunications_and_wireless_technology.yaml
View file @
cda25fef
"
dataset_name"
:
"
Telecommunications
and
Wireless
Technology"
"
dataset_name"
:
"
Telecommunications
-
and
-
Wireless
-
Technology"
"
include"
:
"
_default_kmmlu_yaml"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_telecommunications_and_wireless_technology"
"
task"
:
"
kmmlu_telecommunications_and_wireless_technology"
Prev
1
2
3
4
5
6
7
8
9
10
…
13
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment