Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
5888a695
Unverified
Commit
5888a695
authored
Sep 17, 2020
by
Leo Gao
Committed by
GitHub
Sep 17, 2020
Browse files
Merge pull request #33 from zphang/retrieval
Refactor for explicit imports
parents
635a2155
515d78b3
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
34 additions
and
75 deletions
+34
-75
lm_eval/base.py
lm_eval/base.py
+0
-14
lm_eval/models/__init__.py
lm_eval/models/__init__.py
+7
-16
lm_eval/models/gpt2.py
lm_eval/models/gpt2.py
+0
-2
lm_eval/tasks/__init__.py
lm_eval/tasks/__init__.py
+20
-17
lm_eval/tasks/coqa.py
lm_eval/tasks/coqa.py
+0
-2
lm_eval/tasks/glue.py
lm_eval/tasks/glue.py
+7
-17
lm_eval/tasks/superglue.py
lm_eval/tasks/superglue.py
+0
-7
No files found.
lm_eval/base.py
View file @
5888a695
...
@@ -122,17 +122,3 @@ class Dataset(abc.ABC):
...
@@ -122,17 +122,3 @@ class Dataset(abc.ABC):
)
+
"
\n\n
"
)
+
"
\n\n
"
example
=
self
.
doc_to_text
(
doc
,
include_target
=
False
).
strip
()
example
=
self
.
doc_to_text
(
doc
,
include_target
=
False
).
strip
()
return
description
+
labeled_examples
+
example
return
description
+
labeled_examples
+
example
class
Registry
:
def
__init__
(
self
,
registry_name
):
self
.
registry_name
=
registry_name
self
.
registry
=
{}
def
register
(
self
,
name
):
def
register_cls
(
new_cls
):
if
name
in
self
.
registry
:
raise
ValueError
(
'Cannot register duplicate ({})'
.
format
(
self
.
registry_name
,
name
))
self
.
registry
[
name
]
=
new_cls
return
new_cls
return
register_cls
lm_eval/models/__init__.py
View file @
5888a695
import
importlib
from
.
import
gpt2
import
os
from
.
import
gpt3
from
lm_eval.base
import
Registry
MODEL_REGISTRY
=
Registry
(
registry_name
=
"models"
)
MODEL_REGISTRY
=
{
# Load all modules in models directory to populate registry
"gpt2"
:
gpt2
.
GPT2LM
,
models_dir
=
os
.
path
.
dirname
(
__file__
)
"gpt3"
:
gpt3
.
GPT3LM
,
for
file
in
os
.
listdir
(
models_dir
):
}
path
=
os
.
path
.
join
(
models_dir
,
file
)
if
(
not
file
.
startswith
(
'_'
)
and
not
file
.
startswith
(
'.'
)
and
(
file
.
endswith
(
'.py'
)
or
os
.
path
.
isdir
(
path
))
):
module_name
=
file
[:
file
.
find
(
'.py'
)]
if
file
.
endswith
(
'.py'
)
else
file
module
=
importlib
.
import_module
(
'lm_eval.models.'
+
module_name
)
def
get_model
(
model_name
):
def
get_model
(
model_name
):
return
MODEL_REGISTRY
.
registry
[
model_name
]
return
MODEL_REGISTRY
[
model_name
]
lm_eval/models/gpt2.py
View file @
5888a695
...
@@ -3,10 +3,8 @@ import torch
...
@@ -3,10 +3,8 @@ import torch
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
from
lm_eval.base
import
LM
from
lm_eval.base
import
LM
from
lm_eval
import
utils
from
lm_eval
import
utils
from
.
import
MODEL_REGISTRY
@
MODEL_REGISTRY
.
register
(
"gpt2"
)
class
GPT2LM
(
LM
):
class
GPT2LM
(
LM
):
def
__init__
(
self
,
device
=
"cpu"
):
def
__init__
(
self
,
device
=
"cpu"
):
self
.
device
=
torch
.
device
(
device
)
self
.
device
=
torch
.
device
(
device
)
...
...
lm_eval/tasks/__init__.py
View file @
5888a695
import
importlib
from
.
import
superglue
import
os
from
.
import
glue
from
lm_eval.base
import
Registry
TASK_REGISTRY
=
Registry
(
registry_name
=
"tasks"
)
TASK_REGISTRY
=
{
# Load all modules in models directory to populate registry
"cola"
:
glue
.
CoLA
,
tasks_dir
=
os
.
path
.
dirname
(
__file__
)
"mnli"
:
glue
.
MNLI
,
for
file
in
os
.
listdir
(
tasks_dir
):
"mrpc"
:
glue
.
MRPC
,
path
=
os
.
path
.
join
(
tasks_dir
,
file
)
"rte"
:
glue
.
RTE
,
if
(
"qnli"
:
glue
.
QNLI
,
not
file
.
startswith
(
'_'
)
"qqp"
:
glue
.
QQP
,
and
not
file
.
startswith
(
'.'
)
"stsb"
:
glue
.
STSB
,
and
(
file
.
endswith
(
'.py'
)
or
os
.
path
.
isdir
(
path
))
"sst"
:
glue
.
SST
,
):
"wnli"
:
glue
.
WNLI
,
module_name
=
file
[:
file
.
find
(
'.py'
)]
if
file
.
endswith
(
'.py'
)
else
file
"boolq"
:
superglue
.
BoolQ
,
module
=
importlib
.
import_module
(
'lm_eval.tasks.'
+
module_name
)
"commitmentbank"
:
superglue
.
CommitmentBank
,
"copa"
:
superglue
.
Copa
,
"wic"
:
superglue
.
WordsInContext
,
"wsc"
:
superglue
.
WinogradSchemaChallenge
,
}
ALL_TASKS
=
sorted
(
list
(
TASK_REGISTRY
.
registry
))
ALL_TASKS
=
sorted
(
list
(
TASK_REGISTRY
))
def
get_task
(
task_name
):
def
get_task
(
task_name
):
return
TASK_REGISTRY
.
registry
[
task_name
]
return
TASK_REGISTRY
[
task_name
]
def
get_task_dict
(
task_name_list
):
def
get_task_dict
(
task_name_list
):
...
...
lm_eval/tasks/coqa.py
View file @
5888a695
import
json
import
json
import
random
import
random
from
lm_eval.base
import
Dataset
from
lm_eval.base
import
Dataset
from
.
import
TASK_REGISTRY
@
TASK_REGISTRY
.
register
(
"coqa"
)
class
CoQA
(
Dataset
):
class
CoQA
(
Dataset
):
def
has_training_docs
(
self
):
def
has_training_docs
(
self
):
return
True
return
True
...
...
lm_eval/tasks/glue.py
View file @
5888a695
...
@@ -3,7 +3,6 @@ from scipy.stats import pearsonr, spearmanr
...
@@ -3,7 +3,6 @@ from scipy.stats import pearsonr, spearmanr
from
sklearn.metrics
import
f1_score
,
matthews_corrcoef
from
sklearn.metrics
import
f1_score
,
matthews_corrcoef
from
tqdm
import
auto
as
tqdm_lib
from
tqdm
import
auto
as
tqdm_lib
from
.
common
import
NLP_TASK
,
simple_accuracy_metric
,
yesno
from
.
common
import
NLP_TASK
,
simple_accuracy_metric
,
yesno
from
.
import
TASK_REGISTRY
def
get_accuracy_and_f1
(
preds
,
golds
):
def
get_accuracy_and_f1
(
preds
,
golds
):
...
@@ -23,7 +22,6 @@ def get_accuracy_and_f1(preds, golds):
...
@@ -23,7 +22,6 @@ def get_accuracy_and_f1(preds, golds):
}
}
@
TASK_REGISTRY
.
register
(
"cola"
)
class
CoLA
(
NLP_TASK
):
class
CoLA
(
NLP_TASK
):
NLP_PATH
=
"glue"
NLP_PATH
=
"glue"
NLP_NAME
=
"cola"
NLP_NAME
=
"cola"
...
@@ -66,7 +64,6 @@ class CoLA(NLP_TASK):
...
@@ -66,7 +64,6 @@ class CoLA(NLP_TASK):
}
}
@
TASK_REGISTRY
.
register
(
"mnli"
)
class
MNLI
(
NLP_TASK
):
class
MNLI
(
NLP_TASK
):
NLP_PATH
=
"glue"
NLP_PATH
=
"glue"
NLP_NAME
=
"mnli"
NLP_NAME
=
"mnli"
...
@@ -110,15 +107,14 @@ class MNLI(NLP_TASK):
...
@@ -110,15 +107,14 @@ class MNLI(NLP_TASK):
num_fewshot
=
num_fewshot
,
num_fewshot
=
num_fewshot
,
)
)
probs
=
np
.
array
([
probs
=
np
.
array
([
self
.
lm
.
loglikelihood
(
ctx
,
' True'
),
lm
.
loglikelihood
(
ctx
,
' True'
),
self
.
lm
.
loglikelihood
(
ctx
,
' Neither'
),
lm
.
loglikelihood
(
ctx
,
' Neither'
),
self
.
lm
.
loglikelihood
(
ctx
,
' False'
),
lm
.
loglikelihood
(
ctx
,
' False'
),
])
])
preds
.
append
(
np
.
argmax
(
probs
))
preds
.
append
(
np
.
argmax
(
probs
))
return
simple_accuracy_metric
(
preds
=
preds
,
golds
=
golds
)
return
simple_accuracy_metric
(
preds
=
preds
,
golds
=
golds
)
@
TASK_REGISTRY
.
register
(
"mrpc"
)
class
MRPC
(
NLP_TASK
):
class
MRPC
(
NLP_TASK
):
NLP_PATH
=
"glue"
NLP_PATH
=
"glue"
NLP_NAME
=
"mrpc"
NLP_NAME
=
"mrpc"
...
@@ -157,7 +153,6 @@ class MRPC(NLP_TASK):
...
@@ -157,7 +153,6 @@ class MRPC(NLP_TASK):
return
get_accuracy_and_f1
(
preds
=
preds
,
golds
=
golds
)
return
get_accuracy_and_f1
(
preds
=
preds
,
golds
=
golds
)
@
TASK_REGISTRY
.
register
(
"rte"
)
class
RTE
(
NLP_TASK
):
class
RTE
(
NLP_TASK
):
NLP_PATH
=
"glue"
NLP_PATH
=
"glue"
NLP_NAME
=
"rte"
NLP_NAME
=
"rte"
...
@@ -195,7 +190,6 @@ class RTE(NLP_TASK):
...
@@ -195,7 +190,6 @@ class RTE(NLP_TASK):
return
simple_accuracy_metric
(
preds
=
preds
,
golds
=
golds
)
return
simple_accuracy_metric
(
preds
=
preds
,
golds
=
golds
)
@
TASK_REGISTRY
.
register
(
"qnli"
)
class
QNLI
(
NLP_TASK
):
class
QNLI
(
NLP_TASK
):
NLP_PATH
=
"glue"
NLP_PATH
=
"glue"
NLP_NAME
=
"qnli"
NLP_NAME
=
"qnli"
...
@@ -229,11 +223,10 @@ class QNLI(NLP_TASK):
...
@@ -229,11 +223,10 @@ class QNLI(NLP_TASK):
provide_description
=
provide_description
,
provide_description
=
provide_description
,
num_fewshot
=
num_fewshot
,
num_fewshot
=
num_fewshot
,
)
)
preds
.
append
(
self
.
lm
.
loglikelihood
(
ctx
,
' False'
)
>
self
.
lm
.
loglikelihood
(
ctx
,
' True'
))
preds
.
append
(
lm
.
loglikelihood
(
ctx
,
' False'
)
>
lm
.
loglikelihood
(
ctx
,
' True'
))
return
simple_accuracy_metric
(
preds
=
preds
,
golds
=
golds
)
return
simple_accuracy_metric
(
preds
=
preds
,
golds
=
golds
)
@
TASK_REGISTRY
.
register
(
"qqp"
)
class
QQP
(
NLP_TASK
):
class
QQP
(
NLP_TASK
):
NLP_PATH
=
"glue"
NLP_PATH
=
"glue"
NLP_NAME
=
"qqp"
NLP_NAME
=
"qqp"
...
@@ -272,7 +265,6 @@ class QQP(NLP_TASK):
...
@@ -272,7 +265,6 @@ class QQP(NLP_TASK):
return
get_accuracy_and_f1
(
preds
=
preds
,
golds
=
golds
)
return
get_accuracy_and_f1
(
preds
=
preds
,
golds
=
golds
)
@
TASK_REGISTRY
.
register
(
"stsb"
)
class
STSB
(
NLP_TASK
):
class
STSB
(
NLP_TASK
):
NLP_PATH
=
"glue"
NLP_PATH
=
"glue"
NLP_NAME
=
"stsb"
NLP_NAME
=
"stsb"
...
@@ -330,7 +322,6 @@ class STSB(NLP_TASK):
...
@@ -330,7 +322,6 @@ class STSB(NLP_TASK):
}
}
@
TASK_REGISTRY
.
register
(
"sst"
)
class
SST
(
NLP_TASK
):
class
SST
(
NLP_TASK
):
NLP_PATH
=
"glue"
NLP_PATH
=
"glue"
NLP_NAME
=
"sst2"
NLP_NAME
=
"sst2"
...
@@ -368,7 +359,6 @@ class SST(NLP_TASK):
...
@@ -368,7 +359,6 @@ class SST(NLP_TASK):
return
simple_accuracy_metric
(
preds
=
preds
,
golds
=
golds
)
return
simple_accuracy_metric
(
preds
=
preds
,
golds
=
golds
)
@
TASK_REGISTRY
.
register
(
"wnli"
)
class
WNLI
(
NLP_TASK
):
class
WNLI
(
NLP_TASK
):
NLP_PATH
=
"glue"
NLP_PATH
=
"glue"
NLP_NAME
=
"wnli"
NLP_NAME
=
"wnli"
...
@@ -404,9 +394,9 @@ class WNLI(NLP_TASK):
...
@@ -404,9 +394,9 @@ class WNLI(NLP_TASK):
num_fewshot
=
num_fewshot
,
num_fewshot
=
num_fewshot
,
)
)
probs
=
np
.
array
([
probs
=
np
.
array
([
self
.
lm
.
loglikelihood
(
ctx
,
' True'
),
lm
.
loglikelihood
(
ctx
,
' True'
),
self
.
lm
.
loglikelihood
(
ctx
,
' Neither'
),
lm
.
loglikelihood
(
ctx
,
' Neither'
),
self
.
lm
.
loglikelihood
(
ctx
,
' False'
),
lm
.
loglikelihood
(
ctx
,
' False'
),
])
])
preds
.
append
(
np
.
argmax
(
probs
))
preds
.
append
(
np
.
argmax
(
probs
))
return
simple_accuracy_metric
(
preds
=
preds
,
golds
=
golds
)
return
simple_accuracy_metric
(
preds
=
preds
,
golds
=
golds
)
lm_eval/tasks/superglue.py
View file @
5888a695
import
numpy
as
np
import
numpy
as
np
from
tqdm
import
auto
as
tqdm_lib
from
tqdm
import
auto
as
tqdm_lib
from
.
common
import
NLP_TASK
,
simple_accuracy_metric
,
yesno
from
.
common
import
NLP_TASK
,
simple_accuracy_metric
,
yesno
from
.
import
TASK_REGISTRY
@
TASK_REGISTRY
.
register
(
"boolq"
)
class
BoolQ
(
NLP_TASK
):
class
BoolQ
(
NLP_TASK
):
NLP_PATH
=
"super_glue"
NLP_PATH
=
"super_glue"
NLP_NAME
=
"boolq"
NLP_NAME
=
"boolq"
...
@@ -38,7 +36,6 @@ class BoolQ(NLP_TASK):
...
@@ -38,7 +36,6 @@ class BoolQ(NLP_TASK):
return
simple_accuracy_metric
(
preds
=
preds
,
golds
=
golds
)
return
simple_accuracy_metric
(
preds
=
preds
,
golds
=
golds
)
@
TASK_REGISTRY
.
register
(
"cb"
)
class
CommitmentBank
(
NLP_TASK
):
class
CommitmentBank
(
NLP_TASK
):
NLP_PATH
=
"super_glue"
NLP_PATH
=
"super_glue"
NLP_NAME
=
"cb"
NLP_NAME
=
"cb"
...
@@ -82,7 +79,6 @@ class CommitmentBank(NLP_TASK):
...
@@ -82,7 +79,6 @@ class CommitmentBank(NLP_TASK):
return
simple_accuracy_metric
(
preds
=
preds
,
golds
=
golds
)
return
simple_accuracy_metric
(
preds
=
preds
,
golds
=
golds
)
@
TASK_REGISTRY
.
register
(
"copa"
)
class
Copa
(
NLP_TASK
):
class
Copa
(
NLP_TASK
):
NLP_PATH
=
"super_glue"
NLP_PATH
=
"super_glue"
NLP_NAME
=
"copa"
NLP_NAME
=
"copa"
...
@@ -124,8 +120,6 @@ class Copa(NLP_TASK):
...
@@ -124,8 +120,6 @@ class Copa(NLP_TASK):
return
choice
[
0
].
lower
()
+
choice
[
1
:]
return
choice
[
0
].
lower
()
+
choice
[
1
:]
@
TASK_REGISTRY
.
register
(
"wic"
)
class
WordsInContext
(
NLP_TASK
):
class
WordsInContext
(
NLP_TASK
):
NLP_PATH
=
"super_glue"
NLP_PATH
=
"super_glue"
NLP_NAME
=
"wic"
NLP_NAME
=
"wic"
...
@@ -163,7 +157,6 @@ class WordsInContext(NLP_TASK):
...
@@ -163,7 +157,6 @@ class WordsInContext(NLP_TASK):
return
simple_accuracy_metric
(
preds
=
preds
,
golds
=
golds
)
return
simple_accuracy_metric
(
preds
=
preds
,
golds
=
golds
)
@
TASK_REGISTRY
.
register
(
"wsc"
)
class
WinogradSchemaChallenge
(
NLP_TASK
):
class
WinogradSchemaChallenge
(
NLP_TASK
):
NLP_PATH
=
"super_glue"
NLP_PATH
=
"super_glue"
NLP_NAME
=
"wsc"
NLP_NAME
=
"wsc"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment