Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
f88bb827
Commit
f88bb827
authored
Sep 07, 2020
by
Jason Phang
Browse files
lib
parent
cf80f340
Changes
15
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
22 additions
and
64 deletions
+22
-64
hf.py
hf.py
+0
-43
lm_eval/__init__.py
lm_eval/__init__.py
+0
-0
lm_eval/base.py
lm_eval/base.py
+0
-0
lm_eval/models/__init__.py
lm_eval/models/__init__.py
+2
-2
lm_eval/models/dummy.py
lm_eval/models/dummy.py
+1
-3
lm_eval/models/gpt2.py
lm_eval/models/gpt2.py
+2
-2
lm_eval/models/gpt3.py
lm_eval/models/gpt3.py
+4
-3
lm_eval/tasks/__init__.py
lm_eval/tasks/__init__.py
+2
-2
lm_eval/tasks/common.py
lm_eval/tasks/common.py
+0
-0
lm_eval/tasks/coqa.py
lm_eval/tasks/coqa.py
+1
-1
lm_eval/tasks/coqa_evaluate.py
lm_eval/tasks/coqa_evaluate.py
+0
-0
lm_eval/tasks/glue.py
lm_eval/tasks/glue.py
+0
-0
lm_eval/tasks/superglue.py
lm_eval/tasks/superglue.py
+0
-0
lm_eval/utils.py
lm_eval/utils.py
+0
-0
main.py
main.py
+10
-8
No files found.
hf.py
deleted
100644 → 0
View file @
cf80f340
import
base
import
nlp
def
yesno
(
x
):
if
x
:
return
'yes'
else
:
return
'no'
def
mean
(
x
):
return
sum
(
x
)
/
len
(
x
)
class
BoolQ
(
base
.
Dataset
):
def
__init__
(
self
):
self
.
dataset
=
nlp
.
load_dataset
(
'boolq'
)
def
training_docs
(
self
):
yield
from
self
.
dataset
[
'train'
]
def
validation_docs
(
self
):
yield
from
self
.
dataset
[
'validation'
]
def
test_docs
(
self
):
return
[]
def
fewshot_prefix
(
self
):
return
"Read the following passages and answer each question with a yes or a no."
def
doc_to_text
(
self
,
doc
,
include_target
=
True
):
return
f
"
{
doc
[
'passage'
]
}
\n
question:
{
doc
[
'question'
]
}
\n
answer: "
+
(
yesno
(
doc
[
'answer'
])
if
include_target
else
""
)
def
evaluate
(
self
,
docs
,
lm
,
provide_description
,
num_fewshot
):
acc
=
[]
for
doc
in
docs
:
ctx
=
'
\n\n
'
.
join
(
map
(
self
.
doc_to_text
,
self
.
fewshot_examples
(
k
=
num_fewshot
)))
+
'
\n\n
'
ctx
+=
self
.
doc_to_text
(
doc
,
include_target
=
False
).
strip
()
ctx
=
((
self
.
fewshot_description
()
+
"
\n\n
"
)
if
provide_description
else
""
)
+
ctx
ans
=
lm
.
loglikelihood
(
ctx
,
'yes'
)
>
lm
.
loglikelihood
(
ctx
,
'no'
)
acc
.
append
(
int
(
ans
==
doc
[
'answer'
]))
return
mean
(
acc
)
\ No newline at end of file
lm_eval/__init__.py
0 → 100644
View file @
f88bb827
base.py
→
lm_eval/
base.py
View file @
f88bb827
File moved
models/__init__.py
→
lm_eval/
models/__init__.py
View file @
f88bb827
import
importlib
import
os
from
.
.base
import
Registry
from
lm_eval
.base
import
Registry
MODEL_REGISTRY
=
Registry
(
registry_name
=
"models"
)
# Load all modules in models directory to populate registry
...
...
@@ -13,7 +13,7 @@ for file in os.listdir(models_dir):
and
(
file
.
endswith
(
'.py'
)
or
os
.
path
.
isdir
(
path
))
):
module_name
=
file
[:
file
.
find
(
'.py'
)]
if
file
.
endswith
(
'.py'
)
else
file
module
=
importlib
.
import_module
(
'lm_eval
uation_harness
.models.'
+
module_name
)
module
=
importlib
.
import_module
(
'lm_eval.models.'
+
module_name
)
def
get_model
(
model_name
):
...
...
models/dummy.py
→
lm_eval/
models/dummy.py
View file @
f88bb827
import
transformers
import
torch
from
..base
import
LM
from
lm_eval.base
import
LM
from
.
import
MODEL_REGISTRY
...
...
models/gpt2.py
→
lm_eval/
models/gpt2.py
View file @
f88bb827
import
transformers
import
torch
import
torch.nn.functional
as
F
from
.
.base
import
LM
from
..
import
utils
from
lm_eval
.base
import
LM
from
lm_eval
import
utils
from
.
import
MODEL_REGISTRY
...
...
models/gpt3.py
→
lm_eval/
models/gpt3.py
View file @
f88bb827
import
os
import
openai
import
transformers
from
.
.base
import
LM
from
..
import
utils
from
lm_eval
.base
import
LM
from
lm_eval
import
utils
from
.
import
MODEL_REGISTRY
...
...
@@ -15,7 +15,7 @@ class GPT3LM(LM):
openai
.
api_key
=
os
.
environ
[
"OPENAI_API_SECRET_KEY"
]
@
classmethod
def
create_from_arg
s
(
cls
,
arg_string
):
def
create_from_arg
_string
(
cls
,
arg_string
):
args
=
utils
.
simple_parse_args_string
(
arg_string
)
return
cls
(
engine
=
args
.
get
(
"engine"
,
"davinci"
))
...
...
@@ -37,6 +37,7 @@ class GPT3LM(LM):
response
=
openai
.
Completion
.
create
(
engine
=
self
.
engine
,
prompt
=
full_text
,
echo
=
True
,
max_tokens
=
0
,
temperature
=
0.0
,
logprobs
=
0
,
)
...
...
tasks/__init__.py
→
lm_eval/
tasks/__init__.py
View file @
f88bb827
import
importlib
import
os
from
.
.base
import
Registry
from
lm_eval
.base
import
Registry
TASK_REGISTRY
=
Registry
(
registry_name
=
"tasks"
)
# Load all modules in models directory to populate registry
...
...
@@ -13,7 +13,7 @@ for file in os.listdir(tasks_dir):
and
(
file
.
endswith
(
'.py'
)
or
os
.
path
.
isdir
(
path
))
):
module_name
=
file
[:
file
.
find
(
'.py'
)]
if
file
.
endswith
(
'.py'
)
else
file
module
=
importlib
.
import_module
(
'lm_eval
uation_harness
.tasks.'
+
module_name
)
module
=
importlib
.
import_module
(
'lm_eval.tasks.'
+
module_name
)
ALL_TASKS
=
sorted
(
list
(
TASK_REGISTRY
.
registry
))
...
...
tasks/common.py
→
lm_eval/
tasks/common.py
View file @
f88bb827
File moved
tasks/coqa.py
→
lm_eval/
tasks/coqa.py
View file @
f88bb827
import
json
import
random
from
.
.base
import
Dataset
from
lm_eval
.base
import
Dataset
from
.
import
TASK_REGISTRY
...
...
tasks/coqa
-
evaluate
-v1.0
.py
→
lm_eval/
tasks/coqa
_
evaluate.py
View file @
f88bb827
File moved
tasks/glue.py
→
lm_eval/
tasks/glue.py
View file @
f88bb827
File moved
tasks/superglue.py
→
lm_eval/
tasks/superglue.py
View file @
f88bb827
File moved
utils.py
→
lm_eval/
utils.py
View file @
f88bb827
File moved
main.py
View file @
f88bb827
import
argparse
import
json
import
models
import
tasks
from
lm_eval
import
models
,
tasks
def
parse_args
():
parser
=
argparse
.
ArgumentParser
()
...
...
@@ -10,32 +10,34 @@ def parse_args():
parser
.
add_argument
(
'--model_args'
,
default
=
""
)
parser
.
add_argument
(
'--tasks'
,
default
=
"all_tasks"
)
parser
.
add_argument
(
'--provide_description'
,
action
=
"store_true"
)
parser
.
add_argument
(
'--n
ew
_fewshot'
,
action
=
"store_true"
)
parser
.
add_argument
(
'--n
um
_fewshot'
,
type
=
int
,
default
=
1
)
return
parser
.
parse_args
()
def
main
():
args
=
parse_args
()
mode
l
=
models
.
get_model
(
args
.
model
).
create_from_arg_string
(
args
.
model_args
)
l
m
=
models
.
get_model
(
args
.
model
).
create_from_arg_string
(
args
.
model_args
)
if
args
.
tasks
==
"all_tasks"
:
task_names
=
tasks
.
ALL_TASKS
else
:
task_names
=
args
.
tasks
.
split
(
","
)
task_
lis
t
=
{
task_
dic
t
=
{
task_name
:
tasks
.
get_task
(
task_name
)()
for
task_name
in
task_names
}
results
=
{}
for
task_name
,
task
in
task_
list
:
for
task_name
,
task
in
task_
dict
.
items
()
:
if
not
task
.
has_validation_docs
():
continue
result
=
task
.
evaluate
(
docs
=
task
.
validation_docs
(),
lm
=
lm
,
provide_description
=
args
.
provide_description
,
num_fewshot
=
args
.
n
ew
_fewshot
,
num_fewshot
=
args
.
n
um
_fewshot
,
)
results
[
task_name
]
=
result
print
(
json
.
dumps
(
results
,
indent
=
2
))
if
__name__
==
"__main__"
:
main
()
\ No newline at end of file
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment