Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
c3f724cf
Commit
c3f724cf
authored
Feb 08, 2021
by
Leo Gao
Browse files
Change glue and superglue prompts
parent
1050109b
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
41 additions
and
31 deletions
+41
-31
lm_eval/tasks/glue.py
lm_eval/tasks/glue.py
+17
-17
lm_eval/tasks/superglue.py
lm_eval/tasks/superglue.py
+12
-12
lm_eval/utils.py
lm_eval/utils.py
+11
-1
main.py
main.py
+1
-1
No files found.
lm_eval/tasks/glue.py
View file @
c3f724cf
...
...
@@ -3,7 +3,7 @@ from lm_eval.base import rf, mean, f1_score, matthews_corrcoef
from
scipy.stats
import
pearsonr
,
spearmanr
from
tqdm
import
auto
as
tqdm_lib
from
.
common
import
HFTask
,
yesno
from
..utils
import
general_detokenize
# Single-Sentence Tasks
...
...
@@ -22,10 +22,10 @@ class CoLA(HFTask):
return
True
def
fewshot_description
(
self
):
return
"Does this sentence make sense?
:
\t
True or False
?
"
return
"Does this sentence make sense?
(
True or False
)
"
def
doc_to_text
(
self
,
doc
):
return
"
Sentence: {}
\n
Answer:"
.
format
(
doc
[
"sentence"
])
return
"
{}
\n
Question: Does this sentence make sense?
\n
Answer:"
.
format
(
doc
[
"sentence"
])
def
doc_to_target
(
self
,
doc
):
return
" {}"
.
format
({
1
:
"True"
,
0
:
"False"
}[
doc
[
"label"
]])
...
...
@@ -71,8 +71,8 @@ class SST(HFTask):
return
"Indicate if each sentence is Positive or Negative."
def
doc_to_text
(
self
,
doc
):
return
"
sentence:
\t
{}
\t
\n
a
nswer:"
.
format
(
doc
[
"sentence"
],
return
"
{}
\n
Question: Is this sentence Positive or Negative?
\n
A
nswer:"
.
format
(
general_detokenize
(
doc
[
"sentence"
]
)
,
)
def
doc_to_target
(
self
,
doc
):
...
...
@@ -127,9 +127,9 @@ class MNLI(HFTask):
return
self
.
data
[
"test_matched"
]
def
doc_to_text
(
self
,
doc
):
return
"{}
\n
q
uestion:
\t
{}
\t
True, False or Neither?
\n
a
nswer:"
.
format
(
return
"{}
\n
Q
uestion:
{}
True, False or Neither?
\n
A
nswer:"
.
format
(
doc
[
"premise"
],
doc
[
"hypothesis"
],
doc
[
"hypothesis"
]
+
(
''
if
doc
[
"hypothesis"
].
endswith
(
'.'
)
else
'.'
)
,
)
def
doc_to_target
(
self
,
doc
):
...
...
@@ -187,7 +187,7 @@ class QNLI(HFTask):
return
True
def
doc_to_text
(
self
,
doc
):
return
"
question:
\t
{}
\n
response:
\t
{}
\n
Does this answer the question
, Yes or No?
:"
.
format
(
return
"
{}
\n
{}
\n
Question:
Does this
response
answer the question
?
\n
Answer
:"
.
format
(
doc
[
"question"
],
doc
[
"sentence"
],
)
...
...
@@ -235,7 +235,7 @@ class WNLI(HFTask):
return
True
def
doc_to_text
(
self
,
doc
):
return
"{}
\n
q
uestion:
\t
{}
\t
True, False or Neither?
\n
a
nswer:"
.
format
(
return
"{}
\n
Q
uestion:
{}
True, False or Neither?
\n
A
nswer:"
.
format
(
doc
[
"sentence1"
],
doc
[
"sentence2"
],
)
...
...
@@ -284,7 +284,7 @@ class RTE(HFTask):
return
True
def
doc_to_text
(
self
,
doc
):
return
"{}
\n
q
uestion:
\t
{}
\t
True or False?
\n
a
nswer:"
.
format
(
return
"{}
\n
Q
uestion:
{}
True or False?
\n
A
nswer:"
.
format
(
doc
[
"sentence1"
],
doc
[
"sentence2"
],
)
...
...
@@ -338,17 +338,17 @@ class MRPC(HFTask):
return
"Indicate if both sentences mean the same thing."
def
doc_to_text
(
self
,
doc
):
return
"
s
entence 1:
\t
{}
\n
s
entence 2:
\t
{}
\n
a
nswer:"
.
format
(
doc
[
"sentence1"
],
doc
[
"sentence2"
],
return
"
S
entence 1:
{}
\n
S
entence 2:
{}
\n
Question: Do both sentences mean the same thing?
\n
A
nswer:"
.
format
(
general_detokenize
(
doc
[
"sentence1"
]
)
,
general_detokenize
(
doc
[
"sentence2"
]
)
,
)
def
doc_to_target
(
self
,
doc
):
return
" {}"
.
format
(
yesno
(
doc
[
"label"
]))
def
construct_requests
(
self
,
doc
,
ctx
):
ll_yes
,
_
=
rf
.
loglikelihood
(
ctx
,
"
y
es"
)
ll_no
,
_
=
rf
.
loglikelihood
(
ctx
,
"
n
o"
)
ll_yes
,
_
=
rf
.
loglikelihood
(
ctx
,
"
Y
es"
)
ll_no
,
_
=
rf
.
loglikelihood
(
ctx
,
"
N
o"
)
return
ll_yes
,
ll_no
def
process_results
(
self
,
doc
,
results
):
...
...
@@ -390,7 +390,7 @@ class QQP(HFTask):
return
"Indicate if both questions ask the same thing."
def
doc_to_text
(
self
,
doc
):
return
"
q
uestion 1:
\t
{}
\n
q
uestion 2:
\t
{}
\n
a
nswer:"
.
format
(
return
"
Q
uestion 1:
{}
\n
Q
uestion 2:
{}
\n
Question: Do both questions ask the same thing?
\n
A
nswer:"
.
format
(
doc
[
"question1"
],
doc
[
"question2"
],
)
...
...
@@ -443,7 +443,7 @@ class STSB(HFTask):
"where 5 means identical and 0 means unrelated."
def
doc_to_text
(
self
,
doc
):
return
"sentence 1:
\t
{}
\n
sentence 2:
\t
{}
\n
a
nswer:"
.
format
(
return
"sentence 1:
{}
\n
sentence 2:
{}
\n
A
nswer:"
.
format
(
doc
[
"sentence1"
],
doc
[
"sentence2"
],
)
...
...
lm_eval/tasks/superglue.py
View file @
c3f724cf
...
...
@@ -28,7 +28,7 @@ class BoolQ(HFTask):
return
"Read the following passages and answer each question with a yes or a no."
def
doc_to_text
(
self
,
doc
):
return
f
"
{
doc
[
'passage'
]
}
\n
q
uestion:
{
doc
[
'question'
]
}
\n
a
nswer:"
return
f
"
{
doc
[
'passage'
]
}
\n
Q
uestion:
{
doc
[
'question'
]
}
\n
A
nswer:"
def
doc_to_target
(
self
,
doc
):
return
" "
+
yesno
(
doc
[
'label'
])
...
...
@@ -80,7 +80,7 @@ class CommitmentBank(HFTask):
"to the truth of the hypothesis. The three possible labels are true, false or neither."
def
doc_to_text
(
self
,
doc
):
return
"{}
\n
q
uestion: {}
t
rue,
f
alse or
n
either?
\n
a
nswer:"
.
format
(
return
"{}
\n
Q
uestion: {}
. T
rue,
F
alse or
N
either?
\n
A
nswer:"
.
format
(
doc
[
"premise"
],
doc
[
"hypothesis"
],
)
...
...
@@ -89,12 +89,12 @@ class CommitmentBank(HFTask):
# True = entailment
# False = contradiction
# Neither = neutral
return
" {}"
.
format
({
0
:
"
t
rue"
,
1
:
"
n
either"
,
2
:
"
f
alse"
}[
doc
[
"label"
]])
return
" {}"
.
format
({
0
:
"
T
rue"
,
1
:
"
N
either"
,
2
:
"
F
alse"
}[
doc
[
"label"
]])
def
construct_requests
(
self
,
doc
,
ctx
):
ll_true
,
_
=
rf
.
loglikelihood
(
ctx
,
'
t
rue'
)
ll_neither
,
_
=
rf
.
loglikelihood
(
ctx
,
'
n
either'
)
ll_false
,
_
=
rf
.
loglikelihood
(
ctx
,
'
f
alse'
)
ll_true
,
_
=
rf
.
loglikelihood
(
ctx
,
'
T
rue'
)
ll_neither
,
_
=
rf
.
loglikelihood
(
ctx
,
'
N
either'
)
ll_false
,
_
=
rf
.
loglikelihood
(
ctx
,
'
F
alse'
)
return
ll_true
,
ll_neither
,
ll_false
...
...
@@ -214,15 +214,15 @@ class MultiRC(HFTask):
return
"READING COMPREHENSION ANSWER KEY"
def
doc_to_text
(
self
,
doc
):
return
f
"
{
doc
[
'paragraph'
]
}
\n
\n
{
doc
[
'question'
]
}
\n
"
return
f
"
{
doc
[
'paragraph'
]
}
\n
Question:
{
doc
[
'question'
]
}
\n
Answer:
"
def
doc_to_target
(
self
,
doc
):
return
self
.
format_answer
(
answer
=
doc
[
"answer"
],
label
=
doc
[
"label"
])
@
staticmethod
def
format_answer
(
answer
,
label
):
label_str
=
"
True
"
if
label
else
"
False
"
return
f
"
[
{
label_str
}
]
{
answer
}
"
label_str
=
"
Yes
"
if
label
else
"
No
"
return
f
"
{
label_str
}
,
{
answer
}
"
def
construct_requests
(
self
,
doc
,
ctx
):
true_choice
=
self
.
format_answer
(
answer
=
doc
[
"answer"
],
label
=
True
)
...
...
@@ -364,8 +364,8 @@ class WordsInContext(HFTask):
return
""
def
doc_to_text
(
self
,
doc
):
return
"
{}
\n
{}
\n
Question: Is the word '{}' used in the same way in the"
\
" two sentences above?
\n
a
nswer:"
.
format
(
return
"
Sentence 1: {}
\n
Sentence 2:
{}
\n
Question: Is the word '{}' used in the same way in the"
\
" two sentences above?
\n
A
nswer:"
.
format
(
doc
[
"sentence1"
],
doc
[
"sentence2"
],
doc
[
"sentence1"
][
doc
[
"start1"
]:
doc
[
"end1"
]],
...
...
@@ -438,7 +438,7 @@ class SGWinogradSchemaChallenge(HFTask):
# NOTE: HuggingFace span indices are word-based not character-based.
pre
=
" "
.
join
(
raw_passage
.
split
()[:
doc
[
"span2_index"
]])
post
=
raw_passage
[
len
(
pre
)
+
len
(
doc
[
"span2_text"
])
+
1
:]
passage
=
pre
+
" *{}*"
.
format
(
doc
[
'span2_text'
])
+
post
passage
=
general_detokenize
(
pre
+
" *{}*"
.
format
(
doc
[
'span2_text'
])
+
post
)
noun
=
doc
[
"span1_text"
]
pronoun
=
doc
[
"span2_text"
]
text
=
(
...
...
lm_eval/utils.py
View file @
c3f724cf
import
os
import
re
class
ExitCodeError
(
Exception
):
...
...
@@ -39,4 +40,13 @@ def chunks(iter, n):
yield
arr
arr
=
[]
if
arr
:
yield
arr
\ No newline at end of file
if
arr
:
yield
arr
def
general_detokenize
(
string
):
string
=
string
.
replace
(
" n't"
,
"n't"
)
string
=
string
.
replace
(
" )"
,
")"
)
string
=
string
.
replace
(
"( "
,
"("
)
string
=
string
.
replace
(
"
\"
"
,
"
\"
"
)
string
=
string
.
replace
(
"
\"
"
,
"
\"
"
)
string
=
re
.
sub
(
r
" (['.,])"
,
r
"\1"
)
return
string
\ No newline at end of file
main.py
View file @
c3f724cf
...
...
@@ -16,7 +16,7 @@ def parse_args():
parser
.
add_argument
(
'--model_args'
,
default
=
""
)
parser
.
add_argument
(
'--tasks'
,
default
=
"all_tasks"
)
parser
.
add_argument
(
'--provide_description'
,
action
=
"store_true"
)
parser
.
add_argument
(
'--num_fewshot'
,
type
=
int
,
default
=
1
)
parser
.
add_argument
(
'--num_fewshot'
,
type
=
int
,
default
=
0
)
parser
.
add_argument
(
'--seed'
,
type
=
int
,
default
=
1234
)
parser
.
add_argument
(
'--output_path'
,
default
=
None
)
parser
.
add_argument
(
'--limit'
,
type
=
int
,
default
=
None
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment