Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
786d612e
Commit
786d612e
authored
May 18, 2023
by
bzantium
Browse files
modify prompt for better evaluation
parent
1f8b7f79
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
11 additions
and
19 deletions
+11
-19
lm_eval/tasks/kobest.py
lm_eval/tasks/kobest.py
+11
-19
No files found.
lm_eval/tasks/kobest.py
View file @
786d612e
...
@@ -50,13 +50,13 @@ class BoolQ(Task):
...
@@ -50,13 +50,13 @@ class BoolQ(Task):
return
self
.
dataset
[
"test"
]
return
self
.
dataset
[
"test"
]
def
doc_to_text
(
self
,
doc
):
def
doc_to_text
(
self
,
doc
):
return
"
{} 질문: {} 답변:
"
.
format
(
doc
[
"paragraph"
],
doc
[
"question"
])
return
"
다음 지문을 읽고 질문에 대하여 틀리면 아니 맞으면 예로 답하시오.
\n\n
지문: {}
\n
질문: {}
\n
정답:
"
.
format
(
doc
[
"paragraph"
],
doc
[
"question"
])
def
doc_to_target
(
self
,
doc
):
def
doc_to_target
(
self
,
doc
):
return
" {}"
.
format
({
0
:
"아니
오
"
,
1
:
"예"
}[
doc
[
"label"
]])
return
" {}"
.
format
({
0
:
"아니"
,
1
:
"예"
}[
doc
[
"label"
]])
def
construct_requests
(
self
,
doc
,
ctx
):
def
construct_requests
(
self
,
doc
,
ctx
):
ll_no
,
_
=
rf
.
loglikelihood
(
ctx
,
" 아니
오
"
)
ll_no
,
_
=
rf
.
loglikelihood
(
ctx
,
" 아니"
)
ll_yes
,
_
=
rf
.
loglikelihood
(
ctx
,
" 예"
)
ll_yes
,
_
=
rf
.
loglikelihood
(
ctx
,
" 예"
)
return
ll_no
,
ll_yes
return
ll_no
,
ll_yes
...
@@ -125,8 +125,8 @@ class COPA(Task):
...
@@ -125,8 +125,8 @@ class COPA(Task):
return
" "
+
correct_choice
return
" "
+
correct_choice
def
construct_requests
(
self
,
doc
,
ctx
):
def
construct_requests
(
self
,
doc
,
ctx
):
ll_choice1
,
_
=
rf
.
loglikelihood
(
ctx
,
" "
+
doc
[
"alternative_1"
])
ll_choice1
,
_
=
rf
.
loglikelihood
(
ctx
,
" "
+
doc
[
"alternative_1"
])
ll_choice2
,
_
=
rf
.
loglikelihood
(
ctx
,
" "
+
doc
[
"alternative_2"
])
ll_choice2
,
_
=
rf
.
loglikelihood
(
ctx
,
" "
+
doc
[
"alternative_2"
])
return
ll_choice1
,
ll_choice2
return
ll_choice1
,
ll_choice2
...
@@ -177,13 +177,13 @@ class WiC(Task):
...
@@ -177,13 +177,13 @@ class WiC(Task):
return
self
.
dataset
[
"test"
]
return
self
.
dataset
[
"test"
]
def
doc_to_text
(
self
,
doc
):
def
doc_to_text
(
self
,
doc
):
return
"
문장1: {} 문장2: {}
두 문장에서 {}가 같은
뜻으
로 쓰였
나?
"
.
format
(
doc
[
"context_1"
],
doc
[
"context_2"
],
doc
[
"word"
])
return
"
다음
두 문장에서
단어 '
{}
'
가
다른 의미로 쓰였으면 아니,
같은
의미
로 쓰였
으면 예로 답하시오.
\n\n
문장1: {}
\n
문장2: {}
\n
정답:
"
.
format
(
doc
[
"context_1"
],
doc
[
"context_2"
],
doc
[
"word"
])
def
doc_to_target
(
self
,
doc
):
def
doc_to_target
(
self
,
doc
):
return
" {}"
.
format
({
0
:
"아니
오
"
,
1
:
"예"
}[
doc
[
"label"
]])
return
" {}"
.
format
({
0
:
"
아니"
,
1
:
"
예"
}[
doc
[
"label"
]])
def
construct_requests
(
self
,
doc
,
ctx
):
def
construct_requests
(
self
,
doc
,
ctx
):
ll_no
,
_
=
rf
.
loglikelihood
(
ctx
,
" 아니
오
"
)
ll_no
,
_
=
rf
.
loglikelihood
(
ctx
,
" 아니"
)
ll_yes
,
_
=
rf
.
loglikelihood
(
ctx
,
" 예"
)
ll_yes
,
_
=
rf
.
loglikelihood
(
ctx
,
" 예"
)
return
ll_no
,
ll_yes
return
ll_no
,
ll_yes
...
@@ -248,28 +248,20 @@ class HellaSwag(MultipleChoiceTask):
...
@@ -248,28 +248,20 @@ class HellaSwag(MultipleChoiceTask):
def
process_results
(
self
,
doc
,
results
):
def
process_results
(
self
,
doc
,
results
):
pred
=
np
.
argmax
(
results
)
pred
=
np
.
argmax
(
results
)
gold
=
doc
[
"gold"
]
gold
=
doc
[
"gold"
]
acc
=
1.
if
np
.
argmax
(
results
)
==
gold
else
0.
completion_len
=
np
.
array
([
float
(
len
(
i
))
for
i
in
doc
[
"choices"
]])
acc_norm
=
1.
if
np
.
argmax
(
results
/
completion_len
)
==
gold
else
0.
return
{
return
{
"acc"
:
acc
,
"acc"
:
pred
==
gold
,
"acc_norm"
:
acc_norm
,
"macro_f1"
:
(
gold
,
pred
)
"macro_f1"
:
(
gold
,
pred
)
}
}
def
higher_is_better
(
self
):
def
higher_is_better
(
self
):
return
{
return
{
"acc"
:
True
,
"acc"
:
True
,
"acc_norm"
:
True
,
"macro_f1"
:
True
"macro_f1"
:
True
}
}
def
aggregation
(
self
):
def
aggregation
(
self
):
return
{
return
{
"acc"
:
mean
,
"acc"
:
mean
,
"acc_norm"
:
mean
,
"macro_f1"
:
macro_f1_score
"macro_f1"
:
macro_f1_score
}
}
...
@@ -300,10 +292,10 @@ class SentiNeg(Task):
...
@@ -300,10 +292,10 @@ class SentiNeg(Task):
return
self
.
dataset
[
"test"
]
return
self
.
dataset
[
"test"
]
def
doc_to_text
(
self
,
doc
):
def
doc_to_text
(
self
,
doc
):
return
"문장: {}
긍부정
:"
.
format
(
doc
[
"sentence"
])
return
"
다음 문장에서 느껴지는 감정을 답하시오.
\n\n
문장: {}
\n
정답
:"
.
format
(
doc
[
"sentence"
])
def
doc_to_target
(
self
,
doc
):
def
doc_to_target
(
self
,
doc
):
return
"
{}"
.
format
({
0
:
"부정"
,
1
:
"긍정"
}[
doc
[
"label"
]])
return
"{}"
.
format
({
0
:
"
부정"
,
1
:
"
긍정"
}[
doc
[
"label"
]])
def
construct_requests
(
self
,
doc
,
ctx
):
def
construct_requests
(
self
,
doc
,
ctx
):
ll_no
,
_
=
rf
.
loglikelihood
(
ctx
,
" 부정"
)
ll_no
,
_
=
rf
.
loglikelihood
(
ctx
,
" 부정"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment