Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
e28e1c8b
Commit
e28e1c8b
authored
Feb 05, 2025
by
Baber
Browse files
add judge filter
parent
bbfab74a
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
8 additions
and
2 deletions
+8
-2
lm_eval/filters/judge.py
lm_eval/filters/judge.py
+8
-2
No files found.
lm_eval/filters/judge.py
View file @
e28e1c8b
...
@@ -3,6 +3,7 @@ import os
...
@@ -3,6 +3,7 @@ import os
from
lm_eval.api.filter
import
Filter
from
lm_eval.api.filter
import
Filter
from
lm_eval.api.registry
import
register_filter
from
lm_eval.api.registry
import
register_filter
from
lm_eval.models.openai_completions
import
LocalChatCompletion
from
lm_eval.models.openai_completions
import
LocalChatCompletion
from
lm_eval.utils
import
eval_logger
@
register_filter
(
"judge"
)
@
register_filter
(
"judge"
)
...
@@ -53,7 +54,7 @@ class JudgeFilter(Filter):
...
@@ -53,7 +54,7 @@ class JudgeFilter(Filter):
Your response must be exactly "yes", "no", or "unknown", with no additional explanation!
Your response must be exactly "yes", "no", or "unknown", with no additional explanation!
"""
"""
def
__init__
(
self
,
url
,
**
kwargs
)
->
None
:
def
__init__
(
self
,
url
,
model
,
**
kwargs
)
->
None
:
"""
"""
pass a string `regex` to run `re.compile(r"regex")` on.
pass a string `regex` to run `re.compile(r"regex")` on.
`fallback` defines the output returned if no matches for the regex are located.
`fallback` defines the output returned if no matches for the regex are located.
...
@@ -61,7 +62,12 @@ class JudgeFilter(Filter):
...
@@ -61,7 +62,12 @@ class JudgeFilter(Filter):
assert
os
.
environ
.
get
(
"AI_API_KEY"
)
is
not
None
,
(
assert
os
.
environ
.
get
(
"AI_API_KEY"
)
is
not
None
,
(
"Please set the AI_API_KEY environment variable to use the JudgeFilter (can be empty string)"
"Please set the AI_API_KEY environment variable to use the JudgeFilter (can be empty string)"
)
)
self
.
model
=
LocalChatCompletion
(
base_url
=
url
,
**
kwargs
)
eval_logger
.
info
(
"Pass num_concurrent=N to --metadata to set the number of concurrent requests for the JudgeFilter"
)
self
.
model
=
LocalChatCompletion
(
base_url
=
url
,
pretrained
=
model
,
num_concurrent
=
2
,
**
kwargs
)
def
apply
(
self
,
resps
:
list
[
list
[
str
]],
docs
:
list
[
dict
])
->
list
[
list
[
str
]]:
def
apply
(
self
,
resps
:
list
[
list
[
str
]],
docs
:
list
[
dict
])
->
list
[
list
[
str
]]:
inputs
=
[
inputs
=
[
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment