Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
b6f38ac8
Commit
b6f38ac8
authored
Jul 18, 2025
by
Baber
Browse files
remove prompt-source for now
parent
bd028848
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
40 additions
and
38 deletions
+40
-38
lm_eval/api/task.py
lm_eval/api/task.py
+39
-37
lm_eval/config/task.py
lm_eval/config/task.py
+1
-1
No files found.
lm_eval/api/task.py
View file @
b6f38ac8
...
@@ -31,7 +31,6 @@ from lm_eval.caching.cache import load_from_cache, save_to_cache
...
@@ -31,7 +31,6 @@ from lm_eval.caching.cache import load_from_cache, save_to_cache
from
lm_eval.config.metric
import
MetricConfig
from
lm_eval.config.metric
import
MetricConfig
from
lm_eval.config.task
import
TaskConfig
from
lm_eval.config.task
import
TaskConfig
from
lm_eval.filters
import
build_filter_ensemble
from
lm_eval.filters
import
build_filter_ensemble
from
lm_eval.prompts
import
get_prompt
ALL_OUTPUT_TYPES
=
[
ALL_OUTPUT_TYPES
=
[
...
@@ -421,12 +420,12 @@ class Task(abc.ABC):
...
@@ -421,12 +420,12 @@ class Task(abc.ABC):
return
getattr
(
self
.
_config
,
key
,
None
)
return
getattr
(
self
.
_config
,
key
,
None
)
@
classmethod
@
classmethod
def
count_bytes
(
cls
,
doc
)
->
int
:
def
count_bytes
(
cls
,
doc
:
str
)
->
int
:
"""Used for byte-level perplexity metrics in rolling loglikelihood"""
"""Used for byte-level perplexity metrics in rolling loglikelihood"""
return
len
(
doc
.
encode
(
"utf-8"
))
return
len
(
doc
.
encode
(
"utf-8"
))
@
classmethod
@
classmethod
def
count_words
(
cls
,
doc
)
->
int
:
def
count_words
(
cls
,
doc
:
str
)
->
int
:
"""Downstream loglikelihood_rolling perplexity tasks with custom word boundaries should override this!"""
"""Downstream loglikelihood_rolling perplexity tasks with custom word boundaries should override this!"""
return
len
(
re
.
split
(
r
"\s+"
,
doc
))
return
len
(
re
.
split
(
r
"\s+"
,
doc
))
...
@@ -647,13 +646,13 @@ class ConfigurableTask(Task):
...
@@ -647,13 +646,13 @@ class ConfigurableTask(Task):
self
.
_filters
=
self
.
config
.
get_filters
self
.
_filters
=
self
.
config
.
get_filters
if
self
.
config
.
use_prompt
is
not
None
:
#
if self.config.use_prompt is not None:
eval_logger
.
info
(
f
"loading prompt
{
self
.
config
.
use_prompt
}
"
)
#
eval_logger.info(f"loading prompt {self.config.use_prompt}")
self
.
prompt
=
get_prompt
(
#
self.prompt = get_prompt(
self
.
config
.
use_prompt
,
self
.
DATASET_PATH
,
self
.
DATASET_NAME
#
self.config.use_prompt, self.DATASET_PATH, self.DATASET_NAME
)
#
)
else
:
#
else:
self
.
prompt
=
None
#
self.prompt = None
if
(
if
(
self
.
config
.
fewshot_cfg
.
num_fewshot
()
>
0
self
.
config
.
fewshot_cfg
.
num_fewshot
()
>
0
...
@@ -666,7 +665,7 @@ class ConfigurableTask(Task):
...
@@ -666,7 +665,7 @@ class ConfigurableTask(Task):
self
.
task_docs
=
self
.
eval_docs
self
.
task_docs
=
self
.
eval_docs
# Test One Doc
# Test One Doc
self
.
features
=
list
(
self
.
task_docs
.
features
.
keys
())
self
.
features
:
list
[
str
]
=
list
(
self
.
task_docs
.
features
.
keys
())
self
.
multiple_input
=
0
self
.
multiple_input
=
0
self
.
multiple_target
=
0
self
.
multiple_target
=
0
test_doc
=
self
.
task_docs
[
0
]
test_doc
=
self
.
task_docs
[
0
]
...
@@ -1012,10 +1011,12 @@ class ConfigurableTask(Task):
...
@@ -1012,10 +1011,12 @@ class ConfigurableTask(Task):
"""
"""
return
doc
return
doc
def
doc_to_text
(
self
,
doc
:
dict
,
doc_to_text
:
Union
[
int
,
str
,
Callable
]
=
None
):
def
doc_to_text
(
if
self
.
prompt
is
not
None
:
self
,
doc
:
dict
,
doc_to_text
:
Union
[
int
,
str
,
Callable
,
None
]
=
None
doc_to_text
=
self
.
prompt
):
elif
doc_to_text
is
not
None
:
# if self.prompt is not None:
# doc_to_text = self.prompt
if
doc_to_text
is
not
None
:
doc_to_text
=
doc_to_text
doc_to_text
=
doc_to_text
else
:
else
:
doc_to_text
=
self
.
config
.
doc_to_text
doc_to_text
=
self
.
config
.
doc_to_text
...
@@ -1037,21 +1038,21 @@ class ConfigurableTask(Task):
...
@@ -1037,21 +1038,21 @@ class ConfigurableTask(Task):
elif
callable
(
doc_to_text
):
elif
callable
(
doc_to_text
):
return
doc_to_text
(
doc
)
return
doc_to_text
(
doc
)
# Used when applying a Promptsource template
# Used when applying a Promptsource template
elif
hasattr
(
doc_to_text
,
"apply"
):
#
elif hasattr(doc_to_text, "apply"):
applied_prompt
=
doc_to_text
.
apply
(
doc
)
#
applied_prompt = doc_to_text.apply(doc)
if
len
(
applied_prompt
)
==
2
:
#
if len(applied_prompt) == 2:
return
applied_prompt
[
0
]
#
return applied_prompt[0]
else
:
#
else:
eval_logger
.
warning
(
"Applied prompt returns empty string"
)
#
eval_logger.warning("Applied prompt returns empty string")
return
self
.
config
.
fewshot_delimiter
#
return self.config.fewshot_delimiter
else
:
else
:
print
(
type
(
doc_to_text
))
print
(
type
(
doc_to_text
))
raise
TypeError
raise
TypeError
def
doc_to_target
(
self
,
doc
:
dict
,
doc_to_target
=
None
)
->
Union
[
int
,
str
,
list
]:
def
doc_to_target
(
self
,
doc
:
dict
,
doc_to_target
=
None
)
->
Union
[
int
,
str
,
list
]:
if
self
.
prompt
is
not
None
:
#
if self.prompt is not None:
doc_to_target
=
self
.
prompt
#
doc_to_target = self.prompt
el
if
doc_to_target
is
not
None
:
if
doc_to_target
is
not
None
:
doc_to_target
=
doc_to_target
doc_to_target
=
doc_to_target
else
:
else
:
doc_to_target
=
self
.
config
.
doc_to_target
doc_to_target
=
self
.
config
.
doc_to_target
...
@@ -1083,26 +1084,27 @@ class ConfigurableTask(Task):
...
@@ -1083,26 +1084,27 @@ class ConfigurableTask(Task):
return
doc_to_target
return
doc_to_target
elif
callable
(
doc_to_target
):
elif
callable
(
doc_to_target
):
return
doc_to_target
(
doc
)
return
doc_to_target
(
doc
)
# Used when applying a Promptsource template
#
#
Used when applying a Promptsource template
elif
hasattr
(
doc_to_target
,
"apply"
):
#
elif hasattr(doc_to_target, "apply"):
applied_prompt
=
doc_to_target
.
apply
(
doc
)
#
applied_prompt = doc_to_target.apply(doc)
if
len
(
applied_prompt
)
==
2
:
#
if len(applied_prompt) == 2:
return
applied_prompt
[
1
]
#
return applied_prompt[1]
else
:
#
else:
eval_logger
.
warning
(
"Applied prompt returns empty string"
)
#
eval_logger.warning("Applied prompt returns empty string")
return
self
.
config
.
fewshot_delimiter
#
return self.config.fewshot_delimiter
else
:
else
:
raise
TypeError
raise
TypeError
def
doc_to_choice
(
def
doc_to_choice
(
self
,
doc
:
dict
,
doc_to_choice
:
Union
[
str
,
list
,
dict
]
=
None
self
,
doc
:
dict
,
doc_to_choice
:
Union
[
str
,
list
,
dict
,
None
]
=
None
)
->
List
[
str
]:
)
->
List
[
str
]:
if
self
.
prompt
is
not
None
:
#
if self.prompt is not None:
doc_to_choice
=
self
.
prompt
#
doc_to_choice = self.prompt
el
if
doc_to_choice
is
not
None
:
if
doc_to_choice
is
not
None
:
doc_to_choice
=
doc_to_choice
doc_to_choice
=
doc_to_choice
elif
self
.
config
.
doc_to_choice
is
None
:
elif
self
.
config
.
doc_to_choice
is
None
:
eval_logger
.
error
(
"doc_to_choice was called but not set in config"
)
eval_logger
.
error
(
"doc_to_choice was called but not set in config"
)
doc_to_choice
=
None
else
:
else
:
doc_to_choice
=
self
.
config
.
doc_to_choice
doc_to_choice
=
self
.
config
.
doc_to_choice
...
...
lm_eval/config/task.py
View file @
b6f38ac8
...
@@ -167,7 +167,7 @@ class TaskConfig(dict):
...
@@ -167,7 +167,7 @@ class TaskConfig(dict):
)
# by default, not used in the code. allows for users to pass arbitrary info to tasks
)
# by default, not used in the code. allows for users to pass arbitrary info to tasks
_metric_list
:
list
[
MetricConfig
]
=
field
(
default_factory
=
list
)
_metric_list
:
list
[
MetricConfig
]
=
field
(
default_factory
=
list
)
_filter_list
:
list
[
FilterConfig
]
=
None
_filter_list
:
list
[
FilterConfig
]
=
field
(
default_factory
=
list
)
# ds_cfg: DatasetConfig = field(init=False)
# ds_cfg: DatasetConfig = field(init=False)
fewshot_cfg
:
FewshotConfig
=
field
(
init
=
False
)
fewshot_cfg
:
FewshotConfig
=
field
(
init
=
False
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment