Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
a541bfb9
Commit
a541bfb9
authored
Jun 27, 2024
by
Nathan Habib
Browse files
cleanup
parent
62a7b945
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
27 additions
and
23 deletions
+27
-23
lm_eval/__main__.py
lm_eval/__main__.py
+10
-4
lm_eval/api/task.py
lm_eval/api/task.py
+17
-19
No files found.
lm_eval/__main__.py
View file @
a541bfb9
...
@@ -348,11 +348,17 @@ def cli_evaluate(args: Union[argparse.Namespace, None] = None) -> None:
...
@@ -348,11 +348,17 @@ def cli_evaluate(args: Union[argparse.Namespace, None] = None) -> None:
# Respect user's value passed in via CLI, otherwise default to True and add to comma-separated model args
# Respect user's value passed in via CLI, otherwise default to True and add to comma-separated model args
if
args
.
trust_remote_code
:
if
args
.
trust_remote_code
:
os
.
environ
[
"HF_DATASETS_TRUST_REMOTE_CODE"
]
=
str
(
args
.
trust_remote_code
)
eval_logger
.
info
(
args
.
model_args
=
(
"Passed `--trust_remote_code`, setting environment variable `HF_DATASETS_TRUST_REMOTE_CODE=true`"
args
.
model_args
+
f
",trust_remote_code=
{
os
.
environ
[
'HF_DATASETS_TRUST_REMOTE_CODE'
]
}
"
)
)
# HACK: import datasets and override its HF_DATASETS_TRUST_REMOTE_CODE value internally,
# because it's already been determined based on the prior env var before launching our
# script--`datasets` gets imported by lm_eval internally before these lines can update the env.
import
datasets
datasets
.
config
.
HF_DATASETS_TRUST_REMOTE_CODE
=
True
args
.
model_args
=
args
.
model_args
+
",trust_remote_code=True"
eval_logger
.
info
(
f
"Selected Tasks:
{
task_names
}
"
)
eval_logger
.
info
(
f
"Selected Tasks:
{
task_names
}
"
)
...
...
lm_eval/api/task.py
View file @
a541bfb9
...
@@ -67,9 +67,9 @@ class TaskConfig(dict):
...
@@ -67,9 +67,9 @@ class TaskConfig(dict):
training_split
:
Optional
[
str
]
=
None
training_split
:
Optional
[
str
]
=
None
validation_split
:
Optional
[
str
]
=
None
validation_split
:
Optional
[
str
]
=
None
test_split
:
Optional
[
str
]
=
None
test_split
:
Optional
[
str
]
=
None
fewshot_split
:
Optional
[
fewshot_split
:
Optional
[
str
]
=
(
str
None
# TODO: assert that this not None if num_fewshot > 0. (?) assert if this is same split as one evaling (?)
]
=
None
# TODO: assert that this not None if num_fewshot > 0. (?) assert if this is same split as one evaling (?
)
)
# formatting / prompting options.
# formatting / prompting options.
# see docs/advanced_task_guide.md for more info
# see docs/advanced_task_guide.md for more info
process_docs
:
Optional
[
Callable
]
=
None
process_docs
:
Optional
[
Callable
]
=
None
...
@@ -92,9 +92,9 @@ class TaskConfig(dict):
...
@@ -92,9 +92,9 @@ class TaskConfig(dict):
filter_list
:
Optional
[
Union
[
str
,
list
]]
=
None
filter_list
:
Optional
[
Union
[
str
,
list
]]
=
None
should_decontaminate
:
bool
=
False
should_decontaminate
:
bool
=
False
doc_to_decontamination_query
:
Optional
[
str
]
=
None
doc_to_decontamination_query
:
Optional
[
str
]
=
None
metadata
:
Optional
[
metadata
:
Optional
[
dict
]
=
(
dict
None
# by default, not used in the code. allows for users to pass arbitrary info to tasks
]
=
None
# by default, not used in the code. allows for users to pass arbitrary info to tasks
)
def
__post_init__
(
self
)
->
None
:
def
__post_init__
(
self
)
->
None
:
if
self
.
generation_kwargs
is
not
None
:
if
self
.
generation_kwargs
is
not
None
:
...
@@ -229,9 +229,9 @@ class Task(abc.ABC):
...
@@ -229,9 +229,9 @@ class Task(abc.ABC):
self
.
_config
:
TaskConfig
=
TaskConfig
({
**
config
})
if
config
else
TaskConfig
()
self
.
_config
:
TaskConfig
=
TaskConfig
({
**
config
})
if
config
else
TaskConfig
()
self
.
_filters
=
[
build_filter_ensemble
(
"none"
,
[[
"take_first"
,
None
]])]
self
.
_filters
=
[
build_filter_ensemble
(
"none"
,
[[
"take_first"
,
None
]])]
self
.
fewshot_rnd
:
Optional
[
self
.
fewshot_rnd
:
Optional
[
random
.
Random
]
=
(
random
.
Random
None
# purposely induce errors in case of improper usage
]
=
None
# purposely induce errors in case of improper usage
)
def
download
(
def
download
(
self
,
self
,
...
@@ -368,14 +368,14 @@ class Task(abc.ABC):
...
@@ -368,14 +368,14 @@ class Task(abc.ABC):
def
build_all_requests
(
def
build_all_requests
(
self
,
self
,
*
,
*
,
limit
=
None
,
limit
:
Union
[
int
,
None
]
=
None
,
rank
=
None
,
rank
:
int
=
None
,
world_size
=
None
,
world_size
:
int
=
None
,
cache_requests
=
False
,
cache_requests
:
bool
=
False
,
rewrite_requests_cache
=
False
,
rewrite_requests_cache
:
bool
=
False
,
system_instruction
=
None
,
system_instruction
:
Optional
[
str
]
=
None
,
apply_chat_template
=
False
,
apply_chat_template
:
bool
=
False
,
fewshot_as_multiturn
=
False
,
fewshot_as_multiturn
:
bool
=
False
,
lm
=
None
,
lm
=
None
,
)
->
None
:
)
->
None
:
"""Build a set of Instances for a task, and store them in task.instances"""
"""Build a set of Instances for a task, and store them in task.instances"""
...
@@ -1050,8 +1050,6 @@ class ConfigurableTask(Task):
...
@@ -1050,8 +1050,6 @@ class ConfigurableTask(Task):
System instruction to be applied to the prompt.
System instruction to be applied to the prompt.
:param apply_chat_template: bool
:param apply_chat_template: bool
Whether to apply the chat template to the fewshot context.
Whether to apply the chat template to the fewshot context.
:param tokenizer:
The tokenizer to use for applying the chat template.
:param fewshot_as_multiturn: bool
:param fewshot_as_multiturn: bool
Whether to provide the fewshot examples as a multiturn conversation or a single user turn.
Whether to provide the fewshot examples as a multiturn conversation or a single user turn.
:param lm:
:param lm:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment