Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
e3fee7ea
Commit
e3fee7ea
authored
Jul 21, 2025
by
Baber
Browse files
nit
parent
3e3a0d8f
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
81 additions
and
105 deletions
+81
-105
lm_eval/api/task.py
lm_eval/api/task.py
+81
-105
No files found.
lm_eval/api/task.py
View file @
e3fee7ea
from
__future__
import
annotations
import
abc
import
ast
import
logging
...
...
@@ -8,12 +10,7 @@ from copy import deepcopy
from
typing
import
(
TYPE_CHECKING
,
Any
,
Dict
,
List
,
Literal
,
Optional
,
Tuple
,
Union
,
)
import
datasets
...
...
@@ -54,23 +51,23 @@ class Task(abc.ABC):
{"question": ..., question, answer)
"""
VERSION
:
Optional
[
Union
[
int
,
str
]]
=
None
VERSION
:
int
|
str
|
None
=
None
# The name of the `Task` benchmark as denoted in the HuggingFace datasets Hub
# or a path to a custom `datasets` loading script.
DATASET_PATH
:
Optional
[
str
]
=
None
DATASET_PATH
:
str
|
None
=
None
# The name of a subset within `DATASET_PATH`.
DATASET_NAME
:
Optional
[
str
]
=
None
DATASET_NAME
:
str
|
None
=
None
OUTPUT_TYPE
:
Optional
[
OutputType
]
=
None
OUTPUT_TYPE
:
OutputType
|
None
=
None
def
__init__
(
self
,
data_dir
:
Optional
[
str
]
=
None
,
cache_dir
:
Optional
[
str
]
=
None
,
download_mode
:
Optional
[
datasets
.
DownloadMode
]
=
None
,
config
:
Optional
[
Mapping
]
=
None
,
# Union[dict, TaskConfig]
data_dir
:
str
|
None
=
None
,
cache_dir
:
str
|
None
=
None
,
download_mode
:
datasets
.
DownloadMode
|
None
=
None
,
config
:
Mapping
|
None
=
None
,
# Union[dict, TaskConfig]
)
->
None
:
"""
:param data_dir: str
...
...
@@ -94,21 +91,21 @@ class Task(abc.ABC):
Fresh download and fresh dataset.
"""
self
.
download
(
data_dir
,
cache_dir
,
download_mode
)
self
.
_training_docs
:
Optional
[
list
]
=
None
self
.
_fewshot_docs
:
Optional
[
list
]
=
None
self
.
_instances
:
Optional
[
L
ist
[
Instance
]
]
=
None
self
.
_training_docs
:
list
|
None
=
None
self
.
_fewshot_docs
:
list
|
None
=
None
self
.
_instances
:
l
ist
[
Instance
]
|
None
=
None
self
.
_config
:
TaskConfig
=
TaskConfig
.
from_yaml
({
**
config
})
self
.
_filters
=
[
build_filter_ensemble
(
"none"
,
[(
"take_first"
,
None
)])]
self
.
fewshot_rnd
:
Optional
[
random
.
Random
]
=
(
self
.
fewshot_rnd
:
random
.
Random
|
None
=
(
None
# purposely induce errors in case of improper usage
)
def
download
(
self
,
data_dir
:
Optional
[
str
]
=
None
,
cache_dir
:
Optional
[
str
]
=
None
,
data_dir
:
str
|
None
=
None
,
cache_dir
:
str
|
None
=
None
,
download_mode
=
None
,
)
->
None
:
"""Downloads and returns the task dataset.
...
...
@@ -235,7 +232,7 @@ class Task(abc.ABC):
pass
@
abc
.
abstractmethod
def
doc_to_target
(
self
,
doc
:
dict
)
->
Union
[
str
,
int
]
:
def
doc_to_target
(
self
,
doc
:
dict
)
->
str
|
int
:
pass
# not an abstractmethod because not every language-only task has to implement this
...
...
@@ -251,16 +248,16 @@ class Task(abc.ABC):
def
build_all_requests
(
self
,
*
,
limit
:
Union
[
int
,
None
]
=
None
,
samples
:
Optional
[
L
ist
[
int
]
]
=
None
,
limit
:
int
|
None
=
None
,
samples
:
l
ist
[
int
]
|
None
=
None
,
rank
:
int
=
0
,
world_size
:
int
=
1
,
cache_requests
:
bool
=
False
,
rewrite_requests_cache
:
bool
=
False
,
system_instruction
:
Optional
[
str
]
=
None
,
system_instruction
:
str
|
None
=
None
,
apply_chat_template
:
bool
=
False
,
fewshot_as_multiturn
:
bool
=
False
,
chat_template
:
Optional
[
Callable
]
=
None
,
chat_template
:
Callable
|
None
=
None
,
tokenizer_name
:
str
=
""
,
)
->
None
:
"""Build a set of Instances for a task, and store them in task.instances"""
...
...
@@ -362,7 +359,7 @@ class Task(abc.ABC):
save_to_cache
(
file_name
=
cache_key
,
obj
=
instances
)
@
abc
.
abstractmethod
def
construct_requests
(
self
,
doc
:
dict
,
ctx
:
Union
[
list
[
dict
]
,
str
]
,
**
kwargs
):
def
construct_requests
(
self
,
doc
:
dict
,
ctx
:
list
[
dict
]
|
str
,
**
kwargs
):
"""Uses RequestFactory to construct Requests and returns an iterable of
Requests which will be sent to the LM.
...
...
@@ -402,7 +399,7 @@ class Task(abc.ABC):
A dictionary where keys are the names of submetrics and values are
functions that aggregate a list of metric scores
"""
pass
return
True
@
deprecated
(
"not used anymore"
)
def
higher_is_better
(
self
):
...
...
@@ -411,7 +408,7 @@ class Task(abc.ABC):
A dictionary where keys are the names of submetrics and values are
whether a higher value of the submetric is better
"""
pass
return
True
def
get_config
(
self
,
key
:
str
)
->
Any
:
return
getattr
(
self
.
_config
,
key
,
None
)
...
...
@@ -485,7 +482,7 @@ class Task(abc.ABC):
example
=
self
.
doc_to_text
(
doc
)
return
description
+
labeled_examples
+
example
def
apply_filters
(
self
)
->
Optional
[
L
ist
[
Instance
]
]
:
def
apply_filters
(
self
)
->
l
ist
[
Instance
]
|
None
:
"""Iterates over FilterEnsembles and applies them to instances"""
if
hasattr
(
self
,
"_filters"
):
for
f
in
self
.
_filters
:
...
...
@@ -530,13 +527,13 @@ class Task(abc.ABC):
self
.
_config
.
metric_list
=
[
MetricConfig
(
name
=
metric_name
)]
self
.
_config
.
process_results
=
lambda
*
args
:
{
"bypass"
:
0
}
def
set_fewshot_seed
(
self
,
seed
:
Optional
[
int
]
=
None
)
->
None
:
def
set_fewshot_seed
(
self
,
seed
:
int
|
None
=
None
)
->
None
:
self
.
fewshot_rnd
=
random
.
Random
(
seed
)
if
hasattr
(
self
,
"sampler"
):
self
.
sampler
.
rnd
=
self
.
fewshot_rnd
@
property
def
eval_docs
(
self
)
->
Union
[
datasets
.
Dataset
,
Iterable
[
dict
]
]
:
def
eval_docs
(
self
)
->
datasets
.
Dataset
|
Iterable
[
dict
]:
if
self
.
has_test_docs
():
return
self
.
test_docs
()
elif
self
.
has_validation_docs
():
...
...
@@ -550,13 +547,13 @@ class Task(abc.ABC):
self
,
*
,
rank
:
int
=
0
,
limit
:
Union
[
int
,
None
]
=
None
,
limit
:
int
|
None
=
None
,
world_size
:
int
=
1
,
samples
:
Optional
[
L
ist
[
int
]
]
=
None
,
)
->
Iterator
[
T
uple
[
int
,
Any
]]:
samples
:
l
ist
[
int
]
|
None
=
None
,
)
->
Iterator
[
t
uple
[
int
,
Any
]]:
if
samples
:
n
=
len
(
self
.
eval_docs
)
assert
all
(
[
e
<
n
for
e
in
samples
]
),
(
assert
all
(
e
<
n
for
e
in
samples
),
(
f
"Elements of --samples should be in the interval [0,k-1] where k is the number of total examples. In this case, k=
{
n
}
."
)
eval_logger
.
info
(
...
...
@@ -589,7 +586,7 @@ class ConfigurableTask(Task):
data_dir
=
None
,
cache_dir
=
None
,
download_mode
=
None
,
config
:
Optional
[
dict
]
=
None
,
config
:
dict
|
None
=
None
,
)
->
None
:
# Get pre-configured attributes
self
.
_config
=
self
.
CONFIG
...
...
@@ -607,9 +604,8 @@ class ConfigurableTask(Task):
"Must pass a config to ConfigurableTask, either in cls.CONFIG or `config` kwarg"
)
if
isinstance
(
self
.
config
.
metadata
,
dict
):
if
"version"
in
self
.
config
.
metadata
:
self
.
VERSION
=
self
.
config
.
metadata
[
"version"
]
if
isinstance
(
self
.
config
.
metadata
,
dict
)
and
"version"
in
self
.
config
.
metadata
:
self
.
VERSION
=
self
.
config
.
metadata
[
"version"
]
if
self
.
config
.
output_type
is
not
None
:
if
self
.
config
.
output_type
not
in
ALL_OUTPUT_TYPES
:
...
...
@@ -695,18 +691,13 @@ class ConfigurableTask(Task):
else
:
test_target
=
str
(
test_target
)
if
test_choice
is
not
None
:
check_choices
=
test_choice
else
:
check_choices
=
[
test_target
]
check_choices
=
test_choice
if
test_choice
is
not
None
else
[
test_target
]
if
self
.
config
.
doc_to_choice
is
not
None
:
for
choice
in
check_choices
:
choice_has_whitespace
=
True
if
choice
[
0
].
isspace
()
else
False
choice_has_whitespace
=
choice
[
0
].
isspace
()
delimiter_has_whitespace
=
(
True
if
self
.
config
.
target_delimiter
.
rstrip
()
self
.
config
.
target_delimiter
.
rstrip
()
!=
self
.
config
.
target_delimiter
else
False
)
if
delimiter_has_whitespace
and
choice_has_whitespace
:
...
...
@@ -718,9 +709,7 @@ class ConfigurableTask(Task):
f
'Both target_delimiter "
{
self
.
config
.
target_delimiter
}
" and target choice: "
{
choice
}
" do not have whitespace, ignore if the language you are evaluating on does not require/use whitespace'
)
def
download
(
self
,
dataset_kwargs
:
Optional
[
Dict
[
str
,
Any
]]
=
None
,
**
kwargs
)
->
None
:
def
download
(
self
,
dataset_kwargs
:
dict
[
str
,
Any
]
|
None
=
None
,
**
kwargs
)
->
None
:
self
.
config
.
dataset_kwargs
,
self
.
config
.
metadata
=
(
self
.
config
.
dataset_kwargs
or
{},
self
.
config
.
metadata
or
{},
...
...
@@ -739,24 +728,15 @@ class ConfigurableTask(Task):
)
def
has_training_docs
(
self
)
->
bool
:
if
self
.
config
.
training_split
is
not
None
:
return
True
else
:
return
False
return
self
.
config
.
training_split
is
not
None
def
has_validation_docs
(
self
)
->
bool
:
if
self
.
config
.
validation_split
is
not
None
:
return
True
else
:
return
False
return
self
.
config
.
validation_split
is
not
None
def
has_test_docs
(
self
)
->
bool
:
if
self
.
config
.
test_split
is
not
None
:
return
True
else
:
return
False
return
self
.
config
.
test_split
is
not
None
def
training_docs
(
self
)
->
Optional
[
datasets
.
Dataset
]
:
def
training_docs
(
self
)
->
datasets
.
Dataset
|
None
:
if
self
.
has_training_docs
():
if
self
.
config
.
process_docs
is
not
None
:
return
self
.
config
.
process_docs
(
...
...
@@ -764,7 +744,7 @@ class ConfigurableTask(Task):
)
return
self
.
dataset
[
self
.
config
.
training_split
]
def
validation_docs
(
self
)
->
Optional
[
datasets
.
Dataset
]
:
def
validation_docs
(
self
)
->
datasets
.
Dataset
|
None
:
if
self
.
has_validation_docs
():
if
self
.
config
.
process_docs
is
not
None
:
return
self
.
config
.
process_docs
(
...
...
@@ -772,7 +752,7 @@ class ConfigurableTask(Task):
)
return
self
.
dataset
[
self
.
config
.
validation_split
]
def
test_docs
(
self
)
->
Optional
[
datasets
.
Dataset
]
:
def
test_docs
(
self
)
->
datasets
.
Dataset
|
None
:
if
self
.
has_test_docs
():
if
self
.
config
.
process_docs
is
not
None
:
return
self
.
config
.
process_docs
(
self
.
dataset
[
self
.
config
.
test_split
])
...
...
@@ -785,22 +765,25 @@ class ConfigurableTask(Task):
return
docs
# Fallback to parent implementation
if
_num_fewshot
:
=
self
.
config
.
num_fewshot
:
if
isinstance
(
_num_fewshot
,
int
)
and
_num_fewshot
>
0
:
eval_logger
.
warning
(
f
"[Task:
{
self
.
config
.
task
}
] "
"num_fewshot > 0 but no fewshot source configured. "
"Using preconfigured rule."
)
if
(
(
_num_fewshot
:
=
self
.
config
.
num_fewshot
)
and
isinstance
(
_num_fewshot
,
int
)
and
_num_fewshot
>
0
):
eval_logger
.
warning
(
f
"[Task:
{
self
.
config
.
task
}
] "
"num_fewshot > 0 but no fewshot source configured. "
"Using preconfigured rule."
)
return
super
().
fewshot_docs
()
@
staticmethod
def
append_target_question
(
labeled_examples
:
L
ist
[
D
ict
[
str
,
str
]],
labeled_examples
:
l
ist
[
d
ict
[
str
,
str
]],
question
:
str
,
fewshot_as_multiturn
:
bool
=
False
,
gen_prefix
:
Optional
[
str
]
=
None
,
gen_prefix
:
str
|
None
=
None
,
)
->
None
:
"""Adds a target question to the labeled examples list.
If fewshot_as_multiturn is True, or labeled_examples is empty, or the last entry is a system turn, appends the question as a new user entry.
...
...
@@ -824,12 +807,12 @@ class ConfigurableTask(Task):
self
,
doc
:
dict
,
num_fewshot
:
int
,
system_instruction
:
Optional
[
str
]
=
None
,
system_instruction
:
str
|
None
=
None
,
apply_chat_template
:
bool
=
False
,
fewshot_as_multiturn
:
bool
=
False
,
chat_template
:
Optional
[
Callable
]
=
None
,
gen_prefix
:
Optional
[
str
]
=
None
,
)
->
Union
[
str
,
L
ist
[
str
]
,
None
]
:
chat_template
:
Callable
|
None
=
None
,
gen_prefix
:
str
|
None
=
None
,
)
->
str
|
l
ist
[
str
]
|
None
:
"""Returns a fewshot context string that is made up of a prepended description
(if provided), the `num_fewshot` number of examples, and an appended prompt example.
...
...
@@ -850,10 +833,7 @@ class ConfigurableTask(Task):
:returns: str
The fewshot context.
"""
if
apply_chat_template
:
labeled_examples
=
[]
else
:
labeled_examples
=
""
labeled_examples
=
[]
if
apply_chat_template
else
""
# get task description
if
description
:
=
self
.
config
.
description
:
...
...
@@ -923,7 +903,7 @@ class ConfigurableTask(Task):
labeled_examples_list
.
append
(
chat_template
(
chat
,
add_generation_prompt
=
False
if
gen_prefix
else
True
,
add_generation_prompt
=
not
gen_prefix
,
)
)
return
labeled_examples_list
...
...
@@ -947,7 +927,7 @@ class ConfigurableTask(Task):
# return lm.apply_chat_template(labeled_examples)
return
chat_template
(
labeled_examples
,
add_generation_prompt
=
False
if
gen_prefix
else
True
,
add_generation_prompt
=
not
gen_prefix
,
)
else
:
prefix
=
(
...
...
@@ -968,7 +948,7 @@ class ConfigurableTask(Task):
else
:
return
labeled_examples
+
str
(
example
)
+
prefix
def
apply_filters
(
self
)
->
Optional
[
L
ist
[
Instance
]
]
:
def
apply_filters
(
self
)
->
l
ist
[
Instance
]
|
None
:
"""Iterates over FilterEnsembles and applies them to instances"""
if
hasattr
(
self
,
"_filters"
):
for
f
in
self
.
_filters
:
...
...
@@ -1008,9 +988,7 @@ class ConfigurableTask(Task):
"""
return
doc
def
doc_to_text
(
self
,
doc
:
dict
,
doc_to_text
:
Union
[
int
,
str
,
Callable
,
None
]
=
None
):
def
doc_to_text
(
self
,
doc
:
dict
,
doc_to_text
:
int
|
str
|
Callable
|
None
=
None
):
# if self.prompt is not None:
# doc_to_text = self.prompt
if
doc_to_text
is
not
None
:
...
...
@@ -1046,9 +1024,7 @@ class ConfigurableTask(Task):
print
(
type
(
doc_to_text
))
raise
TypeError
def
doc_to_target
(
self
,
doc
:
dict
,
doc_to_target
=
None
)
->
Union
[
int
,
str
,
list
[
int
]]:
def
doc_to_target
(
self
,
doc
:
dict
,
doc_to_target
=
None
)
->
int
|
str
|
list
[
int
]:
# if self.prompt is not None:
# doc_to_target = self.prompt
if
doc_to_target
is
not
None
:
...
...
@@ -1097,8 +1073,8 @@ class ConfigurableTask(Task):
def
doc_to_choice
(
self
,
doc
:
dict
,
doc_to_choice
:
Union
[
str
,
list
,
dict
,
Callable
[...,
list
[
str
]]
,
None
]
=
None
,
)
->
L
ist
[
str
]:
doc_to_choice
:
str
|
list
|
dict
|
Callable
[...,
list
[
str
]]
|
None
=
None
,
)
->
l
ist
[
str
]:
# if self.prompt is not None:
# doc_to_choice = self.prompt
if
doc_to_choice
is
not
None
:
...
...
@@ -1125,7 +1101,7 @@ class ConfigurableTask(Task):
else
:
raise
TypeError
def
doc_to_image
(
self
,
doc
:
dict
,
doc_to_image
=
None
)
->
Union
[
int
,
str
,
list
,
None
]
:
def
doc_to_image
(
self
,
doc
:
dict
,
doc_to_image
=
None
)
->
int
|
str
|
list
|
None
:
if
doc_to_image
is
not
None
:
doc_to_image
=
doc_to_image
elif
self
.
config
.
doc_to_image
is
not
None
:
...
...
@@ -1148,7 +1124,7 @@ class ConfigurableTask(Task):
else
:
return
None
def
doc_to_audio
(
self
,
doc
:
Any
,
doc_to_audio
=
None
)
->
Union
[
int
,
str
,
list
,
None
]
:
def
doc_to_audio
(
self
,
doc
:
Any
,
doc_to_audio
=
None
)
->
int
|
str
|
list
|
None
:
if
doc_to_audio
is
not
None
:
doc_to_audio
=
doc_to_audio
elif
self
.
config
.
doc_to_audio
is
not
None
:
...
...
@@ -1171,7 +1147,7 @@ class ConfigurableTask(Task):
else
:
return
None
def
doc_to_prefix
(
self
,
doc
:
dict
)
->
Optional
[
str
]
:
def
doc_to_prefix
(
self
,
doc
:
dict
)
->
str
|
None
:
if
(
gen_prefix
:
=
self
.
config
.
gen_prefix
)
is
not
None
:
if
gen_prefix
in
self
.
features
:
return
doc
[
gen_prefix
]
...
...
@@ -1181,7 +1157,7 @@ class ConfigurableTask(Task):
def
construct_requests
(
self
,
doc
:
dict
,
ctx
:
str
,
**
kwargs
)
->
Union
[
L
ist
[
Instance
]
,
Instance
]
:
)
->
l
ist
[
Instance
]
|
Instance
:
apply_chat_template
=
kwargs
.
pop
(
"apply_chat_template"
,
False
)
chat_template
:
Callable
|
None
=
kwargs
.
pop
(
"chat_template"
,
None
)
...
...
@@ -1317,7 +1293,7 @@ class ConfigurableTask(Task):
elif
self
.
OUTPUT_TYPE
==
"multiple_choice"
:
lls
,
is_greedy
=
zip
(
*
results
)
# retrieve choices in
L
ist[str] form, to compute choice lengths, etc.
# retrieve choices in
l
ist[str] form, to compute choice lengths, etc.
choices
=
self
.
doc_to_choice
(
doc
)
completion_len
=
np
.
array
([
float
(
len
(
i
))
for
i
in
choices
])
...
...
@@ -1364,7 +1340,7 @@ class ConfigurableTask(Task):
if
self
.
multiple_target
:
acc
=
1.0
if
pred
in
gold
else
0.0
acc_norm
=
1.0
if
pred_norm
in
gold
else
0.0
exact_match
=
int
(
any
(
[
is_greedy
[
i
]
if
i
!=
-
100
else
0
for
i
in
gold
]
))
exact_match
=
int
(
any
(
is_greedy
[
i
]
if
i
!=
-
100
else
0
for
i
in
gold
))
else
:
acc
=
1.0
if
pred
==
gold
else
0.0
acc_norm
=
1.0
if
pred_norm
==
gold
else
0.0
...
...
@@ -1406,7 +1382,7 @@ class ConfigurableTask(Task):
# it assumes that doc_to_target returns a number.
choices
=
self
.
doc_to_choice
(
doc
)
gold
=
choices
[
gold
]
for
metric
in
self
.
_metric_fn_list
.
keys
()
:
for
metric
in
self
.
_metric_fn_list
:
try
:
result_score
=
self
.
_metric_fn_list
[
metric
](
references
=
[
gold
]
if
not
isinstance
(
gold
,
list
)
else
gold
,
...
...
@@ -1440,7 +1416,7 @@ class ConfigurableTask(Task):
return
getattr
(
self
.
_config
,
key
,
None
)
@
property
def
task_name
(
self
)
->
Optional
[
str
]
:
def
task_name
(
self
)
->
str
|
None
:
return
getattr
(
self
.
config
,
"task"
,
None
)
def
__repr__
(
self
):
...
...
@@ -1458,7 +1434,7 @@ class MultipleChoiceTask(Task):
def
doc_to_target
(
self
,
doc
:
dict
)
->
str
:
return
" "
+
doc
[
"choices"
][
doc
[
"gold"
]]
def
construct_requests
(
self
,
doc
:
dict
,
ctx
:
str
,
**
kwargs
)
->
L
ist
[
Instance
]:
def
construct_requests
(
self
,
doc
:
dict
,
ctx
:
str
,
**
kwargs
)
->
l
ist
[
Instance
]:
# TODO: add mutual info here?
return
[
Instance
(
...
...
@@ -1471,7 +1447,7 @@ class MultipleChoiceTask(Task):
for
i
,
choice
in
enumerate
(
doc
[
"choices"
])
]
def
process_results
(
self
,
doc
:
dict
,
results
:
Iterable
[
T
uple
[
float
,
bool
]])
->
dict
:
def
process_results
(
self
,
doc
:
dict
,
results
:
Iterable
[
t
uple
[
float
,
bool
]])
->
dict
:
results
=
[
res
[
0
]
for
res
in
results
]
# only retain loglikelihoods, discard is_greedy TODO: do we need is_greedy anywhere?
...
...
@@ -1505,7 +1481,7 @@ class PerplexityTask(Task):
def
has_training_docs
(
self
)
->
bool
:
return
False
def
fewshot_examples
(
self
,
k
:
int
,
rnd
)
->
L
ist
:
def
fewshot_examples
(
self
,
k
:
int
,
rnd
)
->
l
ist
:
if
k
!=
0
:
raise
ValueError
(
"The number of fewshot examples must be 0 for perplexity tasks."
...
...
@@ -1536,7 +1512,7 @@ class PerplexityTask(Task):
def
doc_to_target
(
self
,
doc
):
return
doc
def
construct_requests
(
self
,
doc
:
dict
,
ctx
:
Optional
[
str
]
,
**
kwargs
):
def
construct_requests
(
self
,
doc
:
dict
,
ctx
:
str
|
None
,
**
kwargs
):
if
bool
(
ctx
):
raise
ValueError
...
...
@@ -1548,7 +1524,7 @@ class PerplexityTask(Task):
**
kwargs
,
)
def
process_results
(
self
,
doc
:
dict
,
results
:
T
uple
[
float
])
->
dict
:
def
process_results
(
self
,
doc
:
dict
,
results
:
t
uple
[
float
])
->
dict
:
(
loglikelihood
,)
=
results
words
=
self
.
count_words
(
self
.
doc_to_target
(
doc
))
bytes_
=
self
.
count_bytes
(
self
.
doc_to_target
(
doc
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment