Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
2106fbeb
Commit
2106fbeb
authored
Jan 15, 2025
by
Baber
Browse files
Merge branch 'main' into mathvista
# Conflicts: # lm_eval/models/openai_completions.py
parents
4354fe46
703fbffd
Changes
574
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
148 additions
and
29 deletions
+148
-29
lm_eval/tasks/xquad/xquad_hi.yaml
lm_eval/tasks/xquad/xquad_hi.yaml
+4
-0
lm_eval/tasks/xquad/xquad_ro.yaml
lm_eval/tasks/xquad/xquad_ro.yaml
+4
-0
lm_eval/tasks/xquad/xquad_ru.yaml
lm_eval/tasks/xquad/xquad_ru.yaml
+4
-0
lm_eval/tasks/xquad/xquad_th.yaml
lm_eval/tasks/xquad/xquad_th.yaml
+4
-0
lm_eval/tasks/xquad/xquad_tr.yaml
lm_eval/tasks/xquad/xquad_tr.yaml
+4
-0
lm_eval/tasks/xquad/xquad_vi.yaml
lm_eval/tasks/xquad/xquad_vi.yaml
+4
-0
lm_eval/tasks/xquad/xquad_zh.yaml
lm_eval/tasks/xquad/xquad_zh.yaml
+4
-0
lm_eval/utils.py
lm_eval/utils.py
+10
-5
pyproject.toml
pyproject.toml
+7
-2
scripts/clean_training_data/generate_13_grams.py
scripts/clean_training_data/generate_13_grams.py
+1
-1
scripts/zeno_visualize.py
scripts/zeno_visualize.py
+25
-13
tests/models/test_api.py
tests/models/test_api.py
+3
-3
tests/models/test_gptqmodel.py
tests/models/test_gptqmodel.py
+54
-0
tests/test_tasks.py
tests/test_tasks.py
+20
-5
No files found.
lm_eval/tasks/xquad/xquad_hi.yaml
0 → 100755
View file @
2106fbeb
include
:
xquad_common_yaml
task
:
xquad_hi
dataset_name
:
xquad.hi
doc_to_text
:
"
प्रसंग:
{{context}}
\n\n
सवाल:
{{question}}
\n\n
उत्तर:"
lm_eval/tasks/xquad/xquad_ro.yaml
0 → 100755
View file @
2106fbeb
include
:
xquad_common_yaml
task
:
xquad_ro
dataset_name
:
xquad.ro
doc_to_text
:
"
Context:
{{context}}
\n\n
Întrebare:
{{question}}
\n\n
Răspuns:"
lm_eval/tasks/xquad/xquad_ru.yaml
0 → 100755
View file @
2106fbeb
include
:
xquad_common_yaml
task
:
xquad_ru
dataset_name
:
xquad.ru
doc_to_text
:
"
Контекст:
{{context}}
\n\n
Вопрос:
{{question}}
\n\n
Ответ:"
lm_eval/tasks/xquad/xquad_th.yaml
0 → 100755
View file @
2106fbeb
include
:
xquad_common_yaml
task
:
xquad_th
dataset_name
:
xquad.th
doc_to_text
:
"
บริบท:
{{context}}
\n\n
คำถาม:
{{question}}
\n\n
คำตอบ:"
lm_eval/tasks/xquad/xquad_tr.yaml
0 → 100755
View file @
2106fbeb
include
:
xquad_common_yaml
task
:
xquad_tr
dataset_name
:
xquad.tr
doc_to_text
:
"
Bağlam:
{{context}}
\n\n
Soru:
{{question}}
\n\n
Cevap:"
lm_eval/tasks/xquad/xquad_vi.yaml
0 → 100755
View file @
2106fbeb
include
:
xquad_common_yaml
task
:
xquad_vi
dataset_name
:
xquad.vi
doc_to_text
:
"
Bối
cảnh:
{{context}}
\n\n
Câu
hỏi:
{{question}}
\n\n
Trả
lời:"
lm_eval/tasks/xquad/xquad_zh.yaml
0 → 100755
View file @
2106fbeb
include
:
xquad_common_yaml
task
:
xquad_zh
dataset_name
:
xquad.zh
doc_to_text
:
"
语境:
{{context}}
\n\n
问题:
{{question}}
\n\n
回答:"
lm_eval/utils.py
View file @
2106fbeb
...
@@ -10,7 +10,7 @@ import os
...
@@ -10,7 +10,7 @@ import os
import
re
import
re
from
dataclasses
import
asdict
,
is_dataclass
from
dataclasses
import
asdict
,
is_dataclass
from
itertools
import
islice
from
itertools
import
islice
from
typing
import
Any
,
Callable
,
List
from
typing
import
Any
,
Callable
,
Generator
,
List
,
Tuple
import
numpy
as
np
import
numpy
as
np
import
yaml
import
yaml
...
@@ -104,7 +104,8 @@ def simple_parse_args_string(args_string):
...
@@ -104,7 +104,8 @@ def simple_parse_args_string(args_string):
return
{}
return
{}
arg_list
=
[
arg
for
arg
in
args_string
.
split
(
","
)
if
arg
]
arg_list
=
[
arg
for
arg
in
args_string
.
split
(
","
)
if
arg
]
args_dict
=
{
args_dict
=
{
k
:
handle_arg_string
(
v
)
for
k
,
v
in
[
arg
.
split
(
"="
)
for
arg
in
arg_list
]
kv
[
0
]:
handle_arg_string
(
"="
.
join
(
kv
[
1
:]))
for
kv
in
[
arg
.
split
(
"="
)
for
arg
in
arg_list
]
}
}
return
args_dict
return
args_dict
...
@@ -201,7 +202,9 @@ def get_sample_results_filenames(filenames: List[str]) -> List[str]:
...
@@ -201,7 +202,9 @@ def get_sample_results_filenames(filenames: List[str]) -> List[str]:
return
[
f
for
f
in
filenames
if
"/samples_"
in
f
and
".json"
in
f
]
return
[
f
for
f
in
filenames
if
"/samples_"
in
f
and
".json"
in
f
]
def
get_rolling_token_windows
(
token_list
,
prefix_token
,
max_seq_len
,
context_len
):
def
get_rolling_token_windows
(
token_list
:
List
[
int
],
prefix_token
:
int
,
max_seq_len
:
int
,
context_len
:
int
)
->
Generator
[
Tuple
[
List
[
int
],
List
[
int
]],
None
,
None
]:
"""
"""
- context_len allows for a rolling window context, allowing each prediction window to potentially
- context_len allows for a rolling window context, allowing each prediction window to potentially
condition on some context
condition on some context
...
@@ -228,7 +231,7 @@ def get_rolling_token_windows(token_list, prefix_token, max_seq_len, context_len
...
@@ -228,7 +231,7 @@ def get_rolling_token_windows(token_list, prefix_token, max_seq_len, context_len
# Special handling for first window: predict all tokens
# Special handling for first window: predict all tokens
first_seq_len
=
min
(
max_seq_len
,
len
(
token_list
))
first_seq_len
=
min
(
max_seq_len
,
len
(
token_list
))
yield
(
[
prefix_token
]
+
token_list
[:
first_seq_len
-
1
],
token_list
[:
first_seq_len
]
)
yield
[
prefix_token
]
+
token_list
[:
first_seq_len
-
1
],
token_list
[:
first_seq_len
]
predicted
+=
first_seq_len
predicted
+=
first_seq_len
while
predicted
<
len
(
token_list
):
while
predicted
<
len
(
token_list
):
...
@@ -242,7 +245,9 @@ def get_rolling_token_windows(token_list, prefix_token, max_seq_len, context_len
...
@@ -242,7 +245,9 @@ def get_rolling_token_windows(token_list, prefix_token, max_seq_len, context_len
predicted
+=
window_pred_len
predicted
+=
window_pred_len
def
make_disjoint_window
(
pair
):
def
make_disjoint_window
(
pair
:
Tuple
[
List
[
int
],
List
[
int
]],
)
->
Tuple
[
List
[
int
],
List
[
int
]]:
"""Takes output from get_rolling_token_windows and makes the context not overlap with the continuation"""
"""Takes output from get_rolling_token_windows and makes the context not overlap with the continuation"""
a
,
b
=
pair
a
,
b
=
pair
return
a
[:
len
(
a
)
-
(
len
(
b
)
-
1
)],
b
return
a
[:
len
(
a
)
-
(
len
(
b
)
-
1
)],
b
...
...
pyproject.toml
View file @
2106fbeb
...
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
...
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
[project]
name
=
"lm_eval"
name
=
"lm_eval"
version
=
"0.4.
5
"
version
=
"0.4.
7
"
authors
=
[
authors
=
[
{name=
"EleutherAI"
,
email=
"contact@eleuther.ai"
}
{name=
"EleutherAI"
,
email=
"contact@eleuther.ai"
}
]
]
...
@@ -16,7 +16,7 @@ classifiers = [
...
@@ -16,7 +16,7 @@ classifiers = [
"License :: OSI Approved :: MIT License"
,
"License :: OSI Approved :: MIT License"
,
"Operating System :: OS Independent"
,
"Operating System :: OS Independent"
,
]
]
requires-python
=
">=3.
8
"
requires-python
=
">=3.
9
"
license
=
{
"text"
=
"MIT"
}
license
=
{
"text"
=
"MIT"
}
dependencies
=
[
dependencies
=
[
"accelerate>=0.26.0"
,
"accelerate>=0.26.0"
,
...
@@ -62,6 +62,7 @@ dev = ["pytest", "pytest-cov", "pytest-xdist", "pre-commit", "mypy"]
...
@@ -62,6 +62,7 @@ dev = ["pytest", "pytest-cov", "pytest-xdist", "pre-commit", "mypy"]
deepsparse
=
["deepsparse-nightly[llm]>=1.8.0.20240404"]
deepsparse
=
["deepsparse-nightly[llm]>=1.8.0.20240404"]
gptq
=
["auto-gptq[triton]>=0.6.0"]
gptq
=
["auto-gptq[triton]>=0.6.0"]
hf_transfer
=
["hf_transfer"]
hf_transfer
=
["hf_transfer"]
ibm_watsonx_ai
=
["ibm_watsonx_ai>=1.1.22"]
ifeval
=
[
"langdetect"
,
"immutabledict"
,
"nltk>=3.9.1"
]
ifeval
=
[
"langdetect"
,
"immutabledict"
,
"nltk>=3.9.1"
]
neuronx
=
["optimum[neuronx]"]
neuronx
=
["optimum[neuronx]"]
mamba
=
[
"mamba_ssm"
,
"causal-conv1d==1.0.2"
]
mamba
=
[
"mamba_ssm"
,
"causal-conv1d==1.0.2"
]
...
@@ -75,12 +76,15 @@ testing = ["pytest", "pytest-cov", "pytest-xdist"]
...
@@ -75,12 +76,15 @@ testing = ["pytest", "pytest-cov", "pytest-xdist"]
vllm
=
["vllm>=0.4.2"]
vllm
=
["vllm>=0.4.2"]
zeno
=
[
"pandas"
,
"zeno-client"
]
zeno
=
[
"pandas"
,
"zeno-client"
]
wandb
=
[
"wandb>=0.16.3"
,
"pandas"
,
"numpy"
]
wandb
=
[
"wandb>=0.16.3"
,
"pandas"
,
"numpy"
]
gptqmodel
=
["gptqmodel>=1.0.9"]
japanese_leaderboard
=
[
"emoji==2.14.0"
,
"neologdn==0.5.3"
,
"fugashi[unidic-lite]"
,
"rouge_score>=0.1.2"
]
all
=
[
all
=
[
"lm_eval[anthropic]"
,
"lm_eval[anthropic]"
,
"lm_eval[dev]"
,
"lm_eval[dev]"
,
"lm_eval[deepsparse]"
,
"lm_eval[deepsparse]"
,
"lm_eval[gptq]"
,
"lm_eval[gptq]"
,
"lm_eval[hf_transfer]"
,
"lm_eval[hf_transfer]"
,
"lm_eval[ibm_watsonx_ai]"
,
"lm_eval[ifeval]"
,
"lm_eval[ifeval]"
,
"lm_eval[mamba]"
,
"lm_eval[mamba]"
,
"lm_eval[math]"
,
"lm_eval[math]"
,
...
@@ -93,6 +97,7 @@ all = [
...
@@ -93,6 +97,7 @@ all = [
"lm_eval[vllm]"
,
"lm_eval[vllm]"
,
"lm_eval[zeno]"
,
"lm_eval[zeno]"
,
"lm_eval[wandb]"
,
"lm_eval[wandb]"
,
"lm_eval[japanese_leaderboard]"
,
]
]
[tool.ruff.lint]
[tool.ruff.lint]
...
...
scripts/clean_training_data/generate_13_grams.py
View file @
2106fbeb
...
@@ -55,7 +55,7 @@ def yield_pile(start_offsets=None, checkpoint_offset=None):
...
@@ -55,7 +55,7 @@ def yield_pile(start_offsets=None, checkpoint_offset=None):
print
(
print
(
"We expect the pile archives to be in the 'pile' directory, but this was not found."
"We expect the pile archives to be in the 'pile' directory, but this was not found."
)
)
raise
Exception
(
"Pile directory not found."
)
raise
FileNotFoundError
(
"Pile directory not found."
)
files
=
list
(
sorted
(
glob
.
glob
(
os
.
path
.
join
(
directory
,
"*.jsonl.zst*"
))))
files
=
list
(
sorted
(
glob
.
glob
(
os
.
path
.
join
(
directory
,
"*.jsonl.zst*"
))))
...
...
scripts/zeno_visualize.py
View file @
2106fbeb
...
@@ -109,13 +109,14 @@ def main():
...
@@ -109,13 +109,14 @@ def main():
if
model_index
==
0
:
# Only need to assemble data for the first model
if
model_index
==
0
:
# Only need to assemble data for the first model
metrics
=
[]
metrics
=
[]
for
metric
in
config
[
"metric_list"
]:
for
metric
in
config
[
"metric_list"
]:
metrics
.
append
(
if
metric
.
get
(
"aggregation"
)
==
"mean"
:
ZenoMetric
(
metrics
.
append
(
name
=
metric
[
"metric"
],
ZenoMetric
(
type
=
"mean"
,
name
=
metric
[
"metric"
],
columns
=
[
metric
[
"metric"
]],
type
=
"mean"
,
columns
=
[
metric
[
"metric"
]],
)
)
)
)
project
=
client
.
create_project
(
project
=
client
.
create_project
(
name
=
args
.
project_name
+
(
f
"_
{
task
}
"
if
len
(
tasks
)
>
1
else
""
),
name
=
args
.
project_name
+
(
f
"_
{
task
}
"
if
len
(
tasks
)
>
1
else
""
),
view
=
"text-classification"
,
view
=
"text-classification"
,
...
@@ -168,7 +169,11 @@ def generate_dataset(
...
@@ -168,7 +169,11 @@ def generate_dataset(
Returns:
Returns:
pd.Dataframe: A dataframe that is ready to be uploaded to Zeno.
pd.Dataframe: A dataframe that is ready to be uploaded to Zeno.
"""
"""
ids
=
[
x
[
"doc_id"
]
for
x
in
data
]
ids
=
(
[
x
[
"doc_id"
]
for
x
in
data
]
if
not
config
.
get
(
"filter_list"
)
else
[
f
"
{
x
[
'doc_id'
]
}
.
{
x
[
'filter'
]
}
"
for
x
in
data
]
)
labels
=
[
x
[
"target"
]
for
x
in
data
]
labels
=
[
x
[
"target"
]
for
x
in
data
]
instance
=
[
""
]
*
len
(
ids
)
instance
=
[
""
]
*
len
(
ids
)
...
@@ -190,6 +195,7 @@ def generate_dataset(
...
@@ -190,6 +195,7 @@ def generate_dataset(
return
pd
.
DataFrame
(
return
pd
.
DataFrame
(
{
{
"id"
:
ids
,
"id"
:
ids
,
"doc_id"
:
[
x
[
"doc_id"
]
for
x
in
data
],
"data"
:
instance
,
"data"
:
instance
,
"input_len"
:
[
len
(
x
)
for
x
in
instance
],
"input_len"
:
[
len
(
x
)
for
x
in
instance
],
"labels"
:
labels
,
"labels"
:
labels
,
...
@@ -208,8 +214,15 @@ def generate_system_df(data, config):
...
@@ -208,8 +214,15 @@ def generate_system_df(data, config):
Returns:
Returns:
pd.Dataframe: A dataframe that is ready to be uploaded to Zeno as a system.
pd.Dataframe: A dataframe that is ready to be uploaded to Zeno as a system.
"""
"""
ids
=
[
x
[
"doc_id"
]
for
x
in
data
]
ids
=
(
[
x
[
"doc_id"
]
for
x
in
data
]
if
not
config
.
get
(
"filter_list"
)
else
[
f
"
{
x
[
'doc_id'
]
}
.
{
x
[
'filter'
]
}
"
for
x
in
data
]
)
system_dict
=
{
"id"
:
ids
}
system_dict
=
{
"id"
:
ids
}
system_dict
[
"doc_id"
]
=
[
x
[
"doc_id"
]
for
x
in
data
]
if
config
.
get
(
"filter_list"
):
system_dict
[
"filter"
]
=
[
x
[
"filter"
]
for
x
in
data
]
system_dict
[
"output"
]
=
[
""
]
*
len
(
ids
)
system_dict
[
"output"
]
=
[
""
]
*
len
(
ids
)
if
config
[
"output_type"
]
==
"loglikelihood"
:
if
config
[
"output_type"
]
==
"loglikelihood"
:
...
@@ -228,11 +241,10 @@ def generate_system_df(data, config):
...
@@ -228,11 +241,10 @@ def generate_system_df(data, config):
system_dict
[
"output"
]
=
[
str
(
x
[
"filtered_resps"
][
0
])
for
x
in
data
]
system_dict
[
"output"
]
=
[
str
(
x
[
"filtered_resps"
][
0
])
for
x
in
data
]
system_dict
[
"output_length"
]
=
[
len
(
str
(
x
[
"filtered_resps"
][
0
]))
for
x
in
data
]
system_dict
[
"output_length"
]
=
[
len
(
str
(
x
[
"filtered_resps"
][
0
]))
for
x
in
data
]
metrics
=
{}
metrics
=
{
for
metric
in
config
[
"metric_list"
]:
metric
[
"metric"
]:
[
x
[
metric
[
"metric"
]]
for
x
in
data
]
if
"aggregation"
in
metric
and
metric
[
"aggregation"
]
==
"mean"
:
for
metric
in
config
[
"metric_list"
]
metrics
[
metric
[
"metric"
]]
=
[
x
[
metric
[
"metric"
]]
for
x
in
data
]
}
system_dict
.
update
(
metrics
)
system_dict
.
update
(
metrics
)
system_df
=
pd
.
DataFrame
(
system_dict
)
system_df
=
pd
.
DataFrame
(
system_dict
)
return
system_df
return
system_df
...
...
tests/models/test_api.py
View file @
2106fbeb
...
@@ -63,13 +63,13 @@ def test_create_payload_loglikelihood(api):
...
@@ -63,13 +63,13 @@ def test_create_payload_loglikelihood(api):
(
(
[
"Hello, how are"
],
[
"Hello, how are"
],
True
,
True
,
{
"max_gen_toks"
:
100
,
"temperature"
:
0.7
},
{
"max_gen_toks"
:
100
,
"temperature"
:
0.7
,
"until"
:
[
"hi"
]
},
{
{
"prompt"
:
"Hello, how are"
,
"prompt"
:
"Hello, how are"
,
"model"
:
"gpt-3.5-turbo"
,
"model"
:
"gpt-3.5-turbo"
,
"max_tokens"
:
100
,
"max_tokens"
:
100
,
"temperature"
:
0.7
,
"temperature"
:
0.7
,
"stop"
:
[
"
<|endoftext|>
"
],
"stop"
:
[
"
hi
"
],
"seed"
:
1234
,
"seed"
:
1234
,
},
},
),
),
...
@@ -82,7 +82,7 @@ def test_create_payload_loglikelihood(api):
...
@@ -82,7 +82,7 @@ def test_create_payload_loglikelihood(api):
"model"
:
"gpt-3.5-turbo"
,
"model"
:
"gpt-3.5-turbo"
,
"max_tokens"
:
256
,
"max_tokens"
:
256
,
"temperature"
:
0
,
"temperature"
:
0
,
"stop"
:
[
"<|endoftext|>"
],
"stop"
:
[],
"seed"
:
1234
,
"seed"
:
1234
,
},
},
),
),
...
...
tests/models/test_gptqmodel.py
0 → 100644
View file @
2106fbeb
from
typing
import
List
import
pytest
import
lm_eval
def
assert_less_than
(
value
,
threshold
,
desc
):
if
value
is
not
None
:
assert
float
(
value
)
<
threshold
,
f
"
{
desc
}
should be less than
{
threshold
}
"
@
pytest
.
mark
.
skip
(
reason
=
"requires CUDA"
)
class
Test_GPTQModel
:
gptqmodel
=
pytest
.
importorskip
(
"gptqmodel"
,
minversion
=
"1.0.9"
)
MODEL_ID
=
"ModelCloud/Opt-125-GPTQ-4bit-10-25-2024"
def
test_gptqmodel
(
self
)
->
None
:
acc
=
"acc"
acc_norm
=
"acc_norm"
acc_value
=
None
acc_norm_value
=
None
task
=
"arc_easy"
model_args
=
f
"pretrained=
{
self
.
MODEL_ID
}
,gptqmodel=True"
tasks
:
List
[
str
]
=
[
task
]
results
=
lm_eval
.
simple_evaluate
(
model
=
"hf"
,
model_args
=
model_args
,
tasks
=
tasks
,
device
=
"cuda"
,
)
column
=
"results"
dic
=
results
.
get
(
column
,
{}).
get
(
self
.
task
)
if
dic
is
not
None
:
if
"alias"
in
dic
:
_
=
dic
.
pop
(
"alias"
)
items
=
sorted
(
dic
.
items
())
for
k
,
v
in
items
:
m
,
_
,
f
=
k
.
partition
(
","
)
if
m
.
endswith
(
"_stderr"
):
continue
if
m
==
acc
:
acc_value
=
"%.4f"
%
v
if
isinstance
(
v
,
float
)
else
v
if
m
==
acc_norm
:
acc_norm_value
=
"%.4f"
%
v
if
isinstance
(
v
,
float
)
else
v
assert_less_than
(
acc_value
,
0.43
,
"acc"
)
assert_less_than
(
acc_norm_value
,
0.39
,
"acc_norm"
)
tests/test_tasks.py
View file @
2106fbeb
import
os
import
os
from
itertools
import
islice
from
itertools
import
islice
import
datasets
import
pytest
import
pytest
import
lm_eval.tasks
as
tasks
import
lm_eval.tasks
as
tasks
...
@@ -10,6 +11,7 @@ from lm_eval.evaluator_utils import get_task_list
...
@@ -10,6 +11,7 @@ from lm_eval.evaluator_utils import get_task_list
from
.utils
import
new_tasks
from
.utils
import
new_tasks
datasets
.
config
.
HF_DATASETS_TRUST_REMOTE_CODE
=
True
os
.
environ
[
"TOKENIZERS_PARALLELISM"
]
=
"false"
os
.
environ
[
"TOKENIZERS_PARALLELISM"
]
=
"false"
task_manager
=
tasks
.
TaskManager
()
task_manager
=
tasks
.
TaskManager
()
# Default Task
# Default Task
...
@@ -77,10 +79,17 @@ class TestNewTasks:
...
@@ -77,10 +79,17 @@ class TestNewTasks:
)
)
_array
=
[
task
.
doc_to_text
(
doc
)
for
doc
in
arr
]
_array
=
[
task
.
doc_to_text
(
doc
)
for
doc
in
arr
]
# space convention; allow txt to have length 0 for perplexity-like tasks since the model tacks an <|endoftext|> on
# space convention; allow txt to have length 0 for perplexity-like tasks since the model tacks an <|endoftext|> on
assert
all
(
target_delimiter
:
str
=
task
.
config
.
target_delimiter
isinstance
(
x
,
str
)
and
(
x
[
-
1
]
!=
" "
if
len
(
x
)
!=
0
else
True
)
if
not
task
.
multiple_input
:
for
x
in
_array
for
x
in
_array
:
)
assert
isinstance
(
x
,
str
)
assert
(
(
x
[
-
1
].
isspace
()
is
False
if
len
(
x
)
>
0
else
True
)
if
target_delimiter
.
isspace
()
else
True
),
"doc_to_text ends in a whitespace and target delimiter also a whitespace"
else
:
pass
def
test_create_choices
(
self
,
task_class
,
limit
):
def
test_create_choices
(
self
,
task_class
,
limit
):
task
=
task_class
task
=
task_class
...
@@ -121,5 +130,11 @@ class TestNewTasks:
...
@@ -121,5 +130,11 @@ class TestNewTasks:
if
task
.
has_test_docs
()
if
task
.
has_test_docs
()
else
list
(
islice
(
task
.
validation_docs
(),
limit
))
else
list
(
islice
(
task
.
validation_docs
(),
limit
))
)
)
requests
=
[
task
.
construct_requests
(
doc
,
task
.
doc_to_text
(
doc
))
for
doc
in
arr
]
# ctx is "" for multiple input tasks
requests
=
[
task
.
construct_requests
(
doc
=
doc
,
ctx
=
""
if
task
.
multiple_input
else
task
.
doc_to_text
(
doc
)
)
for
doc
in
arr
]
assert
len
(
requests
)
==
limit
if
limit
else
True
assert
len
(
requests
)
==
limit
if
limit
else
True
Prev
1
…
25
26
27
28
29
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment