Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
86ec5a53
"docs/zh_CN/TrainingService/FrameworkControllerMode.rst" did not exist on "53b565e48136431dd8c775be1611ff4bc1eae154"
Commit
86ec5a53
authored
Jan 23, 2024
by
lintangsutawika
Browse files
testing returning to python task list
parent
cea47848
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
19 additions
and
16 deletions
+19
-16
lm_eval/tasks/__init__.py
lm_eval/tasks/__init__.py
+12
-9
lm_eval/tasks/scrolls/task.py
lm_eval/tasks/scrolls/task.py
+6
-6
lm_eval/tasks/squadv2/task.py
lm_eval/tasks/squadv2/task.py
+1
-1
No files found.
lm_eval/tasks/__init__.py
View file @
86ec5a53
...
@@ -13,15 +13,17 @@ from lm_eval.api.task import TaskConfig, Task, ConfigurableTask
...
@@ -13,15 +13,17 @@ from lm_eval.api.task import TaskConfig, Task, ConfigurableTask
import
logging
import
logging
# import python tasks
# import python tasks
from
.squadv2.task
import
SQuAD2
import
squadv2
from
.scrolls.task
import
(
import
scrolls
QuALITY
,
python_tasks
=
{
NarrativeQA
,
"squadv2"
:
squadv2
.
task
.
SQuAD2
,
ContractNLI
,
"scrolls_quality"
:
scrolls
.
task
.
QuALITY
,
GovReport
,
"scrolls_narrativeqa"
:
scrolls
.
task
.
NarrativeQA
,
SummScreenFD
,
"scrolls_contractnli"
:
scrolls
.
task
.
ContractNLI
,
QMSum
,
"scrolls_govreport"
:
scrolls
.
task
.
GovReport
,
)
"scrolls_summscreenfd"
:
scrolls
.
task
.
SummScreenFD
,
"scrolls_qmsum"
:
scrolls
.
task
.
QMSum
,
}
eval_logger
=
utils
.
eval_logger
eval_logger
=
utils
.
eval_logger
...
@@ -41,6 +43,7 @@ class TaskManager(abc.ABC):
...
@@ -41,6 +43,7 @@ class TaskManager(abc.ABC):
self
.
ALL_TASKS
=
self
.
initialize_tasks
(
self
.
ALL_TASKS
=
self
.
initialize_tasks
(
include_path
=
include_path
include_path
=
include_path
)
)
# + {k:v, "type":"task" for k,v in python_tasks.items()}
def
initialize_tasks
(
self
,
include_path
=
None
):
def
initialize_tasks
(
self
,
include_path
=
None
):
...
...
lm_eval/tasks/scrolls/task.py
View file @
86ec5a53
...
@@ -337,7 +337,7 @@ class Qasper(_SCROLLSTask):
...
@@ -337,7 +337,7 @@ class Qasper(_SCROLLSTask):
)
)
@
register_task
(
"scrolls_quality"
)
#
@register_task("scrolls_quality")
class
QuALITY
(
_SCROLLSMultipleChoiceTask
):
class
QuALITY
(
_SCROLLSMultipleChoiceTask
):
"""QuALITY: Question Answering with Long Input Texts, Yes!
"""QuALITY: Question Answering with Long Input Texts, Yes!
https://arxiv.org/abs/2112.08608
https://arxiv.org/abs/2112.08608
...
@@ -366,7 +366,7 @@ class QuALITY(_SCROLLSMultipleChoiceTask):
...
@@ -366,7 +366,7 @@ class QuALITY(_SCROLLSMultipleChoiceTask):
return
[
doc
]
return
[
doc
]
@
register_task
(
"scrolls_narrativeqa"
)
#
@register_task("scrolls_narrativeqa")
class
NarrativeQA
(
_SCROLLSTask
):
class
NarrativeQA
(
_SCROLLSTask
):
"""The NarrativeQA Reading Comprehension Challenge
"""The NarrativeQA Reading Comprehension Challenge
https://arxiv.org/abs/1712.07040
https://arxiv.org/abs/1712.07040
...
@@ -400,7 +400,7 @@ class NarrativeQA(_SCROLLSTask):
...
@@ -400,7 +400,7 @@ class NarrativeQA(_SCROLLSTask):
)
)
@
register_task
(
"scrolls_contractnli"
)
#
@register_task("scrolls_contractnli")
class
ContractNLI
(
_SCROLLSMultipleChoiceTask
):
class
ContractNLI
(
_SCROLLSMultipleChoiceTask
):
"""ContractNLI: A Dataset for Document-level Natural Language Inference for Contracts
"""ContractNLI: A Dataset for Document-level Natural Language Inference for Contracts
https://arxiv.org/abs/1712.07040
https://arxiv.org/abs/1712.07040
...
@@ -419,7 +419,7 @@ class ContractNLI(_SCROLLSMultipleChoiceTask):
...
@@ -419,7 +419,7 @@ class ContractNLI(_SCROLLSMultipleChoiceTask):
return
f
"
{
doc
[
'text'
]
}
\n\n
Hypothesis:
{
doc
[
'question'
]
}
\n
Conclusion:"
return
f
"
{
doc
[
'text'
]
}
\n\n
Hypothesis:
{
doc
[
'question'
]
}
\n
Conclusion:"
@
register_task
(
"scrolls_govreport"
)
#
@register_task("scrolls_govreport")
class
GovReport
(
_SCROLLSSummaryTask
):
class
GovReport
(
_SCROLLSSummaryTask
):
"""Efficient Attentions for Long Document Summarization
"""Efficient Attentions for Long Document Summarization
https://arxiv.org/abs/2104.02112
https://arxiv.org/abs/2104.02112
...
@@ -433,7 +433,7 @@ class GovReport(_SCROLLSSummaryTask):
...
@@ -433,7 +433,7 @@ class GovReport(_SCROLLSSummaryTask):
DATASET_NAME
=
"gov_report"
DATASET_NAME
=
"gov_report"
@
register_task
(
"scrolls_summscreenfd"
)
#
@register_task("scrolls_summscreenfd")
class
SummScreenFD
(
_SCROLLSSummaryTask
):
class
SummScreenFD
(
_SCROLLSSummaryTask
):
"""SummScreen: A Dataset for Abstractive Screenplay Summarization
"""SummScreen: A Dataset for Abstractive Screenplay Summarization
https://arxiv.org/abs/2104.07091
https://arxiv.org/abs/2104.07091
...
@@ -442,7 +442,7 @@ class SummScreenFD(_SCROLLSSummaryTask):
...
@@ -442,7 +442,7 @@ class SummScreenFD(_SCROLLSSummaryTask):
DATASET_NAME
=
"summ_screen_fd"
DATASET_NAME
=
"summ_screen_fd"
@
register_task
(
"scrolls_qmsum"
)
#
@register_task("scrolls_qmsum")
class
QMSum
(
_SCROLLSSummaryTask
):
class
QMSum
(
_SCROLLSSummaryTask
):
"""QMSum: A New Benchmark for Query-based Multi-domain
"""QMSum: A New Benchmark for Query-based Multi-domain
Meeting Summarization
Meeting Summarization
...
...
lm_eval/tasks/squadv2/task.py
View file @
86ec5a53
...
@@ -47,7 +47,7 @@ def _squad_agg(key, items):
...
@@ -47,7 +47,7 @@ def _squad_agg(key, items):
return
_squad_metric
(
predictions
=
predictions
,
references
=
references
).
get
(
key
,
0
)
return
_squad_metric
(
predictions
=
predictions
,
references
=
references
).
get
(
key
,
0
)
@
register_task
(
"squadv2"
)
#
@register_task("squadv2")
class
SQuAD2
(
Task
):
class
SQuAD2
(
Task
):
VERSION
=
3
VERSION
=
3
DATASET_PATH
=
"squad_v2"
DATASET_PATH
=
"squad_v2"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment