Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
ff8903f2
Commit
ff8903f2
authored
Jul 05, 2023
by
haileyschoelkopf
Browse files
remove stale templates
parent
e071dd5e
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
0 additions
and
219 deletions
+0
-219
templates/new_multiple_choice_task.py
templates/new_multiple_choice_task.py
+0
-78
templates/new_task.py
templates/new_task.py
+0
-141
No files found.
templates/new_multiple_choice_task.py
deleted
100644 → 0
View file @
e071dd5e
# TODO: Remove all TODO comments once the implementation is complete.
"""
TODO: Add the Paper Title on this line.
TODO: Add the paper's PDF URL (preferably from arXiv) on this line.
TODO: Write a Short Description of the task.
Homepage: TODO: Add the URL to the task's Homepage here.
"""
from
lm_eval.base
import
MultipleChoiceTask
# TODO: Add the BibTeX citation for the task.
_CITATION
=
"""
"""
# TODO: Replace `NewTask` with the name of your Task.
class
NewTask
(
MultipleChoiceTask
):
VERSION
=
0
# TODO: Add the `DATASET_PATH` string. This will be the name of the `Task`
# dataset as denoted in HuggingFace `datasets`.
DATASET_PATH
=
""
# TODO: Add the `DATASET_NAME` string. This is the name of a subset within
# `DATASET_PATH`. If there aren't specific subsets you need, leave this as `None`.
DATASET_NAME
=
None
def
has_training_docs
(
self
):
# TODO: Fill in the return with `True` if the Task has training data; else `False`.
return
False
def
has_validation_docs
(
self
):
# TODO: Fill in the return with `True` if the Task has validation data; else `False`.
return
False
def
has_test_docs
(
self
):
# TODO: Fill in the return with `True` if the Task has test data; else `False`.
return
False
def
training_docs
(
self
):
if
self
.
has_training_docs
():
# We cache training documents in `self._training_docs` for faster
# few-shot processing. If the data is too large to fit in memory,
# return the training data as a generator instead of a list.
if
self
.
_training_docs
is
None
:
# TODO: Return the training document generator from `self.dataset`.
# In most case you can leave this as is unless the dataset split is
# named differently than the default `"train"`.
self
.
_training_docs
=
list
(
map
(
self
.
_process_doc
,
self
.
dataset
[
"train"
])
)
return
self
.
_training_docs
def
validation_docs
(
self
):
if
self
.
has_validation_docs
():
# TODO: Return the validation document generator from `self.dataset`.
# In most case you can leave this as is unless the dataset split is
# named differently than the default `"validation"`.
return
map
(
self
.
_process_doc
,
self
.
dataset
[
"validation"
])
def
test_docs
(
self
):
if
self
.
has_test_docs
():
# TODO: Return the test document generator from `self.dataset`.
# In most case you can leave this as is unless the dataset split is
# named differently than the default `"test"`.
return
map
(
self
.
_process_doc
,
self
.
dataset
[
"test"
])
def
_process_doc
(
self
,
doc
):
# TODO: Process the documents into a dictionary with the following keys:
return
{
"query"
:
""
,
# The query prompt.
"choices"
:
[],
# The list of choices.
"gold"
:
0
,
# The integer used to index into the correct element of `"choices"`.
}
def
doc_to_text
(
self
,
doc
):
# TODO: Format the query prompt portion of the document example.
return
doc
[
"query"
]
templates/new_task.py
deleted
100644 → 0
View file @
e071dd5e
# TODO: Remove all TODO comments once the implementation is complete.
"""
TODO: Add the Paper Title on this line.
TODO: Add the paper's PDF URL (preferably from arXiv) on this line.
TODO: Write a Short Description of the task.
Homepage: TODO: Add the URL to the task's Homepage here.
"""
from
lm_eval.base
import
Task
# TODO: Add the BibTeX citation for the task.
_CITATION
=
"""
"""
# TODO: Replace `NewTask` with the name of your Task.
class
NewTask
(
Task
):
VERSION
=
0
# TODO: Add the `DATASET_PATH` string. This will be the name of the `Task`
# dataset as denoted in HuggingFace `datasets`.
DATASET_PATH
=
""
# TODO: Add the `DATASET_NAME` string. This is the name of a subset within
# `DATASET_PATH`. If there aren't specific subsets you need, leave this as `None`.
DATASET_NAME
=
None
def
has_training_docs
(
self
):
# TODO: Fill in the return with `True` if the Task has training data; else `False`.
return
False
def
has_validation_docs
(
self
):
# TODO: Fill in the return with `True` if the Task has validation data; else `False`.
return
False
def
has_test_docs
(
self
):
# TODO: Fill in the return with `True` if the Task has test data; else `False`.
return
False
def
training_docs
(
self
):
if
self
.
has_training_docs
():
# We cache training documents in `self._training_docs` for faster
# few-shot processing. If the data is too large to fit in memory,
# return the training data as a generator instead of a list.
if
self
.
_training_docs
is
None
:
# TODO: Return the training document generator from `self.dataset`.
# If you need to process the data, `map` over the documents with
# the custom processing function, `self._process_doc`. E.g.
# `map(self._process_doc, self.dataset["validation"])`
# In most case you can leave this as is unless the dataset split is
# named differently than the default `"train"`.
self
.
_training_docs
=
list
(
self
.
dataset
[
"train"
])
return
self
.
_training_docs
def
validation_docs
(
self
):
if
self
.
has_validation_docs
():
# TODO: Return the validation document generator from `self.dataset`.
# If you need to process the data, `map` over the documents with the
# custom processing function, `self._process_doc`. E.g.
# `map(self._process_doc, self.dataset["validation"])`
# In most case you can leave this as is unless the dataset split is
# named differently than the default `"validation"`.
return
self
.
dataset
[
"validation"
]
def
test_docs
(
self
):
if
self
.
has_test_docs
():
# TODO: Return the test document generator from `self.dataset`.
# If you need to process the data, `map` over the documents with the
# custom processing function, `self._process_doc`. E.g.
# `map(self._process_doc, self.dataset["test"])`
# In most case you can leave this as is unless the dataset split is
# named differently than the default `"test"`.
return
self
.
dataset
[
"test"
]
def
_process_doc
(
self
,
doc
):
# TODO: Process (detokenize, strip, replace etc.) each individual `doc`
# with this function. You can map this across the docs in each available
# dataset split. See the TODOs in `train_docs`, `validation_docs`, and
# `test_docs` for snippets.
# NOTE: DELETE THIS FUNCTION IF UNUSED.
return
doc
def
doc_to_text
(
self
,
doc
):
# TODO: Format the query prompt portion of the document example.
return
""
def
doc_to_target
(
self
,
doc
):
# TODO: Fill in the `target` ("gold answer") variable.
# The prepended `" "` is required to space out the `doc_to_text` and
# `doc_to_target` strings.
target
=
""
return
" "
+
target
def
construct_requests
(
self
,
doc
,
ctx
):
"""Uses RequestFactory to construct Requests and returns an iterable of
Requests which will be sent to the LM.
:param doc:
The document as returned from training_docs, validation_docs, or
test_docs.
:param ctx: str
The context string, generated by fewshot_context. This includes the natural
language description, as well as the few shot examples, and the question
part of the document for `doc`.
"""
# TODO: Construct your language model requests with the request factory, `rf`,
# and return them as an iterable.
return
[]
def
process_results
(
self
,
doc
,
results
):
"""Take a single document and the LM results and evaluates, returning a
dict where keys are the names of submetrics and values are the values of
the metric for that one document
:param doc:
The document as returned from training_docs, validation_docs, or test_docs.
:param results:
The results of the requests created in construct_requests.
"""
# TODO: For each (sub)metric in the task evaluation, add a key-value pair
# with the metric name as key and the corresponding metric result as value
# for the current `doc`.
return
{}
def
aggregation
(
self
):
"""
:returns: {str: [metric_score] -> float}
A dictionary where keys are the names of submetrics and values are
functions that aggregate a list of metric scores
"""
# TODO: For each (sub)metric in the task evaluation, add a key-value pair
# with the metric name as key and an aggregation function as value which
# determines how to combine results from each document in the dataset.
# Check `lm_eval.metrics` to find built-in aggregation functions.
return
{}
def
higher_is_better
(
self
):
# TODO: For each (sub)metric in the task evaluation, add a key-value pair
# with the metric name as key and a `bool` value determining whether or
# not higher values of that metric are deemed better.
return
{}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment