"vscode:/vscode.git/clone" did not exist on "b3955af465fcf765110b98102c6bb824fc0b2f49"
Commit 62df55d1 authored by Konrad's avatar Konrad
Browse files

initial chat template

parent 885f48d6
......@@ -162,6 +162,12 @@ def setup_parser() -> argparse.ArgumentParser:
default=False,
help="If True, write out all model outputs and documents for per-sample measurement and post-hoc analysis. Use with --output_path.",
)
parser.add_argument(
"--apply_chat_template",
action="store_true",
default=False,
help="If True, applies the chat template to the prompt",
)
parser.add_argument(
"--show_config",
action="store_true",
......@@ -357,6 +363,7 @@ def cli_evaluate(args: Union[argparse.Namespace, None] = None) -> None:
check_integrity=args.check_integrity,
write_out=args.write_out,
log_samples=args.log_samples,
apply_chat_template=args.apply_chat_template,
gen_kwargs=args.gen_kwargs,
task_manager=task_manager,
verbosity=args.verbosity,
......
......@@ -63,9 +63,54 @@ class ContextSampler:
)
+ self.fewshot_delimiter
)
return labeled_examples
def get_chat_context(
self,
doc,
num_fewshot,
chat_history: list = [],
):
# draw an extra fewshot sample if using same split as evaluating on
n_samples = (
num_fewshot + 1
if self.config.fewshot_split == self.config.test_split
else num_fewshot
)
# draw `n_samples` docs from fewshot_docs
fewshotex = self.sample(n_samples)
# get rid of the doc that's the one we're evaluating, if it's in the fewshot
# TODO: should we just stop people from using fewshot from same split as evaluating?
selected_docs = [x for x in fewshotex if x != doc][:num_fewshot]
for doc in selected_docs:
chat_history.append(
{
"role": "user",
"content": self.doc_to_text(doc)
if (
self.config.doc_to_choice is None
or isinstance(self.doc_to_text(doc), str)
)
else self.doc_to_choice(doc)[self.doc_to_text(doc)],
}
)
chat_history.append(
{
"role": "assistant",
"content": str(self.doc_to_target(doc)[0])
if isinstance(self.doc_to_target(doc), list)
else self.doc_to_target(doc)
if (
self.config.doc_to_choice is None
or isinstance(self.doc_to_target(doc), str)
)
else str(self.doc_to_choice(doc)[self.doc_to_target(doc)]),
}
)
return chat_history
def sample(self, n):
"""
Draw `n` samples from our fewshot docs. This method should be overridden by subclasses.
......
......@@ -373,6 +373,8 @@ class Task(abc.ABC):
world_size=None,
cache_requests=False,
rewrite_requests_cache=False,
apply_chat_template=False,
tokenizer=None,
) -> None:
"""Build a set of Instances for a task, and store them in task.instances"""
......@@ -421,6 +423,8 @@ class Task(abc.ABC):
fewshot_ctx = self.fewshot_context(
doc,
0 if self.config.num_fewshot is None else self.config.num_fewshot,
apply_chat_template,
tokenizer,
)
# TODO: we should override self.config.repeats if doing greedy gen so users don't waste time+compute
......@@ -957,8 +961,32 @@ class ConfigurableTask(Task):
)
return super().fewshot_docs()
def convert_chat_history_to_string(self, chat_history: list, tokenizer=None) -> str:
"""Returns chat history tokenized or concatenated as a string.
:param chat_history: list
The chat history to convert to a string.
:param tokenizer:
Optional tokenizer to use for applying the chat template, if None, the sampler's fewshot_delimiter is used.
"""
if tokenizer:
return tokenizer.apply_chat_template(
chat_history, tokenize=False, add_generation_prompt=True
)
else:
return self.sampler.fewshot_delimiter + "".join(
f"{s['role']}: {s['content']}" + self.sampler.fewshot_delimiter
for s in chat_history
)
@utils.positional_deprecated
def fewshot_context(self, doc: str, num_fewshot: int) -> str:
def fewshot_context(
self,
doc: str,
num_fewshot: int,
apply_chat_template: bool = False,
tokenizer=None,
) -> str:
"""Returns a fewshot context string that is made up of a prepended description
(if provided), the `num_fewshot` number of examples, and an appended prompt example.
......@@ -966,19 +994,57 @@ class ConfigurableTask(Task):
The document as returned from training_docs, validation_docs, or test_docs.
:param num_fewshot: int
The number of fewshot examples to provide in the returned context string.
:param apply_chat_template: bool
Whether to apply the chat template to the fewshot context.
:param tokenizer:
The tokenizer to use for applying the chat template.
:returns: str
The fewshot context.
"""
if description := self.config.description:
description = utils.apply_template(self.config.description, doc)
chat_history = []
if num_fewshot == 0:
# always prepend the (possibly empty) task description
if apply_chat_template:
chat_history.append({"role": "system", "content": description})
else:
labeled_examples = description
else:
labeled_examples = description + self.sampler.get_context(doc, num_fewshot)
if apply_chat_template:
chat_history = self.sampler.get_chat_context(
doc, num_fewshot, chat_history
)
else:
labeled_examples = description + self.sampler.get_context(
doc, num_fewshot
)
example = self.doc_to_text(doc)
if apply_chat_template:
if not self.multiple_input:
if isinstance(example, str):
chat_history.append({"role": "user", "content": example})
elif isinstance(example, list):
chat_histories_list = []
for ex in example:
chat = deepcopy(chat_history)
chat.append({"role": "user", "content": ex})
chat_histories_list.append(
self.convert_chat_history_to_string(chat, tokenizer)
)
return chat_histories_list
elif isinstance(example, int):
if self.config.doc_to_choice is not None:
choices = self.doc_to_choice(doc)
chat_history.append(
{"role": "user", "content": choices[example]}
)
else:
chat_history.append({"role": "user", "content": str(example)})
return self.convert_chat_history_to_string(chat_history, tokenizer)
else:
if self.multiple_input:
return labeled_examples
else:
......
......@@ -55,6 +55,7 @@ def simple_evaluate(
check_integrity: bool = False,
write_out: bool = False,
log_samples: bool = True,
apply_chat_template: bool = False,
gen_kwargs: Optional[str] = None,
task_manager: Optional[TaskManager] = None,
verbosity: str = "INFO",
......@@ -99,6 +100,8 @@ def simple_evaluate(
If True, write out an example document and model input for checking task integrity
:param log_samples: bool
If True, write out all model outputs and documents for per-sample measurement and post-hoc analysis
:param apply_chat_template: bool
If True, apply chat template to the prompt
:param gen_kwargs: str
String arguments for model generation
Ignored for all tasks with loglikelihood output_type
......@@ -262,6 +265,7 @@ def simple_evaluate(
bootstrap_iters=bootstrap_iters,
write_out=write_out,
log_samples=log_samples,
apply_chat_template=apply_chat_template,
verbosity=verbosity,
)
......@@ -317,6 +321,7 @@ def evaluate(
bootstrap_iters: Optional[int] = 100000,
write_out: bool = False,
log_samples: bool = True,
apply_chat_template: bool = False,
verbosity: str = "INFO",
):
"""Instantiate and evaluate a model on a list of tasks.
......@@ -333,6 +338,8 @@ def evaluate(
If True, write out an example document and model input for checking task integrity
:param log_samples: bool
If True, write out all model outputs and documents for per-sample measurement and post-hoc analysis
:param apply_chat_template: bool
If True, apply chat template to the prompt
:return
Dictionary of results
"""
......@@ -362,6 +369,8 @@ def evaluate(
world_size=lm.world_size,
cache_requests=cache_requests,
rewrite_requests_cache=rewrite_requests_cache,
apply_chat_template=apply_chat_template,
tokenizer=lm.tokenizer,
)
eval_logger.debug(
f"Task: {task_output.task_name}; number of requests on this rank: {len(task.instances)}"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment