"vscode:/vscode.git/clone" did not exist on "5fa42993b7670b4a9c48cf9d5a73f88a630b2d78"
Commit 921c4d62 authored by Konrad's avatar Konrad
Browse files

fewshot as multiturn

parent 3369f887
......@@ -48,6 +48,8 @@ This mode supports a number of command-line arguments, the details of which can
- `--apply_chat_template` : If this flag is on, a chat template will be applied to the prompt. For Hugging Face models, the chat template is taken from the tokenizer, if the tokenizer does not have a chat template, a default one will be applied. For other models, a generic chat template is used.
- `--fewshot_as_multiturn` : If this flag is on, the Fewshot examples are treated as a multi-turn conversation. Questions are provided as user content and answers are provided as assistant responses. Requires `--num_fewshot` to be set to be greater than 0, and `--apply_chat_template` to be on.
- `--predict_only`: Generates the model outputs without computing metrics. Use with `--log_samples` to retrieve decoded results.
* `--seed`: Set seed for python's random, numpy and torch. Accepts a comma-separated list of 3 values for python's random, numpy, and torch seeds, respectively, or a single integer to set the same seed for all three. The values are either an integer or 'None' to not set the seed. Default is `0,1234,1234` (for backward compatibility). E.g. `--seed 0,None,8` sets `random.seed(0)` and `torch.manual_seed(8)`. Here numpy's seed is not set since the second value is `None`. E.g, `--seed 42` sets all three seeds to 42.
......
......@@ -174,6 +174,12 @@ def setup_parser() -> argparse.ArgumentParser:
default=False,
help="If True, applies the chat template to the prompt",
)
parser.add_argument(
"--fewshot_as_multiturn",
action="store_true",
default=False,
help="If True, uses the fewshot as a multi-turn conversation",
)
parser.add_argument(
"--show_config",
action="store_true",
......@@ -282,6 +288,11 @@ def cli_evaluate(args: Union[argparse.Namespace, None] = None) -> None:
"Specify --output_path if providing --log_samples or --predict_only"
)
if args.fewshot_as_multiturn and args.apply_chat_template is False:
raise ValueError(
"If fewshot_as_multiturn is set, apply_chat_template must be set to True."
)
if args.include_path is not None:
eval_logger.info(f"Including path: {args.include_path}")
task_manager = TaskManager(args.verbosity, include_path=args.include_path)
......@@ -371,6 +382,7 @@ def cli_evaluate(args: Union[argparse.Namespace, None] = None) -> None:
log_samples=args.log_samples,
system_instruction=args.system_instruction,
apply_chat_template=args.apply_chat_template,
fewshot_as_multiturn=args.fewshot_as_multiturn,
gen_kwargs=args.gen_kwargs,
task_manager=task_manager,
verbosity=args.verbosity,
......
......@@ -35,40 +35,32 @@ class ContextSampler:
# TODO: should we just stop people from using fewshot from same split as evaluating?
selected_docs = [x for x in fewshotex if x != doc][:num_fewshot]
labeled_examples = (
self.fewshot_delimiter.join(
[
# TODO: is separating doc_to_text and doc_to_target by one space always desired?
(
self.doc_to_text(doc)
if (
self.config.doc_to_choice is None
or isinstance(self.doc_to_text(doc), str)
)
else self.doc_to_choice(doc)[self.doc_to_text(doc)]
)
+ self.target_delimiter
+ (
str(self.doc_to_target(doc)[0])
if isinstance(self.doc_to_target(doc), list)
else self.doc_to_target(doc)
if (
self.config.doc_to_choice is None
or isinstance(self.doc_to_target(doc), str)
)
else str(self.doc_to_choice(doc)[self.doc_to_target(doc)])
)
for doc in selected_docs
]
labeled_examples = ""
for doc in selected_docs:
doc_content = self.doc_to_text(doc)
doc_target = self.doc_to_target(doc)
labeled_examples += (
doc_content
if self.config.doc_to_choice is None or isinstance(doc_content, str)
else self.doc_to_choice(doc)[doc_content]
)
+ self.fewshot_delimiter
)
labeled_examples += self.target_delimiter
labeled_examples += (
str(doc_target[0])
if isinstance(doc_target, list)
else doc_target
if self.config.doc_to_choice is None or isinstance(doc_target, str)
else str(self.doc_to_choice(doc)[doc_target])
)
labeled_examples += self.fewshot_delimiter
return labeled_examples
def get_chat_context(
self,
doc,
num_fewshot,
fewshot_as_multiturn: bool = False,
chat_history: list = [],
):
# draw an extra fewshot sample if using same split as evaluating on
......@@ -84,31 +76,36 @@ class ContextSampler:
# TODO: should we just stop people from using fewshot from same split as evaluating?
selected_docs = [x for x in fewshotex if x != doc][:num_fewshot]
for doc in selected_docs:
chat_history.append(
{
"role": "user",
"content": self.doc_to_text(doc)
if (
self.config.doc_to_choice is None
or isinstance(self.doc_to_text(doc), str)
)
else self.doc_to_choice(doc)[self.doc_to_text(doc)],
}
)
if fewshot_as_multiturn:
for doc in selected_docs:
doc_content = self.doc_to_text(doc)
doc_target = self.doc_to_target(doc)
chat_history.append(
{
"role": "user",
"content": doc_content
if self.config.doc_to_choice is None
or isinstance(doc_content, str)
else self.doc_to_choice(doc)[doc_content],
}
)
chat_history.append(
{
"role": "assistant",
"content": str(doc_target[0])
if isinstance(doc_target, list)
else doc_target
if self.config.doc_to_choice is None
or isinstance(doc_target, str)
else str(self.doc_to_choice(doc)[doc_target]),
}
)
else:
# get fewshot context as one user turn
chat_history.append(
{
"role": "assistant",
"content": str(self.doc_to_target(doc)[0])
if isinstance(self.doc_to_target(doc), list)
else self.doc_to_target(doc)
if (
self.config.doc_to_choice is None
or isinstance(self.doc_to_target(doc), str)
)
else str(self.doc_to_choice(doc)[self.doc_to_target(doc)]),
}
{"role": "user", "content": self.get_context(doc, num_fewshot)}
)
return chat_history
def sample(self, n):
......
......@@ -375,6 +375,7 @@ class Task(abc.ABC):
rewrite_requests_cache=False,
system_instruction=None,
apply_chat_template=False,
fewshot_as_multiturn=False,
tokenizer=None,
) -> None:
"""Build a set of Instances for a task, and store them in task.instances"""
......@@ -426,6 +427,7 @@ class Task(abc.ABC):
0 if self.config.num_fewshot is None else self.config.num_fewshot,
system_instruction,
apply_chat_template,
fewshot_as_multiturn,
tokenizer,
)
......@@ -988,6 +990,7 @@ class ConfigurableTask(Task):
num_fewshot: int,
system_instruction: str = None,
apply_chat_template: bool = False,
fewshot_as_multiturn: bool = False,
tokenizer=None,
) -> str:
"""Returns a fewshot context string that is made up of a prepended description
......@@ -1001,6 +1004,8 @@ class ConfigurableTask(Task):
System instruction to be applied to the prompt.
:param apply_chat_template: bool
Whether to apply the chat template to the fewshot context.
:param fewshot_as_multiturn: bool
Whether to provide the fewshot examples as a multiturn conversation or a single user turn.
:param tokenizer:
The tokenizer to use for applying the chat template.
:returns: str
......@@ -1039,7 +1044,7 @@ class ConfigurableTask(Task):
if num_fewshot > 0:
if apply_chat_template:
labeled_examples = self.sampler.get_chat_context(
doc, num_fewshot, labeled_examples
doc, num_fewshot, fewshot_as_multiturn, labeled_examples
)
else:
labeled_examples += self.sampler.get_context(doc, num_fewshot)
......
......@@ -57,6 +57,7 @@ def simple_evaluate(
log_samples: bool = True,
system_instruction: str = None,
apply_chat_template: bool = False,
fewshot_as_multiturn: bool = False,
gen_kwargs: Optional[str] = None,
task_manager: Optional[TaskManager] = None,
verbosity: str = "INFO",
......@@ -105,6 +106,8 @@ def simple_evaluate(
System instruction to be applied to the prompt
:param apply_chat_template: bool
If True, apply chat template to the prompt
:param fewshot_as_multiturn: bool
Whether to provide the fewshot examples as a multiturn conversation or a single user turn.
:param gen_kwargs: str
String arguments for model generation
Ignored for all tasks with loglikelihood output_type
......@@ -270,6 +273,7 @@ def simple_evaluate(
log_samples=log_samples,
system_instruction=system_instruction,
apply_chat_template=apply_chat_template,
fewshot_as_multiturn=fewshot_as_multiturn,
verbosity=verbosity,
)
......@@ -327,6 +331,7 @@ def evaluate(
log_samples: bool = True,
system_instruction: str = None,
apply_chat_template: bool = False,
fewshot_as_multiturn: bool = False,
verbosity: str = "INFO",
):
"""Instantiate and evaluate a model on a list of tasks.
......@@ -347,6 +352,8 @@ def evaluate(
System instruction to be applied to the prompt
:param apply_chat_template: bool
If True, apply chat template to the prompt
:param fewshot_as_multiturn: bool
Whether to provide the fewshot examples as a multiturn conversation or a single user turn.
:return
Dictionary of results
"""
......@@ -378,6 +385,7 @@ def evaluate(
rewrite_requests_cache=rewrite_requests_cache,
system_instruction=system_instruction,
apply_chat_template=apply_chat_template,
fewshot_as_multiturn=fewshot_as_multiturn,
tokenizer=lm.tokenizer if hasattr(lm, "tokenizer") else None,
)
eval_logger.debug(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment