Commit 9dfb58a3 authored by Konrad's avatar Konrad
Browse files

system instruction

parent cd9e4540
...@@ -44,6 +44,8 @@ This mode supports a number of command-line arguments, the details of which can ...@@ -44,6 +44,8 @@ This mode supports a number of command-line arguments, the details of which can
- `--include_path` : Accepts a path to a folder. If passed, then all YAML files containing ` lm-eval`` compatible task configurations will be added to the task registry as available tasks. Used for when one is writing config files for their own task in a folder other than `lm_eval/tasks/` - `--include_path` : Accepts a path to a folder. If passed, then all YAML files containing ` lm-eval`` compatible task configurations will be added to the task registry as available tasks. Used for when one is writing config files for their own task in a folder other than `lm_eval/tasks/`
- `--system_instruction`: Specifies a system instruction string to prepend to the prompt.
- `--apply_chat_template` : If this flag is on, a chat template will be applied to the prompt. For Hugging Face models, the chat template is taken from the tokenizer, if the tokenizer does not have a chat template, a default one will be applied. For other models, a generic chat template is used. - `--apply_chat_template` : If this flag is on, a chat template will be applied to the prompt. For Hugging Face models, the chat template is taken from the tokenizer, if the tokenizer does not have a chat template, a default one will be applied. For other models, a generic chat template is used.
- `--predict_only`: Generates the model outputs without computing metrics. Use with `--log_samples` to retrieve decoded results. - `--predict_only`: Generates the model outputs without computing metrics. Use with `--log_samples` to retrieve decoded results.
......
...@@ -162,6 +162,12 @@ def setup_parser() -> argparse.ArgumentParser: ...@@ -162,6 +162,12 @@ def setup_parser() -> argparse.ArgumentParser:
default=False, default=False,
help="If True, write out all model outputs and documents for per-sample measurement and post-hoc analysis. Use with --output_path.", help="If True, write out all model outputs and documents for per-sample measurement and post-hoc analysis. Use with --output_path.",
) )
parser.add_argument(
"--system_instruction",
type=str,
default=None,
help="System instruction to be used in the prompt",
)
parser.add_argument( parser.add_argument(
"--apply_chat_template", "--apply_chat_template",
action="store_true", action="store_true",
...@@ -363,6 +369,7 @@ def cli_evaluate(args: Union[argparse.Namespace, None] = None) -> None: ...@@ -363,6 +369,7 @@ def cli_evaluate(args: Union[argparse.Namespace, None] = None) -> None:
check_integrity=args.check_integrity, check_integrity=args.check_integrity,
write_out=args.write_out, write_out=args.write_out,
log_samples=args.log_samples, log_samples=args.log_samples,
system_instruction=args.system_instruction,
apply_chat_template=args.apply_chat_template, apply_chat_template=args.apply_chat_template,
gen_kwargs=args.gen_kwargs, gen_kwargs=args.gen_kwargs,
task_manager=task_manager, task_manager=task_manager,
......
...@@ -373,6 +373,7 @@ class Task(abc.ABC): ...@@ -373,6 +373,7 @@ class Task(abc.ABC):
world_size=None, world_size=None,
cache_requests=False, cache_requests=False,
rewrite_requests_cache=False, rewrite_requests_cache=False,
system_instruction=None,
apply_chat_template=False, apply_chat_template=False,
tokenizer=None, tokenizer=None,
) -> None: ) -> None:
...@@ -423,6 +424,7 @@ class Task(abc.ABC): ...@@ -423,6 +424,7 @@ class Task(abc.ABC):
fewshot_ctx = self.fewshot_context( fewshot_ctx = self.fewshot_context(
doc, doc,
0 if self.config.num_fewshot is None else self.config.num_fewshot, 0 if self.config.num_fewshot is None else self.config.num_fewshot,
system_instruction,
apply_chat_template, apply_chat_template,
tokenizer, tokenizer,
) )
...@@ -984,6 +986,7 @@ class ConfigurableTask(Task): ...@@ -984,6 +986,7 @@ class ConfigurableTask(Task):
self, self,
doc: str, doc: str,
num_fewshot: int, num_fewshot: int,
system_instruction: str = "",
apply_chat_template: bool = False, apply_chat_template: bool = False,
tokenizer=None, tokenizer=None,
) -> str: ) -> str:
...@@ -994,6 +997,8 @@ class ConfigurableTask(Task): ...@@ -994,6 +997,8 @@ class ConfigurableTask(Task):
The document as returned from training_docs, validation_docs, or test_docs. The document as returned from training_docs, validation_docs, or test_docs.
:param num_fewshot: int :param num_fewshot: int
The number of fewshot examples to provide in the returned context string. The number of fewshot examples to provide in the returned context string.
:param system_instruction: str
System instruction to be applied to the prompt.
:param apply_chat_template: bool :param apply_chat_template: bool
Whether to apply the chat template to the fewshot context. Whether to apply the chat template to the fewshot context.
:param tokenizer: :param tokenizer:
...@@ -1001,25 +1006,43 @@ class ConfigurableTask(Task): ...@@ -1001,25 +1006,43 @@ class ConfigurableTask(Task):
:returns: str :returns: str
The fewshot context. The fewshot context.
""" """
if apply_chat_template:
labeled_examples = []
else:
labeled_examples = ""
# get task description
if description := self.config.description: if description := self.config.description:
description = utils.apply_template(self.config.description, doc) description = utils.apply_template(self.config.description, doc)
labeled_examples = [] # create system prompt based on the provided system instruction and description
if num_fewshot == 0: if system_instruction and description:
# always prepend the (possibly empty) task description system_prompt = (
if apply_chat_template: f"{system_instruction}{self.sampler.fewshot_delimiter}{description}"
labeled_examples.append({"role": "system", "content": description}) )
elif system_instruction:
system_prompt = system_instruction
elif description:
system_prompt = description
else: else:
labeled_examples = description system_prompt = ""
# add system prompt if specified
if system_prompt:
if apply_chat_template:
labeled_examples.append({"role": "system", "content": system_prompt})
else: else:
labeled_examples = system_prompt
# if few-shot - append examples after the system prompt
if num_fewshot > 0:
if apply_chat_template: if apply_chat_template:
labeled_examples = self.sampler.get_chat_context( labeled_examples = self.sampler.get_chat_context(
doc, num_fewshot, labeled_examples doc, num_fewshot, labeled_examples
) )
else: else:
labeled_examples = description + self.sampler.get_context( labeled_examples += self.sampler.get_context(doc, num_fewshot)
doc, num_fewshot
)
example = self.doc_to_text(doc) example = self.doc_to_text(doc)
if apply_chat_template: if apply_chat_template:
......
...@@ -55,6 +55,7 @@ def simple_evaluate( ...@@ -55,6 +55,7 @@ def simple_evaluate(
check_integrity: bool = False, check_integrity: bool = False,
write_out: bool = False, write_out: bool = False,
log_samples: bool = True, log_samples: bool = True,
system_instruction: str = "",
apply_chat_template: bool = False, apply_chat_template: bool = False,
gen_kwargs: Optional[str] = None, gen_kwargs: Optional[str] = None,
task_manager: Optional[TaskManager] = None, task_manager: Optional[TaskManager] = None,
...@@ -100,6 +101,8 @@ def simple_evaluate( ...@@ -100,6 +101,8 @@ def simple_evaluate(
If True, write out an example document and model input for checking task integrity If True, write out an example document and model input for checking task integrity
:param log_samples: bool :param log_samples: bool
If True, write out all model outputs and documents for per-sample measurement and post-hoc analysis If True, write out all model outputs and documents for per-sample measurement and post-hoc analysis
:param system_instruction: str
System instruction to be applied to the prompt
:param apply_chat_template: bool :param apply_chat_template: bool
If True, apply chat template to the prompt If True, apply chat template to the prompt
:param gen_kwargs: str :param gen_kwargs: str
...@@ -265,6 +268,7 @@ def simple_evaluate( ...@@ -265,6 +268,7 @@ def simple_evaluate(
bootstrap_iters=bootstrap_iters, bootstrap_iters=bootstrap_iters,
write_out=write_out, write_out=write_out,
log_samples=log_samples, log_samples=log_samples,
system_instruction=system_instruction,
apply_chat_template=apply_chat_template, apply_chat_template=apply_chat_template,
verbosity=verbosity, verbosity=verbosity,
) )
...@@ -321,6 +325,7 @@ def evaluate( ...@@ -321,6 +325,7 @@ def evaluate(
bootstrap_iters: Optional[int] = 100000, bootstrap_iters: Optional[int] = 100000,
write_out: bool = False, write_out: bool = False,
log_samples: bool = True, log_samples: bool = True,
system_instruction: str = "",
apply_chat_template: bool = False, apply_chat_template: bool = False,
verbosity: str = "INFO", verbosity: str = "INFO",
): ):
...@@ -338,6 +343,8 @@ def evaluate( ...@@ -338,6 +343,8 @@ def evaluate(
If True, write out an example document and model input for checking task integrity If True, write out an example document and model input for checking task integrity
:param log_samples: bool :param log_samples: bool
If True, write out all model outputs and documents for per-sample measurement and post-hoc analysis If True, write out all model outputs and documents for per-sample measurement and post-hoc analysis
:param system_instruction: str
System instruction to be applied to the prompt
:param apply_chat_template: bool :param apply_chat_template: bool
If True, apply chat template to the prompt If True, apply chat template to the prompt
:return :return
...@@ -369,6 +376,7 @@ def evaluate( ...@@ -369,6 +376,7 @@ def evaluate(
world_size=lm.world_size, world_size=lm.world_size,
cache_requests=cache_requests, cache_requests=cache_requests,
rewrite_requests_cache=rewrite_requests_cache, rewrite_requests_cache=rewrite_requests_cache,
system_instruction=system_instruction,
apply_chat_template=apply_chat_template, apply_chat_template=apply_chat_template,
tokenizer=lm.tokenizer if hasattr(lm, "tokenizer") else None, tokenizer=lm.tokenizer if hasattr(lm, "tokenizer") else None,
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment