Commit 787c99ef authored by haileyschoelkopf's avatar haileyschoelkopf
Browse files

Merge branch 'fix-len0-continuations' of...

Merge branch 'fix-len0-continuations' of https://github.com/EleutherAI/lm-evaluation-harness into fix-len0-continuations
parents 42d54f8c d03c9fde
......@@ -31,3 +31,16 @@ class Instance:
return (
self.arguments if isinstance(self.arguments, tuple) else (self.arguments,)
)
@args.setter
def args(self, new_arguments: tuple) -> None:
"""
Update the arguments of this instance with a new one
"""
if isinstance(new_arguments, tuple):
assert (
len(new_arguments) == len(self.args)
), "Must set new Instance arguments to have same size + types as old arguments"
self.arguments = new_arguments
else:
raise ValueError("Must set new Instance args to a tuple!")
......@@ -96,6 +96,10 @@ class HFLM(LM):
# PEFT and quantization options
peft: Optional[str] = None,
autogptq: Optional[Union[bool, str]] = False,
# Chat templating settings
use_chat_template: Optional[bool] = False,
# TODO: validate a template exists in tokenizer config, if this flag is true
system_prompt: Optional[str] = None,
**kwargs,
) -> None:
super().__init__()
......@@ -241,6 +245,9 @@ class HFLM(LM):
else:
self.tokenizer.add_special_tokens({"pad_token": "<|pad|>"})
self.system_prompt = system_prompt
self.use_chat_template = use_chat_template
self._max_length = max_length
self.batch_schedule = 1
......@@ -678,6 +685,36 @@ class HFLM(LM):
elif self.AUTO_MODEL_CLASS == transformers.AutoModelForSeq2SeqLM:
return self.tokenizer.decode(tokens, skip_special_tokens=True)
def wrap_chat_template(
self, requests: List[Instance], generate=False
) -> List[Instance]:
"""
Utility for adding chat templates via the apply_chat_template() method
"""
# TODO: handle repeats > 1 case?
# TODO: raise an error if system prompt not compatible with template
new_reqs = []
for req in requests:
context, continuation = req.args[0].strip(), req.args[1]
chat = []
if self.system_prompt is not None:
chat += [{"role": "system", "content": "You are a helpful assistant."}]
chat += [
{"role": "user", "content": context},
]
# TODO: expose settings for chat formatting:
# - whether some "trigger" / start of assistant response might be placed in assistant's generation for it
# - if few-shot, should the fewshots be placed in separate convo turns? provided in user's single turn?...
context = self.tokenizer.apply_chat_template(
chat,
tokenize=False,
add_generation_prompt=True,
)
req.args = (context, continuation)
new_reqs.append(req)
return new_reqs
def _model_call(self, inps, attn_mask=None, labels=None):
"""
:param inps: torch.Tensor
......@@ -778,6 +815,11 @@ class HFLM(LM):
return context_enc, continuation_enc
def loglikelihood(self, requests: List[Instance]) -> List[Tuple[float, bool]]:
if self.use_chat_template:
print(f"First element before prompt formatting...\n{requests[0].args}")
requests = self.wrap_chat_template(requests)
print(f"First element after prompt formatting...\n{requests[0].args}")
new_reqs = []
for context, continuation in [req.args for req in requests]:
if context == "":
......@@ -796,6 +838,8 @@ class HFLM(LM):
def loglikelihood_rolling(self, requests: List[Instance]) -> List[float]:
loglikelihoods = []
# TODO: add a warning that chat templates are ignored for ppl evals
adaptive_batch_size = None
if self.batch_size == "auto":
# using rolling window with maximum context
......@@ -872,7 +916,6 @@ class HFLM(LM):
disable_tqdm: bool = False,
override_bs: int = None,
) -> List[Tuple[float, bool]]:
# TODO: implement some kind of efficient-request-middleware that lumps together requests with the same context
res = []
def _collate(x):
......@@ -1051,6 +1094,11 @@ class HFLM(LM):
return re_ord.get_original(res)
def generate_until(self, requests: List[Instance]) -> List[str]:
if self.use_chat_template:
print(f"First element before prompt formatting...\n{requests[0].args}")
requests = self.tok_chat_template(requests)
print(f"First element after prompt formatting...\n{requests[0].args}")
res = []
def _collate(x):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment