Commit ca57061c authored by Leo Gao's avatar Leo Gao
Browse files

Patch gpt3lm

parent bc5478a1
......@@ -71,7 +71,10 @@ class GPT3LM(LM):
res = []
def _collate(x):
toks = self.tokenizer.encode(x[0] + x[1])[:-1]
# this doesn't efficiently handle last-token differences yet, but those are kinda annoying because
# it's not guaranteed that the 100 or so logprobs we get to see actually contain all the continuations
# we care about and so we need some kind of backup for when it isn't
toks = self.tokenizer.encode(x[0] + x[1])
return (len(toks), self.tokenizer.decode(toks))
reord = utils.Reorderer(requests, _collate)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment