"test/vscode:/vscode.git/clone" did not exist on "95f763f466d89612bb8ede5821ecc37077a39e3a"
Commit e3031e84 authored by Leo Gao's avatar Leo Gao
Browse files

Remove num_tokens

parent 90e50b4c
......@@ -20,16 +20,6 @@ class LM(abc.ABC):
"""
pass
@classmethod
def num_tokens(cls, string):
"""Return the number of tokens in a string, based on tokenization
:param string: str
Input string
:return: int
"""
pass
@classmethod
def create_from_arg_string(cls, arg_string):
"""Constructor method, in case models need additional arguments
......
......@@ -5,8 +5,5 @@ from . import MODEL_REGISTRY
@MODEL_REGISTRY.register("dummy")
class DummyLM(LM):
def generate(self, context, max_gen_length):
return "lol"
def loglikelihood(self, context, continuation):
return 0.0
......@@ -28,6 +28,3 @@ class GPT2LM(LM):
logits = F.log_softmax(self.gpt2(inp)[0], dim=-1)[:, ctxlen - 1:-1] # [batch, seq, vocab]
return torch.gather(logits, 2, cont_toks.unsqueeze(-1)).squeeze(-1)
def num_tokens(self, string):
return len(self.tokenizer.tokenize(string))
......@@ -47,4 +47,3 @@ class GPT3LM(LM):
logprobs = response.choices[0]["logprobs"]["token_logprobs"]
continuation_logprobs = logprobs[ctxlen:]
return sum(continuation_logprobs)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment