Unverified Commit 32cb7449 authored by Michelle's avatar Michelle Committed by GitHub
Browse files

fix auto loading gpt2 tokenizer (#5279)

parent 5d9a0ae7
......@@ -136,6 +136,19 @@ class ColossalLLM(LLM):
"""Get the identifying parameters."""
return {"n": self.n}
def get_token_ids(self, text: str) -> List[int]:
"""Return the ordered ids of the tokens in a text.
Args:
text: The string input to tokenize.
Returns:
A list of ids corresponding to the tokens in the text, in order they occur
in the text.
"""
# use the colossal llm's tokenizer instead of langchain's cached GPT2 tokenizer
return self.api.tokenizer.encode(text)
class VllmLLM(LLM):
"""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment