Unverified Commit 2a6acc88 authored by Hailey Schoelkopf's avatar Hailey Schoelkopf Committed by GitHub
Browse files

fail gracefully upon tokenizer logging failure (#2038)

parent cc2d3463
...@@ -114,14 +114,28 @@ def add_env_info(storage: Dict[str, Any]): ...@@ -114,14 +114,28 @@ def add_env_info(storage: Dict[str, Any]):
def add_tokenizer_info(storage: Dict[str, Any], lm): def add_tokenizer_info(storage: Dict[str, Any], lm):
if getattr(lm, "tokenizer", False): if getattr(lm, "tokenizer", False):
try:
tokenizer_info = { tokenizer_info = {
"tokenizer_pad_token": [lm.tokenizer.pad_token, lm.tokenizer.pad_token_id], "tokenizer_pad_token": [
"tokenizer_eos_token": [lm.tokenizer.eos_token, lm.tokenizer.eos_token_id], lm.tokenizer.pad_token,
"tokenizer_bos_token": [lm.tokenizer.bos_token, lm.tokenizer.bos_token_id], lm.tokenizer.pad_token_id,
],
"tokenizer_eos_token": [
lm.tokenizer.eos_token,
lm.tokenizer.eos_token_id,
],
"tokenizer_bos_token": [
lm.tokenizer.bos_token,
lm.tokenizer.bos_token_id,
],
"eot_token_id": getattr(lm, "eot_token_id", None), "eot_token_id": getattr(lm, "eot_token_id", None),
"max_length": getattr(lm, "max_length", None), "max_length": getattr(lm, "max_length", None),
} }
storage.update(tokenizer_info) storage.update(tokenizer_info)
except Exception as err:
logger.debug(
f"Logging detailed tokenizer info failed with {err}, skipping..."
)
# seems gguf and textsynth do not have tokenizer # seems gguf and textsynth do not have tokenizer
else: else:
logger.debug( logger.debug(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment