"test/git@developer.sourcefind.cn:gaoqiong/migraphx.git" did not exist on "6ea1e1bee5450fb4c435e38f035528be12bffe91"
Unverified Commit 2a6acc88 authored by Hailey Schoelkopf's avatar Hailey Schoelkopf Committed by GitHub
Browse files

fail gracefully upon tokenizer logging failure (#2038)

parent cc2d3463
...@@ -114,15 +114,29 @@ def add_env_info(storage: Dict[str, Any]): ...@@ -114,15 +114,29 @@ def add_env_info(storage: Dict[str, Any]):
def add_tokenizer_info(storage: Dict[str, Any], lm): def add_tokenizer_info(storage: Dict[str, Any], lm):
if getattr(lm, "tokenizer", False): if getattr(lm, "tokenizer", False):
tokenizer_info = { try:
"tokenizer_pad_token": [lm.tokenizer.pad_token, lm.tokenizer.pad_token_id], tokenizer_info = {
"tokenizer_eos_token": [lm.tokenizer.eos_token, lm.tokenizer.eos_token_id], "tokenizer_pad_token": [
"tokenizer_bos_token": [lm.tokenizer.bos_token, lm.tokenizer.bos_token_id], lm.tokenizer.pad_token,
"eot_token_id": getattr(lm, "eot_token_id", None), lm.tokenizer.pad_token_id,
"max_length": getattr(lm, "max_length", None), ],
} "tokenizer_eos_token": [
storage.update(tokenizer_info) lm.tokenizer.eos_token,
# seems gguf and textsynth do not have tokenizer lm.tokenizer.eos_token_id,
],
"tokenizer_bos_token": [
lm.tokenizer.bos_token,
lm.tokenizer.bos_token_id,
],
"eot_token_id": getattr(lm, "eot_token_id", None),
"max_length": getattr(lm, "max_length", None),
}
storage.update(tokenizer_info)
except Exception as err:
logger.debug(
f"Logging detailed tokenizer info failed with {err}, skipping..."
)
# seems gguf and textsynth do not have tokenizer
else: else:
logger.debug( logger.debug(
"LM does not have a 'tokenizer' attribute, not logging tokenizer metadata to results." "LM does not have a 'tokenizer' attribute, not logging tokenizer metadata to results."
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment