Unverified Commit 69193f71 authored by Ke Bao's avatar Ke Bao Committed by GitHub
Browse files

Filter tokenizer warning for kimi models (#12485)

parent d5b6e50f
......@@ -15,6 +15,7 @@
import contextlib
import json
import logging
import os
import tempfile
import warnings
......@@ -348,6 +349,12 @@ def get_context_length(config):
_FAST_LLAMA_TOKENIZER = "hf-internal-testing/llama-tokenizer"
# Filter warnings like: https://github.com/sgl-project/sglang/issues/8082
class TokenizerWarningsFilter(logging.Filter):
def filter(self, record: logging.LogRecord) -> bool:
return "Calling super().encode with" not in record.getMessage()
def get_tokenizer(
tokenizer_name: str,
*args,
......@@ -393,6 +400,10 @@ def get_tokenizer(
clean_up_tokenization_spaces=False,
**kwargs,
)
# Filter tokenizer warnings
logging.getLogger(tokenizer.__class__.__module__).addFilter(
TokenizerWarningsFilter()
)
except TypeError as e:
# The LLaMA tokenizer causes a protobuf error in some environments.
err_msg = (
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment