Unverified Commit 69193f71 authored by Ke Bao's avatar Ke Bao Committed by GitHub
Browse files

Filter tokenizer warning for kimi models (#12485)

parent d5b6e50f
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
import contextlib import contextlib
import json import json
import logging
import os import os
import tempfile import tempfile
import warnings import warnings
...@@ -348,6 +349,12 @@ def get_context_length(config): ...@@ -348,6 +349,12 @@ def get_context_length(config):
_FAST_LLAMA_TOKENIZER = "hf-internal-testing/llama-tokenizer" _FAST_LLAMA_TOKENIZER = "hf-internal-testing/llama-tokenizer"
# Filter warnings like: https://github.com/sgl-project/sglang/issues/8082
class TokenizerWarningsFilter(logging.Filter):
def filter(self, record: logging.LogRecord) -> bool:
return "Calling super().encode with" not in record.getMessage()
def get_tokenizer( def get_tokenizer(
tokenizer_name: str, tokenizer_name: str,
*args, *args,
...@@ -393,6 +400,10 @@ def get_tokenizer( ...@@ -393,6 +400,10 @@ def get_tokenizer(
clean_up_tokenization_spaces=False, clean_up_tokenization_spaces=False,
**kwargs, **kwargs,
) )
# Filter tokenizer warnings
logging.getLogger(tokenizer.__class__.__module__).addFilter(
TokenizerWarningsFilter()
)
except TypeError as e: except TypeError as e:
# The LLaMA tokenizer causes a protobuf error in some environments. # The LLaMA tokenizer causes a protobuf error in some environments.
err_msg = ( err_msg = (
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment