Unverified Commit ba2ce28f authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

[Auto Sync] Update model_config.py (20251014) (#11580)


Co-authored-by: default avatargithub-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: default avatarHanming Lu <69857889+hanming-lu@users.noreply.github.com>
parent 98923880
...@@ -25,7 +25,7 @@ from transformers import PretrainedConfig ...@@ -25,7 +25,7 @@ from transformers import PretrainedConfig
from sglang.srt.environ import envs from sglang.srt.environ import envs
from sglang.srt.layers.quantization import QUANTIZATION_METHODS from sglang.srt.layers.quantization import QUANTIZATION_METHODS
from sglang.srt.server_args import ServerArgs from sglang.srt.server_args import ServerArgs
from sglang.srt.utils import is_hip from sglang.srt.utils import is_hip, retry
from sglang.srt.utils.hf_transformers_utils import ( from sglang.srt.utils.hf_transformers_utils import (
get_config, get_config,
get_context_length, get_context_length,
...@@ -492,7 +492,16 @@ class ModelConfig: ...@@ -492,7 +492,16 @@ class ModelConfig:
from huggingface_hub import HfApi, hf_hub_download from huggingface_hub import HfApi, hf_hub_download
hf_api = HfApi() hf_api = HfApi()
if hf_api.file_exists(self.model_path, "hf_quant_config.json"): # Retry HF API call up to 3 times
file_exists = retry(
lambda: hf_api.file_exists(
self.model_path, "hf_quant_config.json"
),
max_retry=2,
initial_delay=1.0,
max_delay=5.0,
)
if file_exists:
# Download and parse the quantization config for remote models # Download and parse the quantization config for remote models
quant_config_file = hf_hub_download( quant_config_file = hf_hub_download(
repo_id=self.model_path, repo_id=self.model_path,
...@@ -506,7 +515,10 @@ class ModelConfig: ...@@ -506,7 +515,10 @@ class ModelConfig:
logger.warning( logger.warning(
"Offline mode is enabled, skipping hf_quant_config.json check" "Offline mode is enabled, skipping hf_quant_config.json check"
) )
pass except Exception as e:
logger.warning(
f"Failed to check hf_quant_config.json: {self.model_path} {e}"
)
elif os.path.exists(os.path.join(self.model_path, "hf_quant_config.json")): elif os.path.exists(os.path.join(self.model_path, "hf_quant_config.json")):
quant_config_file = os.path.join( quant_config_file = os.path.join(
self.model_path, "hf_quant_config.json" self.model_path, "hf_quant_config.json"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment