Unverified Commit fde71262 authored by Kevin H. Luu's avatar Kevin H. Luu Committed by GitHub
Browse files

[misc] Add retries with exponential backoff for HF file existence check (#13008)

parent 24313714
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
import enum import enum
import json import json
import os import os
import time
from pathlib import Path from pathlib import Path
from typing import Any, Dict, Literal, Optional, Type, Union from typing import Any, Dict, Literal, Optional, Type, Union
...@@ -100,15 +101,33 @@ def file_or_path_exists(model: Union[str, Path], config_name: str, ...@@ -100,15 +101,33 @@ def file_or_path_exists(model: Union[str, Path], config_name: str,
# NB: file_exists will only check for the existence of the config file on # NB: file_exists will only check for the existence of the config file on
# hf_hub. This will fail in offline mode. # hf_hub. This will fail in offline mode.
try:
return file_exists(model, # Call HF to check if the file exists
config_name, # 2 retries and exponential backoff
revision=revision, max_retries = 2
token=HF_TOKEN) retry_delay = 2
except huggingface_hub.errors.OfflineModeIsEnabled: for attempt in range(max_retries):
# Don't raise in offline mode, all we know is that we don't have this try:
# file cached. return file_exists(model,
return False config_name,
revision=revision,
token=HF_TOKEN)
except huggingface_hub.errors.OfflineModeIsEnabled:
# Don't raise in offline mode,
# all we know is that we don't have this
# file cached.
return False
except Exception as e:
logger.error(
"Error checking file existence: %s, retrying %d of %d", e,
attempt + 1, max_retries)
if attempt == max_retries - 1:
logger.error("Error checking file existence: %s", e)
raise
time.sleep(retry_delay)
retry_delay *= 2
continue
return False
def patch_rope_scaling(config: PretrainedConfig) -> None: def patch_rope_scaling(config: PretrainedConfig) -> None:
...@@ -193,10 +212,26 @@ def get_config( ...@@ -193,10 +212,26 @@ def get_config(
# raise an offline mode error to indicate to the user that they # raise an offline mode error to indicate to the user that they
# don't have files cached and may need to go online. # don't have files cached and may need to go online.
# This is conveniently triggered by calling file_exists(). # This is conveniently triggered by calling file_exists().
file_exists(model,
HF_CONFIG_NAME, # Call HF to check if the file exists
revision=revision, # 2 retries and exponential backoff
token=HF_TOKEN) max_retries = 2
retry_delay = 2
for attempt in range(max_retries):
try:
file_exists(model,
HF_CONFIG_NAME,
revision=revision,
token=HF_TOKEN)
except Exception as e:
logger.error(
"Error checking file existence: %s, retrying %d of %d",
e, attempt + 1, max_retries)
if attempt == max_retries:
logger.error("Error checking file existence: %s", e)
raise e
time.sleep(retry_delay)
retry_delay *= 2
raise ValueError(f"No supported config format found in {model}") raise ValueError(f"No supported config format found in {model}")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment