Unverified Commit d5d214ac authored by Kevin H. Luu's avatar Kevin H. Luu Committed by GitHub
Browse files

[1/n][CI] Load models in CI from S3 instead of HF (#13205)



Signed-off-by: <>
Co-authored-by: default avatarEC2 Default User <ec2-user@ip-172-31-20-117.us-west-2.compute.internal>
parent fd84857f
...@@ -27,6 +27,8 @@ from vllm.model_executor.layers.quantization import (QuantizationConfig, ...@@ -27,6 +27,8 @@ from vllm.model_executor.layers.quantization import (QuantizationConfig,
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.utils import PlaceholderModule from vllm.utils import PlaceholderModule
logger = init_logger(__name__)
try: try:
from runai_model_streamer import SafetensorsStreamer from runai_model_streamer import SafetensorsStreamer
except (ImportError, OSError): except (ImportError, OSError):
...@@ -37,8 +39,6 @@ except (ImportError, OSError): ...@@ -37,8 +39,6 @@ except (ImportError, OSError):
SafetensorsStreamer = runai_model_streamer.placeholder_attr( SafetensorsStreamer = runai_model_streamer.placeholder_attr(
"SafetensorsStreamer") "SafetensorsStreamer")
logger = init_logger(__name__)
# use system-level temp directory for file locks, so that multiple users # use system-level temp directory for file locks, so that multiple users
# can share the same lock without error. # can share the same lock without error.
# lock files in the temp directory will be automatically deleted when the # lock files in the temp directory will be automatically deleted when the
......
...@@ -144,7 +144,6 @@ def file_exists( ...@@ -144,7 +144,6 @@ def file_exists(
revision: Optional[str] = None, revision: Optional[str] = None,
token: Union[str, bool, None] = None, token: Union[str, bool, None] = None,
) -> bool: ) -> bool:
file_list = list_repo_files(repo_id, file_list = list_repo_files(repo_id,
repo_type=repo_type, repo_type=repo_type,
revision=revision, revision=revision,
...@@ -498,7 +497,7 @@ def get_sentence_transformer_tokenizer_config(model: str, ...@@ -498,7 +497,7 @@ def get_sentence_transformer_tokenizer_config(model: str,
if encoder_dict: if encoder_dict:
break break
if not encoder_dict: if not encoder_dict and not model.startswith("/"):
try: try:
# If model is on HuggingfaceHub, get the repo files # If model is on HuggingfaceHub, get the repo files
repo_files = list_repo_files(model, repo_files = list_repo_files(model,
......
...@@ -46,6 +46,8 @@ def glob(s3=None, ...@@ -46,6 +46,8 @@ def glob(s3=None,
""" """
if s3 is None: if s3 is None:
s3 = boto3.client("s3") s3 = boto3.client("s3")
if not path.endswith("/"):
path = path + "/"
bucket_name, _, paths = list_files(s3, bucket_name, _, paths = list_files(s3,
path=path, path=path,
allow_pattern=allow_pattern) allow_pattern=allow_pattern)
...@@ -109,6 +111,7 @@ class S3Model: ...@@ -109,6 +111,7 @@ class S3Model:
for sig in (signal.SIGINT, signal.SIGTERM): for sig in (signal.SIGINT, signal.SIGTERM):
existing_handler = signal.getsignal(sig) existing_handler = signal.getsignal(sig)
signal.signal(sig, self._close_by_signal(existing_handler)) signal.signal(sig, self._close_by_signal(existing_handler))
self.dir = tempfile.mkdtemp() self.dir = tempfile.mkdtemp()
def __del__(self): def __del__(self):
...@@ -140,6 +143,9 @@ class S3Model: ...@@ -140,6 +143,9 @@ class S3Model:
ignore_pattern: A list of patterns of which files not to pull. ignore_pattern: A list of patterns of which files not to pull.
""" """
if not s3_model_path.endswith("/"):
s3_model_path = s3_model_path + "/"
bucket_name, base_dir, files = list_files(self.s3, s3_model_path, bucket_name, base_dir, files = list_files(self.s3, s3_model_path,
allow_pattern, allow_pattern,
ignore_pattern) ignore_pattern)
...@@ -147,8 +153,9 @@ class S3Model: ...@@ -147,8 +153,9 @@ class S3Model:
return return
for file in files: for file in files:
destination_file = os.path.join(self.dir, destination_file = os.path.join(
file.removeprefix(base_dir)) self.dir,
file.removeprefix(base_dir).lstrip("/"))
local_dir = Path(destination_file).parent local_dir = Path(destination_file).parent
os.makedirs(local_dir, exist_ok=True) os.makedirs(local_dir, exist_ok=True)
self.s3.download_file(bucket_name, file, destination_file) self.s3.download_file(bucket_name, file, destination_file)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment