Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
be22bb6f
Unverified
Commit
be22bb6f
authored
Oct 01, 2025
by
pwschuurman
Committed by
GitHub
Oct 01, 2025
Browse files
Run:ai model streamer add GCS package support (#24909)
Signed-off-by:
Peter Schuurman
<
psch@google.com
>
parent
169313b9
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
54 additions
and
9 deletions
+54
-9
docs/models/extensions/runai_model_streamer.md
docs/models/extensions/runai_model_streamer.md
+7
-0
requirements/nightly_torch_test.txt
requirements/nightly_torch_test.txt
+1
-1
requirements/rocm.txt
requirements/rocm.txt
+2
-2
requirements/test.in
requirements/test.in
+1
-1
requirements/test.txt
requirements/test.txt
+20
-1
tests/model_executor/model_loader/runai_model_streamer/test_runai_utils.py
...tor/model_loader/runai_model_streamer/test_runai_utils.py
+23
-4
No files found.
docs/models/extensions/runai_model_streamer.md
View file @
be22bb6f
...
...
@@ -24,6 +24,13 @@ vllm serve s3://core-llm/Llama-3-8b \
--load-format
runai_streamer
```
To run model from Google Cloud Storage run:
```
bash
vllm serve gs://core-llm/Llama-3-8b
\
--load-format
runai_streamer
```
To run model from a S3 compatible object store run:
```
bash
...
...
requirements/nightly_torch_test.txt
View file @
be22bb6f
...
...
@@ -43,6 +43,6 @@ tritonclient==2.51.0
numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding
numba == 0.61.2; python_version > '3.9'
numpy
runai-model-streamer[s3]==0.14.0
runai-model-streamer[s3
,gcs
]==0.14.0
fastsafetensors>=0.1.10
pydantic>=2.10 # 2.9 leads to error on python 3.10
requirements/rocm.txt
View file @
be22bb6f
...
...
@@ -13,6 +13,6 @@ tensorizer==2.10.1
packaging>=24.2
setuptools>=77.0.3,<80.0.0
setuptools-scm>=8
runai-model-streamer[s3]==0.14.0
runai-model-streamer[s3
,gcs
]==0.14.0
conch-triton-kernels==1.2.1
timm>=1.0.17
\ No newline at end of file
timm>=1.0.17
requirements/test.in
View file @
be22bb6f
...
...
@@ -51,7 +51,7 @@ tritonclient==2.51.0
numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding
numba == 0.61.2; python_version > '3.9'
numpy
runai-model-streamer[s3]==0.14.0
runai-model-streamer[s3
,gcs
]==0.14.0
fastsafetensors>=0.1.10
pydantic>=2.10 # 2.9 leads to error on python 3.10
decord==0.6.0
...
...
requirements/test.txt
View file @
be22bb6f
...
...
@@ -251,11 +251,27 @@ gitdb==4.0.12
gitpython==3.1.44
# via mlflow-skinny
google-api-core==2.24.2
# via opencensus
# via
# google-cloud-core
# google-cloud-storage
# opencensus
google-auth==2.40.2
# via
# databricks-sdk
# google-api-core
# google-cloud-core
# google-cloud-storage
# runai-model-streamer-gcs
google-cloud-core==2.4.3
# via google-cloud-storage
google-cloud-storage==3.4.0
# via runai-model-streamer-gcs
google-crc32c==1.7.1
# via
# google-cloud-storage
# google-resumable-media
google-resumable-media==2.7.2
# via google-cloud-storage
googleapis-common-protos==1.70.0
# via google-api-core
graphene==3.4.3
...
...
@@ -890,6 +906,7 @@ requests==2.32.3
# docker
# evaluate
# google-api-core
# google-cloud-storage
# huggingface-hub
# lightly
# lm-eval
...
...
@@ -929,6 +946,8 @@ rtree==1.4.0
# via torchgeo
runai-model-streamer==0.14.0
# via -r requirements/test.in
runai-model-streamer-gcs==0.14.0
# via runai-model-streamer
runai-model-streamer-s3==0.14.0
# via runai-model-streamer
s3transfer==0.10.3
...
...
tests/model_executor/model_loader/runai_model_streamer/test_runai_utils.py
View file @
be22bb6f
...
...
@@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
glob
import
hashlib
import
os
import
tempfile
...
...
@@ -9,7 +10,8 @@ import huggingface_hub.constants
from
vllm.model_executor.model_loader.weight_utils
import
(
download_weights_from_hf
)
from
vllm.transformers_utils.runai_utils
import
(
is_runai_obj_uri
,
from
vllm.transformers_utils.runai_utils
import
(
ObjectStorageModel
,
is_runai_obj_uri
,
list_safetensors
)
...
...
@@ -34,6 +36,23 @@ def test_runai_list_safetensors_local():
assert
len
(
safetensors
)
==
len
(
files
)
if
__name__
==
"__main__"
:
test_is_runai_obj_uri
()
test_runai_list_safetensors_local
()
def
test_runai_pull_files_gcs
(
monkeypatch
):
monkeypatch
.
setenv
(
"RUNAI_STREAMER_GCS_USE_ANONYMOUS_CREDENTIALS"
,
"true"
)
# Bypass default project lookup by setting GOOGLE_CLOUD_PROJECT
monkeypatch
.
setenv
(
"GOOGLE_CLOUD_PROJECT"
,
"fake-project"
)
filename
=
"LT08_L1GT_074061_20130309_20170505_01_T2_MTL.txt"
gcs_bucket
=
"gs://gcp-public-data-landsat/LT08/01/074/061/LT08_L1GT_074061_20130309_20170505_01_T2/"
gcs_url
=
f
"
{
gcs_bucket
}
/
{
filename
}
"
model
=
ObjectStorageModel
(
gcs_url
)
model
.
pull_files
(
gcs_bucket
,
allow_pattern
=
[
f
"*
{
filename
}
"
])
# To re-generate / change URLs:
# gsutil ls -L gs://<gcs-url> | grep "Hash (md5)" | tr -d ' ' \
# | cut -d":" -f2 | base64 -d | xxd -p
expected_checksum
=
"f60dea775da1392434275b311b31a431"
hasher
=
hashlib
.
new
(
"md5"
)
with
open
(
os
.
path
.
join
(
model
.
dir
,
filename
),
'rb'
)
as
f
:
# Read the file in chunks to handle large files efficiently
for
chunk
in
iter
(
lambda
:
f
.
read
(
4096
),
b
''
):
hasher
.
update
(
chunk
)
actual_checksum
=
hasher
.
hexdigest
()
assert
actual_checksum
==
expected_checksum
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment