Add runai model streamer e2e test for GCS (#28079)

Signed-off-by: Alexis MacAskill <amacaskill@google.com>

Add runai model streamer e2e test for GCS (#28079)
Signed-off-by: Alexis MacAskill <amacaskill@google.com>
a47d94f1 · Alexis MacAskill · GitHub · e70fbc59 · a47d94f1 · a47d94f1
Unverified Commit a47d94f1 authored Nov 06, 2025 by Alexis MacAskill Committed by GitHub Nov 07, 2025
2 changed files
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -546,8 +546,11 @@ steps:
 - label: Model Executor Test # 23min
  timeout_in_minutes: 35
+  torch_nightly: true
  mirror_hardwares: [amdexperimental]
  source_file_dependencies:
+  - vllm/engine/arg_utils.py
+  - vllm/config/model.py
  - vllm/model_executor
  - tests/model_executor
  - tests/entrypoints/openai/test_tensorizer_entrypoint.py

--- a/tests/model_executor/model_loader/runai_model_streamer/test_runai_model_streamer_loader.py
+++ b/tests/model_executor/model_loader/runai_model_streamer/test_runai_model_streamer_loader.py
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import pytest
 from vllm import SamplingParams
 from vllm.config.load import LoadConfig
 from vllm.model_executor.model_loader import get_model_loader
 load_format = "runai_streamer"
 test_model = "openai-community/gpt2"
+# TODO(amacaskill): Replace with a GKE owned GCS bucket.
+test_gcs_model = "gs://vertex-model-garden-public-us/codegemma/codegemma-2b/"
 prompts = [
    "Hello, my name is",
@@ -32,3 +36,16 @@ def test_runai_model_loader_download_files(vllm_runner):
    with vllm_runner(test_model, load_format=load_format) as llm:
        deserialized_outputs = llm.generate(prompts, sampling_params)
        assert deserialized_outputs
+def test_runai_model_loader_download_files_gcs(
+    vllm_runner, monkeypatch: pytest.MonkeyPatch
+):
+    monkeypatch.setenv("GOOGLE_CLOUD_PROJECT", "fake-project")
+    monkeypatch.setenv("RUNAI_STREAMER_GCS_USE_ANONYMOUS_CREDENTIALS", "true")
+    monkeypatch.setenv(
+        "CLOUD_STORAGE_EMULATOR_ENDPOINT", "https://storage.googleapis.com"
+    )
+    with vllm_runner(test_gcs_model, load_format=load_format) as llm:
+        deserialized_outputs = llm.generate(prompts, sampling_params)
+        assert deserialized_outputs