"deploy/inference-gateway/vscode:/vscode.git/clone" did not exist on "4581fccb5fa519a2e9292fbc291c97ba3492551c"
Unverified Commit ad9d656b authored by Zhengxu Chen's avatar Zhengxu Chen Committed by GitHub
Browse files

[multimodal][test] Reduce memory utilization for test_siglip to avoid OOM (#29504)


Signed-off-by: default avatarzhxchen17 <zhxchen17@fb.com>
Co-authored-by: default avatarCyrus Leung <tlleungac@connect.ust.hk>
parent f37e8938
......@@ -37,7 +37,12 @@ def _run_test(
dtype: str,
) -> None:
with vllm_runner(
model, runner="pooling", dtype=dtype, enforce_eager=True, max_model_len=64
model,
runner="pooling",
dtype=dtype,
enforce_eager=True,
max_model_len=64,
gpu_memory_utilization=0.7,
) as vllm_model:
vllm_outputs = vllm_model.embed(input_texts, images=input_images)
......@@ -134,6 +139,7 @@ def test_models_text_image_no_crash(
dtype=dtype,
enforce_eager=True,
max_model_len=64,
gpu_memory_utilization=0.7,
) as vllm_model:
with pytest.raises(ValueError, match="not both"):
vllm_model.embed(texts, images=images)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment