Unverified Commit 8cef1376 authored by Cyrus Leung's avatar Cyrus Leung Committed by GitHub
Browse files

[Chore] Update more locations to use `attention_config.backend` (#31153)


Signed-off-by: default avatarDarkLight1337 <tlleungac@connect.ust.hk>
parent a37328fc
...@@ -104,7 +104,6 @@ def run_benchmark_with_batch_invariant( ...@@ -104,7 +104,6 @@ def run_benchmark_with_batch_invariant(
random.seed(seed) random.seed(seed)
# Set environment variables # Set environment variables
os.environ["VLLM_ATTENTION_BACKEND"] = backend
if batch_invariant: if batch_invariant:
os.environ["VLLM_BATCH_INVARIANT"] = "1" os.environ["VLLM_BATCH_INVARIANT"] = "1"
else: else:
...@@ -140,6 +139,7 @@ def run_benchmark_with_batch_invariant( ...@@ -140,6 +139,7 @@ def run_benchmark_with_batch_invariant(
max_model_len=max_model_len, max_model_len=max_model_len,
dtype="bfloat16", dtype="bfloat16",
tensor_parallel_size=tp_size, tensor_parallel_size=tp_size,
attention_config={"backend": backend},
enable_prefix_caching=False, enable_prefix_caching=False,
) )
init_time = time.perf_counter() - start_init init_time = time.perf_counter() - start_init
......
...@@ -557,7 +557,8 @@ def test_rms_group_quant( ...@@ -557,7 +557,8 @@ def test_rms_group_quant(
# To capture subprocess logs, we need to know whether spawn or fork is used. # To capture subprocess logs, we need to know whether spawn or fork is used.
# Force spawn as it is more general. # Force spawn as it is more general.
monkeypatch.setenv("VLLM_WORKER_MULTIPROC_METHOD", "spawn") monkeypatch.setenv("VLLM_WORKER_MULTIPROC_METHOD", "spawn")
monkeypatch.setenv("VLLM_ATTENTION_BACKEND", backend.name)
model_kwargs["attention_config"] = {"backend": backend.name}
compilation_config = CompilationConfig( compilation_config = CompilationConfig(
# Testing properties # Testing properties
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment