Unverified Commit eb1629da authored by Andreas Karatzas's avatar Andreas Karatzas Committed by GitHub
Browse files

[ROCm][CI] Fix AITER test flakiness by using explicit attention backend (#32346)


Signed-off-by: default avatarAndreas Karatzas <akaratza@amd.com>
Signed-off-by: default avatarMatthew Wong <Matthew.Wong2@amd.com>
Co-authored-by: default avatarMatthew Wong <Matthew.Wong2@amd.com>
parent 019e2c3b
...@@ -866,7 +866,7 @@ steps: ...@@ -866,7 +866,7 @@ steps:
- label: Language Models Tests (Standard) - label: Language Models Tests (Standard)
timeout_in_minutes: 25 timeout_in_minutes: 25
mirror_hardwares: [amdexperimental] mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_1 agent_pool: mi325_1
# grade: Blocking # grade: Blocking
torch_nightly: true torch_nightly: true
......
...@@ -160,8 +160,12 @@ def test_models( ...@@ -160,8 +160,12 @@ def test_models(
tokenizer_name=model_info.tokenizer or model, tokenizer_name=model_info.tokenizer or model,
tokenizer_mode=model_info.tokenizer_mode, tokenizer_mode=model_info.tokenizer_mode,
trust_remote_code=model_info.trust_remote_code, trust_remote_code=model_info.trust_remote_code,
max_num_seqs=2, # Remove the effects of batch variance on ROCm since batch invariance
# is not yet supported.
# See: https://github.com/vllm-project/vllm/issues/27433
max_num_seqs=1 if current_platform.is_rocm() else 2,
enable_prompt_embeds=use_prompt_embeds, enable_prompt_embeds=use_prompt_embeds,
compilation_config={"cudagraph_capture_sizes": [1, 2]},
) as vllm_model: ) as vllm_model:
vllm_outputs = vllm_model.generate_greedy_logprobs( vllm_outputs = vllm_model.generate_greedy_logprobs(
example_prompts, max_tokens, num_logprobs example_prompts, max_tokens, num_logprobs
......
...@@ -45,14 +45,14 @@ ...@@ -45,14 +45,14 @@
}, },
"16": { "16": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16, "BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1, "GROUP_SIZE_M": 1,
"num_warps": 4, "num_warps": 2,
"num_stages": 2, "num_stages": 2,
"waves_per_eu": 0, "waves_per_eu": 0,
"matrix_instr_nonkdim": 16, "matrix_instr_nonkdim": 16,
"kpack": 2 "kpack": 1
}, },
"24": { "24": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 16,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment