Unverified Commit c19d8482 authored by Ke Bao's avatar Ke Bao Committed by GitHub
Browse files

Adjust flashinfer workspace size for Qwen2 models (#2879)

parent 80002562
...@@ -84,6 +84,10 @@ class FlashInferAttnBackend(AttentionBackend): ...@@ -84,6 +84,10 @@ class FlashInferAttnBackend(AttentionBackend):
self.num_wrappers = 1 self.num_wrappers = 1
self.dispatch_reason = None self.dispatch_reason = None
# Qwen2 models require higher flashinfer workspace size
if "Qwen2ForCausalLM" in model_runner.model_config.hf_config.architectures:
global_config.flashinfer_workspace_size = 512 * 1024 * 1024
# Allocate buffers # Allocate buffers
self.workspace_buffer = torch.empty( self.workspace_buffer = torch.empty(
global_config.flashinfer_workspace_size, global_config.flashinfer_workspace_size,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment