Unverified Commit 4fa7ce46 authored by Roberto L. Castro's avatar Roberto L. Castro Committed by GitHub
Browse files

[Feature] Add SM103 (Blackwell Ultra) Support to vLLM (#30484)


Signed-off-by: default avatarLopezCastroRoberto <robertol.c510@gmail.com>
Signed-off-by: default avatarRoberto L. Castro <38211239+LopezCastroRoberto@users.noreply.github.com>
Co-authored-by: default avataryoukaichao <youkaichao@gmail.com>
parent 57e9bf18
......@@ -420,7 +420,7 @@ class FlashMLASparseMetadataBuilder(AttentionMetadataBuilder[FlashMLASparseMetad
max_num_sm_parts = int(
max((sm_count // 2) / h_k // (cdiv(h_q // h_k, 2 * 64) * s_q), 1)
)
if current_platform.is_device_capability(100):
if current_platform.is_device_capability_family(100):
max_num_sm_parts *= 2
self.tile_scheduler_metadata_buffer = torch.empty(
# TileSchedulerMetaDataSize = 8
......@@ -719,7 +719,7 @@ class FlashMLASparseImpl(MLACommonBaseImpl[FlashMLASparseMetadata]):
self.softmax_scale = scale
assert indexer is not None
self.topk_indices_buffer = indexer.topk_indices_buffer
self.padding = 128 if current_platform.is_device_capability(100) else 64
self.padding = 128 if current_platform.is_device_capability_family(100) else 64
if kv_cache_dtype == "fp8_ds_mla":
# Reserve workspace during initialization
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment