Unverified Commit e26fef83 authored by co63oc's avatar co63oc Committed by GitHub
Browse files

fix some typos (#24616)


Signed-off-by: default avatarco63oc <co63oc@users.noreply.github.com>
parent c1eda615
......@@ -23,7 +23,7 @@ class TestSetting:
fullgraph: bool
# we cannot afford testing the full Catesian product
# we cannot afford testing the full Cartesian product
# of all models and all levels
@pytest.mark.parametrize(
"test_setting",
......
......@@ -345,7 +345,7 @@ def test_mamba_chunk_scan_cont_batch_prefill_chunking(chunk_size, seqlens):
# in the mamba2 ssd kernels, by comparing concatenation (in the sequence
# dimension) of chunked results with the full sequence result.
# It is different from test_mamba_chunk_scan_cont_batch by:
# 1. Not using the naive torch implementaion (ssd_minimal_discrete) to get
# 1. Not using the naive torch implementation (ssd_minimal_discrete) to get
# reference outputs. Instead, it compares chunked kernel outputs to full
# sequence kernel outputs. This is the most straightforward way to
# assert chunked prefill correctness.
......
......@@ -179,7 +179,7 @@ def chunk_local_cumsum_vector(
def grid(meta):
return (triton.cdiv(meta['S'], meta['BS']), NT, B * H)
# keep cummulative normalizer in fp32
# keep cumulative normalizer in fp32
# this kernel is equivalent to
# g = g.view(B, H, NT, BT, -1).cumsum(-2).view(B, H, T, -1)
chunk_local_cumsum_vector_kernel[grid](g_org,
......
......@@ -1322,7 +1322,7 @@ class MLACommonImpl(MLAAttentionImpl[M], Generic[M]):
k_scale: torch.Tensor,
dcp_world_size: int,
):
assert k_scale is None, "DCP not support sacled kvcache now."
assert k_scale is None, "DCP not support scaled kvcache now."
assert attn_metadata.prefill is not None
prefill_metadata = attn_metadata.prefill
assert prefill_metadata.chunked_context is not None
......
......@@ -112,9 +112,9 @@ class BlockTable:
# tokens.
virtual_block_offsets = positions % virtual_block_size
mask = virtual_block_offsets % self.dcp_world_size == self.dcp_rank
# Calcuate local block_offsets
# Calculate local block_offsets
block_offsets = virtual_block_offsets // self.dcp_world_size
# Calcuate slot_mapping
# Calculate slot_mapping
slot_mapping = block_numbers * self.block_size + block_offsets
# Write final slots, use -1 for not-local
self.slot_mapping_np[:req_indices.shape[0]] = np.where(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment