Unverified Commit 3bdd4266 authored by Wilson Wu's avatar Wilson Wu Committed by GitHub
Browse files

Fix typos in comments across multiple files (#30345)


Signed-off-by: default avatarWilson Wu <iwilsonwu@gmail.com>
Co-authored-by: default avatarWentao Ye <44945378+yewentao256@users.noreply.github.com>
parent 06462392
...@@ -186,7 +186,7 @@ struct AttentionMetadata { ...@@ -186,7 +186,7 @@ struct AttentionMetadata {
// - Intermediate outputs: q_tile_size * head_dim * output_buffer_elem_size + 2 // - Intermediate outputs: q_tile_size * head_dim * output_buffer_elem_size + 2
// * q_tile_size * 4, partial output, max + sum (float) // * q_tile_size * 4, partial output, max + sum (float)
// Reduction scratchpad contains: // Reduction scratchpad contains:
// - flags: bool array to indicate wether the split is finished // - flags: bool array to indicate whether the split is finished
// - outputs: split_num * q_tile_size * head_dim * output_buffer_elem_size // - outputs: split_num * q_tile_size * head_dim * output_buffer_elem_size
// - max, sum: 2 * split_num * q_tile_size * 4 // - max, sum: 2 * split_num * q_tile_size * 4
class AttentionScratchPad { class AttentionScratchPad {
......
...@@ -617,7 +617,7 @@ struct MacheteCollectiveMma { ...@@ -617,7 +617,7 @@ struct MacheteCollectiveMma {
// Same as upstream, should be kept the same when possible, not formatted for // Same as upstream, should be kept the same when possible, not formatted for
// easier comparison // easier comparison
// with `SwapAB ? N : M -> M` since we dont support SwapAB // with `SwapAB ? N : M -> M` since we don't support SwapAB
// clang-format off // clang-format off
template<class ProblemShape> template<class ProblemShape>
static bool static bool
......
...@@ -22,7 +22,7 @@ python tools/install_nixl_from_source_ubuntu.py ...@@ -22,7 +22,7 @@ python tools/install_nixl_from_source_ubuntu.py
NixlConnector uses NIXL library for underlying communication, which supports multiple transport backends. UCX (Unified Communication X) is the primary default transport library used by NIXL. Configure transport environment variables: NixlConnector uses NIXL library for underlying communication, which supports multiple transport backends. UCX (Unified Communication X) is the primary default transport library used by NIXL. Configure transport environment variables:
```bash ```bash
# Example UCX configuration, adjust according to your enviroment # Example UCX configuration, adjust according to your environment
export UCX_TLS=all # or specify specific transports like "rc,ud,sm,^cuda_ipc" ..etc export UCX_TLS=all # or specify specific transports like "rc,ud,sm,^cuda_ipc" ..etc
export UCX_NET_DEVICES=all # or specify network devices like "mlx5_0:1,mlx5_1:1" export UCX_NET_DEVICES=all # or specify network devices like "mlx5_0:1,mlx5_1:1"
``` ```
......
...@@ -881,7 +881,7 @@ class FusedMoE(CustomOp): ...@@ -881,7 +881,7 @@ class FusedMoE(CustomOp):
# Record that the clone will be used by shared_experts_stream # Record that the clone will be used by shared_experts_stream
# to avoid gc issue from deallocation of hidden_states_clone # to avoid gc issue from deallocation of hidden_states_clone
# For more details: https://docs.pytorch.org/docs/stable/generated/torch.Tensor.record_stream.html # noqa: E501 # For more details: https://docs.pytorch.org/docs/stable/generated/torch.Tensor.record_stream.html # noqa: E501
# NOTE: We dont need shared_output.record_stream(current_stream()) # NOTE: We don't need shared_output.record_stream(current_stream())
# because we synch the streams before using shared_output. # because we synch the streams before using shared_output.
hidden_states_clone.record_stream(self.shared_experts_stream) hidden_states_clone.record_stream(self.shared_experts_stream)
......
...@@ -28,7 +28,7 @@ class CompressedTensorsW4A16Fp4(CompressedTensorsScheme): ...@@ -28,7 +28,7 @@ class CompressedTensorsW4A16Fp4(CompressedTensorsScheme):
@classmethod @classmethod
def get_min_capability(cls) -> int: def get_min_capability(cls) -> int:
# dont restrict as emulations # don't restrict as emulations
return 80 return 80
def create_weights( def create_weights(
......
...@@ -4871,7 +4871,7 @@ class GPUModelRunner( ...@@ -4871,7 +4871,7 @@ class GPUModelRunner(
# we need to adjust the cudagraph sizes to be a multiple of the uniform # we need to adjust the cudagraph sizes to be a multiple of the uniform
# decode query length to avoid: https://github.com/vllm-project/vllm/issues/28207 # decode query length to avoid: https://github.com/vllm-project/vllm/issues/28207
# temp-fix: https://github.com/vllm-project/vllm/issues/28207#issuecomment-3504004536 # temp-fix: https://github.com/vllm-project/vllm/issues/28207#issuecomment-3504004536
# Will be removed in the near future when we have seperate cudagraph capture # Will be removed in the near future when we have separate cudagraph capture
# sizes for decode and mixed prefill-decode. # sizes for decode and mixed prefill-decode.
if ( if (
cudagraph_mode.decode_mode() == CUDAGraphMode.FULL cudagraph_mode.decode_mode() == CUDAGraphMode.FULL
......
...@@ -135,7 +135,7 @@ class AttentionGroup: ...@@ -135,7 +135,7 @@ class AttentionGroup:
kv_cache_spec: KVCacheSpec kv_cache_spec: KVCacheSpec
kv_cache_group_id: int kv_cache_group_id: int
# When ubatching is enabled we will have a metadata builder for each ubatch # When ubatching is enabled we will have a metadata builder for each ubatch
# so that if they use internal persistant buffers for cudagraphs, and they # so that if they use internal persistent buffers for cudagraphs, and they
# won't have to worry about conflicting with the other ubatches. # won't have to worry about conflicting with the other ubatches.
metadata_builders: list[AttentionMetadataBuilder] = field( metadata_builders: list[AttentionMetadataBuilder] = field(
default_factory=lambda: [] default_factory=lambda: []
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment