"examples/offline_inference_neuron_int8_quantization.py" did not exist on "e90fc21f2eda7e53f692398ee2c0cb5a0ac19693"
Unverified Commit 4bf53362 authored by Hongbin Guo's avatar Hongbin Guo Committed by GitHub
Browse files

[Doc] Fix duplicate words in comments (#36713)


Signed-off-by: default avatarHongbin10 <jdmjdm1998@163.com>
parent 5f77ef15
...@@ -264,7 +264,7 @@ class DefaultMoERunner(MoERunner): ...@@ -264,7 +264,7 @@ class DefaultMoERunner(MoERunner):
) )
# Record that the shared_experts_input will be used in the # Record that the shared_experts_input will be used in the
# shared_experts_stream to to avoid gc issue from # shared_experts_stream to avoid gc issue from
# deallocation. For more details: # deallocation. For more details:
# https://docs.pytorch.org/docs/stable/generated/torch.Tensor.record_stream.html # noqa: E501 # https://docs.pytorch.org/docs/stable/generated/torch.Tensor.record_stream.html # noqa: E501
# NOTE: We don't need shared_output.record_stream(current_stream()) # NOTE: We don't need shared_output.record_stream(current_stream())
......
...@@ -50,7 +50,7 @@ def swap_w13_to_w31(x: torch.Tensor) -> torch.Tensor: ...@@ -50,7 +50,7 @@ def swap_w13_to_w31(x: torch.Tensor) -> torch.Tensor:
def rotate_weights_for_fi_trtllm_fp8_per_tensor_moe( def rotate_weights_for_fi_trtllm_fp8_per_tensor_moe(
gemm1_weights: torch.Tensor, gemm2_weights: torch.Tensor, is_gated_activation: bool gemm1_weights: torch.Tensor, gemm2_weights: torch.Tensor, is_gated_activation: bool
): ):
"""Shuffle weights for for FI TRT-LLM Format""" """Shuffle weights for FI TRT-LLM Format"""
from flashinfer import reorder_rows_for_gated_act_gemm, shuffle_matrix_a from flashinfer import reorder_rows_for_gated_act_gemm, shuffle_matrix_a
epilogue_tile_m = 128 epilogue_tile_m = 128
......
...@@ -57,7 +57,7 @@ class SequenceClassificationMixin(SupportsCrossEncoding, VllmModelForPooling): ...@@ -57,7 +57,7 @@ class SequenceClassificationMixin(SupportsCrossEncoding, VllmModelForPooling):
pooler_config = vllm_config.model_config.pooler_config pooler_config = vllm_config.model_config.pooler_config
assert pooler_config is not None assert pooler_config is not None
# Certain information about the the model and classifier can only be # Certain information about the model and classifier can only be
# inferred from the `ForSequenceClassification` class. Therefore, we # inferred from the `ForSequenceClassification` class. Therefore, we
# instantiate it on the "meta" device to avoid allocating GPU memory. # instantiate it on the "meta" device to avoid allocating GPU memory.
with torch.device("meta"): with torch.device("meta"):
......
...@@ -952,7 +952,7 @@ class OpenCVDynamicOpenPanguVideoBackend(VideoLoader, OpenCVVideoBackendMixin): ...@@ -952,7 +952,7 @@ class OpenCVDynamicOpenPanguVideoBackend(VideoLoader, OpenCVVideoBackendMixin):
frame_recovery=frame_recovery, frame_recovery=frame_recovery,
) )
# Use transformers transformers.video_utils.VideoMetadata format # Use transformers.video_utils.VideoMetadata format
metadata = cls.create_hf_metadata( metadata = cls.create_hf_metadata(
source=source, source=source,
video_backend="opencv_dynamic", video_backend="opencv_dynamic",
......
...@@ -44,7 +44,7 @@ def maybe_serialize_tool_calls(request: "MistralChatCompletionRequest"): ...@@ -44,7 +44,7 @@ def maybe_serialize_tool_calls(request: "MistralChatCompletionRequest"):
# SEE: https://github.com/vllm-project/vllm/pull/9951 # SEE: https://github.com/vllm-project/vllm/pull/9951
# Credits go to: @gcalmettes # Credits go to: @gcalmettes
# NOTE: There is currently a bug in pydantic where attributes # NOTE: There is currently a bug in pydantic where attributes
# declared as iterables are replaced in in the instances by # declared as iterables are replaced in the instances by
# pydantic-core ValidatorIterator instance. In particular, this # pydantic-core ValidatorIterator instance. In particular, this
# affects tool_calls defined in ChatCompletionAssistantMessageParam # affects tool_calls defined in ChatCompletionAssistantMessageParam
# model: # model:
......
...@@ -1055,6 +1055,6 @@ def init_worker_distributed_environment( ...@@ -1055,6 +1055,6 @@ def init_worker_distributed_environment(
parallel_config.decode_context_parallel_size, parallel_config.decode_context_parallel_size,
) )
# Init ec connector here before KV caches caches init # Init ec connector here before KV caches init
# NOTE: We do not init KV caches for Encoder-only instance in EPD disagg mode # NOTE: We do not init KV caches for Encoder-only instance in EPD disagg mode
ensure_ec_transfer_initialized(vllm_config) ensure_ec_transfer_initialized(vllm_config)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment