Add typo checker in pre-commit (#6179)

Co-authored-by: Brayden Zhong <b8zhong@uwaterloo.ca>

Add typo checker in pre-commit (#6179)
Co-authored-by: Brayden Zhong <b8zhong@uwaterloo.ca>
2ce87935 · applesaucethebun · GitHub · de167cf5 · 2ce87935 · 2ce87935
Unverified Commit 2ce87935 authored May 11, 2025 by applesaucethebun Committed by GitHub May 11, 2025
20 changed files
--- a/python/sglang/srt/models/llama4.py
+++ b/python/sglang/srt/models/llama4.py
@@ -428,7 +428,7 @@ class Llama4DecoderLayer(nn.Module):
        # Fully Connected
        hidden_states = self.feed_forward(hidden_states, forward_batch)

-        # TODO(ch-wan): ues reduce-scatter in MLP to avoid this scatter
+        # TODO(ch-wan): use reduce-scatter in MLP to avoid this scatter
        # Scatter
        if self.dp_size != 1:
            # important: forward batch.gathered_buffer is used both after scatter and after gather.

--- a/python/sglang/srt/models/roberta.py
+++ b/python/sglang/srt/models/roberta.py
@@ -57,7 +57,7 @@ class RobertaEmbedding(nn.Module):
        input_shape = input_ids.size()
        inputs_embeds = self.word_embeddings(input_ids)

-        # adpated from vllm: https://github.com/vllm-project/vllm/commit/4a18fd14ba4a349291c798a16bf62fa8a9af0b6b/vllm/model_executor/models/roberta.py
+        # Adapted from vllm: https://github.com/vllm-project/vllm/commit/4a18fd14ba4a349291c798a16bf62fa8a9af0b6b/vllm/model_executor/models/roberta.py

        pos_list = []
        token_list = []

--- a/python/sglang/srt/models/torch_native_llama.py
+++ b/python/sglang/srt/models/torch_native_llama.py
@@ -37,7 +37,7 @@ $ python3 -m sglang.bench_one_batch --correct \
  --tensor-parallel-size 2 \
  --disable-cuda-graph
 ```
-We will eanble CUDA Graph support soon.
+We will enable CUDA Graph support soon.
 """

 import types

--- a/python/sglang/srt/openai_api/adapter.py
+++ b/python/sglang/srt/openai_api/adapter.py
@@ -590,7 +590,7 @@ def v1_generate_response(
    echo = False

    if (not isinstance(request, list)) and request.echo:
-        # TODO: handle the case propmt is token ids
+        # TODO: handle the case prompt is token ids
        if isinstance(request.prompt, list) and isinstance(request.prompt[0], str):
            # for the case of multiple str prompts
            prompts = request.prompt
@@ -646,7 +646,7 @@ def v1_generate_response(
        finish_reason = ret_item["meta_info"]["finish_reason"]

        if to_file:
-            # to make the choise data json serializable
+            # to make the choice data json serializable
            choice_data = {
                "index": 0,
                "text": text,

--- a/python/sglang/srt/reasoning_parser.py
+++ b/python/sglang/srt/reasoning_parser.py
@@ -147,7 +147,7 @@ class ReasoningParser:

    Args:
        model_type (str): Type of model to parse reasoning from
-        stream_reasoning (bool): If Flase, accumulates reasoning content until complete.
+        stream_reasoning (bool): If False, accumulates reasoning content until complete.
            If True, streams reasoning content as it arrives.
    """


--- a/python/sglang/srt/sampling/sampling_batch_info.py
+++ b/python/sglang/srt/sampling/sampling_batch_info.py
@@ -294,7 +294,7 @@ class SamplingBatchInfo:
            # Set the flag to True if any of the two has custom logit processor
            self.has_custom_logit_processor = True

-        # Note: becasue the __len()__ operator is defined on the temperatures tensor,
+        # Note: because the __len()__ operator is defined on the temperatures tensor,
        # please make sure any merge operation with len(self) or len(other) is done before
        # the merge operation of the temperatures tensor below.
        for item in [

--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -825,7 +825,7 @@ class ServerArgs:
        # Multi-node distributed serving
        parser.add_argument(
            "--dist-init-addr",
-            "--nccl-init-addr",  # For backward compatbility. This will be removed in the future.
+            "--nccl-init-addr",  # For backward compatibility. This will be removed in the future.
            type=str,
            help="The host address for initializing distributed backend (e.g., `192.168.0.2:25000`).",
        )
@@ -1096,7 +1096,7 @@ class ServerArgs:
        parser.add_argument(
            "--triton-attention-reduce-in-fp32",
            action="store_true",
-            help="Cast the intermidiate attention results to fp32 to avoid possible crashes related to fp16."
+            help="Cast the intermediate attention results to fp32 to avoid possible crashes related to fp16."
            "This only affects Triton attention kernels.",
        )
        parser.add_argument(
@@ -1188,7 +1188,7 @@ class ServerArgs:
            type=int,
            default=0,
            help="The number of shared_experts need to be replicated to fuse with normal experts in deepseek v3/r1, "
-            "set it to tp_size can get best optimized performace.",
+            "set it to tp_size can get best optimized performance.",
        )
        parser.add_argument(
            "--disable-chunked-prefix-cache",

--- a/python/sglang/srt/speculative/eagle_draft_cuda_graph_runner.py
+++ b/python/sglang/srt/speculative/eagle_draft_cuda_graph_runner.py
@@ -82,12 +82,12 @@ class EAGLEDraftCudaGraphRunner:
            self.capture()
        except RuntimeError as e:
            raise Exception(
-                f"Capture cuda graph failed: {e}\n"
+                f"Capture CUDA graph failed: {e}\n"
                "Possible solutions:\n"
                "1. set --mem-fraction-static to a smaller value (e.g., 0.8 or 0.7)\n"
                "2. set --cuda-graph-max-bs to a smaller value (e.g., 16)\n"
                "3. disable torch compile by not using --enable-torch-compile\n"
-                "4. disable cuda graph by --disable-cuda-graph. (Not recommonded. Huge perf loss)\n"
+                "4. disable CUDA graph by --disable-cuda-graph. (Not recommended. Huge performance loss)\n"
                "Open an issue on GitHub https://github.com/sgl-project/sglang/issues/new/choose \n"
            )

@@ -149,7 +149,7 @@ class EAGLEDraftCudaGraphRunner:

        # Run and capture
        def run_once():
-            # Backup two fileds, which will be modified in-place in `draft_forward`.
+            # Backup two fields, which will be modified in-place in `draft_forward`.
            output_cache_loc_backup = forward_batch.out_cache_loc
            hidden_states_backup = forward_batch.spec_info.hidden_states


--- a/python/sglang/srt/speculative/eagle_utils.py
+++ b/python/sglang/srt/speculative/eagle_utils.py
@@ -167,12 +167,12 @@ class EagleVerifyOutput:
    draft_input: EagleDraftInput
    # Logit outputs from target worker
    logits_output: LogitsProcessorOutput
-    # Accepeted token ids including the bonus token
+    # Accepted token ids including the bonus token
    verified_id: torch.Tensor
-    # Accepeted token length per sequence in a batch in CPU.
+    # Accepted token length per sequence in a batch in CPU.
    accept_length_per_req_cpu: List[int]
-    # Accepeted indices from logits_output.next_token_logits
-    accepeted_indices: torch.Tensor
+    # Accepted indices from logits_output.next_token_logits
+    accepted_indices: torch.Tensor


 @dataclass
@@ -316,7 +316,7 @@ class EagleVerifyInput:

        This API updates values inside logits_output based on the accepted
        tokens. I.e., logits_output.next_token_logits only contains
-        accepeted token logits.
+        accepted token logits.
        """
        bs = self.retrive_index.shape[0]
        candidates = self.draft_token.reshape(bs, self.draft_token_num)
@@ -493,7 +493,7 @@ class EagleVerifyInput:
                logits_output=logits_output,
                verified_id=verified_id,
                accept_length_per_req_cpu=accept_length_cpu,
-                accepeted_indices=accept_index,
+                accepted_indices=accept_index,
            )
        else:
            assign_req_to_token_pool[(bs,)](
@@ -539,7 +539,7 @@ class EagleVerifyInput:
                logits_output=logits_output,
                verified_id=verified_id,
                accept_length_per_req_cpu=accept_length_cpu,
-                accepeted_indices=accept_index,
+                accepted_indices=accept_index,
            )



--- a/python/sglang/srt/speculative/eagle_worker.py
+++ b/python/sglang/srt/speculative/eagle_worker.py
@@ -201,7 +201,7 @@ class EAGLEWorker(TpModelWorker):
            self.has_prefill_wrapper_verify = False
        else:
            raise ValueError(
-                f"EAGLE is not supportted in attention backend {self.server_args.attention_backend}"
+                f"EAGLE is not supported in attention backend {self.server_args.attention_backend}"
            )

        self.draft_model_runner.draft_attn_backend = self.draft_attn_backend
@@ -245,8 +245,8 @@ class EAGLEWorker(TpModelWorker):
        Args:
            batch: The batch to run forward. The state of the batch is modified as it runs.
        Returns:
-            A tuple of the final logit output of the target model, next tokens accepeted,
-            the batch id (used for overlap schedule), and number of accepeted tokens.
+            A tuple of the final logit output of the target model, next tokens accepted,
+            the batch id (used for overlap schedule), and number of accepted tokens.
        """
        if batch.forward_mode.is_decode():
            with self.draft_tp_context(self.draft_model_runner.tp_group):
@@ -491,11 +491,11 @@ class EAGLEWorker(TpModelWorker):
        )

        # Post process based on verified outputs.
-        # Pick indices that we care (accepeted)
+        # Pick indices that we care (accepted)
        logits_output.next_token_logits = logits_output.next_token_logits[
-            res.accepeted_indices
+            res.accepted_indices
        ]
-        logits_output.hidden_states = logits_output.hidden_states[res.accepeted_indices]
+        logits_output.hidden_states = logits_output.hidden_states[res.accepted_indices]

        # Prepare the batch for the next draft forwards.
        batch.forward_mode = ForwardMode.DECODE
@@ -597,7 +597,7 @@ class EAGLEWorker(TpModelWorker):
        self.capture_for_decode(logits_output, forward_batch.spec_info)

    def forward_draft_extend_after_decode(self, batch: ScheduleBatch):
-        # Backup fileds that will be modified in-place
+        # Backup fields that will be modified in-place
        seq_lens_backup = batch.seq_lens.clone()
        req_pool_indices_backup = batch.req_pool_indices
        accept_length_backup = batch.spec_info.accept_length

--- a/python/sglang/test/simple_eval_common.py
+++ b/python/sglang/test/simple_eval_common.py
@@ -140,7 +140,7 @@ class ChatCompletionSampler(SamplerBase):
                    max_tokens=self.max_tokens,
                )
                return response.choices[0].message.content
-            # NOTE: BadRequestError is triggered once for MMMU, please uncomment if you are reruning MMMU
+            # NOTE: BadRequestError is triggered once for MMMU, please uncomment if you are rerunning MMMU
            except openai.BadRequestError as e:
                print("Bad Request Error", e)
                return ""

--- a/python/sglang/test/simple_eval_humaneval.py
+++ b/python/sglang/test/simple_eval_humaneval.py
@@ -121,7 +121,7 @@ class HumanEval(Eval):
                convo=convo,
                metrics={
                    f"pass@{k}": estimate_pass_at_k([total], [correct], k)
-                    # this will be aggrated so no need of .mean()
+                    # this will be aggregated so no need of .mean()
                    for k in self._ks_passes
                    if total >= k
                },

--- a/python/sglang/test/test_programs.py
+++ b/python/sglang/test/test_programs.py
@@ -370,7 +370,7 @@ def test_dtype_gen():
    @sgl.function
    def dtype_gen(s):
        s += "Q: What is the full name of DNS?\n"
-        s += "A: The full nams is " + sgl.gen("str_res", dtype=str, stop="\n") + "\n"
+        s += "A: The full names is " + sgl.gen("str_res", dtype=str, stop="\n") + "\n"
        s += "Q: Which year was DNS invented?\n"
        s += "A: " + sgl.gen("int_res", dtype=int) + "\n"
        s += "Q: What is the value of pi?\n"

--- a/python/sglang/utils.py
+++ b/python/sglang/utils.py
@@ -278,7 +278,7 @@ def graceful_registry(sub_module_name: str):
            f"{sub_module_name} Received signal to shutdown. Performing graceful shutdown..."
        )
        if signum == signal.SIGTERM:
-            logger.info(f"{sub_module_name} recive sigterm")
+            logger.info(f"{sub_module_name} receive sigterm")

    signal.signal(signal.SIGTERM, graceful_shutdown)


--- a/scripts/ci_install_dependency.sh
+++ b/scripts/ci_install_dependency.sh
@@ -25,7 +25,7 @@ pip install -e "python[all]"
 pip install torch_memory_saver
 pip install transformers==4.51.0 sentence_transformers accelerate peft pandas datasets timm torchaudio==2.6.0

-# For compling xgrammar kernels
+# For compiling xgrammar kernels
 pip install cuda-python nvidia-cuda-nvrtc-cu12

 # For lmms_evals evaluating MMMU

--- a/scripts/ci_install_dependency_8_gpu.sh
+++ b/scripts/ci_install_dependency_8_gpu.sh
@@ -43,7 +43,7 @@ pip install -e "python[all]"
 pip install torch_memory_saver
 pip install transformers==4.51.0 sentence_transformers accelerate peft pandas datasets timm torchaudio==2.6.0

-# For compling xgrammar kernels
+# For compiling xgrammar kernels
 pip install cuda-python nvidia-cuda-nvrtc-cu12

 # For lmms_evals evaluating MMMU

--- a/scripts/deprecated/convert_yi_vl.py
+++ b/scripts/deprecated/convert_yi_vl.py
 """
-Convert Yi-VL config into a format useable with SGLang
+Convert Yi-VL config into a format usable with SGLang

 Usage: python3 scripts/convert_yi_vl.py --model-path <path-to-model>
 """

--- a/scripts/export_deepseek_nextn.py
+++ b/scripts/export_deepseek_nextn.py
@@ -90,7 +90,7 @@ def export_nextn_layer_parameters(input_dir, output_dir, nextn_layer_id):

 if __name__ == "__main__":
    parser = argparse.ArgumentParser(
-        description="Export NextN layer paramerters for DeepSeek-V3/R1"
+        description="Export NextN layer parameters for DeepSeek-V3/R1"
    )
    parser.add_argument(
        "--input-dir",

--- a/sgl-kernel/CMakeLists.txt
+++ b/sgl-kernel/CMakeLists.txt
@@ -114,7 +114,7 @@ set(SGL_KERNEL_CUDA_FLAGS
    "--expt-extended-lambda"
    "--threads=32"

-    # Supress warnings
+    # Suppress warnings
    "-Xcompiler=-Wconversion"
    "-Xcompiler=-fno-strict-aliasing"


--- a/sgl-kernel/README.md
+++ b/sgl-kernel/README.md
@@ -87,7 +87,7 @@ Third-party libraries:

  The main different Between sm80/sm87 and sm86/sm89 is the shared memory size. you can follow the link below for more information https://docs.nvidia.com/cuda/cuda-c-programming-guide/#shared-memory-8-x.

-  And for sgl-kernel right now, we can build fa3 on sm80/sm86/sm89/sm90a. Thats mean if you use **A100(tested)**/A*0/**L20(tested)**/L40/L40s/**3090(tested)** you can use fa3.
+  And for sgl-kernel right now, we can build fa3 on sm80/sm86/sm89/sm90a. That means if you use **A100(tested)**/A*0/**L20(tested)**/L40/L40s/**3090(tested)** you can use fa3.

 ### Kernel Development