[CI] Remove unused imports with Ruff to pre-commit config, only to...

[CI] Remove unused imports with Ruff to pre-commit config, only to benchmarks/docs/examples folder (#3969)

[CI] Remove unused imports with Ruff to pre-commit config, only to...
[CI] Remove unused imports with Ruff to pre-commit config, only to benchmarks/docs/examples folder (#3969)
b149b393 · Brayden Zhong · GitHub · 31dfff7d · b149b393 · b149b393
Unverified Commit b149b393 authored Mar 27, 2025 by Brayden Zhong Committed by GitHub Mar 27, 2025
20 changed files
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -22,6 +22,13 @@ repos:
    rev: 5.13.2
    hooks:
      - id: isort
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.11.2
+    hooks:
+      - id: ruff
+        args: [--select=F401, --fixable=F401]
+        files: ^(benchmark/|docs/|examples/)
+        exclude: \.ipynb$
  - repo: https://github.com/psf/black
    rev: 24.10.0
    hooks:

--- a/benchmark/hicache/bench_serving.py
+++ b/benchmark/hicache/bench_serving.py
@@ -23,7 +23,7 @@ import warnings
 from argparse import ArgumentParser
 from dataclasses import dataclass, field
 from datetime import datetime
-from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Union
+from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple
 import aiohttp
 import numpy as np

--- a/benchmark/kernels/decoding_attention_triton/triton_flashinfer_cudnn.py
+++ b/benchmark/kernels/decoding_attention_triton/triton_flashinfer_cudnn.py
@@ -4,8 +4,6 @@ import math
 import cudnn
 import torch
 import torch.utils.benchmark as benchmark
-import triton
-import triton.language as tl
 from flashinfer import BatchDecodeWithPagedKVCacheWrapper
 from sglang.srt.layers.attention.triton_ops.decode_attention import decode_attention_fwd

--- a/benchmark/kernels/minmax-text-01-lightning_attention/benchmark_lightning_attention_decode.py
+++ b/benchmark/kernels/minmax-text-01-lightning_attention/benchmark_lightning_attention_decode.py
 import itertools
 import math
-import os
 from typing import Optional, Tuple
 import torch

--- a/benchmark/kernels/rmsnorm/benchmark_rmsnorm.py
+++ b/benchmark/kernels/rmsnorm/benchmark_rmsnorm.py
@@ -3,7 +3,6 @@ from typing import Optional, Tuple, Union
 import torch
 import triton
-import triton.language as tl
 from flashinfer.norm import fused_add_rmsnorm, rmsnorm
 from torch import nn
 from vllm import _custom_ops as vllm_ops

--- a/benchmark/kernels/scheduler_batch/benchmark_write_req_to_token_pool_triton.py
+++ b/benchmark/kernels/scheduler_batch/benchmark_write_req_to_token_pool_triton.py
 import itertools
 import os
-from typing import List
-import numpy as np
-import pytest
 import torch
 import triton
 import triton.language as tl

--- a/benchmark/lora/lora_bench.py
+++ b/benchmark/lora/lora_bench.py
@@ -15,42 +15,28 @@
 import argparse
 import asyncio
 import json
-import os
 import random
 import resource
 import sys
 import time
 import traceback
-import warnings
 from argparse import ArgumentParser
-from dataclasses import dataclass, field
 from datetime import datetime
-from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple
 import aiohttp
 import numpy as np
-import requests
 from launch_server import LORA_PATH, NUM_LORAS
 from tqdm.asyncio import tqdm
-from transformers import (
+from transformers import PreTrainedTokenizerBase
-    AutoTokenizer,
-    PreTrainedTokenizer,
-    PreTrainedTokenizerBase,
-    PreTrainedTokenizerFast,
-)
 from sglang.bench_serving import (
    AIOHTTP_TIMEOUT,
-    SHAREGPT_URL,
-    BenchmarkMetrics,
    RequestFuncInput,
    RequestFuncOutput,
    calculate_metrics,
-    check_chat_template,
-    get_model,
    get_request,
    get_tokenizer,
-    parse_request_rate_range,
    remove_prefix,
    sample_random_requests,
 )

--- a/benchmark/mmlu/bench_sglang.py
+++ b/benchmark/mmlu/bench_sglang.py
@@ -6,7 +6,6 @@ import time
 import numpy as np
 import pandas as pd
 import tiktoken
-from tqdm import tqdm
 from sglang.test.test_utils import (
    add_common_sglang_args_and_parse,

--- a/benchmark/mmmu/bench_hf.py
+++ b/benchmark/mmmu/bench_hf.py
 import argparse
-import PIL.Image
 import torch
 from data_utils import save_json
 from eval_utils import (

--- a/benchmark/mmmu/data_utils.py
+++ b/benchmark/mmmu/data_utils.py
@@ -5,7 +5,6 @@ import os
 import re
 import yaml
-from datasets import concatenate_datasets, load_dataset
 DOMAIN_CAT2SUB_CAT = {
    "Art and Design": ["Art", "Art_Theory", "Design", "Music"],

--- a/benchmark/multi_turn_chat/long_prompt_multi_turn.py
+++ b/benchmark/multi_turn_chat/long_prompt_multi_turn.py
-import itertools
 import json
-import os
 import random
-import string
-import threading
 import time
 from argparse import ArgumentParser
 from pathlib import Path
-from typing import Union
 from tqdm import tqdm

--- a/docs/backend/patch.py
+++ b/docs/backend/patch.py
-import os
 import weakref
 import nest_asyncio

--- a/docs/frontend/patch.py
+++ b/docs/frontend/patch.py
-import os
 import weakref
 from sglang.utils import execute_shell_command, reserve_port

--- a/examples/frontend_language/quick_start/openai_example_n.py
+++ b/examples/frontend_language/quick_start/openai_example_n.py
@@ -4,8 +4,6 @@ export OPENAI_API_KEY=sk-******
 python3 openai_example_chat.py
 """
-import json
 import sglang as sgl

--- a/examples/frontend_language/usage/json_logprobs.py
+++ b/examples/frontend_language/usage/json_logprobs.py
 # NOTE: Currently this can only be run through HTTP requests.
-import json
 from concurrent.futures import ThreadPoolExecutor
 from json_decode import character_regex

--- a/examples/runtime/engine/offline_batch_inference_vlm.py
+++ b/examples/runtime/engine/offline_batch_inference_vlm.py
@@ -5,11 +5,6 @@ python offline_batch_inference_vlm.py --model-path Qwen/Qwen2-VL-7B-Instruct --c
 import argparse
 import dataclasses
-import io
-import os
-import requests
-from PIL import Image
 import sglang as sgl
 from sglang.srt.conversation import chat_templates

--- a/examples/runtime/hidden_states/hidden_states_server.py
+++ b/examples/runtime/hidden_states/hidden_states_server.py
@@ -12,7 +12,7 @@ import requests
 import torch
 from sglang.test.test_utils import is_in_ci
-from sglang.utils import print_highlight, terminate_process, wait_for_server
+from sglang.utils import terminate_process, wait_for_server
 if is_in_ci():
    from docs.backend.patch import launch_server_cmd

--- a/examples/runtime/openai_batch_chat.py
+++ b/examples/runtime/openai_batch_chat.py
@@ -11,7 +11,6 @@ you should create the input.jsonl file with the following content:
 """
 import json
-import os
 import time
 import openai

--- a/examples/runtime/openai_chat_with_response_prefill.py
+++ b/examples/runtime/openai_chat_with_response_prefill.py
@@ -5,7 +5,6 @@ python openai_chat.py
 """
 import openai
-from openai import OpenAI
 client = openai.Client(base_url="http://127.0.0.1:30000/v1", api_key="EMPTY")

--- a/examples/runtime/token_in_token_out/token_in_token_out_llm_server.py
+++ b/examples/runtime/token_in_token_out/token_in_token_out_llm_server.py
@@ -9,7 +9,7 @@ import requests
 from sglang.srt.hf_transformers_utils import get_tokenizer
 from sglang.test.test_utils import is_in_ci
-from sglang.utils import print_highlight, terminate_process, wait_for_server
+from sglang.utils import terminate_process, wait_for_server
 if is_in_ci():
    from docs.backend.patch import launch_server_cmd