Unverified Commit 62797440 authored by Chang Su's avatar Chang Su Committed by GitHub
Browse files

[Lint] Add `python/sglang` to ruff F401 checks and remove unused imports in files (#11685)

parent 2614adf9
......@@ -27,9 +27,9 @@ repos:
rev: v0.11.7
hooks:
- id: ruff
args: [--select=F401, --fixable=F401]
files: ^(benchmark/|docs/|examples/)
exclude: \.ipynb$|^python/sglang/srt/grpc/.*_pb2\.py$|^python/sglang/srt/grpc/.*_pb2_grpc\.py$|^python/sglang/srt/grpc/.*_pb2\.pyi$|^python/sglang/srt/grpc/.*_pb2_grpc\.pyi$
args: [--select=F401,F821, --fixable=F401]
files: ^(benchmark/|docs/|examples/|python/sglang/)
exclude: __init__\.py$|\.ipynb$|^python/sglang/srt/grpc/.*_pb2\.py$|^python/sglang/srt/grpc/.*_pb2_grpc\.py$|^python/sglang/srt/grpc/.*_pb2\.pyi$|^python/sglang/srt/grpc/.*_pb2_grpc\.pyi$
- repo: https://github.com/psf/black
rev: 24.10.0
hooks:
......
......@@ -15,7 +15,7 @@ if not is_hpu():
# ROCm does not use vllm custom allreduce
if use_vllm_custom_allreduce and not is_hip():
try:
import vllm._C
import vllm._C # noqa: F401
except ImportError as e:
logger.warning("Failed to import from vllm._C with %r", e)
else:
......
......@@ -9,7 +9,6 @@ from unittest.mock import patch
import torch
import torch.fx as fx
import sglang.srt.compilation.weak_ref_tensor_jit
from sglang.srt.compilation.compilation_config import CompilationConfig
from sglang.srt.compilation.compilation_counter import compilation_counter
......
import math
import os
from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple
......
from typing import Any, List, Optional, Union
from transformers import AutoProcessor, LlamaTokenizerFast, PretrainedConfig
from transformers.feature_extraction_utils import BatchFeature
from transformers.image_utils import ImageInput
from transformers.processing_utils import ProcessingKwargs, Unpack
from transformers.tokenization_utils_base import PreTokenizedInput, TextInput
from transformers import AutoProcessor, PretrainedConfig
from transformers.processing_utils import ProcessingKwargs
try:
from transformers import Qwen2_5_VLProcessor
......
......@@ -14,17 +14,12 @@
# limitations under the License.
"""Falcon-H1 model configuration"""
import enum
from transformers.configuration_utils import PretrainedConfig
from transformers.modeling_rope_utils import rope_config_validation
from transformers.utils import logging
from sglang.srt.configs.mamba_utils import Mamba2CacheParams, Mamba2StateShape
from sglang.srt.layers.dp_attention import (
get_attention_tp_size,
get_tensor_model_parallel_world_size,
)
from sglang.srt.layers.dp_attention import get_tensor_model_parallel_world_size
logger = logging.get_logger(__name__)
......
......@@ -21,7 +21,6 @@ from transformers.modeling_rope_utils import rope_config_validation
from transformers.utils import logging
from sglang.srt.configs.mamba_utils import Mamba2CacheParams, Mamba2StateShape
from sglang.srt.distributed.utils import divide
from sglang.srt.layers.dp_attention import get_attention_tp_size
logger = logging.get_logger(__name__)
......
# SPDX-License-Identifier: Apache-2.0
import logging
from typing import Generator, List, Optional, Tuple
from typing import Generator, Optional, Tuple
from urllib.parse import urlparse
import torch
......
import logging
import os
from typing import List, Optional
from typing import List
import torch
......
......@@ -25,7 +25,7 @@ import time
from collections import deque
from dataclasses import dataclass
from http import HTTPStatus
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Type, Union
from typing import TYPE_CHECKING, List, Optional, Tuple, Type, Union
import torch
from torch.distributed import ProcessGroup
......@@ -48,10 +48,7 @@ from sglang.srt.disaggregation.utils import (
)
from sglang.srt.layers.dp_attention import get_attention_tp_size
from sglang.srt.managers.schedule_batch import FINISH_ABORT, RequestStage, ScheduleBatch
from sglang.srt.mem_cache.allocator import (
BaseTokenToKVPoolAllocator,
SWATokenToKVPoolAllocator,
)
from sglang.srt.mem_cache.allocator import BaseTokenToKVPoolAllocator
from sglang.srt.mem_cache.base_prefix_cache import BasePrefixCache
from sglang.srt.mem_cache.memory_pool import (
HybridLinearKVPool,
......@@ -61,7 +58,6 @@ from sglang.srt.mem_cache.memory_pool import (
ReqToTokenPool,
SWAKVPool,
)
from sglang.srt.model_executor.forward_batch_info import ForwardMode
from sglang.srt.utils import get_int_env_var, require_mlp_sync
from sglang.srt.utils.torch_memory_saver_adapter import TorchMemorySaverAdapter
......
......@@ -20,7 +20,6 @@ Life cycle of a request in the prefill server
from __future__ import annotations
import logging
import threading
import time
from collections import deque
from http import HTTPStatus
......@@ -54,7 +53,7 @@ from sglang.srt.mem_cache.memory_pool import (
NSATokenToKVPool,
SWAKVPool,
)
from sglang.srt.model_executor.forward_batch_info import ForwardMode, PPProxyTensors
from sglang.srt.model_executor.forward_batch_info import PPProxyTensors
from sglang.srt.utils import (
DynamicGradMode,
broadcast_pyobj,
......
......@@ -32,7 +32,7 @@ try:
ops.meta_size()
else:
# Use custom allreduce from sgl kernel (ROCM and TRT-LLM)
import sgl_kernel
import sgl_kernel # noqa: F401
custom_ar = True
except Exception:
# For CPUs
......
......@@ -4,7 +4,7 @@ import math
import os
from contextlib import contextmanager
from enum import IntEnum
from typing import Any, Callable, List, Optional, TypeVar, Union
from typing import Optional, Union
import torch
import torch.distributed as dist
......@@ -24,7 +24,7 @@ if _is_hip:
mscclpp_is_available = False
if _is_cuda:
try:
import sgl_kernel
import sgl_kernel # noqa: F401
mscclpp_is_available = True
except:
......
......@@ -9,7 +9,7 @@ from torch.distributed import ProcessGroup
from sglang.srt.distributed.device_communicators.all_reduce_utils import (
SYMM_MEM_ALL_REDUCE_MAX_SIZES,
)
from sglang.srt.utils import get_device_capability, is_cuda, is_hip
from sglang.srt.utils import is_cuda, is_hip
try:
import torch.distributed._symmetric_memory as torch_symm_mem
......
import base64
import os
import pickle
import time
from pathlib import Path
......
# SPDX-License-Identifier: Apache-2.0
# Copied from vLLM
import json
import logging
from abc import ABC, abstractmethod
from typing import Union
......
......@@ -3,7 +3,6 @@
# Adapted from vLLM: https://github.com/vllm-project/vllm/blob/1b9902806915040ac9b3029f2ab7522ec505afc3/vllm/entrypoints/harmony_utils.py
# Slight differences in processing chat messages
import datetime
import json
from collections.abc import Iterable
from typing import Literal, Optional, Union
......
......@@ -19,7 +19,6 @@ This file implements HTTP APIs for the inference engine via fastapi.
import asyncio
import dataclasses
import json
import logging
import multiprocessing as multiprocessing
import os
......
import copy
import dataclasses
import multiprocessing
import pickle
import threading
import time
from typing import Any, Dict, List, Optional, Tuple, Union
from typing import List, Optional, Tuple
import pybase64
import requests
import torch
import torch.distributed as dist
from sglang.srt.entrypoints.EngineBase import EngineBase
from sglang.srt.entrypoints.http_server import launch_server
......
......@@ -3,8 +3,6 @@ from typing import Tuple
import torch
from sglang.srt.utils import get_bool_env_var
def balanced_packing(
weight: torch.Tensor, num_packs: int
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment