Unverified Commit 62797440 authored by Chang Su's avatar Chang Su Committed by GitHub
Browse files

[Lint] Add `python/sglang` to ruff F401 checks and remove unused imports in files (#11685)

parent 2614adf9
...@@ -27,9 +27,9 @@ repos: ...@@ -27,9 +27,9 @@ repos:
rev: v0.11.7 rev: v0.11.7
hooks: hooks:
- id: ruff - id: ruff
args: [--select=F401, --fixable=F401] args: [--select=F401,F821, --fixable=F401]
files: ^(benchmark/|docs/|examples/) files: ^(benchmark/|docs/|examples/|python/sglang/)
exclude: \.ipynb$|^python/sglang/srt/grpc/.*_pb2\.py$|^python/sglang/srt/grpc/.*_pb2_grpc\.py$|^python/sglang/srt/grpc/.*_pb2\.pyi$|^python/sglang/srt/grpc/.*_pb2_grpc\.pyi$ exclude: __init__\.py$|\.ipynb$|^python/sglang/srt/grpc/.*_pb2\.py$|^python/sglang/srt/grpc/.*_pb2_grpc\.py$|^python/sglang/srt/grpc/.*_pb2\.pyi$|^python/sglang/srt/grpc/.*_pb2_grpc\.pyi$
- repo: https://github.com/psf/black - repo: https://github.com/psf/black
rev: 24.10.0 rev: 24.10.0
hooks: hooks:
......
...@@ -15,7 +15,7 @@ if not is_hpu(): ...@@ -15,7 +15,7 @@ if not is_hpu():
# ROCm does not use vllm custom allreduce # ROCm does not use vllm custom allreduce
if use_vllm_custom_allreduce and not is_hip(): if use_vllm_custom_allreduce and not is_hip():
try: try:
import vllm._C import vllm._C # noqa: F401
except ImportError as e: except ImportError as e:
logger.warning("Failed to import from vllm._C with %r", e) logger.warning("Failed to import from vllm._C with %r", e)
else: else:
......
...@@ -9,7 +9,6 @@ from unittest.mock import patch ...@@ -9,7 +9,6 @@ from unittest.mock import patch
import torch import torch
import torch.fx as fx import torch.fx as fx
import sglang.srt.compilation.weak_ref_tensor_jit
from sglang.srt.compilation.compilation_config import CompilationConfig from sglang.srt.compilation.compilation_config import CompilationConfig
from sglang.srt.compilation.compilation_counter import compilation_counter from sglang.srt.compilation.compilation_counter import compilation_counter
......
import math import math
import os
from dataclasses import dataclass from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple from typing import Dict, List, Optional, Tuple
......
from typing import Any, List, Optional, Union from transformers import AutoProcessor, PretrainedConfig
from transformers.processing_utils import ProcessingKwargs
from transformers import AutoProcessor, LlamaTokenizerFast, PretrainedConfig
from transformers.feature_extraction_utils import BatchFeature
from transformers.image_utils import ImageInput
from transformers.processing_utils import ProcessingKwargs, Unpack
from transformers.tokenization_utils_base import PreTokenizedInput, TextInput
try: try:
from transformers import Qwen2_5_VLProcessor from transformers import Qwen2_5_VLProcessor
......
...@@ -14,17 +14,12 @@ ...@@ -14,17 +14,12 @@
# limitations under the License. # limitations under the License.
"""Falcon-H1 model configuration""" """Falcon-H1 model configuration"""
import enum
from transformers.configuration_utils import PretrainedConfig from transformers.configuration_utils import PretrainedConfig
from transformers.modeling_rope_utils import rope_config_validation
from transformers.utils import logging from transformers.utils import logging
from sglang.srt.configs.mamba_utils import Mamba2CacheParams, Mamba2StateShape from sglang.srt.configs.mamba_utils import Mamba2CacheParams, Mamba2StateShape
from sglang.srt.layers.dp_attention import ( from sglang.srt.layers.dp_attention import get_tensor_model_parallel_world_size
get_attention_tp_size,
get_tensor_model_parallel_world_size,
)
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
......
...@@ -21,7 +21,6 @@ from transformers.modeling_rope_utils import rope_config_validation ...@@ -21,7 +21,6 @@ from transformers.modeling_rope_utils import rope_config_validation
from transformers.utils import logging from transformers.utils import logging
from sglang.srt.configs.mamba_utils import Mamba2CacheParams, Mamba2StateShape from sglang.srt.configs.mamba_utils import Mamba2CacheParams, Mamba2StateShape
from sglang.srt.distributed.utils import divide
from sglang.srt.layers.dp_attention import get_attention_tp_size from sglang.srt.layers.dp_attention import get_attention_tp_size
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
import logging import logging
from typing import Generator, List, Optional, Tuple from typing import Generator, Optional, Tuple
from urllib.parse import urlparse from urllib.parse import urlparse
import torch import torch
......
import logging import logging
import os import os
from typing import List, Optional from typing import List
import torch import torch
......
...@@ -25,7 +25,7 @@ import time ...@@ -25,7 +25,7 @@ import time
from collections import deque from collections import deque
from dataclasses import dataclass from dataclasses import dataclass
from http import HTTPStatus from http import HTTPStatus
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Type, Union from typing import TYPE_CHECKING, List, Optional, Tuple, Type, Union
import torch import torch
from torch.distributed import ProcessGroup from torch.distributed import ProcessGroup
...@@ -48,10 +48,7 @@ from sglang.srt.disaggregation.utils import ( ...@@ -48,10 +48,7 @@ from sglang.srt.disaggregation.utils import (
) )
from sglang.srt.layers.dp_attention import get_attention_tp_size from sglang.srt.layers.dp_attention import get_attention_tp_size
from sglang.srt.managers.schedule_batch import FINISH_ABORT, RequestStage, ScheduleBatch from sglang.srt.managers.schedule_batch import FINISH_ABORT, RequestStage, ScheduleBatch
from sglang.srt.mem_cache.allocator import ( from sglang.srt.mem_cache.allocator import BaseTokenToKVPoolAllocator
BaseTokenToKVPoolAllocator,
SWATokenToKVPoolAllocator,
)
from sglang.srt.mem_cache.base_prefix_cache import BasePrefixCache from sglang.srt.mem_cache.base_prefix_cache import BasePrefixCache
from sglang.srt.mem_cache.memory_pool import ( from sglang.srt.mem_cache.memory_pool import (
HybridLinearKVPool, HybridLinearKVPool,
...@@ -61,7 +58,6 @@ from sglang.srt.mem_cache.memory_pool import ( ...@@ -61,7 +58,6 @@ from sglang.srt.mem_cache.memory_pool import (
ReqToTokenPool, ReqToTokenPool,
SWAKVPool, SWAKVPool,
) )
from sglang.srt.model_executor.forward_batch_info import ForwardMode
from sglang.srt.utils import get_int_env_var, require_mlp_sync from sglang.srt.utils import get_int_env_var, require_mlp_sync
from sglang.srt.utils.torch_memory_saver_adapter import TorchMemorySaverAdapter from sglang.srt.utils.torch_memory_saver_adapter import TorchMemorySaverAdapter
......
...@@ -20,7 +20,6 @@ Life cycle of a request in the prefill server ...@@ -20,7 +20,6 @@ Life cycle of a request in the prefill server
from __future__ import annotations from __future__ import annotations
import logging import logging
import threading
import time import time
from collections import deque from collections import deque
from http import HTTPStatus from http import HTTPStatus
...@@ -54,7 +53,7 @@ from sglang.srt.mem_cache.memory_pool import ( ...@@ -54,7 +53,7 @@ from sglang.srt.mem_cache.memory_pool import (
NSATokenToKVPool, NSATokenToKVPool,
SWAKVPool, SWAKVPool,
) )
from sglang.srt.model_executor.forward_batch_info import ForwardMode, PPProxyTensors from sglang.srt.model_executor.forward_batch_info import PPProxyTensors
from sglang.srt.utils import ( from sglang.srt.utils import (
DynamicGradMode, DynamicGradMode,
broadcast_pyobj, broadcast_pyobj,
......
...@@ -32,7 +32,7 @@ try: ...@@ -32,7 +32,7 @@ try:
ops.meta_size() ops.meta_size()
else: else:
# Use custom allreduce from sgl kernel (ROCM and TRT-LLM) # Use custom allreduce from sgl kernel (ROCM and TRT-LLM)
import sgl_kernel import sgl_kernel # noqa: F401
custom_ar = True custom_ar = True
except Exception: except Exception:
# For CPUs # For CPUs
......
...@@ -4,7 +4,7 @@ import math ...@@ -4,7 +4,7 @@ import math
import os import os
from contextlib import contextmanager from contextlib import contextmanager
from enum import IntEnum from enum import IntEnum
from typing import Any, Callable, List, Optional, TypeVar, Union from typing import Optional, Union
import torch import torch
import torch.distributed as dist import torch.distributed as dist
...@@ -24,7 +24,7 @@ if _is_hip: ...@@ -24,7 +24,7 @@ if _is_hip:
mscclpp_is_available = False mscclpp_is_available = False
if _is_cuda: if _is_cuda:
try: try:
import sgl_kernel import sgl_kernel # noqa: F401
mscclpp_is_available = True mscclpp_is_available = True
except: except:
......
...@@ -9,7 +9,7 @@ from torch.distributed import ProcessGroup ...@@ -9,7 +9,7 @@ from torch.distributed import ProcessGroup
from sglang.srt.distributed.device_communicators.all_reduce_utils import ( from sglang.srt.distributed.device_communicators.all_reduce_utils import (
SYMM_MEM_ALL_REDUCE_MAX_SIZES, SYMM_MEM_ALL_REDUCE_MAX_SIZES,
) )
from sglang.srt.utils import get_device_capability, is_cuda, is_hip from sglang.srt.utils import is_cuda, is_hip
try: try:
import torch.distributed._symmetric_memory as torch_symm_mem import torch.distributed._symmetric_memory as torch_symm_mem
......
import base64 import base64
import os
import pickle import pickle
import time import time
from pathlib import Path from pathlib import Path
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# Copied from vLLM # Copied from vLLM
import json
import logging import logging
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import Union from typing import Union
......
...@@ -3,7 +3,6 @@ ...@@ -3,7 +3,6 @@
# Adapted from vLLM: https://github.com/vllm-project/vllm/blob/1b9902806915040ac9b3029f2ab7522ec505afc3/vllm/entrypoints/harmony_utils.py # Adapted from vLLM: https://github.com/vllm-project/vllm/blob/1b9902806915040ac9b3029f2ab7522ec505afc3/vllm/entrypoints/harmony_utils.py
# Slight differences in processing chat messages # Slight differences in processing chat messages
import datetime import datetime
import json
from collections.abc import Iterable from collections.abc import Iterable
from typing import Literal, Optional, Union from typing import Literal, Optional, Union
......
...@@ -19,7 +19,6 @@ This file implements HTTP APIs for the inference engine via fastapi. ...@@ -19,7 +19,6 @@ This file implements HTTP APIs for the inference engine via fastapi.
import asyncio import asyncio
import dataclasses import dataclasses
import json
import logging import logging
import multiprocessing as multiprocessing import multiprocessing as multiprocessing
import os import os
......
import copy
import dataclasses
import multiprocessing import multiprocessing
import pickle
import threading
import time import time
from typing import Any, Dict, List, Optional, Tuple, Union from typing import List, Optional, Tuple
import pybase64
import requests import requests
import torch import torch
import torch.distributed as dist
from sglang.srt.entrypoints.EngineBase import EngineBase from sglang.srt.entrypoints.EngineBase import EngineBase
from sglang.srt.entrypoints.http_server import launch_server from sglang.srt.entrypoints.http_server import launch_server
......
...@@ -3,8 +3,6 @@ from typing import Tuple ...@@ -3,8 +3,6 @@ from typing import Tuple
import torch import torch
from sglang.srt.utils import get_bool_env_var
def balanced_packing( def balanced_packing(
weight: torch.Tensor, num_packs: int weight: torch.Tensor, num_packs: int
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment