"docs/basic_usage/deepseek.md" did not exist on "b39532587bc9328a42f99b522cb9997a210a32b2"
Unverified Commit 62797440 authored by Chang Su's avatar Chang Su Committed by GitHub
Browse files

[Lint] Add `python/sglang` to ruff F401 checks and remove unused imports in files (#11685)

parent 2614adf9
......@@ -6,11 +6,7 @@ from typing import List
from sglang.srt.entrypoints.openai.protocol import Tool
from sglang.srt.function_call.base_format_detector import BaseFormatDetector
from sglang.srt.function_call.core_types import (
StreamingParseResult,
StructureInfo,
_GetInfoFunc,
)
from sglang.srt.function_call.core_types import StreamingParseResult, _GetInfoFunc
from sglang.srt.function_call.ebnf_composer import EBNFComposer
logger = logging.getLogger(__name__)
......
import json
import re
from typing import List
from sglang.srt.entrypoints.openai.protocol import Tool
......
import json
from json import JSONDecodeError, JSONDecoder
from json.decoder import WHITESPACE
from typing import Any, List, Literal, Optional, Tuple, Union
......
......@@ -70,7 +70,7 @@ def compile_proto(proto_file: Path, output_dir: Path, verbose: bool = True) -> b
# Check if grpc_tools is available
try:
import grpc_tools.protoc
import grpc_tools.protoc # noqa: F401
except ImportError:
print("Error: grpcio-tools not installed")
print(
......
......@@ -27,7 +27,6 @@ from sglang.srt.managers.io_struct import (
TokenizedEmbeddingReqInput,
TokenizedGenerateReqInput,
)
from sglang.srt.managers.scheduler import is_health_check_generate_req
from sglang.srt.server_args import PortArgs, ServerArgs
from sglang.srt.utils import get_zmq_socket, kill_process_tree
from sglang.utils import get_exception_traceback
......
......@@ -380,4 +380,7 @@ if not (
logger.info(
"sgl-kernel is not available on Non-NV, Non-AMD platforms or Non-AMX CPUs. Fallback to other kernel libraries."
)
from vllm.model_executor.layers.activation import GeluAndMul, SiluAndMul
from vllm.model_executor.layers.activation import ( # noqa: F401
GeluAndMul,
SiluAndMul,
)
......@@ -20,7 +20,6 @@ if TYPE_CHECKING:
from sglang.srt.layers.radix_attention import RadixAttention
from sglang.srt.model_executor.model_runner import ModelRunner
import os
import numpy as np
......
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Optional, Union
from typing import TYPE_CHECKING, Optional
import torch
......
......@@ -2,7 +2,6 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2023-2025, Songlin Yang, Yu Zhang
import warnings
from typing import Optional
import torch
......
......@@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2023-2025, Songlin Yang, Yu Zhang
from typing import Optional, Tuple
from typing import Optional
import torch
import triton
......
......@@ -3,9 +3,7 @@
# Copyright (c) 2023-2025, Songlin Yang, Yu Zhang
import torch
import torch.nn.functional as F
import triton
import triton.language as tl
from sglang.srt.layers.attention.fla.utils import tensor_cache
......
......@@ -5,7 +5,6 @@
# This backward pass is faster for dimensions up to 8k, but after that it's much slower due to register spilling.
# The models we train have hidden dim up to 8k anyway (e.g. Llama 70B), so this is fine.
import math
import torch
import torch.nn.functional as F
......
......@@ -9,8 +9,6 @@ import triton
import triton.language as tl
from sglang.srt.layers.attention.fla.index import prepare_chunk_indices
from sglang.srt.layers.attention.fla.op import safe_exp
from sglang.srt.layers.attention.fla.utils import check_shared_mem
@triton.heuristics({"IS_VARLEN": lambda args: args["cu_seqlens"] is not None})
......
......@@ -50,7 +50,6 @@ if is_flashinfer_available():
fast_decode_plan,
)
from flashinfer.cascade import merge_state
from flashinfer.decode import _get_range_buf, get_seq_lens
class WrapperDispatch(Enum):
......
from typing import Optional, Union
from typing import Optional
import torch
......
from dataclasses import astuple, dataclass
from functools import lru_cache
from typing import Optional, Union
import torch
import torch.nn.functional as F
from sglang.srt.layers.attention.base_attn_backend import AttentionBackend
from sglang.srt.layers.attention.fla.chunk import chunk_gated_delta_rule
......
......@@ -14,7 +14,7 @@ if TYPE_CHECKING:
class IntelAMXAttnBackend(AttentionBackend):
def __init__(self, model_runner: ModelRunner):
import sgl_kernel
import sgl_kernel # noqa: F401
super().__init__()
self.forward_metadata = None
......
......@@ -4,7 +4,6 @@
from typing import List, Optional, Union
import numpy as np
import torch
import triton
import triton.language as tl
......
......@@ -10,7 +10,6 @@
import torch
import triton
import triton.language as tl
from einops import rearrange
from packaging import version
......
......@@ -13,7 +13,7 @@ def is_mla_preprocess_enabled() -> bool:
if is_mla_preprocess_enabled():
import sgl_kernel_npu
import sgl_kernel_npu # noqa: F401
import torch_npu
torch.npu.config.allow_internal_format = True
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment