Unverified Commit 01bfb22b authored by SangBin Cho's avatar SangBin Cho Committed by GitHub
Browse files

[CI] Try introducing isort. (#3495)

parent e67c295b
from vllm.attention.backends.abstract import AttentionBackend, AttentionMetadata
from vllm.attention.backends.abstract import (AttentionBackend,
AttentionMetadata)
from vllm.attention.layer import Attention
from vllm.attention.selector import get_attn_backend
......
......@@ -7,12 +7,13 @@ flashinfer for all the attention operations.
from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple, Type
from flash_attn import flash_attn_varlen_func
import torch
from flash_attn import flash_attn_varlen_func
from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl,
AttentionMetadata)
from vllm.attention.ops.paged_attn import PagedAttention, PagedAttentionMetadata
from vllm.attention.ops.paged_attn import (PagedAttention,
PagedAttentionMetadata)
class FlashAttentionBackend(AttentionBackend):
......
......@@ -11,7 +11,8 @@ from xformers.ops.fmha.attn_bias import (AttentionBias,
from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl,
AttentionMetadata)
from vllm.attention.ops.paged_attn import PagedAttention, PagedAttentionMetadata
from vllm.attention.ops.paged_attn import (PagedAttention,
PagedAttentionMetadata)
from vllm.logger import init_logger
from vllm.utils import is_hip
......
......@@ -3,8 +3,7 @@ from typing import Dict, List, Optional, Tuple
import torch
from vllm._C import cache_ops
from vllm._C import ops
from vllm._C import cache_ops, ops
from vllm.attention.ops.prefix_prefill import context_attention_fwd
# Should be the same as PARTITION_SIZE in `paged_attention_v2_launcher`.
......
......@@ -13,11 +13,13 @@ logger = init_logger(__name__)
def get_attn_backend(dtype: torch.dtype) -> AttentionBackend:
if _can_use_flash_attn(dtype):
logger.info("Using FlashAttention backend.")
from vllm.attention.backends.flash_attn import FlashAttentionBackend # noqa: F401
from vllm.attention.backends.flash_attn import ( # noqa: F401
FlashAttentionBackend)
return FlashAttentionBackend
else:
logger.info("Using XFormers backend.")
from vllm.attention.backends.xformers import XFormersBackend # noqa: F401
from vllm.attention.backends.xformers import ( # noqa: F401
XFormersBackend)
return XFormersBackend
......
from typing import TYPE_CHECKING, Optional, Union, ClassVar
from dataclasses import dataclass
import json
import os
from packaging.version import Version
from dataclasses import dataclass
from typing import TYPE_CHECKING, ClassVar, Optional, Union
import json
import torch
from packaging.version import Version
from transformers import PretrainedConfig
from vllm.logger import init_logger
from vllm.transformers_utils.config import get_config
from vllm.utils import get_cpu_memory, is_hip, is_neuron, get_nvcc_cuda_version
from vllm.utils import get_cpu_memory, get_nvcc_cuda_version, is_hip, is_neuron
if TYPE_CHECKING:
from ray.util.placement_group import PlacementGroup
......@@ -103,7 +103,8 @@ class ModelConfig:
if os.environ.get("VLLM_USE_MODELSCOPE", "False").lower() == "true":
# download model from ModelScope hub,
# lazy import so that modelscope is not required for normal use.
from modelscope.hub.snapshot_download import snapshot_download # pylint: disable=C
# pylint: disable=C.
from modelscope.hub.snapshot_download import snapshot_download
if not os.path.exists(model):
model_path = snapshot_download(model_id=model,
......
"""A block manager that manages token blocks."""
import enum
from abc import ABC, abstractmethod
from itertools import count, takewhile
from os.path import commonprefix
from typing import Dict, List, Optional, Set, Tuple
from abc import ABC, abstractmethod
from vllm.block import BlockTable, PhysicalTokenBlock
from vllm.core.evictor import EvictionPolicy, Evictor, make_evictor
from vllm.logger import init_logger
from vllm.sequence import Sequence, SequenceGroup, SequenceStatus
from vllm.utils import Device
from vllm.core.evictor import Evictor, EvictionPolicy, make_evictor
from vllm.logger import init_logger
logger = init_logger(__name__)
......
import enum
from typing import OrderedDict
from abc import ABC, abstractmethod, abstractproperty
from typing import OrderedDict
from vllm.block import PhysicalTokenBlock
......
from collections import deque
import enum
import time
from typing import Deque, Dict, Iterable, List, Optional, Tuple, Union, Set
from collections import deque
from typing import Deque, Dict, Iterable, List, Optional, Set, Tuple, Union
from vllm.config import CacheConfig, LoRAConfig, SchedulerConfig
from vllm.core.block_manager import AllocStatus, BlockSpaceManager
from vllm.core.policy import PolicyFactory
from vllm.lora.request import LoRARequest
from vllm.logger import init_logger
from vllm.lora.request import LoRARequest
from vllm.sequence import (Sequence, SequenceData, SequenceGroup,
SequenceGroupMetadata, SequenceStatus)
......
......@@ -3,9 +3,8 @@ import dataclasses
from dataclasses import dataclass
from typing import Optional, Tuple
from vllm.config import (CacheConfig, DeviceConfig, ModelConfig,
ParallelConfig, SchedulerConfig, LoRAConfig,
TokenizerPoolConfig)
from vllm.config import (CacheConfig, DeviceConfig, LoRAConfig, ModelConfig,
ParallelConfig, SchedulerConfig, TokenizerPoolConfig)
@dataclass
......
......@@ -2,17 +2,17 @@ import asyncio
import os
import time
from functools import partial
from typing import (Callable, Dict, Iterable, List, Optional, Set, Tuple, Type,
Union, AsyncIterator)
from typing import (AsyncIterator, Callable, Dict, Iterable, List, Optional,
Set, Tuple, Type, Union)
from transformers import PreTrainedTokenizer
from vllm.lora.request import LoRARequest
from vllm.config import ModelConfig
from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.engine.llm_engine import LLMEngine
from vllm.engine.ray_utils import initialize_ray_cluster, ray
from vllm.logger import init_logger
from vllm.lora.request import LoRARequest
from vllm.outputs import RequestOutput
from vllm.sampling_params import SamplingParams
......
......@@ -4,22 +4,22 @@ from typing import Iterable, List, Optional, Tuple, Type, Union
from transformers import PreTrainedTokenizer
import vllm
from vllm.lora.request import LoRARequest
from vllm.config import (CacheConfig, DeviceConfig, ModelConfig,
ParallelConfig, SchedulerConfig, LoRAConfig)
from vllm.config import (CacheConfig, DeviceConfig, LoRAConfig, ModelConfig,
ParallelConfig, SchedulerConfig)
from vllm.core.scheduler import Scheduler, SchedulerOutputs
from vllm.engine.arg_utils import EngineArgs
from vllm.executor.executor_base import ExecutorBase
from vllm.engine.metrics import StatLogger, Stats
from vllm.engine.ray_utils import initialize_ray_cluster
from vllm.executor.executor_base import ExecutorBase
from vllm.logger import init_logger
from vllm.lora.request import LoRARequest
from vllm.outputs import RequestOutput
from vllm.sampling_params import SamplingParams
from vllm.sequence import (SamplerOutput, Sequence, SequenceGroup,
SequenceGroupOutput, SequenceOutput, SequenceStatus)
from vllm.transformers_utils.detokenizer import Detokenizer
from vllm.transformers_utils.tokenizer_group import (BaseTokenizerGroup,
get_tokenizer_group)
from vllm.transformers_utils.detokenizer import Detokenizer
from vllm.utils import Counter
logger = init_logger(__name__)
......
from vllm.logger import init_logger
from prometheus_client import (Counter, Gauge, Histogram, Info, REGISTRY,
disable_created_metrics)
import time
import numpy as np
from typing import Dict, List
from dataclasses import dataclass
from typing import Dict, List
import numpy as np
from prometheus_client import (REGISTRY, Counter, Gauge, Histogram, Info,
disable_created_metrics)
from vllm.logger import init_logger
logger = init_logger(__name__)
......
import pickle
from typing import Optional, List, Tuple
from typing import List, Optional, Tuple
from vllm.config import ParallelConfig
from vllm.logger import init_logger
from vllm.utils import is_hip, set_cuda_visible_devices, get_ip
from vllm.utils import get_ip, is_hip, set_cuda_visible_devices
logger = init_logger(__name__)
......
......@@ -11,9 +11,9 @@ import json
import ssl
from typing import AsyncGenerator
import uvicorn
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse, Response, StreamingResponse
import uvicorn
from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine
......
......@@ -3,9 +3,9 @@ from typing import List, Optional, Union
from tqdm import tqdm
from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast
from vllm.lora.request import LoRARequest
from vllm.engine.arg_utils import EngineArgs
from vllm.engine.llm_engine import LLMEngine
from vllm.lora.request import LoRARequest
from vllm.outputs import RequestOutput
from vllm.sampling_params import SamplingParams
from vllm.utils import Counter
......
import asyncio
from contextlib import asynccontextmanager
import os
import importlib
import inspect
import os
from contextlib import asynccontextmanager
from http import HTTPStatus
from prometheus_client import make_asgi_app
import fastapi
import uvicorn
from http import HTTPStatus
from fastapi import Request
from fastapi.exceptions import RequestValidationError
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, StreamingResponse, Response
from fastapi.responses import JSONResponse, Response, StreamingResponse
from prometheus_client import make_asgi_app
import vllm
from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.entrypoints.openai.protocol import (CompletionRequest,
ChatCompletionRequest,
ErrorResponse)
from vllm.logger import init_logger
from vllm.entrypoints.openai.cli_args import make_arg_parser
from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
CompletionRequest, ErrorResponse)
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
from vllm.logger import init_logger
TIMEOUT_KEEP_ALIVE = 5 # seconds
......
......@@ -3,12 +3,11 @@
import time
from typing import Dict, List, Literal, Optional, Union
import torch
from pydantic import BaseModel, Field, model_validator
from vllm.utils import random_uuid
from vllm.sampling_params import SamplingParams
import torch
from vllm.utils import random_uuid
class ErrorResponse(BaseModel):
......
import time
import codecs
import time
from typing import AsyncGenerator, AsyncIterator, List, Optional, Union
from fastapi import Request
from typing import AsyncGenerator, AsyncIterator, Optional, List, Union
from vllm.logger import init_logger
from vllm.utils import random_uuid
from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.entrypoints.openai.protocol import (
ChatCompletionRequest, ChatCompletionResponse,
ChatCompletionResponseChoice, ChatCompletionResponseStreamChoice,
ChatCompletionStreamResponse, ChatMessage, DeltaMessage, ErrorResponse,
UsageInfo)
from vllm.outputs import RequestOutput
from vllm.entrypoints.openai.serving_engine import OpenAIServing, LoRA
from vllm.entrypoints.openai.serving_engine import LoRA, OpenAIServing
from vllm.logger import init_logger
from vllm.model_executor.guided_decoding import (
get_guided_decoding_logits_processor)
from vllm.outputs import RequestOutput
from vllm.utils import random_uuid
logger = init_logger(__name__)
......
import asyncio
import time
from typing import (AsyncGenerator, AsyncIterator, Callable, Dict, List,
Optional, Tuple)
from fastapi import Request
from typing import (AsyncGenerator, AsyncIterator, Callable, List, Optional,
Dict, Tuple)
from vllm.logger import init_logger
from vllm.utils import random_uuid
from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.entrypoints.openai.protocol import (
CompletionRequest,
CompletionResponse,
CompletionResponseChoice,
CompletionResponseStreamChoice,
CompletionStreamResponse,
LogProbs,
UsageInfo,
)
from vllm.outputs import RequestOutput
from vllm.entrypoints.openai.serving_engine import OpenAIServing, LoRA
from vllm.entrypoints.openai.protocol import (CompletionRequest,
CompletionResponse,
CompletionResponseChoice,
CompletionResponseStreamChoice,
CompletionStreamResponse,
LogProbs, UsageInfo)
from vllm.entrypoints.openai.serving_engine import LoRA, OpenAIServing
from vllm.logger import init_logger
from vllm.model_executor.guided_decoding import (
get_guided_decoding_logits_processor)
from vllm.outputs import RequestOutput
from vllm.utils import random_uuid
logger = init_logger(__name__)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment