Unverified Commit 01bfb22b authored by SangBin Cho's avatar SangBin Cho Committed by GitHub
Browse files

[CI] Try introducing isort. (#3495)

parent e67c295b
from vllm.attention.backends.abstract import AttentionBackend, AttentionMetadata from vllm.attention.backends.abstract import (AttentionBackend,
AttentionMetadata)
from vllm.attention.layer import Attention from vllm.attention.layer import Attention
from vllm.attention.selector import get_attn_backend from vllm.attention.selector import get_attn_backend
......
...@@ -7,12 +7,13 @@ flashinfer for all the attention operations. ...@@ -7,12 +7,13 @@ flashinfer for all the attention operations.
from dataclasses import dataclass from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple, Type from typing import Dict, List, Optional, Tuple, Type
from flash_attn import flash_attn_varlen_func
import torch import torch
from flash_attn import flash_attn_varlen_func
from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl, from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl,
AttentionMetadata) AttentionMetadata)
from vllm.attention.ops.paged_attn import PagedAttention, PagedAttentionMetadata from vllm.attention.ops.paged_attn import (PagedAttention,
PagedAttentionMetadata)
class FlashAttentionBackend(AttentionBackend): class FlashAttentionBackend(AttentionBackend):
......
...@@ -11,7 +11,8 @@ from xformers.ops.fmha.attn_bias import (AttentionBias, ...@@ -11,7 +11,8 @@ from xformers.ops.fmha.attn_bias import (AttentionBias,
from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl, from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl,
AttentionMetadata) AttentionMetadata)
from vllm.attention.ops.paged_attn import PagedAttention, PagedAttentionMetadata from vllm.attention.ops.paged_attn import (PagedAttention,
PagedAttentionMetadata)
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.utils import is_hip from vllm.utils import is_hip
......
...@@ -3,8 +3,7 @@ from typing import Dict, List, Optional, Tuple ...@@ -3,8 +3,7 @@ from typing import Dict, List, Optional, Tuple
import torch import torch
from vllm._C import cache_ops from vllm._C import cache_ops, ops
from vllm._C import ops
from vllm.attention.ops.prefix_prefill import context_attention_fwd from vllm.attention.ops.prefix_prefill import context_attention_fwd
# Should be the same as PARTITION_SIZE in `paged_attention_v2_launcher`. # Should be the same as PARTITION_SIZE in `paged_attention_v2_launcher`.
......
...@@ -13,11 +13,13 @@ logger = init_logger(__name__) ...@@ -13,11 +13,13 @@ logger = init_logger(__name__)
def get_attn_backend(dtype: torch.dtype) -> AttentionBackend: def get_attn_backend(dtype: torch.dtype) -> AttentionBackend:
if _can_use_flash_attn(dtype): if _can_use_flash_attn(dtype):
logger.info("Using FlashAttention backend.") logger.info("Using FlashAttention backend.")
from vllm.attention.backends.flash_attn import FlashAttentionBackend # noqa: F401 from vllm.attention.backends.flash_attn import ( # noqa: F401
FlashAttentionBackend)
return FlashAttentionBackend return FlashAttentionBackend
else: else:
logger.info("Using XFormers backend.") logger.info("Using XFormers backend.")
from vllm.attention.backends.xformers import XFormersBackend # noqa: F401 from vllm.attention.backends.xformers import ( # noqa: F401
XFormersBackend)
return XFormersBackend return XFormersBackend
......
from typing import TYPE_CHECKING, Optional, Union, ClassVar import json
from dataclasses import dataclass
import os import os
from packaging.version import Version from dataclasses import dataclass
from typing import TYPE_CHECKING, ClassVar, Optional, Union
import json
import torch import torch
from packaging.version import Version
from transformers import PretrainedConfig from transformers import PretrainedConfig
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.transformers_utils.config import get_config from vllm.transformers_utils.config import get_config
from vllm.utils import get_cpu_memory, is_hip, is_neuron, get_nvcc_cuda_version from vllm.utils import get_cpu_memory, get_nvcc_cuda_version, is_hip, is_neuron
if TYPE_CHECKING: if TYPE_CHECKING:
from ray.util.placement_group import PlacementGroup from ray.util.placement_group import PlacementGroup
...@@ -103,7 +103,8 @@ class ModelConfig: ...@@ -103,7 +103,8 @@ class ModelConfig:
if os.environ.get("VLLM_USE_MODELSCOPE", "False").lower() == "true": if os.environ.get("VLLM_USE_MODELSCOPE", "False").lower() == "true":
# download model from ModelScope hub, # download model from ModelScope hub,
# lazy import so that modelscope is not required for normal use. # lazy import so that modelscope is not required for normal use.
from modelscope.hub.snapshot_download import snapshot_download # pylint: disable=C # pylint: disable=C.
from modelscope.hub.snapshot_download import snapshot_download
if not os.path.exists(model): if not os.path.exists(model):
model_path = snapshot_download(model_id=model, model_path = snapshot_download(model_id=model,
......
"""A block manager that manages token blocks.""" """A block manager that manages token blocks."""
import enum import enum
from abc import ABC, abstractmethod
from itertools import count, takewhile from itertools import count, takewhile
from os.path import commonprefix from os.path import commonprefix
from typing import Dict, List, Optional, Set, Tuple from typing import Dict, List, Optional, Set, Tuple
from abc import ABC, abstractmethod
from vllm.block import BlockTable, PhysicalTokenBlock from vllm.block import BlockTable, PhysicalTokenBlock
from vllm.core.evictor import EvictionPolicy, Evictor, make_evictor
from vllm.logger import init_logger
from vllm.sequence import Sequence, SequenceGroup, SequenceStatus from vllm.sequence import Sequence, SequenceGroup, SequenceStatus
from vllm.utils import Device from vllm.utils import Device
from vllm.core.evictor import Evictor, EvictionPolicy, make_evictor
from vllm.logger import init_logger
logger = init_logger(__name__) logger = init_logger(__name__)
......
import enum import enum
from typing import OrderedDict
from abc import ABC, abstractmethod, abstractproperty from abc import ABC, abstractmethod, abstractproperty
from typing import OrderedDict
from vllm.block import PhysicalTokenBlock from vllm.block import PhysicalTokenBlock
......
from collections import deque
import enum import enum
import time import time
from typing import Deque, Dict, Iterable, List, Optional, Tuple, Union, Set from collections import deque
from typing import Deque, Dict, Iterable, List, Optional, Set, Tuple, Union
from vllm.config import CacheConfig, LoRAConfig, SchedulerConfig from vllm.config import CacheConfig, LoRAConfig, SchedulerConfig
from vllm.core.block_manager import AllocStatus, BlockSpaceManager from vllm.core.block_manager import AllocStatus, BlockSpaceManager
from vllm.core.policy import PolicyFactory from vllm.core.policy import PolicyFactory
from vllm.lora.request import LoRARequest
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.lora.request import LoRARequest
from vllm.sequence import (Sequence, SequenceData, SequenceGroup, from vllm.sequence import (Sequence, SequenceData, SequenceGroup,
SequenceGroupMetadata, SequenceStatus) SequenceGroupMetadata, SequenceStatus)
......
...@@ -3,9 +3,8 @@ import dataclasses ...@@ -3,9 +3,8 @@ import dataclasses
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Tuple from typing import Optional, Tuple
from vllm.config import (CacheConfig, DeviceConfig, ModelConfig, from vllm.config import (CacheConfig, DeviceConfig, LoRAConfig, ModelConfig,
ParallelConfig, SchedulerConfig, LoRAConfig, ParallelConfig, SchedulerConfig, TokenizerPoolConfig)
TokenizerPoolConfig)
@dataclass @dataclass
......
...@@ -2,17 +2,17 @@ import asyncio ...@@ -2,17 +2,17 @@ import asyncio
import os import os
import time import time
from functools import partial from functools import partial
from typing import (Callable, Dict, Iterable, List, Optional, Set, Tuple, Type, from typing import (AsyncIterator, Callable, Dict, Iterable, List, Optional,
Union, AsyncIterator) Set, Tuple, Type, Union)
from transformers import PreTrainedTokenizer from transformers import PreTrainedTokenizer
from vllm.lora.request import LoRARequest
from vllm.config import ModelConfig from vllm.config import ModelConfig
from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.engine.llm_engine import LLMEngine from vllm.engine.llm_engine import LLMEngine
from vllm.engine.ray_utils import initialize_ray_cluster, ray from vllm.engine.ray_utils import initialize_ray_cluster, ray
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.lora.request import LoRARequest
from vllm.outputs import RequestOutput from vllm.outputs import RequestOutput
from vllm.sampling_params import SamplingParams from vllm.sampling_params import SamplingParams
......
...@@ -4,22 +4,22 @@ from typing import Iterable, List, Optional, Tuple, Type, Union ...@@ -4,22 +4,22 @@ from typing import Iterable, List, Optional, Tuple, Type, Union
from transformers import PreTrainedTokenizer from transformers import PreTrainedTokenizer
import vllm import vllm
from vllm.lora.request import LoRARequest from vllm.config import (CacheConfig, DeviceConfig, LoRAConfig, ModelConfig,
from vllm.config import (CacheConfig, DeviceConfig, ModelConfig, ParallelConfig, SchedulerConfig)
ParallelConfig, SchedulerConfig, LoRAConfig)
from vllm.core.scheduler import Scheduler, SchedulerOutputs from vllm.core.scheduler import Scheduler, SchedulerOutputs
from vllm.engine.arg_utils import EngineArgs from vllm.engine.arg_utils import EngineArgs
from vllm.executor.executor_base import ExecutorBase
from vllm.engine.metrics import StatLogger, Stats from vllm.engine.metrics import StatLogger, Stats
from vllm.engine.ray_utils import initialize_ray_cluster from vllm.engine.ray_utils import initialize_ray_cluster
from vllm.executor.executor_base import ExecutorBase
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.lora.request import LoRARequest
from vllm.outputs import RequestOutput from vllm.outputs import RequestOutput
from vllm.sampling_params import SamplingParams from vllm.sampling_params import SamplingParams
from vllm.sequence import (SamplerOutput, Sequence, SequenceGroup, from vllm.sequence import (SamplerOutput, Sequence, SequenceGroup,
SequenceGroupOutput, SequenceOutput, SequenceStatus) SequenceGroupOutput, SequenceOutput, SequenceStatus)
from vllm.transformers_utils.detokenizer import Detokenizer
from vllm.transformers_utils.tokenizer_group import (BaseTokenizerGroup, from vllm.transformers_utils.tokenizer_group import (BaseTokenizerGroup,
get_tokenizer_group) get_tokenizer_group)
from vllm.transformers_utils.detokenizer import Detokenizer
from vllm.utils import Counter from vllm.utils import Counter
logger = init_logger(__name__) logger = init_logger(__name__)
......
from vllm.logger import init_logger
from prometheus_client import (Counter, Gauge, Histogram, Info, REGISTRY,
disable_created_metrics)
import time import time
import numpy as np
from typing import Dict, List
from dataclasses import dataclass from dataclasses import dataclass
from typing import Dict, List
import numpy as np
from prometheus_client import (REGISTRY, Counter, Gauge, Histogram, Info,
disable_created_metrics)
from vllm.logger import init_logger
logger = init_logger(__name__) logger = init_logger(__name__)
......
import pickle import pickle
from typing import List, Optional, Tuple
from typing import Optional, List, Tuple
from vllm.config import ParallelConfig from vllm.config import ParallelConfig
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.utils import is_hip, set_cuda_visible_devices, get_ip from vllm.utils import get_ip, is_hip, set_cuda_visible_devices
logger = init_logger(__name__) logger = init_logger(__name__)
......
...@@ -11,9 +11,9 @@ import json ...@@ -11,9 +11,9 @@ import json
import ssl import ssl
from typing import AsyncGenerator from typing import AsyncGenerator
import uvicorn
from fastapi import FastAPI, Request from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse, Response, StreamingResponse from fastapi.responses import JSONResponse, Response, StreamingResponse
import uvicorn
from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine from vllm.engine.async_llm_engine import AsyncLLMEngine
......
...@@ -3,9 +3,9 @@ from typing import List, Optional, Union ...@@ -3,9 +3,9 @@ from typing import List, Optional, Union
from tqdm import tqdm from tqdm import tqdm
from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast
from vllm.lora.request import LoRARequest
from vllm.engine.arg_utils import EngineArgs from vllm.engine.arg_utils import EngineArgs
from vllm.engine.llm_engine import LLMEngine from vllm.engine.llm_engine import LLMEngine
from vllm.lora.request import LoRARequest
from vllm.outputs import RequestOutput from vllm.outputs import RequestOutput
from vllm.sampling_params import SamplingParams from vllm.sampling_params import SamplingParams
from vllm.utils import Counter from vllm.utils import Counter
......
import asyncio import asyncio
from contextlib import asynccontextmanager
import os
import importlib import importlib
import inspect import inspect
import os
from contextlib import asynccontextmanager
from http import HTTPStatus
from prometheus_client import make_asgi_app
import fastapi import fastapi
import uvicorn import uvicorn
from http import HTTPStatus
from fastapi import Request from fastapi import Request
from fastapi.exceptions import RequestValidationError from fastapi.exceptions import RequestValidationError
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, StreamingResponse, Response from fastapi.responses import JSONResponse, Response, StreamingResponse
from prometheus_client import make_asgi_app
import vllm import vllm
from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.entrypoints.openai.protocol import (CompletionRequest,
ChatCompletionRequest,
ErrorResponse)
from vllm.logger import init_logger
from vllm.entrypoints.openai.cli_args import make_arg_parser from vllm.entrypoints.openai.cli_args import make_arg_parser
from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
CompletionRequest, ErrorResponse)
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
from vllm.logger import init_logger
TIMEOUT_KEEP_ALIVE = 5 # seconds TIMEOUT_KEEP_ALIVE = 5 # seconds
......
...@@ -3,12 +3,11 @@ ...@@ -3,12 +3,11 @@
import time import time
from typing import Dict, List, Literal, Optional, Union from typing import Dict, List, Literal, Optional, Union
import torch
from pydantic import BaseModel, Field, model_validator from pydantic import BaseModel, Field, model_validator
from vllm.utils import random_uuid
from vllm.sampling_params import SamplingParams from vllm.sampling_params import SamplingParams
from vllm.utils import random_uuid
import torch
class ErrorResponse(BaseModel): class ErrorResponse(BaseModel):
......
import time
import codecs import codecs
import time
from typing import AsyncGenerator, AsyncIterator, List, Optional, Union
from fastapi import Request from fastapi import Request
from typing import AsyncGenerator, AsyncIterator, Optional, List, Union
from vllm.logger import init_logger
from vllm.utils import random_uuid
from vllm.engine.async_llm_engine import AsyncLLMEngine from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.entrypoints.openai.protocol import ( from vllm.entrypoints.openai.protocol import (
ChatCompletionRequest, ChatCompletionResponse, ChatCompletionRequest, ChatCompletionResponse,
ChatCompletionResponseChoice, ChatCompletionResponseStreamChoice, ChatCompletionResponseChoice, ChatCompletionResponseStreamChoice,
ChatCompletionStreamResponse, ChatMessage, DeltaMessage, ErrorResponse, ChatCompletionStreamResponse, ChatMessage, DeltaMessage, ErrorResponse,
UsageInfo) UsageInfo)
from vllm.outputs import RequestOutput from vllm.entrypoints.openai.serving_engine import LoRA, OpenAIServing
from vllm.entrypoints.openai.serving_engine import OpenAIServing, LoRA from vllm.logger import init_logger
from vllm.model_executor.guided_decoding import ( from vllm.model_executor.guided_decoding import (
get_guided_decoding_logits_processor) get_guided_decoding_logits_processor)
from vllm.outputs import RequestOutput
from vllm.utils import random_uuid
logger = init_logger(__name__) logger = init_logger(__name__)
......
import asyncio import asyncio
import time import time
from typing import (AsyncGenerator, AsyncIterator, Callable, Dict, List,
Optional, Tuple)
from fastapi import Request from fastapi import Request
from typing import (AsyncGenerator, AsyncIterator, Callable, List, Optional,
Dict, Tuple)
from vllm.logger import init_logger
from vllm.utils import random_uuid
from vllm.engine.async_llm_engine import AsyncLLMEngine from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.entrypoints.openai.protocol import ( from vllm.entrypoints.openai.protocol import (CompletionRequest,
CompletionRequest, CompletionResponse,
CompletionResponse, CompletionResponseChoice,
CompletionResponseChoice, CompletionResponseStreamChoice,
CompletionResponseStreamChoice, CompletionStreamResponse,
CompletionStreamResponse, LogProbs, UsageInfo)
LogProbs, from vllm.entrypoints.openai.serving_engine import LoRA, OpenAIServing
UsageInfo, from vllm.logger import init_logger
)
from vllm.outputs import RequestOutput
from vllm.entrypoints.openai.serving_engine import OpenAIServing, LoRA
from vllm.model_executor.guided_decoding import ( from vllm.model_executor.guided_decoding import (
get_guided_decoding_logits_processor) get_guided_decoding_logits_processor)
from vllm.outputs import RequestOutput
from vllm.utils import random_uuid
logger = init_logger(__name__) logger = init_logger(__name__)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment