Unverified Commit 01bfb22b authored by SangBin Cho's avatar SangBin Cho Committed by GitHub
Browse files

[CI] Try introducing isort. (#3495)

parent e67c295b
...@@ -3,16 +3,16 @@ import json ...@@ -3,16 +3,16 @@ import json
from dataclasses import dataclass from dataclasses import dataclass
from http import HTTPStatus from http import HTTPStatus
from typing import Dict, List, Optional, Union from typing import Dict, List, Optional, Union
from vllm.logger import init_logger
from vllm.transformers_utils.tokenizer import get_tokenizer
from vllm.engine.async_llm_engine import AsyncLLMEngine from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.entrypoints.openai.protocol import (CompletionRequest, from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
ChatCompletionRequest, CompletionRequest, ErrorResponse,
ErrorResponse, LogProbs, LogProbs, ModelCard, ModelList,
ModelCard, ModelList,
ModelPermission) ModelPermission)
from vllm.logger import init_logger
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from vllm.sequence import Logprob from vllm.sequence import Logprob
from vllm.transformers_utils.tokenizer import get_tokenizer
logger = init_logger(__name__) logger = init_logger(__name__)
......
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import Dict, List, Optional from typing import Dict, List, Optional
from vllm.config import (CacheConfig, DeviceConfig, ModelConfig, from vllm.config import (CacheConfig, DeviceConfig, LoRAConfig, ModelConfig,
ParallelConfig, SchedulerConfig, LoRAConfig) ParallelConfig, SchedulerConfig)
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from vllm.sequence import SamplerOutput, SequenceGroupMetadata from vllm.sequence import SamplerOutput, SequenceGroupMetadata
......
from typing import Dict, List, Optional from typing import Dict, List, Optional
from vllm.lora.request import LoRARequest from vllm.config import (CacheConfig, DeviceConfig, LoRAConfig, ModelConfig,
from vllm.config import (CacheConfig, DeviceConfig, ModelConfig, ParallelConfig, SchedulerConfig)
ParallelConfig, SchedulerConfig, LoRAConfig)
from vllm.executor.executor_base import ExecutorAsyncBase, ExecutorBase from vllm.executor.executor_base import ExecutorAsyncBase, ExecutorBase
from vllm.executor.utils import check_block_size_valid from vllm.executor.utils import check_block_size_valid
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.lora.request import LoRARequest
from vllm.sequence import SamplerOutput, SequenceGroupMetadata from vllm.sequence import SamplerOutput, SequenceGroupMetadata
from vllm.utils import (get_ip, get_open_port, get_distributed_init_method, from vllm.utils import (get_distributed_init_method, get_ip, get_open_port,
make_async) make_async)
logger = init_logger(__name__) logger = init_logger(__name__)
......
from typing import Dict, List, Optional from typing import Dict, List, Optional
from vllm.lora.request import LoRARequest from vllm.config import (CacheConfig, DeviceConfig, LoRAConfig, ModelConfig,
from vllm.config import (CacheConfig, DeviceConfig, ModelConfig, ParallelConfig, SchedulerConfig)
ParallelConfig, SchedulerConfig, LoRAConfig)
from vllm.executor.executor_base import ExecutorBase from vllm.executor.executor_base import ExecutorBase
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.lora.request import LoRARequest
from vllm.sequence import SamplerOutput, SequenceGroupMetadata from vllm.sequence import SamplerOutput, SequenceGroupMetadata
logger = init_logger(__name__) logger = init_logger(__name__)
......
import asyncio import asyncio
import copy import copy
from collections import defaultdict
import os import os
import pickle import pickle
from collections import defaultdict
from typing import TYPE_CHECKING, Any, Dict, List, Optional from typing import TYPE_CHECKING, Any, Dict, List, Optional
from vllm.config import (CacheConfig, DeviceConfig, ModelConfig, from vllm.config import (CacheConfig, DeviceConfig, LoRAConfig, ModelConfig,
ParallelConfig, SchedulerConfig, LoRAConfig) ParallelConfig, SchedulerConfig)
from vllm.engine.ray_utils import RayWorkerVllm, ray from vllm.engine.ray_utils import RayWorkerVllm, ray
from vllm.executor.executor_base import ExecutorAsyncBase, ExecutorBase from vllm.executor.executor_base import ExecutorAsyncBase, ExecutorBase
from vllm.executor.utils import check_block_size_valid from vllm.executor.utils import check_block_size_valid
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from vllm.sequence import SamplerOutput, SequenceGroupMetadata from vllm.sequence import SamplerOutput, SequenceGroupMetadata
from vllm.utils import (set_cuda_visible_devices, get_ip, get_open_port, from vllm.utils import (get_distributed_init_method, get_ip, get_open_port,
get_distributed_init_method, make_async) make_async, set_cuda_visible_devices)
if ray is not None: if ray is not None:
from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy
...@@ -343,7 +343,7 @@ class RayGPUExecutor(ExecutorBase): ...@@ -343,7 +343,7 @@ class RayGPUExecutor(ExecutorBase):
raise ValueError(f"Ray version {required_version} or greater is " raise ValueError(f"Ray version {required_version} or greater is "
f"required, but found {current_version}") f"required, but found {current_version}")
from ray.dag import MultiOutputNode, InputNode from ray.dag import InputNode, MultiOutputNode
assert self.parallel_config.worker_use_ray assert self.parallel_config.worker_use_ray
# Right now, compiled DAG requires at least 1 arg. We send # Right now, compiled DAG requires at least 1 arg. We send
......
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
# https://github.com/skypilot-org/skypilot/blob/86dc0f6283a335e4aa37b3c10716f90999f48ab6/sky/sky_logging.py # https://github.com/skypilot-org/skypilot/blob/86dc0f6283a335e4aa37b3c10716f90999f48ab6/sky/sky_logging.py
"""Logging configuration for vLLM.""" """Logging configuration for vLLM."""
import logging import logging
import sys
import os import os
import sys
VLLM_CONFIGURE_LOGGING = int(os.getenv("VLLM_CONFIGURE_LOGGING", "1")) VLLM_CONFIGURE_LOGGING = int(os.getenv("VLLM_CONFIGURE_LOGGING", "1"))
......
...@@ -10,18 +10,16 @@ from transformers import PretrainedConfig ...@@ -10,18 +10,16 @@ from transformers import PretrainedConfig
from vllm.config import LoRAConfig from vllm.config import LoRAConfig
from vllm.lora.punica import add_lora, add_lora_slice, bgmv from vllm.lora.punica import add_lora, add_lora_slice, bgmv
from vllm.model_executor.parallel_utils.communication_op import (
tensor_model_parallel_all_gather,
tensor_model_parallel_all_reduce,
tensor_model_parallel_gather,
)
from vllm.model_executor.layers.linear import (ColumnParallelLinear, from vllm.model_executor.layers.linear import (ColumnParallelLinear,
RowParallelLinear, MergedColumnParallelLinear,
QKVParallelLinear, QKVParallelLinear,
MergedColumnParallelLinear) RowParallelLinear)
from vllm.model_executor.layers.logits_processor import LogitsProcessor from vllm.model_executor.layers.logits_processor import LogitsProcessor
from vllm.model_executor.layers.vocab_parallel_embedding import ( from vllm.model_executor.layers.vocab_parallel_embedding import (
VocabParallelEmbedding, ParallelLMHead) ParallelLMHead, VocabParallelEmbedding)
from vllm.model_executor.parallel_utils.communication_op import (
tensor_model_parallel_all_gather, tensor_model_parallel_all_reduce,
tensor_model_parallel_gather)
from vllm.model_executor.parallel_utils.parallel_state import ( from vllm.model_executor.parallel_utils.parallel_state import (
get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size) get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size)
from vllm.model_executor.parallel_utils.utils import ( from vllm.model_executor.parallel_utils.utils import (
......
from typing import List, Optional from typing import List, Optional
import torch import torch
from vllm.utils import is_pin_memory_available from vllm.utils import is_pin_memory_available
......
...@@ -4,19 +4,18 @@ import logging ...@@ -4,19 +4,18 @@ import logging
import math import math
import os import os
import re import re
from typing import (Callable, Dict, Hashable, List, Optional, Tuple, Type) from typing import Callable, Dict, Hashable, List, Optional, Tuple, Type
import safetensors.torch import safetensors.torch
import torch import torch
from torch import nn from torch import nn
from vllm.config import LoRAConfig from vllm.config import LoRAConfig
from vllm.utils import LRUCache, is_pin_memory_available
from vllm.lora.layers import (BaseLayerWithLoRA, LoRAMapping, from_layer, from vllm.lora.layers import (BaseLayerWithLoRA, LoRAMapping, from_layer,
from_layer_logits_processor) from_layer_logits_processor)
from vllm.lora.lora import LoRALayerWeights, PackedLoRALayerWeights from vllm.lora.lora import LoRALayerWeights, PackedLoRALayerWeights
from vllm.lora.utils import parse_fine_tuned_lora_name, replace_submodule from vllm.lora.utils import parse_fine_tuned_lora_name, replace_submodule
from vllm.utils import LRUCache, is_pin_memory_available
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
......
...@@ -4,11 +4,11 @@ from typing import Any, Dict, List, Optional, Set, Type ...@@ -4,11 +4,11 @@ from typing import Any, Dict, List, Optional, Set, Type
import torch import torch
from vllm.config import LoRAConfig
from vllm.lora.layers import LoRAMapping
from vllm.lora.models import (LoRAModel, LoRAModelManager, from vllm.lora.models import (LoRAModel, LoRAModelManager,
LRUCacheLoRAModelManager, create_lora_manager) LRUCacheLoRAModelManager, create_lora_manager)
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from vllm.lora.layers import LoRAMapping
from vllm.config import LoRAConfig
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
......
...@@ -5,16 +5,16 @@ from enum import Enum ...@@ -5,16 +5,16 @@ from enum import Enum
from functools import lru_cache from functools import lru_cache
from json import dumps as json_dumps from json import dumps as json_dumps
from re import escape as regex_escape from re import escape as regex_escape
from typing import Union, Tuple from typing import Tuple, Union
from pydantic import BaseModel from pydantic import BaseModel
from transformers import PreTrainedTokenizerBase from transformers import PreTrainedTokenizerBase
from vllm.entrypoints.openai.protocol import (CompletionRequest, from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
ChatCompletionRequest) CompletionRequest)
from vllm.model_executor.guided_logits_processors import (JSONLogitsProcessor, from vllm.model_executor.guided_logits_processors import (CFGLogitsProcessor,
RegexLogitsProcessor, JSONLogitsProcessor,
CFGLogitsProcessor) RegexLogitsProcessor)
class GuidedDecodingMode(Enum): class GuidedDecodingMode(Enum):
......
...@@ -16,13 +16,13 @@ ...@@ -16,13 +16,13 @@
import json import json
import math import math
from collections import defaultdict from collections import defaultdict
from typing import Union, DefaultDict, Dict, List, Optional, Callable from typing import Callable, DefaultDict, Dict, List, Optional, Union
import torch import torch
from outlines.fsm.fsm import CFGFSM, RegexFSM
from outlines.fsm.json_schema import build_regex_from_schema
from pydantic import BaseModel from pydantic import BaseModel
from transformers import PreTrainedTokenizerBase from transformers import PreTrainedTokenizerBase
from outlines.fsm.fsm import RegexFSM, CFGFSM
from outlines.fsm.json_schema import build_regex_from_schema
class BaseLogitsProcessor: class BaseLogitsProcessor:
......
from vllm.model_executor.layers.fused_moe.fused_moe import ( from vllm.model_executor.layers.fused_moe.fused_moe import (
fused_moe, fused_moe, get_config_file_name)
get_config_file_name,
)
__all__ = [ __all__ = [
"fused_moe", "fused_moe",
......
...@@ -5,14 +5,14 @@ import torch ...@@ -5,14 +5,14 @@ import torch
import torch.nn.functional as F import torch.nn.functional as F
from torch.nn.parameter import Parameter from torch.nn.parameter import Parameter
from vllm.logger import init_logger
from vllm.model_executor.parallel_utils.communication_op import (
tensor_model_parallel_all_gather, tensor_model_parallel_all_reduce)
from vllm.model_executor.parallel_utils.parallel_state import ( from vllm.model_executor.parallel_utils.parallel_state import (
get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size) get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size)
from vllm.model_executor.parallel_utils.communication_op import (
tensor_model_parallel_all_reduce, tensor_model_parallel_all_gather)
from vllm.model_executor.parallel_utils.utils import ( from vllm.model_executor.parallel_utils.utils import (
divide, split_tensor_along_last_dim) divide, split_tensor_along_last_dim)
from vllm.model_executor.utils import set_weight_attrs from vllm.model_executor.utils import set_weight_attrs
from vllm.logger import init_logger
logger = init_logger(__name__) logger = init_logger(__name__)
......
from typing import Optional, Union
import torch import torch
import triton import triton
import triton.language as tl import triton.language as tl
from typing import Optional, Union
def seeded_uniform( def seeded_uniform(
*size, *size,
......
import math import math
from typing import Tuple, Optional from typing import Optional, Tuple
import torch import torch
import triton import triton
......
from typing import Type from typing import Type
from vllm.model_executor.layers.quantization.awq import AWQConfig
from vllm.model_executor.layers.quantization.base_config import ( from vllm.model_executor.layers.quantization.base_config import (
QuantizationConfig) QuantizationConfig)
from vllm.model_executor.layers.quantization.awq import AWQConfig
from vllm.model_executor.layers.quantization.gptq import GPTQConfig from vllm.model_executor.layers.quantization.gptq import GPTQConfig
from vllm.model_executor.layers.quantization.squeezellm import SqueezeLLMConfig
from vllm.model_executor.layers.quantization.marlin import MarlinConfig from vllm.model_executor.layers.quantization.marlin import MarlinConfig
from vllm.model_executor.layers.quantization.squeezellm import SqueezeLLMConfig
_QUANTIZATION_CONFIG_REGISTRY = { _QUANTIZATION_CONFIG_REGISTRY = {
"awq": AWQConfig, "awq": AWQConfig,
......
import enum import enum
from enum import Enum from enum import Enum
from typing import Any, Dict, List, Optional
from fractions import Fraction from fractions import Fraction
from typing import Any, Dict, List, Optional
import torch import torch
from torch.nn.parameter import Parameter from torch.nn.parameter import Parameter
......
...@@ -4,7 +4,8 @@ import torch ...@@ -4,7 +4,8 @@ import torch
from torch.nn.parameter import Parameter from torch.nn.parameter import Parameter
from vllm._C import ops from vllm._C import ops
from vllm.model_executor.layers.linear import LinearMethodBase, set_weight_attrs from vllm.model_executor.layers.linear import (LinearMethodBase,
set_weight_attrs)
from vllm.model_executor.layers.quantization.base_config import ( from vllm.model_executor.layers.quantization.base_config import (
QuantizationConfig) QuantizationConfig)
......
from typing import Tuple, Optional
from functools import cached_property from functools import cached_property
from typing import Optional, Tuple
import torch import torch
import torch.nn as nn
import torch.jit import torch.jit
import torch.nn as nn
class RejectionSampler(nn.Module): class RejectionSampler(nn.Module):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment