Unverified Commit 01bfb22b authored by SangBin Cho's avatar SangBin Cho Committed by GitHub
Browse files

[CI] Try introducing isort. (#3495)

parent e67c295b
...@@ -110,8 +110,8 @@ def _get_model_architecture(config: PretrainedConfig) -> Type[nn.Module]: ...@@ -110,8 +110,8 @@ def _get_model_architecture(config: PretrainedConfig) -> Type[nn.Module]:
def get_neuron_model(model_config: ModelConfig, def get_neuron_model(model_config: ModelConfig,
parallel_config: ParallelConfig, parallel_config: ParallelConfig,
scheduler_config: SchedulerConfig) -> nn.Module: scheduler_config: SchedulerConfig) -> nn.Module:
from transformers_neuronx.config import (NeuronConfig, from transformers_neuronx.config import (ContinuousBatchingConfig,
ContinuousBatchingConfig) NeuronConfig)
# Create a model instance. # Create a model instance.
model = NeuronCasualLM(model_config.hf_config) model = NeuronCasualLM(model_config.hf_config)
......
...@@ -5,14 +5,11 @@ import torch ...@@ -5,14 +5,11 @@ import torch
from torch.distributed import ProcessGroup from torch.distributed import ProcessGroup
from vllm.model_executor.parallel_utils import cupy_utils from vllm.model_executor.parallel_utils import cupy_utils
from vllm.model_executor.parallel_utils.parallel_state import (
get_tensor_model_parallel_rank,
get_tensor_model_parallel_world_size,
get_tensor_model_parallel_group,
is_cupy_nccl_enabled_for_all_reduce,
)
from vllm.model_executor.parallel_utils.custom_all_reduce import ( from vllm.model_executor.parallel_utils.custom_all_reduce import (
custom_all_reduce) custom_all_reduce)
from vllm.model_executor.parallel_utils.parallel_state import (
get_tensor_model_parallel_group, get_tensor_model_parallel_rank,
get_tensor_model_parallel_world_size, is_cupy_nccl_enabled_for_all_reduce)
def tensor_model_parallel_all_reduce(input_: torch.Tensor) -> torch.Tensor: def tensor_model_parallel_all_reduce(input_: torch.Tensor) -> torch.Tensor:
......
...@@ -6,11 +6,12 @@ import torch.distributed as dist ...@@ -6,11 +6,12 @@ import torch.distributed as dist
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.model_executor.parallel_utils.parallel_state import ( from vllm.model_executor.parallel_utils.parallel_state import (
get_tensor_model_parallel_world_size, get_tensor_model_parallel_rank) get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size)
try: try:
from vllm._C import custom_ar
import pynvml import pynvml
from vllm._C import custom_ar
except ImportError: except ImportError:
# For AMD GPUs # For AMD GPUs
custom_ar = None custom_ar = None
......
import random
from dataclasses import dataclass from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple from typing import Dict, List, Optional, Tuple
import torch import torch
import random
from vllm.model_executor.layers.ops.sample import ( from vllm.model_executor.layers.ops.sample import get_num_triton_sampler_splits
get_num_triton_sampler_splits)
from vllm.sampling_params import SamplingParams, SamplingType from vllm.sampling_params import SamplingParams, SamplingType
from vllm.sequence import SequenceData from vllm.sequence import SequenceData
from vllm.utils import is_pin_memory_available from vllm.utils import is_pin_memory_available
......
"""Utilities for downloading and initializing model weights.""" """Utilities for downloading and initializing model weights."""
import filelock import fnmatch
import glob import glob
import hashlib import hashlib
import fnmatch
import json import json
import os import os
from collections import defaultdict from collections import defaultdict
from typing import Any, Iterator, List, Optional, Tuple from typing import Any, Iterator, List, Optional, Tuple
from huggingface_hub import snapshot_download, HfFileSystem import filelock
import numpy as np import numpy as np
from safetensors.torch import load_file, save_file, safe_open
import torch import torch
from huggingface_hub import HfFileSystem, snapshot_download
from safetensors.torch import load_file, safe_open, save_file
from tqdm.auto import tqdm from tqdm.auto import tqdm
from vllm.config import ModelConfig from vllm.config import ModelConfig
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.model_executor.layers.quantization import (get_quantization_config, from vllm.model_executor.layers.quantization import (QuantizationConfig,
QuantizationConfig) get_quantization_config)
logger = init_logger(__name__) logger = init_logger(__name__)
......
from typing import List, Optional
import time import time
from typing import List, Optional
from vllm.sequence import (PromptLogprobs, SampleLogprobs, SequenceGroup,
SequenceStatus, RequestMetrics)
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from vllm.sequence import (PromptLogprobs, RequestMetrics, SampleLogprobs,
SequenceGroup, SequenceStatus)
class CompletionOutput: class CompletionOutput:
......
...@@ -2,14 +2,15 @@ ...@@ -2,14 +2,15 @@
import copy import copy
import enum import enum
from dataclasses import dataclass from dataclasses import dataclass
from typing import Dict, List, Optional, Union, TYPE_CHECKING from typing import TYPE_CHECKING, Dict, List, Optional, Union
from vllm.block import LogicalTokenBlock from vllm.block import LogicalTokenBlock
from vllm.sampling_params import SamplingParams
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from vllm.sampling_params import SamplingParams
if TYPE_CHECKING: if TYPE_CHECKING:
import torch import torch
from vllm.spec_decode.metrics import SpecDecodeWorkerMetrics from vllm.spec_decode.metrics import SpecDecodeWorkerMetrics
......
from typing import Iterator, List, Tuple, Optional, Dict
from itertools import chain, count from itertools import chain, count
from typing import Dict, Iterator, List, Optional, Tuple
import torch import torch
from vllm.sequence import (SamplerOutput, SequenceGroupMetadata, SequenceData) from vllm.sequence import SamplerOutput, SequenceData, SequenceGroupMetadata
from vllm.worker.worker import Worker from vllm.spec_decode.interfaces import (SpeculativeProposals,
from vllm.spec_decode.util import (nvtx_range, sampler_output_to_torch, SpeculativeScorer, SpeculativeScores)
get_all_seq_ids, from vllm.spec_decode.util import (get_all_seq_ids, nvtx_range,
sampler_output_to_torch,
split_batch_by_proposal_len) split_batch_by_proposal_len)
from vllm.spec_decode.interfaces import (SpeculativeScorer, from vllm.worker.worker import Worker
SpeculativeProposals,
SpeculativeScores)
SeqId = int SeqId = int
TargetSeqId = int TargetSeqId = int
......
from typing import List, Tuple, Optional, Dict
from dataclasses import dataclass
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple
import torch import torch
......
import torch import time
from dataclasses import dataclass from dataclasses import dataclass
from typing import Callable, Optional
import torch
from vllm.model_executor.layers.rejection_sampler import RejectionSampler from vllm.model_executor.layers.rejection_sampler import RejectionSampler
from typing import Optional
from vllm.utils import is_pin_memory_available from vllm.utils import is_pin_memory_available
import time
from typing import Callable
@dataclass @dataclass
......
from typing import List, Dict, Optional, Tuple
import copy import copy
from typing import Dict, List, Optional, Tuple
import torch import torch
from vllm.sequence import SamplerOutput, SequenceGroupMetadata from vllm.sequence import SamplerOutput, SequenceGroupMetadata
from vllm.worker.worker import Worker
from vllm.spec_decode.interfaces import (SpeculativeProposals, from vllm.spec_decode.interfaces import (SpeculativeProposals,
SpeculativeProposer) SpeculativeProposer)
from vllm.spec_decode.util import sampler_output_to_torch from vllm.spec_decode.util import sampler_output_to_torch
from vllm.worker.worker import Worker
class MultiStepWorker(Worker): class MultiStepWorker(Worker):
......
from typing import List, Tuple, Optional, Dict
from functools import cached_property from functools import cached_property
from typing import Dict, List, Optional, Tuple
import torch import torch
from vllm.spec_decode.metrics import AsyncMetricsCollector from vllm.config import CacheConfig
from vllm.model_executor.layers.rejection_sampler import RejectionSampler
from vllm.sequence import (SamplerOutput, SequenceGroupMetadata, from vllm.sequence import (SamplerOutput, SequenceGroupMetadata,
SequenceGroupOutput, SequenceOutput) SequenceGroupOutput, SequenceOutput)
from vllm.worker.worker import Worker from vllm.spec_decode.batch_expansion import BatchExpansionTop1Scorer
from vllm.spec_decode.interfaces import (SpeculativeProposals,
SpeculativeScorer, SpeculativeScores)
from vllm.spec_decode.metrics import AsyncMetricsCollector
from vllm.spec_decode.multi_step_worker import MultiStepWorker from vllm.spec_decode.multi_step_worker import MultiStepWorker
from vllm.model_executor.layers.rejection_sampler import RejectionSampler from vllm.spec_decode.util import (get_all_seq_ids, nvtx_range,
from vllm.config import CacheConfig
from vllm.spec_decode.util import (nvtx_range, get_all_seq_ids,
split_batch_by_proposal_len) split_batch_by_proposal_len)
from vllm.spec_decode.interfaces import SpeculativeProposals, SpeculativeScores from vllm.worker.worker import Worker
from vllm.spec_decode.batch_expansion import BatchExpansionTop1Scorer
from vllm.spec_decode.interfaces import SpeculativeScorer
class SpecDecodeWorker: class SpecDecodeWorker:
......
import torch
from typing import List, Tuple
from vllm.sequence import SequenceGroupMetadata, SamplerOutput
from contextlib import contextmanager from contextlib import contextmanager
from itertools import chain from itertools import chain
from typing import List, Tuple
import torch
from vllm.sequence import SamplerOutput, SequenceGroupMetadata
SeqId = int SeqId = int
......
from vllm.transformers_utils.configs.chatglm import ChatGLMConfig from vllm.transformers_utils.configs.chatglm import ChatGLMConfig
from vllm.transformers_utils.configs.mpt import MPTConfig
# RWConfig is for the original tiiuae/falcon-40b(-instruct) and # RWConfig is for the original tiiuae/falcon-40b(-instruct) and
# tiiuae/falcon-7b(-instruct) models. Newer Falcon models will use the # tiiuae/falcon-7b(-instruct) models. Newer Falcon models will use the
# `FalconConfig` class from the official HuggingFace transformers library. # `FalconConfig` class from the official HuggingFace transformers library.
from vllm.transformers_utils.configs.falcon import RWConfig from vllm.transformers_utils.configs.falcon import RWConfig
from vllm.transformers_utils.configs.jais import JAISConfig from vllm.transformers_utils.configs.jais import JAISConfig
from vllm.transformers_utils.configs.mpt import MPTConfig
__all__ = [ __all__ = [
"ChatGLMConfig", "ChatGLMConfig",
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
"""A HuggingFace-style model configuration.""" """A HuggingFace-style model configuration."""
import warnings import warnings
from typing import Any, Dict, Optional, Union from typing import Any, Dict, Optional, Union
from transformers import PretrainedConfig from transformers import PretrainedConfig
attn_config_defaults: Dict = { attn_config_defaults: Dict = {
......
from typing import List, Dict, Optional from typing import Dict, List, Optional
from transformers import PreTrainedTokenizer from transformers import PreTrainedTokenizer
from vllm.sequence import Sequence, Logprob, SequenceGroup, SamplingParams
from vllm.transformers_utils.tokenizer import (detokenize_incrementally, from vllm.sequence import Logprob, SamplingParams, Sequence, SequenceGroup
convert_prompt_ids_to_tokens) from vllm.transformers_utils.tokenizer import (convert_prompt_ids_to_tokens,
detokenize_incrementally)
from vllm.transformers_utils.tokenizer_group.base_tokenizer_group import ( from vllm.transformers_utils.tokenizer_group.base_tokenizer_group import (
BaseTokenizerGroup) BaseTokenizerGroup)
......
...@@ -5,8 +5,8 @@ from transformers import (AutoTokenizer, PreTrainedTokenizer, ...@@ -5,8 +5,8 @@ from transformers import (AutoTokenizer, PreTrainedTokenizer,
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from vllm.utils import make_async
from vllm.transformers_utils.tokenizers import * from vllm.transformers_utils.tokenizers import *
from vllm.utils import make_async
logger = init_logger(__name__) logger = init_logger(__name__)
......
from typing import Optional from typing import Optional
from vllm.config import TokenizerPoolConfig from vllm.config import TokenizerPoolConfig
from vllm.engine.ray_utils import ray
from vllm.transformers_utils.tokenizer_group.base_tokenizer_group import ( from vllm.transformers_utils.tokenizer_group.base_tokenizer_group import (
BaseTokenizerGroup) BaseTokenizerGroup)
from vllm.transformers_utils.tokenizer_group.tokenizer_group import ( from vllm.transformers_utils.tokenizer_group.tokenizer_group import (
TokenizerGroup) TokenizerGroup)
from vllm.engine.ray_utils import ray
if ray: if ray:
from vllm.transformers_utils.tokenizer_group.ray_tokenizer_group import ( from vllm.transformers_utils.tokenizer_group.ray_tokenizer_group import (
......
...@@ -2,16 +2,16 @@ import asyncio ...@@ -2,16 +2,16 @@ import asyncio
import os import os
from typing import List, Optional from typing import List, Optional
from ray.util.scheduling_strategies import NodeAffinitySchedulingStrategy
from transformers import PreTrainedTokenizer from transformers import PreTrainedTokenizer
from vllm.config import TokenizerPoolConfig from vllm.config import TokenizerPoolConfig
from vllm.lora.request import LoRARequest
from vllm.engine.ray_utils import ray from vllm.engine.ray_utils import ray
from vllm.lora.request import LoRARequest
from vllm.transformers_utils.tokenizer_group.base_tokenizer_group import ( from vllm.transformers_utils.tokenizer_group.base_tokenizer_group import (
BaseTokenizerGroup) BaseTokenizerGroup)
from vllm.transformers_utils.tokenizer_group.tokenizer_group import ( from vllm.transformers_utils.tokenizer_group.tokenizer_group import (
TokenizerGroup) TokenizerGroup)
from ray.util.scheduling_strategies import NodeAffinitySchedulingStrategy
class RayTokenizerGroupPool(BaseTokenizerGroup): class RayTokenizerGroupPool(BaseTokenizerGroup):
......
...@@ -4,11 +4,11 @@ from transformers import PreTrainedTokenizer ...@@ -4,11 +4,11 @@ from transformers import PreTrainedTokenizer
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from vllm.transformers_utils.tokenizer import (get_lora_tokenizer, from vllm.transformers_utils.tokenizer import (get_lora_tokenizer,
get_lora_tokenizer_async) get_lora_tokenizer_async,
get_tokenizer)
from vllm.transformers_utils.tokenizer_group.base_tokenizer_group import ( from vllm.transformers_utils.tokenizer_group.base_tokenizer_group import (
BaseTokenizerGroup) BaseTokenizerGroup)
from vllm.utils import LRUCache from vllm.utils import LRUCache
from vllm.transformers_utils.tokenizer import get_tokenizer
class TokenizerGroup(BaseTokenizerGroup): class TokenizerGroup(BaseTokenizerGroup):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment