Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
01bfb22b
Unverified
Commit
01bfb22b
authored
Mar 25, 2024
by
SangBin Cho
Committed by
GitHub
Mar 25, 2024
Browse files
[CI] Try introducing isort. (#3495)
parent
e67c295b
Changes
144
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
67 additions
and
63 deletions
+67
-63
vllm/model_executor/neuron_model_loader.py
vllm/model_executor/neuron_model_loader.py
+2
-2
vllm/model_executor/parallel_utils/communication_op.py
vllm/model_executor/parallel_utils/communication_op.py
+3
-6
vllm/model_executor/parallel_utils/custom_all_reduce.py
vllm/model_executor/parallel_utils/custom_all_reduce.py
+3
-2
vllm/model_executor/sampling_metadata.py
vllm/model_executor/sampling_metadata.py
+2
-3
vllm/model_executor/weight_utils.py
vllm/model_executor/weight_utils.py
+6
-6
vllm/outputs.py
vllm/outputs.py
+3
-3
vllm/sequence.py
vllm/sequence.py
+3
-2
vllm/spec_decode/batch_expansion.py
vllm/spec_decode/batch_expansion.py
+7
-8
vllm/spec_decode/interfaces.py
vllm/spec_decode/interfaces.py
+2
-2
vllm/spec_decode/metrics.py
vllm/spec_decode/metrics.py
+5
-4
vllm/spec_decode/multi_step_worker.py
vllm/spec_decode/multi_step_worker.py
+2
-2
vllm/spec_decode/spec_decode_worker.py
vllm/spec_decode/spec_decode_worker.py
+9
-9
vllm/spec_decode/util.py
vllm/spec_decode/util.py
+5
-3
vllm/transformers_utils/configs/__init__.py
vllm/transformers_utils/configs/__init__.py
+1
-1
vllm/transformers_utils/configs/mpt.py
vllm/transformers_utils/configs/mpt.py
+1
-0
vllm/transformers_utils/detokenizer.py
vllm/transformers_utils/detokenizer.py
+6
-4
vllm/transformers_utils/tokenizer.py
vllm/transformers_utils/tokenizer.py
+1
-1
vllm/transformers_utils/tokenizer_group/__init__.py
vllm/transformers_utils/tokenizer_group/__init__.py
+2
-1
vllm/transformers_utils/tokenizer_group/ray_tokenizer_group.py
...transformers_utils/tokenizer_group/ray_tokenizer_group.py
+2
-2
vllm/transformers_utils/tokenizer_group/tokenizer_group.py
vllm/transformers_utils/tokenizer_group/tokenizer_group.py
+2
-2
No files found.
vllm/model_executor/neuron_model_loader.py
View file @
01bfb22b
...
@@ -110,8 +110,8 @@ def _get_model_architecture(config: PretrainedConfig) -> Type[nn.Module]:
...
@@ -110,8 +110,8 @@ def _get_model_architecture(config: PretrainedConfig) -> Type[nn.Module]:
def
get_neuron_model
(
model_config
:
ModelConfig
,
def
get_neuron_model
(
model_config
:
ModelConfig
,
parallel_config
:
ParallelConfig
,
parallel_config
:
ParallelConfig
,
scheduler_config
:
SchedulerConfig
)
->
nn
.
Module
:
scheduler_config
:
SchedulerConfig
)
->
nn
.
Module
:
from
transformers_neuronx.config
import
(
Neuron
Config
,
from
transformers_neuronx.config
import
(
ContinuousBatching
Config
,
ContinuousBatching
Config
)
Neuron
Config
)
# Create a model instance.
# Create a model instance.
model
=
NeuronCasualLM
(
model_config
.
hf_config
)
model
=
NeuronCasualLM
(
model_config
.
hf_config
)
...
...
vllm/model_executor/parallel_utils/communication_op.py
View file @
01bfb22b
...
@@ -5,14 +5,11 @@ import torch
...
@@ -5,14 +5,11 @@ import torch
from
torch.distributed
import
ProcessGroup
from
torch.distributed
import
ProcessGroup
from
vllm.model_executor.parallel_utils
import
cupy_utils
from
vllm.model_executor.parallel_utils
import
cupy_utils
from
vllm.model_executor.parallel_utils.parallel_state
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
,
get_tensor_model_parallel_group
,
is_cupy_nccl_enabled_for_all_reduce
,
)
from
vllm.model_executor.parallel_utils.custom_all_reduce
import
(
from
vllm.model_executor.parallel_utils.custom_all_reduce
import
(
custom_all_reduce
)
custom_all_reduce
)
from
vllm.model_executor.parallel_utils.parallel_state
import
(
get_tensor_model_parallel_group
,
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
,
is_cupy_nccl_enabled_for_all_reduce
)
def
tensor_model_parallel_all_reduce
(
input_
:
torch
.
Tensor
)
->
torch
.
Tensor
:
def
tensor_model_parallel_all_reduce
(
input_
:
torch
.
Tensor
)
->
torch
.
Tensor
:
...
...
vllm/model_executor/parallel_utils/custom_all_reduce.py
View file @
01bfb22b
...
@@ -6,11 +6,12 @@ import torch.distributed as dist
...
@@ -6,11 +6,12 @@ import torch.distributed as dist
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.parallel_utils.parallel_state
import
(
from
vllm.model_executor.parallel_utils.parallel_state
import
(
get_tensor_model_parallel_
world_size
,
get_tensor_model_parallel_
rank
)
get_tensor_model_parallel_
rank
,
get_tensor_model_parallel_
world_size
)
try
:
try
:
from
vllm._C
import
custom_ar
import
pynvml
import
pynvml
from
vllm._C
import
custom_ar
except
ImportError
:
except
ImportError
:
# For AMD GPUs
# For AMD GPUs
custom_ar
=
None
custom_ar
=
None
...
...
vllm/model_executor/sampling_metadata.py
View file @
01bfb22b
import
random
from
dataclasses
import
dataclass
from
dataclasses
import
dataclass
from
typing
import
Dict
,
List
,
Optional
,
Tuple
from
typing
import
Dict
,
List
,
Optional
,
Tuple
import
torch
import
torch
import
random
from
vllm.model_executor.layers.ops.sample
import
(
from
vllm.model_executor.layers.ops.sample
import
get_num_triton_sampler_splits
get_num_triton_sampler_splits
)
from
vllm.sampling_params
import
SamplingParams
,
SamplingType
from
vllm.sampling_params
import
SamplingParams
,
SamplingType
from
vllm.sequence
import
SequenceData
from
vllm.sequence
import
SequenceData
from
vllm.utils
import
is_pin_memory_available
from
vllm.utils
import
is_pin_memory_available
...
...
vllm/model_executor/weight_utils.py
View file @
01bfb22b
"""Utilities for downloading and initializing model weights."""
"""Utilities for downloading and initializing model weights."""
import
f
ilelock
import
f
nmatch
import
glob
import
glob
import
hashlib
import
hashlib
import
fnmatch
import
json
import
json
import
os
import
os
from
collections
import
defaultdict
from
collections
import
defaultdict
from
typing
import
Any
,
Iterator
,
List
,
Optional
,
Tuple
from
typing
import
Any
,
Iterator
,
List
,
Optional
,
Tuple
from
huggingface_hub
import
snapshot_download
,
HfFileSystem
import
filelock
import
numpy
as
np
import
numpy
as
np
from
safetensors.torch
import
load_file
,
save_file
,
safe_open
import
torch
import
torch
from
huggingface_hub
import
HfFileSystem
,
snapshot_download
from
safetensors.torch
import
load_file
,
safe_open
,
save_file
from
tqdm.auto
import
tqdm
from
tqdm.auto
import
tqdm
from
vllm.config
import
ModelConfig
from
vllm.config
import
ModelConfig
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.quantization
import
(
get_q
uantization
_c
onfig
,
from
vllm.model_executor.layers.quantization
import
(
Q
uantization
C
onfig
,
Q
uantization
C
onfig
)
get_q
uantization
_c
onfig
)
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
...
...
vllm/outputs.py
View file @
01bfb22b
from
typing
import
List
,
Optional
import
time
import
time
from
typing
import
List
,
Optional
from
vllm.sequence
import
(
PromptLogprobs
,
SampleLogprobs
,
SequenceGroup
,
SequenceStatus
,
RequestMetrics
)
from
vllm.lora.request
import
LoRARequest
from
vllm.lora.request
import
LoRARequest
from
vllm.sequence
import
(
PromptLogprobs
,
RequestMetrics
,
SampleLogprobs
,
SequenceGroup
,
SequenceStatus
)
class
CompletionOutput
:
class
CompletionOutput
:
...
...
vllm/sequence.py
View file @
01bfb22b
...
@@ -2,14 +2,15 @@
...
@@ -2,14 +2,15 @@
import
copy
import
copy
import
enum
import
enum
from
dataclasses
import
dataclass
from
dataclasses
import
dataclass
from
typing
import
Dict
,
List
,
Optional
,
Union
,
TYPE_CHECKING
from
typing
import
TYPE_CHECKING
,
Dict
,
List
,
Optional
,
Union
from
vllm.block
import
LogicalTokenBlock
from
vllm.block
import
LogicalTokenBlock
from
vllm.sampling_params
import
SamplingParams
from
vllm.lora.request
import
LoRARequest
from
vllm.lora.request
import
LoRARequest
from
vllm.sampling_params
import
SamplingParams
if
TYPE_CHECKING
:
if
TYPE_CHECKING
:
import
torch
import
torch
from
vllm.spec_decode.metrics
import
SpecDecodeWorkerMetrics
from
vllm.spec_decode.metrics
import
SpecDecodeWorkerMetrics
...
...
vllm/spec_decode/batch_expansion.py
View file @
01bfb22b
from
typing
import
Iterator
,
List
,
Tuple
,
Optional
,
Dict
from
itertools
import
chain
,
count
from
itertools
import
chain
,
count
from
typing
import
Dict
,
Iterator
,
List
,
Optional
,
Tuple
import
torch
import
torch
from
vllm.sequence
import
(
SamplerOutput
,
SequenceGroupMetadata
,
SequenceData
)
from
vllm.sequence
import
SamplerOutput
,
SequenceData
,
SequenceGroupMetadata
from
vllm.worker.worker
import
Worker
from
vllm.spec_decode.interfaces
import
(
SpeculativeProposals
,
from
vllm.spec_decode.util
import
(
nvtx_range
,
sampler_output_to_torch
,
SpeculativeScorer
,
SpeculativeScores
)
get_all_seq_ids
,
from
vllm.spec_decode.util
import
(
get_all_seq_ids
,
nvtx_range
,
sampler_output_to_torch
,
split_batch_by_proposal_len
)
split_batch_by_proposal_len
)
from
vllm.spec_decode.interfaces
import
(
SpeculativeScorer
,
from
vllm.worker.worker
import
Worker
SpeculativeProposals
,
SpeculativeScores
)
SeqId
=
int
SeqId
=
int
TargetSeqId
=
int
TargetSeqId
=
int
...
...
vllm/spec_decode/interfaces.py
View file @
01bfb22b
from
typing
import
List
,
Tuple
,
Optional
,
Dict
from
dataclasses
import
dataclass
from
abc
import
ABC
,
abstractmethod
from
abc
import
ABC
,
abstractmethod
from
dataclasses
import
dataclass
from
typing
import
Dict
,
List
,
Optional
,
Tuple
import
torch
import
torch
...
...
vllm/spec_decode/metrics.py
View file @
01bfb22b
import
t
orch
import
t
ime
from
dataclasses
import
dataclass
from
dataclasses
import
dataclass
from
typing
import
Callable
,
Optional
import
torch
from
vllm.model_executor.layers.rejection_sampler
import
RejectionSampler
from
vllm.model_executor.layers.rejection_sampler
import
RejectionSampler
from
typing
import
Optional
from
vllm.utils
import
is_pin_memory_available
from
vllm.utils
import
is_pin_memory_available
import
time
from
typing
import
Callable
@
dataclass
@
dataclass
...
...
vllm/spec_decode/multi_step_worker.py
View file @
01bfb22b
from
typing
import
List
,
Dict
,
Optional
,
Tuple
import
copy
import
copy
from
typing
import
Dict
,
List
,
Optional
,
Tuple
import
torch
import
torch
from
vllm.sequence
import
SamplerOutput
,
SequenceGroupMetadata
from
vllm.sequence
import
SamplerOutput
,
SequenceGroupMetadata
from
vllm.worker.worker
import
Worker
from
vllm.spec_decode.interfaces
import
(
SpeculativeProposals
,
from
vllm.spec_decode.interfaces
import
(
SpeculativeProposals
,
SpeculativeProposer
)
SpeculativeProposer
)
from
vllm.spec_decode.util
import
sampler_output_to_torch
from
vllm.spec_decode.util
import
sampler_output_to_torch
from
vllm.worker.worker
import
Worker
class
MultiStepWorker
(
Worker
):
class
MultiStepWorker
(
Worker
):
...
...
vllm/spec_decode/spec_decode_worker.py
View file @
01bfb22b
from
typing
import
List
,
Tuple
,
Optional
,
Dict
from
functools
import
cached_property
from
functools
import
cached_property
from
typing
import
Dict
,
List
,
Optional
,
Tuple
import
torch
import
torch
from
vllm.spec_decode.metrics
import
AsyncMetricsCollector
from
vllm.config
import
CacheConfig
from
vllm.model_executor.layers.rejection_sampler
import
RejectionSampler
from
vllm.sequence
import
(
SamplerOutput
,
SequenceGroupMetadata
,
from
vllm.sequence
import
(
SamplerOutput
,
SequenceGroupMetadata
,
SequenceGroupOutput
,
SequenceOutput
)
SequenceGroupOutput
,
SequenceOutput
)
from
vllm.worker.worker
import
Worker
from
vllm.spec_decode.batch_expansion
import
BatchExpansionTop1Scorer
from
vllm.spec_decode.interfaces
import
(
SpeculativeProposals
,
SpeculativeScorer
,
SpeculativeScores
)
from
vllm.spec_decode.metrics
import
AsyncMetricsCollector
from
vllm.spec_decode.multi_step_worker
import
MultiStepWorker
from
vllm.spec_decode.multi_step_worker
import
MultiStepWorker
from
vllm.model_executor.layers.rejection_sampler
import
RejectionSampler
from
vllm.spec_decode.util
import
(
get_all_seq_ids
,
nvtx_range
,
from
vllm.config
import
CacheConfig
from
vllm.spec_decode.util
import
(
nvtx_range
,
get_all_seq_ids
,
split_batch_by_proposal_len
)
split_batch_by_proposal_len
)
from
vllm.spec_decode.interfaces
import
SpeculativeProposals
,
SpeculativeScores
from
vllm.worker.worker
import
Worker
from
vllm.spec_decode.batch_expansion
import
BatchExpansionTop1Scorer
from
vllm.spec_decode.interfaces
import
SpeculativeScorer
class
SpecDecodeWorker
:
class
SpecDecodeWorker
:
...
...
vllm/spec_decode/util.py
View file @
01bfb22b
import
torch
from
typing
import
List
,
Tuple
from
vllm.sequence
import
SequenceGroupMetadata
,
SamplerOutput
from
contextlib
import
contextmanager
from
contextlib
import
contextmanager
from
itertools
import
chain
from
itertools
import
chain
from
typing
import
List
,
Tuple
import
torch
from
vllm.sequence
import
SamplerOutput
,
SequenceGroupMetadata
SeqId
=
int
SeqId
=
int
...
...
vllm/transformers_utils/configs/__init__.py
View file @
01bfb22b
from
vllm.transformers_utils.configs.chatglm
import
ChatGLMConfig
from
vllm.transformers_utils.configs.chatglm
import
ChatGLMConfig
from
vllm.transformers_utils.configs.mpt
import
MPTConfig
# RWConfig is for the original tiiuae/falcon-40b(-instruct) and
# RWConfig is for the original tiiuae/falcon-40b(-instruct) and
# tiiuae/falcon-7b(-instruct) models. Newer Falcon models will use the
# tiiuae/falcon-7b(-instruct) models. Newer Falcon models will use the
# `FalconConfig` class from the official HuggingFace transformers library.
# `FalconConfig` class from the official HuggingFace transformers library.
from
vllm.transformers_utils.configs.falcon
import
RWConfig
from
vllm.transformers_utils.configs.falcon
import
RWConfig
from
vllm.transformers_utils.configs.jais
import
JAISConfig
from
vllm.transformers_utils.configs.jais
import
JAISConfig
from
vllm.transformers_utils.configs.mpt
import
MPTConfig
__all__
=
[
__all__
=
[
"ChatGLMConfig"
,
"ChatGLMConfig"
,
...
...
vllm/transformers_utils/configs/mpt.py
View file @
01bfb22b
...
@@ -4,6 +4,7 @@
...
@@ -4,6 +4,7 @@
"""A HuggingFace-style model configuration."""
"""A HuggingFace-style model configuration."""
import
warnings
import
warnings
from
typing
import
Any
,
Dict
,
Optional
,
Union
from
typing
import
Any
,
Dict
,
Optional
,
Union
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
attn_config_defaults
:
Dict
=
{
attn_config_defaults
:
Dict
=
{
...
...
vllm/transformers_utils/detokenizer.py
View file @
01bfb22b
from
typing
import
List
,
Dict
,
Optional
from
typing
import
Dict
,
List
,
Optional
from
transformers
import
PreTrainedTokenizer
from
transformers
import
PreTrainedTokenizer
from
vllm.sequence
import
Sequence
,
Logprob
,
SequenceGroup
,
SamplingParams
from
vllm.transformers_utils.tokenizer
import
(
detokenize_incrementally
,
from
vllm.sequence
import
Logprob
,
SamplingParams
,
Sequence
,
SequenceGroup
convert_prompt_ids_to_tokens
)
from
vllm.transformers_utils.tokenizer
import
(
convert_prompt_ids_to_tokens
,
detokenize_incrementally
)
from
vllm.transformers_utils.tokenizer_group.base_tokenizer_group
import
(
from
vllm.transformers_utils.tokenizer_group.base_tokenizer_group
import
(
BaseTokenizerGroup
)
BaseTokenizerGroup
)
...
...
vllm/transformers_utils/tokenizer.py
View file @
01bfb22b
...
@@ -5,8 +5,8 @@ from transformers import (AutoTokenizer, PreTrainedTokenizer,
...
@@ -5,8 +5,8 @@ from transformers import (AutoTokenizer, PreTrainedTokenizer,
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.lora.request
import
LoRARequest
from
vllm.lora.request
import
LoRARequest
from
vllm.utils
import
make_async
from
vllm.transformers_utils.tokenizers
import
*
from
vllm.transformers_utils.tokenizers
import
*
from
vllm.utils
import
make_async
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
...
...
vllm/transformers_utils/tokenizer_group/__init__.py
View file @
01bfb22b
from
typing
import
Optional
from
typing
import
Optional
from
vllm.config
import
TokenizerPoolConfig
from
vllm.config
import
TokenizerPoolConfig
from
vllm.engine.ray_utils
import
ray
from
vllm.transformers_utils.tokenizer_group.base_tokenizer_group
import
(
from
vllm.transformers_utils.tokenizer_group.base_tokenizer_group
import
(
BaseTokenizerGroup
)
BaseTokenizerGroup
)
from
vllm.transformers_utils.tokenizer_group.tokenizer_group
import
(
from
vllm.transformers_utils.tokenizer_group.tokenizer_group
import
(
TokenizerGroup
)
TokenizerGroup
)
from
vllm.engine.ray_utils
import
ray
if
ray
:
if
ray
:
from
vllm.transformers_utils.tokenizer_group.ray_tokenizer_group
import
(
from
vllm.transformers_utils.tokenizer_group.ray_tokenizer_group
import
(
...
...
vllm/transformers_utils/tokenizer_group/ray_tokenizer_group.py
View file @
01bfb22b
...
@@ -2,16 +2,16 @@ import asyncio
...
@@ -2,16 +2,16 @@ import asyncio
import
os
import
os
from
typing
import
List
,
Optional
from
typing
import
List
,
Optional
from
ray.util.scheduling_strategies
import
NodeAffinitySchedulingStrategy
from
transformers
import
PreTrainedTokenizer
from
transformers
import
PreTrainedTokenizer
from
vllm.config
import
TokenizerPoolConfig
from
vllm.config
import
TokenizerPoolConfig
from
vllm.lora.request
import
LoRARequest
from
vllm.engine.ray_utils
import
ray
from
vllm.engine.ray_utils
import
ray
from
vllm.lora.request
import
LoRARequest
from
vllm.transformers_utils.tokenizer_group.base_tokenizer_group
import
(
from
vllm.transformers_utils.tokenizer_group.base_tokenizer_group
import
(
BaseTokenizerGroup
)
BaseTokenizerGroup
)
from
vllm.transformers_utils.tokenizer_group.tokenizer_group
import
(
from
vllm.transformers_utils.tokenizer_group.tokenizer_group
import
(
TokenizerGroup
)
TokenizerGroup
)
from
ray.util.scheduling_strategies
import
NodeAffinitySchedulingStrategy
class
RayTokenizerGroupPool
(
BaseTokenizerGroup
):
class
RayTokenizerGroupPool
(
BaseTokenizerGroup
):
...
...
vllm/transformers_utils/tokenizer_group/tokenizer_group.py
View file @
01bfb22b
...
@@ -4,11 +4,11 @@ from transformers import PreTrainedTokenizer
...
@@ -4,11 +4,11 @@ from transformers import PreTrainedTokenizer
from
vllm.lora.request
import
LoRARequest
from
vllm.lora.request
import
LoRARequest
from
vllm.transformers_utils.tokenizer
import
(
get_lora_tokenizer
,
from
vllm.transformers_utils.tokenizer
import
(
get_lora_tokenizer
,
get_lora_tokenizer_async
)
get_lora_tokenizer_async
,
get_tokenizer
)
from
vllm.transformers_utils.tokenizer_group.base_tokenizer_group
import
(
from
vllm.transformers_utils.tokenizer_group.base_tokenizer_group
import
(
BaseTokenizerGroup
)
BaseTokenizerGroup
)
from
vllm.utils
import
LRUCache
from
vllm.utils
import
LRUCache
from
vllm.transformers_utils.tokenizer
import
get_tokenizer
class
TokenizerGroup
(
BaseTokenizerGroup
):
class
TokenizerGroup
(
BaseTokenizerGroup
):
...
...
Prev
1
…
3
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment