Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
01bfb22b
"vscode:/vscode.git/clone" did not exist on "a892b259b4503526511d9ec17d5f8961e46969a6"
Unverified
Commit
01bfb22b
authored
Mar 25, 2024
by
SangBin Cho
Committed by
GitHub
Mar 25, 2024
Browse files
[CI] Try introducing isort. (#3495)
parent
e67c295b
Changes
144
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
67 additions
and
63 deletions
+67
-63
vllm/model_executor/neuron_model_loader.py
vllm/model_executor/neuron_model_loader.py
+2
-2
vllm/model_executor/parallel_utils/communication_op.py
vllm/model_executor/parallel_utils/communication_op.py
+3
-6
vllm/model_executor/parallel_utils/custom_all_reduce.py
vllm/model_executor/parallel_utils/custom_all_reduce.py
+3
-2
vllm/model_executor/sampling_metadata.py
vllm/model_executor/sampling_metadata.py
+2
-3
vllm/model_executor/weight_utils.py
vllm/model_executor/weight_utils.py
+6
-6
vllm/outputs.py
vllm/outputs.py
+3
-3
vllm/sequence.py
vllm/sequence.py
+3
-2
vllm/spec_decode/batch_expansion.py
vllm/spec_decode/batch_expansion.py
+7
-8
vllm/spec_decode/interfaces.py
vllm/spec_decode/interfaces.py
+2
-2
vllm/spec_decode/metrics.py
vllm/spec_decode/metrics.py
+5
-4
vllm/spec_decode/multi_step_worker.py
vllm/spec_decode/multi_step_worker.py
+2
-2
vllm/spec_decode/spec_decode_worker.py
vllm/spec_decode/spec_decode_worker.py
+9
-9
vllm/spec_decode/util.py
vllm/spec_decode/util.py
+5
-3
vllm/transformers_utils/configs/__init__.py
vllm/transformers_utils/configs/__init__.py
+1
-1
vllm/transformers_utils/configs/mpt.py
vllm/transformers_utils/configs/mpt.py
+1
-0
vllm/transformers_utils/detokenizer.py
vllm/transformers_utils/detokenizer.py
+6
-4
vllm/transformers_utils/tokenizer.py
vllm/transformers_utils/tokenizer.py
+1
-1
vllm/transformers_utils/tokenizer_group/__init__.py
vllm/transformers_utils/tokenizer_group/__init__.py
+2
-1
vllm/transformers_utils/tokenizer_group/ray_tokenizer_group.py
...transformers_utils/tokenizer_group/ray_tokenizer_group.py
+2
-2
vllm/transformers_utils/tokenizer_group/tokenizer_group.py
vllm/transformers_utils/tokenizer_group/tokenizer_group.py
+2
-2
No files found.
vllm/model_executor/neuron_model_loader.py
View file @
01bfb22b
...
...
@@ -110,8 +110,8 @@ def _get_model_architecture(config: PretrainedConfig) -> Type[nn.Module]:
def
get_neuron_model
(
model_config
:
ModelConfig
,
parallel_config
:
ParallelConfig
,
scheduler_config
:
SchedulerConfig
)
->
nn
.
Module
:
from
transformers_neuronx.config
import
(
Neuron
Config
,
ContinuousBatching
Config
)
from
transformers_neuronx.config
import
(
ContinuousBatching
Config
,
Neuron
Config
)
# Create a model instance.
model
=
NeuronCasualLM
(
model_config
.
hf_config
)
...
...
vllm/model_executor/parallel_utils/communication_op.py
View file @
01bfb22b
...
...
@@ -5,14 +5,11 @@ import torch
from
torch.distributed
import
ProcessGroup
from
vllm.model_executor.parallel_utils
import
cupy_utils
from
vllm.model_executor.parallel_utils.parallel_state
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
,
get_tensor_model_parallel_group
,
is_cupy_nccl_enabled_for_all_reduce
,
)
from
vllm.model_executor.parallel_utils.custom_all_reduce
import
(
custom_all_reduce
)
from
vllm.model_executor.parallel_utils.parallel_state
import
(
get_tensor_model_parallel_group
,
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
,
is_cupy_nccl_enabled_for_all_reduce
)
def
tensor_model_parallel_all_reduce
(
input_
:
torch
.
Tensor
)
->
torch
.
Tensor
:
...
...
vllm/model_executor/parallel_utils/custom_all_reduce.py
View file @
01bfb22b
...
...
@@ -6,11 +6,12 @@ import torch.distributed as dist
from
vllm.logger
import
init_logger
from
vllm.model_executor.parallel_utils.parallel_state
import
(
get_tensor_model_parallel_
world_size
,
get_tensor_model_parallel_
rank
)
get_tensor_model_parallel_
rank
,
get_tensor_model_parallel_
world_size
)
try
:
from
vllm._C
import
custom_ar
import
pynvml
from
vllm._C
import
custom_ar
except
ImportError
:
# For AMD GPUs
custom_ar
=
None
...
...
vllm/model_executor/sampling_metadata.py
View file @
01bfb22b
import
random
from
dataclasses
import
dataclass
from
typing
import
Dict
,
List
,
Optional
,
Tuple
import
torch
import
random
from
vllm.model_executor.layers.ops.sample
import
(
get_num_triton_sampler_splits
)
from
vllm.model_executor.layers.ops.sample
import
get_num_triton_sampler_splits
from
vllm.sampling_params
import
SamplingParams
,
SamplingType
from
vllm.sequence
import
SequenceData
from
vllm.utils
import
is_pin_memory_available
...
...
vllm/model_executor/weight_utils.py
View file @
01bfb22b
"""Utilities for downloading and initializing model weights."""
import
f
ilelock
import
f
nmatch
import
glob
import
hashlib
import
fnmatch
import
json
import
os
from
collections
import
defaultdict
from
typing
import
Any
,
Iterator
,
List
,
Optional
,
Tuple
from
huggingface_hub
import
snapshot_download
,
HfFileSystem
import
filelock
import
numpy
as
np
from
safetensors.torch
import
load_file
,
save_file
,
safe_open
import
torch
from
huggingface_hub
import
HfFileSystem
,
snapshot_download
from
safetensors.torch
import
load_file
,
safe_open
,
save_file
from
tqdm.auto
import
tqdm
from
vllm.config
import
ModelConfig
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.quantization
import
(
get_q
uantization
_c
onfig
,
Q
uantization
C
onfig
)
from
vllm.model_executor.layers.quantization
import
(
Q
uantization
C
onfig
,
get_q
uantization
_c
onfig
)
logger
=
init_logger
(
__name__
)
...
...
vllm/outputs.py
View file @
01bfb22b
from
typing
import
List
,
Optional
import
time
from
typing
import
List
,
Optional
from
vllm.sequence
import
(
PromptLogprobs
,
SampleLogprobs
,
SequenceGroup
,
SequenceStatus
,
RequestMetrics
)
from
vllm.lora.request
import
LoRARequest
from
vllm.sequence
import
(
PromptLogprobs
,
RequestMetrics
,
SampleLogprobs
,
SequenceGroup
,
SequenceStatus
)
class
CompletionOutput
:
...
...
vllm/sequence.py
View file @
01bfb22b
...
...
@@ -2,14 +2,15 @@
import
copy
import
enum
from
dataclasses
import
dataclass
from
typing
import
Dict
,
List
,
Optional
,
Union
,
TYPE_CHECKING
from
typing
import
TYPE_CHECKING
,
Dict
,
List
,
Optional
,
Union
from
vllm.block
import
LogicalTokenBlock
from
vllm.sampling_params
import
SamplingParams
from
vllm.lora.request
import
LoRARequest
from
vllm.sampling_params
import
SamplingParams
if
TYPE_CHECKING
:
import
torch
from
vllm.spec_decode.metrics
import
SpecDecodeWorkerMetrics
...
...
vllm/spec_decode/batch_expansion.py
View file @
01bfb22b
from
typing
import
Iterator
,
List
,
Tuple
,
Optional
,
Dict
from
itertools
import
chain
,
count
from
typing
import
Dict
,
Iterator
,
List
,
Optional
,
Tuple
import
torch
from
vllm.sequence
import
(
SamplerOutput
,
SequenceGroupMetadata
,
SequenceData
)
from
vllm.worker.worker
import
Worker
from
vllm.spec_decode.util
import
(
nvtx_range
,
sampler_output_to_torch
,
get_all_seq_ids
,
from
vllm.sequence
import
SamplerOutput
,
SequenceData
,
SequenceGroupMetadata
from
vllm.spec_decode.interfaces
import
(
SpeculativeProposals
,
SpeculativeScorer
,
SpeculativeScores
)
from
vllm.spec_decode.util
import
(
get_all_seq_ids
,
nvtx_range
,
sampler_output_to_torch
,
split_batch_by_proposal_len
)
from
vllm.spec_decode.interfaces
import
(
SpeculativeScorer
,
SpeculativeProposals
,
SpeculativeScores
)
from
vllm.worker.worker
import
Worker
SeqId
=
int
TargetSeqId
=
int
...
...
vllm/spec_decode/interfaces.py
View file @
01bfb22b
from
typing
import
List
,
Tuple
,
Optional
,
Dict
from
dataclasses
import
dataclass
from
abc
import
ABC
,
abstractmethod
from
dataclasses
import
dataclass
from
typing
import
Dict
,
List
,
Optional
,
Tuple
import
torch
...
...
vllm/spec_decode/metrics.py
View file @
01bfb22b
import
t
orch
import
t
ime
from
dataclasses
import
dataclass
from
typing
import
Callable
,
Optional
import
torch
from
vllm.model_executor.layers.rejection_sampler
import
RejectionSampler
from
typing
import
Optional
from
vllm.utils
import
is_pin_memory_available
import
time
from
typing
import
Callable
@
dataclass
...
...
vllm/spec_decode/multi_step_worker.py
View file @
01bfb22b
from
typing
import
List
,
Dict
,
Optional
,
Tuple
import
copy
from
typing
import
Dict
,
List
,
Optional
,
Tuple
import
torch
from
vllm.sequence
import
SamplerOutput
,
SequenceGroupMetadata
from
vllm.worker.worker
import
Worker
from
vllm.spec_decode.interfaces
import
(
SpeculativeProposals
,
SpeculativeProposer
)
from
vllm.spec_decode.util
import
sampler_output_to_torch
from
vllm.worker.worker
import
Worker
class
MultiStepWorker
(
Worker
):
...
...
vllm/spec_decode/spec_decode_worker.py
View file @
01bfb22b
from
typing
import
List
,
Tuple
,
Optional
,
Dict
from
functools
import
cached_property
from
typing
import
Dict
,
List
,
Optional
,
Tuple
import
torch
from
vllm.spec_decode.metrics
import
AsyncMetricsCollector
from
vllm.config
import
CacheConfig
from
vllm.model_executor.layers.rejection_sampler
import
RejectionSampler
from
vllm.sequence
import
(
SamplerOutput
,
SequenceGroupMetadata
,
SequenceGroupOutput
,
SequenceOutput
)
from
vllm.worker.worker
import
Worker
from
vllm.spec_decode.batch_expansion
import
BatchExpansionTop1Scorer
from
vllm.spec_decode.interfaces
import
(
SpeculativeProposals
,
SpeculativeScorer
,
SpeculativeScores
)
from
vllm.spec_decode.metrics
import
AsyncMetricsCollector
from
vllm.spec_decode.multi_step_worker
import
MultiStepWorker
from
vllm.model_executor.layers.rejection_sampler
import
RejectionSampler
from
vllm.config
import
CacheConfig
from
vllm.spec_decode.util
import
(
nvtx_range
,
get_all_seq_ids
,
from
vllm.spec_decode.util
import
(
get_all_seq_ids
,
nvtx_range
,
split_batch_by_proposal_len
)
from
vllm.spec_decode.interfaces
import
SpeculativeProposals
,
SpeculativeScores
from
vllm.spec_decode.batch_expansion
import
BatchExpansionTop1Scorer
from
vllm.spec_decode.interfaces
import
SpeculativeScorer
from
vllm.worker.worker
import
Worker
class
SpecDecodeWorker
:
...
...
vllm/spec_decode/util.py
View file @
01bfb22b
import
torch
from
typing
import
List
,
Tuple
from
vllm.sequence
import
SequenceGroupMetadata
,
SamplerOutput
from
contextlib
import
contextmanager
from
itertools
import
chain
from
typing
import
List
,
Tuple
import
torch
from
vllm.sequence
import
SamplerOutput
,
SequenceGroupMetadata
SeqId
=
int
...
...
vllm/transformers_utils/configs/__init__.py
View file @
01bfb22b
from
vllm.transformers_utils.configs.chatglm
import
ChatGLMConfig
from
vllm.transformers_utils.configs.mpt
import
MPTConfig
# RWConfig is for the original tiiuae/falcon-40b(-instruct) and
# tiiuae/falcon-7b(-instruct) models. Newer Falcon models will use the
# `FalconConfig` class from the official HuggingFace transformers library.
from
vllm.transformers_utils.configs.falcon
import
RWConfig
from
vllm.transformers_utils.configs.jais
import
JAISConfig
from
vllm.transformers_utils.configs.mpt
import
MPTConfig
__all__
=
[
"ChatGLMConfig"
,
...
...
vllm/transformers_utils/configs/mpt.py
View file @
01bfb22b
...
...
@@ -4,6 +4,7 @@
"""A HuggingFace-style model configuration."""
import
warnings
from
typing
import
Any
,
Dict
,
Optional
,
Union
from
transformers
import
PretrainedConfig
attn_config_defaults
:
Dict
=
{
...
...
vllm/transformers_utils/detokenizer.py
View file @
01bfb22b
from
typing
import
List
,
Dict
,
Optional
from
typing
import
Dict
,
List
,
Optional
from
transformers
import
PreTrainedTokenizer
from
vllm.sequence
import
Sequence
,
Logprob
,
SequenceGroup
,
SamplingParams
from
vllm.transformers_utils.tokenizer
import
(
detokenize_incrementally
,
convert_prompt_ids_to_tokens
)
from
vllm.sequence
import
Logprob
,
SamplingParams
,
Sequence
,
SequenceGroup
from
vllm.transformers_utils.tokenizer
import
(
convert_prompt_ids_to_tokens
,
detokenize_incrementally
)
from
vllm.transformers_utils.tokenizer_group.base_tokenizer_group
import
(
BaseTokenizerGroup
)
...
...
vllm/transformers_utils/tokenizer.py
View file @
01bfb22b
...
...
@@ -5,8 +5,8 @@ from transformers import (AutoTokenizer, PreTrainedTokenizer,
from
vllm.logger
import
init_logger
from
vllm.lora.request
import
LoRARequest
from
vllm.utils
import
make_async
from
vllm.transformers_utils.tokenizers
import
*
from
vllm.utils
import
make_async
logger
=
init_logger
(
__name__
)
...
...
vllm/transformers_utils/tokenizer_group/__init__.py
View file @
01bfb22b
from
typing
import
Optional
from
vllm.config
import
TokenizerPoolConfig
from
vllm.engine.ray_utils
import
ray
from
vllm.transformers_utils.tokenizer_group.base_tokenizer_group
import
(
BaseTokenizerGroup
)
from
vllm.transformers_utils.tokenizer_group.tokenizer_group
import
(
TokenizerGroup
)
from
vllm.engine.ray_utils
import
ray
if
ray
:
from
vllm.transformers_utils.tokenizer_group.ray_tokenizer_group
import
(
...
...
vllm/transformers_utils/tokenizer_group/ray_tokenizer_group.py
View file @
01bfb22b
...
...
@@ -2,16 +2,16 @@ import asyncio
import
os
from
typing
import
List
,
Optional
from
ray.util.scheduling_strategies
import
NodeAffinitySchedulingStrategy
from
transformers
import
PreTrainedTokenizer
from
vllm.config
import
TokenizerPoolConfig
from
vllm.lora.request
import
LoRARequest
from
vllm.engine.ray_utils
import
ray
from
vllm.lora.request
import
LoRARequest
from
vllm.transformers_utils.tokenizer_group.base_tokenizer_group
import
(
BaseTokenizerGroup
)
from
vllm.transformers_utils.tokenizer_group.tokenizer_group
import
(
TokenizerGroup
)
from
ray.util.scheduling_strategies
import
NodeAffinitySchedulingStrategy
class
RayTokenizerGroupPool
(
BaseTokenizerGroup
):
...
...
vllm/transformers_utils/tokenizer_group/tokenizer_group.py
View file @
01bfb22b
...
...
@@ -4,11 +4,11 @@ from transformers import PreTrainedTokenizer
from
vllm.lora.request
import
LoRARequest
from
vllm.transformers_utils.tokenizer
import
(
get_lora_tokenizer
,
get_lora_tokenizer_async
)
get_lora_tokenizer_async
,
get_tokenizer
)
from
vllm.transformers_utils.tokenizer_group.base_tokenizer_group
import
(
BaseTokenizerGroup
)
from
vllm.utils
import
LRUCache
from
vllm.transformers_utils.tokenizer
import
get_tokenizer
class
TokenizerGroup
(
BaseTokenizerGroup
):
...
...
Prev
1
…
3
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment