Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
01bfb22b
Unverified
Commit
01bfb22b
authored
Mar 25, 2024
by
SangBin Cho
Committed by
GitHub
Mar 25, 2024
Browse files
[CI] Try introducing isort. (#3495)
parent
e67c295b
Changes
144
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
56 additions
and
59 deletions
+56
-59
vllm/entrypoints/openai/serving_engine.py
vllm/entrypoints/openai/serving_engine.py
+6
-6
vllm/executor/executor_base.py
vllm/executor/executor_base.py
+2
-2
vllm/executor/gpu_executor.py
vllm/executor/gpu_executor.py
+4
-4
vllm/executor/neuron_executor.py
vllm/executor/neuron_executor.py
+3
-3
vllm/executor/ray_gpu_executor.py
vllm/executor/ray_gpu_executor.py
+6
-6
vllm/logger.py
vllm/logger.py
+1
-1
vllm/lora/layers.py
vllm/lora/layers.py
+6
-8
vllm/lora/lora.py
vllm/lora/lora.py
+1
-0
vllm/lora/models.py
vllm/lora/models.py
+2
-3
vllm/lora/worker_manager.py
vllm/lora/worker_manager.py
+2
-2
vllm/model_executor/guided_decoding.py
vllm/model_executor/guided_decoding.py
+6
-6
vllm/model_executor/guided_logits_processors.py
vllm/model_executor/guided_logits_processors.py
+3
-3
vllm/model_executor/layers/fused_moe/__init__.py
vllm/model_executor/layers/fused_moe/__init__.py
+1
-3
vllm/model_executor/layers/linear.py
vllm/model_executor/layers/linear.py
+3
-3
vllm/model_executor/layers/ops/rand.py
vllm/model_executor/layers/ops/rand.py
+2
-2
vllm/model_executor/layers/ops/sample.py
vllm/model_executor/layers/ops/sample.py
+1
-1
vllm/model_executor/layers/quantization/__init__.py
vllm/model_executor/layers/quantization/__init__.py
+2
-2
vllm/model_executor/layers/quantization/gptq.py
vllm/model_executor/layers/quantization/gptq.py
+1
-1
vllm/model_executor/layers/quantization/marlin.py
vllm/model_executor/layers/quantization/marlin.py
+2
-1
vllm/model_executor/layers/rejection_sampler.py
vllm/model_executor/layers/rejection_sampler.py
+2
-2
No files found.
vllm/entrypoints/openai/serving_engine.py
View file @
01bfb22b
...
@@ -3,16 +3,16 @@ import json
...
@@ -3,16 +3,16 @@ import json
from
dataclasses
import
dataclass
from
dataclasses
import
dataclass
from
http
import
HTTPStatus
from
http
import
HTTPStatus
from
typing
import
Dict
,
List
,
Optional
,
Union
from
typing
import
Dict
,
List
,
Optional
,
Union
from
vllm.logger
import
init_logger
from
vllm.transformers_utils.tokenizer
import
get_tokenizer
from
vllm.engine.async_llm_engine
import
AsyncLLMEngine
from
vllm.engine.async_llm_engine
import
AsyncLLMEngine
from
vllm.entrypoints.openai.protocol
import
(
CompletionRequest
,
from
vllm.entrypoints.openai.protocol
import
(
ChatCompletionRequest
,
ChatCompletionRequest
,
CompletionRequest
,
ErrorResponse
,
ErrorResponse
,
LogProbs
,
LogProbs
,
ModelCard
,
ModelList
,
ModelCard
,
ModelList
,
ModelPermission
)
ModelPermission
)
from
vllm.logger
import
init_logger
from
vllm.lora.request
import
LoRARequest
from
vllm.lora.request
import
LoRARequest
from
vllm.sequence
import
Logprob
from
vllm.sequence
import
Logprob
from
vllm.transformers_utils.tokenizer
import
get_tokenizer
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
...
...
vllm/executor/executor_base.py
View file @
01bfb22b
from
abc
import
ABC
,
abstractmethod
from
abc
import
ABC
,
abstractmethod
from
typing
import
Dict
,
List
,
Optional
from
typing
import
Dict
,
List
,
Optional
from
vllm.config
import
(
CacheConfig
,
DeviceConfig
,
ModelConfig
,
from
vllm.config
import
(
CacheConfig
,
DeviceConfig
,
LoRAConfig
,
ModelConfig
,
ParallelConfig
,
SchedulerConfig
,
LoRAConfig
)
ParallelConfig
,
SchedulerConfig
)
from
vllm.lora.request
import
LoRARequest
from
vllm.lora.request
import
LoRARequest
from
vllm.sequence
import
SamplerOutput
,
SequenceGroupMetadata
from
vllm.sequence
import
SamplerOutput
,
SequenceGroupMetadata
...
...
vllm/executor/gpu_executor.py
View file @
01bfb22b
from
typing
import
Dict
,
List
,
Optional
from
typing
import
Dict
,
List
,
Optional
from
vllm.lora.request
import
LoRARequest
from
vllm.config
import
(
CacheConfig
,
DeviceConfig
,
LoRAConfig
,
ModelConfig
,
from
vllm.config
import
(
CacheConfig
,
DeviceConfig
,
ModelConfig
,
ParallelConfig
,
SchedulerConfig
)
ParallelConfig
,
SchedulerConfig
,
LoRAConfig
)
from
vllm.executor.executor_base
import
ExecutorAsyncBase
,
ExecutorBase
from
vllm.executor.executor_base
import
ExecutorAsyncBase
,
ExecutorBase
from
vllm.executor.utils
import
check_block_size_valid
from
vllm.executor.utils
import
check_block_size_valid
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.lora.request
import
LoRARequest
from
vllm.sequence
import
SamplerOutput
,
SequenceGroupMetadata
from
vllm.sequence
import
SamplerOutput
,
SequenceGroupMetadata
from
vllm.utils
import
(
get_ip
,
get_open_port
,
get_distributed_init_method
,
from
vllm.utils
import
(
get_distributed_init_method
,
get_ip
,
get_open_port
,
make_async
)
make_async
)
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
...
...
vllm/executor/neuron_executor.py
View file @
01bfb22b
from
typing
import
Dict
,
List
,
Optional
from
typing
import
Dict
,
List
,
Optional
from
vllm.lora.request
import
LoRARequest
from
vllm.config
import
(
CacheConfig
,
DeviceConfig
,
LoRAConfig
,
ModelConfig
,
from
vllm.config
import
(
CacheConfig
,
DeviceConfig
,
ModelConfig
,
ParallelConfig
,
SchedulerConfig
)
ParallelConfig
,
SchedulerConfig
,
LoRAConfig
)
from
vllm.executor.executor_base
import
ExecutorBase
from
vllm.executor.executor_base
import
ExecutorBase
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.lora.request
import
LoRARequest
from
vllm.sequence
import
SamplerOutput
,
SequenceGroupMetadata
from
vllm.sequence
import
SamplerOutput
,
SequenceGroupMetadata
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
...
...
vllm/executor/ray_gpu_executor.py
View file @
01bfb22b
import
asyncio
import
asyncio
import
copy
import
copy
from
collections
import
defaultdict
import
os
import
os
import
pickle
import
pickle
from
collections
import
defaultdict
from
typing
import
TYPE_CHECKING
,
Any
,
Dict
,
List
,
Optional
from
typing
import
TYPE_CHECKING
,
Any
,
Dict
,
List
,
Optional
from
vllm.config
import
(
CacheConfig
,
DeviceConfig
,
ModelConfig
,
from
vllm.config
import
(
CacheConfig
,
DeviceConfig
,
LoRAConfig
,
ModelConfig
,
ParallelConfig
,
SchedulerConfig
,
LoRAConfig
)
ParallelConfig
,
SchedulerConfig
)
from
vllm.engine.ray_utils
import
RayWorkerVllm
,
ray
from
vllm.engine.ray_utils
import
RayWorkerVllm
,
ray
from
vllm.executor.executor_base
import
ExecutorAsyncBase
,
ExecutorBase
from
vllm.executor.executor_base
import
ExecutorAsyncBase
,
ExecutorBase
from
vllm.executor.utils
import
check_block_size_valid
from
vllm.executor.utils
import
check_block_size_valid
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.lora.request
import
LoRARequest
from
vllm.lora.request
import
LoRARequest
from
vllm.sequence
import
SamplerOutput
,
SequenceGroupMetadata
from
vllm.sequence
import
SamplerOutput
,
SequenceGroupMetadata
from
vllm.utils
import
(
s
et_
cuda_visible_devices
,
get_ip
,
get_open_port
,
from
vllm.utils
import
(
g
et_
distributed_init_method
,
get_ip
,
get_open_port
,
get_distributed_init_method
,
make_async
)
make_async
,
set_cuda_visible_devices
)
if
ray
is
not
None
:
if
ray
is
not
None
:
from
ray.util.scheduling_strategies
import
PlacementGroupSchedulingStrategy
from
ray.util.scheduling_strategies
import
PlacementGroupSchedulingStrategy
...
@@ -343,7 +343,7 @@ class RayGPUExecutor(ExecutorBase):
...
@@ -343,7 +343,7 @@ class RayGPUExecutor(ExecutorBase):
raise
ValueError
(
f
"Ray version
{
required_version
}
or greater is "
raise
ValueError
(
f
"Ray version
{
required_version
}
or greater is "
f
"required, but found
{
current_version
}
"
)
f
"required, but found
{
current_version
}
"
)
from
ray.dag
import
MultiOut
putNode
,
In
putNode
from
ray.dag
import
In
putNode
,
MultiOut
putNode
assert
self
.
parallel_config
.
worker_use_ray
assert
self
.
parallel_config
.
worker_use_ray
# Right now, compiled DAG requires at least 1 arg. We send
# Right now, compiled DAG requires at least 1 arg. We send
...
...
vllm/logger.py
View file @
01bfb22b
...
@@ -2,8 +2,8 @@
...
@@ -2,8 +2,8 @@
# https://github.com/skypilot-org/skypilot/blob/86dc0f6283a335e4aa37b3c10716f90999f48ab6/sky/sky_logging.py
# https://github.com/skypilot-org/skypilot/blob/86dc0f6283a335e4aa37b3c10716f90999f48ab6/sky/sky_logging.py
"""Logging configuration for vLLM."""
"""Logging configuration for vLLM."""
import
logging
import
logging
import
sys
import
os
import
os
import
sys
VLLM_CONFIGURE_LOGGING
=
int
(
os
.
getenv
(
"VLLM_CONFIGURE_LOGGING"
,
"1"
))
VLLM_CONFIGURE_LOGGING
=
int
(
os
.
getenv
(
"VLLM_CONFIGURE_LOGGING"
,
"1"
))
...
...
vllm/lora/layers.py
View file @
01bfb22b
...
@@ -10,18 +10,16 @@ from transformers import PretrainedConfig
...
@@ -10,18 +10,16 @@ from transformers import PretrainedConfig
from
vllm.config
import
LoRAConfig
from
vllm.config
import
LoRAConfig
from
vllm.lora.punica
import
add_lora
,
add_lora_slice
,
bgmv
from
vllm.lora.punica
import
add_lora
,
add_lora_slice
,
bgmv
from
vllm.model_executor.parallel_utils.communication_op
import
(
tensor_model_parallel_all_gather
,
tensor_model_parallel_all_reduce
,
tensor_model_parallel_gather
,
)
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
Row
ParallelLinear
,
MergedColumn
ParallelLinear
,
QKVParallelLinear
,
QKVParallelLinear
,
MergedColumn
ParallelLinear
)
Row
ParallelLinear
)
from
vllm.model_executor.layers.logits_processor
import
LogitsProcessor
from
vllm.model_executor.layers.logits_processor
import
LogitsProcessor
from
vllm.model_executor.layers.vocab_parallel_embedding
import
(
from
vllm.model_executor.layers.vocab_parallel_embedding
import
(
VocabParallelEmbedding
,
ParallelLMHead
)
ParallelLMHead
,
VocabParallelEmbedding
)
from
vllm.model_executor.parallel_utils.communication_op
import
(
tensor_model_parallel_all_gather
,
tensor_model_parallel_all_reduce
,
tensor_model_parallel_gather
)
from
vllm.model_executor.parallel_utils.parallel_state
import
(
from
vllm.model_executor.parallel_utils.parallel_state
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
)
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
)
from
vllm.model_executor.parallel_utils.utils
import
(
from
vllm.model_executor.parallel_utils.utils
import
(
...
...
vllm/lora/lora.py
View file @
01bfb22b
from
typing
import
List
,
Optional
from
typing
import
List
,
Optional
import
torch
import
torch
from
vllm.utils
import
is_pin_memory_available
from
vllm.utils
import
is_pin_memory_available
...
...
vllm/lora/models.py
View file @
01bfb22b
...
@@ -4,19 +4,18 @@ import logging
...
@@ -4,19 +4,18 @@ import logging
import
math
import
math
import
os
import
os
import
re
import
re
from
typing
import
(
Callable
,
Dict
,
Hashable
,
List
,
Optional
,
Tuple
,
Type
)
from
typing
import
Callable
,
Dict
,
Hashable
,
List
,
Optional
,
Tuple
,
Type
import
safetensors.torch
import
safetensors.torch
import
torch
import
torch
from
torch
import
nn
from
torch
import
nn
from
vllm.config
import
LoRAConfig
from
vllm.config
import
LoRAConfig
from
vllm.utils
import
LRUCache
,
is_pin_memory_available
from
vllm.lora.layers
import
(
BaseLayerWithLoRA
,
LoRAMapping
,
from_layer
,
from
vllm.lora.layers
import
(
BaseLayerWithLoRA
,
LoRAMapping
,
from_layer
,
from_layer_logits_processor
)
from_layer_logits_processor
)
from
vllm.lora.lora
import
LoRALayerWeights
,
PackedLoRALayerWeights
from
vllm.lora.lora
import
LoRALayerWeights
,
PackedLoRALayerWeights
from
vllm.lora.utils
import
parse_fine_tuned_lora_name
,
replace_submodule
from
vllm.lora.utils
import
parse_fine_tuned_lora_name
,
replace_submodule
from
vllm.utils
import
LRUCache
,
is_pin_memory_available
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
...
vllm/lora/worker_manager.py
View file @
01bfb22b
...
@@ -4,11 +4,11 @@ from typing import Any, Dict, List, Optional, Set, Type
...
@@ -4,11 +4,11 @@ from typing import Any, Dict, List, Optional, Set, Type
import
torch
import
torch
from
vllm.config
import
LoRAConfig
from
vllm.lora.layers
import
LoRAMapping
from
vllm.lora.models
import
(
LoRAModel
,
LoRAModelManager
,
from
vllm.lora.models
import
(
LoRAModel
,
LoRAModelManager
,
LRUCacheLoRAModelManager
,
create_lora_manager
)
LRUCacheLoRAModelManager
,
create_lora_manager
)
from
vllm.lora.request
import
LoRARequest
from
vllm.lora.request
import
LoRARequest
from
vllm.lora.layers
import
LoRAMapping
from
vllm.config
import
LoRAConfig
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
...
vllm/model_executor/guided_decoding.py
View file @
01bfb22b
...
@@ -5,16 +5,16 @@ from enum import Enum
...
@@ -5,16 +5,16 @@ from enum import Enum
from
functools
import
lru_cache
from
functools
import
lru_cache
from
json
import
dumps
as
json_dumps
from
json
import
dumps
as
json_dumps
from
re
import
escape
as
regex_escape
from
re
import
escape
as
regex_escape
from
typing
import
Union
,
Tuple
from
typing
import
Tuple
,
Union
from
pydantic
import
BaseModel
from
pydantic
import
BaseModel
from
transformers
import
PreTrainedTokenizerBase
from
transformers
import
PreTrainedTokenizerBase
from
vllm.entrypoints.openai.protocol
import
(
CompletionRequest
,
from
vllm.entrypoints.openai.protocol
import
(
Chat
CompletionRequest
,
Chat
CompletionRequest
)
CompletionRequest
)
from
vllm.model_executor.guided_logits_processors
import
(
JSON
LogitsProcessor
,
from
vllm.model_executor.guided_logits_processors
import
(
CFG
LogitsProcessor
,
Regex
LogitsProcessor
,
JSON
LogitsProcessor
,
CFG
LogitsProcessor
)
Regex
LogitsProcessor
)
class
GuidedDecodingMode
(
Enum
):
class
GuidedDecodingMode
(
Enum
):
...
...
vllm/model_executor/guided_logits_processors.py
View file @
01bfb22b
...
@@ -16,13 +16,13 @@
...
@@ -16,13 +16,13 @@
import
json
import
json
import
math
import
math
from
collections
import
defaultdict
from
collections
import
defaultdict
from
typing
import
Union
,
DefaultDict
,
Dict
,
List
,
Optional
,
Callable
from
typing
import
Callable
,
DefaultDict
,
Dict
,
List
,
Optional
,
Union
import
torch
import
torch
from
outlines.fsm.fsm
import
CFGFSM
,
RegexFSM
from
outlines.fsm.json_schema
import
build_regex_from_schema
from
pydantic
import
BaseModel
from
pydantic
import
BaseModel
from
transformers
import
PreTrainedTokenizerBase
from
transformers
import
PreTrainedTokenizerBase
from
outlines.fsm.fsm
import
RegexFSM
,
CFGFSM
from
outlines.fsm.json_schema
import
build_regex_from_schema
class
BaseLogitsProcessor
:
class
BaseLogitsProcessor
:
...
...
vllm/model_executor/layers/fused_moe/__init__.py
View file @
01bfb22b
from
vllm.model_executor.layers.fused_moe.fused_moe
import
(
from
vllm.model_executor.layers.fused_moe.fused_moe
import
(
fused_moe
,
fused_moe
,
get_config_file_name
)
get_config_file_name
,
)
__all__
=
[
__all__
=
[
"fused_moe"
,
"fused_moe"
,
...
...
vllm/model_executor/layers/linear.py
View file @
01bfb22b
...
@@ -5,14 +5,14 @@ import torch
...
@@ -5,14 +5,14 @@ import torch
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
from
torch.nn.parameter
import
Parameter
from
torch.nn.parameter
import
Parameter
from
vllm.logger
import
init_logger
from
vllm.model_executor.parallel_utils.communication_op
import
(
tensor_model_parallel_all_gather
,
tensor_model_parallel_all_reduce
)
from
vllm.model_executor.parallel_utils.parallel_state
import
(
from
vllm.model_executor.parallel_utils.parallel_state
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
)
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
)
from
vllm.model_executor.parallel_utils.communication_op
import
(
tensor_model_parallel_all_reduce
,
tensor_model_parallel_all_gather
)
from
vllm.model_executor.parallel_utils.utils
import
(
from
vllm.model_executor.parallel_utils.utils
import
(
divide
,
split_tensor_along_last_dim
)
divide
,
split_tensor_along_last_dim
)
from
vllm.model_executor.utils
import
set_weight_attrs
from
vllm.model_executor.utils
import
set_weight_attrs
from
vllm.logger
import
init_logger
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
...
...
vllm/model_executor/layers/ops/rand.py
View file @
01bfb22b
from
typing
import
Optional
,
Union
import
torch
import
torch
import
triton
import
triton
import
triton.language
as
tl
import
triton.language
as
tl
from
typing
import
Optional
,
Union
def
seeded_uniform
(
def
seeded_uniform
(
*
size
,
*
size
,
...
...
vllm/model_executor/layers/ops/sample.py
View file @
01bfb22b
import
math
import
math
from
typing
import
Tuple
,
Optional
from
typing
import
Optional
,
Tuple
import
torch
import
torch
import
triton
import
triton
...
...
vllm/model_executor/layers/quantization/__init__.py
View file @
01bfb22b
from
typing
import
Type
from
typing
import
Type
from
vllm.model_executor.layers.quantization.awq
import
AWQConfig
from
vllm.model_executor.layers.quantization.base_config
import
(
from
vllm.model_executor.layers.quantization.base_config
import
(
QuantizationConfig
)
QuantizationConfig
)
from
vllm.model_executor.layers.quantization.awq
import
AWQConfig
from
vllm.model_executor.layers.quantization.gptq
import
GPTQConfig
from
vllm.model_executor.layers.quantization.gptq
import
GPTQConfig
from
vllm.model_executor.layers.quantization.squeezellm
import
SqueezeLLMConfig
from
vllm.model_executor.layers.quantization.marlin
import
MarlinConfig
from
vllm.model_executor.layers.quantization.marlin
import
MarlinConfig
from
vllm.model_executor.layers.quantization.squeezellm
import
SqueezeLLMConfig
_QUANTIZATION_CONFIG_REGISTRY
=
{
_QUANTIZATION_CONFIG_REGISTRY
=
{
"awq"
:
AWQConfig
,
"awq"
:
AWQConfig
,
...
...
vllm/model_executor/layers/quantization/gptq.py
View file @
01bfb22b
import
enum
import
enum
from
enum
import
Enum
from
enum
import
Enum
from
typing
import
Any
,
Dict
,
List
,
Optional
from
fractions
import
Fraction
from
fractions
import
Fraction
from
typing
import
Any
,
Dict
,
List
,
Optional
import
torch
import
torch
from
torch.nn.parameter
import
Parameter
from
torch.nn.parameter
import
Parameter
...
...
vllm/model_executor/layers/quantization/marlin.py
View file @
01bfb22b
...
@@ -4,7 +4,8 @@ import torch
...
@@ -4,7 +4,8 @@ import torch
from
torch.nn.parameter
import
Parameter
from
torch.nn.parameter
import
Parameter
from
vllm._C
import
ops
from
vllm._C
import
ops
from
vllm.model_executor.layers.linear
import
LinearMethodBase
,
set_weight_attrs
from
vllm.model_executor.layers.linear
import
(
LinearMethodBase
,
set_weight_attrs
)
from
vllm.model_executor.layers.quantization.base_config
import
(
from
vllm.model_executor.layers.quantization.base_config
import
(
QuantizationConfig
)
QuantizationConfig
)
...
...
vllm/model_executor/layers/rejection_sampler.py
View file @
01bfb22b
from
typing
import
Tuple
,
Optional
from
functools
import
cached_property
from
functools
import
cached_property
from
typing
import
Optional
,
Tuple
import
torch
import
torch
import
torch.nn
as
nn
import
torch.jit
import
torch.jit
import
torch.nn
as
nn
class
RejectionSampler
(
nn
.
Module
):
class
RejectionSampler
(
nn
.
Module
):
...
...
Prev
1
2
3
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment