Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
01bfb22b
Unverified
Commit
01bfb22b
authored
Mar 25, 2024
by
SangBin Cho
Committed by
GitHub
Mar 25, 2024
Browse files
[CI] Try introducing isort. (#3495)
parent
e67c295b
Changes
144
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
20 additions
and
26 deletions
+20
-26
vllm/utils.py
vllm/utils.py
+8
-13
vllm/worker/cache_engine.py
vllm/worker/cache_engine.py
+1
-1
vllm/worker/model_runner.py
vllm/worker/model_runner.py
+7
-8
vllm/worker/worker.py
vllm/worker/worker.py
+4
-4
No files found.
vllm/utils.py
View file @
01bfb22b
import
asyncio
import
enum
import
enum
import
gc
import
os
import
os
import
socket
import
socket
import
subprocess
import
subprocess
import
uuid
import
uuid
import
gc
import
warnings
from
collections
import
OrderedDict
from
functools
import
lru_cache
,
partial
from
platform
import
uname
from
platform
import
uname
from
typing
import
List
,
Tuple
,
Union
,
Generic
from
typing
import
(
Any
,
Awaitable
,
Callable
,
Generic
,
Hashable
,
List
,
from
packaging.version
import
parse
,
Vers
ion
Optional
,
Tuple
,
TypeVar
,
Un
ion
)
import
psutil
import
psutil
import
torch
import
torch
import
asyncio
from
packaging.version
import
Version
,
parse
from
functools
import
partial
,
lru_cache
from
typing
import
(
Awaitable
,
Callable
,
TypeVar
,
)
from
collections
import
OrderedDict
from
typing
import
Any
,
Hashable
,
Optional
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
import
warnings
T
=
TypeVar
(
"T"
)
T
=
TypeVar
(
"T"
)
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
...
...
vllm/worker/cache_engine.py
View file @
01bfb22b
...
@@ -6,7 +6,7 @@ import torch
...
@@ -6,7 +6,7 @@ import torch
from
vllm.attention
import
get_attn_backend
from
vllm.attention
import
get_attn_backend
from
vllm.config
import
CacheConfig
,
ModelConfig
,
ParallelConfig
from
vllm.config
import
CacheConfig
,
ModelConfig
,
ParallelConfig
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.utils
import
is_pin_memory_available
,
STR_DTYPE_TO_TORCH_DTYPE
from
vllm.utils
import
STR_DTYPE_TO_TORCH_DTYPE
,
is_pin_memory_available
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
...
...
vllm/worker/model_runner.py
View file @
01bfb22b
import
contextlib
import
contextlib
import
time
import
time
from
typing
import
Dict
,
List
,
Optional
,
Tuple
,
Set
from
typing
import
Dict
,
List
,
Optional
,
Set
,
Tuple
import
numpy
as
np
import
numpy
as
np
import
torch
import
torch
import
torch.nn
as
nn
import
torch.nn
as
nn
from
vllm.attention
import
AttentionMetadata
,
get_attn_backend
from
vllm.attention
import
AttentionMetadata
,
get_attn_backend
from
vllm.config
import
(
DeviceConfig
,
Model
Config
,
LoRA
Config
,
ParallelConfig
,
from
vllm.config
import
(
DeviceConfig
,
LoRA
Config
,
Model
Config
,
ParallelConfig
,
SchedulerConfig
)
SchedulerConfig
)
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.lora.layers
import
LoRAMapping
from
vllm.lora.request
import
LoRARequest
from
vllm.lora.worker_manager
import
LRUCacheWorkerLoRAManager
from
vllm.model_executor
import
SamplingMetadata
from
vllm.model_executor
import
SamplingMetadata
from
vllm.model_executor.model_loader
import
get_model
from
vllm.model_executor.model_loader
import
get_model
from
vllm.model_executor.parallel_utils
import
cupy_utils
from
vllm.model_executor.parallel_utils
import
cupy_utils
,
custom_all_reduce
from
vllm.model_executor.parallel_utils.communication_op
import
(
from
vllm.model_executor.parallel_utils.communication_op
import
(
broadcast_tensor_dict
)
broadcast_tensor_dict
)
from
vllm.model_executor.parallel_utils.parallel_state
import
(
from
vllm.model_executor.parallel_utils.parallel_state
import
(
with_cupy_nccl_for_all_reduce
)
with_cupy_nccl_for_all_reduce
)
from
vllm.model_executor.parallel_utils
import
custom_all_reduce
from
vllm.sampling_params
import
SamplingParams
,
SamplingType
from
vllm.sampling_params
import
SamplingParams
,
SamplingType
from
vllm.sequence
import
SamplerOutput
,
SequenceData
,
SequenceGroupMetadata
from
vllm.sequence
import
SamplerOutput
,
SequenceData
,
SequenceGroupMetadata
from
vllm.lora.worker_manager
import
LRUCacheWorkerLoRAManager
from
vllm.utils
import
(
CudaMemoryProfiler
,
async_tensor_h2d
,
from
vllm.lora.layers
import
LoRAMapping
from
vllm.lora.request
import
LoRARequest
from
vllm.utils
import
(
async_tensor_h2d
,
CudaMemoryProfiler
,
is_pin_memory_available
,
make_tensor_with_pad
,
is_pin_memory_available
,
make_tensor_with_pad
,
maybe_expand_dim
)
maybe_expand_dim
)
...
...
vllm/worker/worker.py
View file @
01bfb22b
"""A GPU worker class."""
"""A GPU worker class."""
import
gc
import
gc
import
os
import
os
from
typing
import
Dict
,
List
,
Tuple
,
Set
,
Optional
from
typing
import
Dict
,
List
,
Optional
,
Set
,
Tuple
import
torch
import
torch
import
torch.distributed
import
torch.distributed
from
vllm.config
import
(
CacheConfig
,
DeviceConfig
,
ModelConfig
,
from
vllm.config
import
(
CacheConfig
,
DeviceConfig
,
LoRAConfig
,
ModelConfig
,
ParallelConfig
,
SchedulerConfig
,
LoRAConfig
)
ParallelConfig
,
SchedulerConfig
)
from
vllm.lora.request
import
LoRARequest
from
vllm.model_executor
import
set_random_seed
from
vllm.model_executor
import
set_random_seed
from
vllm.model_executor.parallel_utils
import
cupy_utils
from
vllm.model_executor.parallel_utils
import
cupy_utils
from
vllm.model_executor.parallel_utils.communication_op
import
(
from
vllm.model_executor.parallel_utils.communication_op
import
(
...
@@ -18,7 +19,6 @@ from vllm.model_executor.parallel_utils.parallel_state import (
...
@@ -18,7 +19,6 @@ from vllm.model_executor.parallel_utils.parallel_state import (
from
vllm.sequence
import
SamplerOutput
,
SequenceGroupMetadata
from
vllm.sequence
import
SamplerOutput
,
SequenceGroupMetadata
from
vllm.worker.cache_engine
import
CacheEngine
from
vllm.worker.cache_engine
import
CacheEngine
from
vllm.worker.model_runner
import
ModelRunner
from
vllm.worker.model_runner
import
ModelRunner
from
vllm.lora.request
import
LoRARequest
class
Worker
:
class
Worker
:
...
...
Prev
1
…
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment