Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
01bfb22b
Unverified
Commit
01bfb22b
authored
Mar 25, 2024
by
SangBin Cho
Committed by
GitHub
Mar 25, 2024
Browse files
[CI] Try introducing isort. (#3495)
parent
e67c295b
Changes
144
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
20 additions
and
26 deletions
+20
-26
vllm/utils.py
vllm/utils.py
+8
-13
vllm/worker/cache_engine.py
vllm/worker/cache_engine.py
+1
-1
vllm/worker/model_runner.py
vllm/worker/model_runner.py
+7
-8
vllm/worker/worker.py
vllm/worker/worker.py
+4
-4
No files found.
vllm/utils.py
View file @
01bfb22b
import
asyncio
import
enum
import
gc
import
os
import
socket
import
subprocess
import
uuid
import
gc
import
warnings
from
collections
import
OrderedDict
from
functools
import
lru_cache
,
partial
from
platform
import
uname
from
typing
import
List
,
Tuple
,
Union
,
Generic
from
packaging.version
import
parse
,
Vers
ion
from
typing
import
(
Any
,
Awaitable
,
Callable
,
Generic
,
Hashable
,
List
,
Optional
,
Tuple
,
TypeVar
,
Un
ion
)
import
psutil
import
torch
import
asyncio
from
functools
import
partial
,
lru_cache
from
typing
import
(
Awaitable
,
Callable
,
TypeVar
,
)
from
collections
import
OrderedDict
from
typing
import
Any
,
Hashable
,
Optional
from
packaging.version
import
Version
,
parse
from
vllm.logger
import
init_logger
import
warnings
T
=
TypeVar
(
"T"
)
logger
=
init_logger
(
__name__
)
...
...
vllm/worker/cache_engine.py
View file @
01bfb22b
...
...
@@ -6,7 +6,7 @@ import torch
from
vllm.attention
import
get_attn_backend
from
vllm.config
import
CacheConfig
,
ModelConfig
,
ParallelConfig
from
vllm.logger
import
init_logger
from
vllm.utils
import
is_pin_memory_available
,
STR_DTYPE_TO_TORCH_DTYPE
from
vllm.utils
import
STR_DTYPE_TO_TORCH_DTYPE
,
is_pin_memory_available
logger
=
init_logger
(
__name__
)
...
...
vllm/worker/model_runner.py
View file @
01bfb22b
import
contextlib
import
time
from
typing
import
Dict
,
List
,
Optional
,
Tuple
,
Set
from
typing
import
Dict
,
List
,
Optional
,
Set
,
Tuple
import
numpy
as
np
import
torch
import
torch.nn
as
nn
from
vllm.attention
import
AttentionMetadata
,
get_attn_backend
from
vllm.config
import
(
DeviceConfig
,
Model
Config
,
LoRA
Config
,
ParallelConfig
,
from
vllm.config
import
(
DeviceConfig
,
LoRA
Config
,
Model
Config
,
ParallelConfig
,
SchedulerConfig
)
from
vllm.logger
import
init_logger
from
vllm.lora.layers
import
LoRAMapping
from
vllm.lora.request
import
LoRARequest
from
vllm.lora.worker_manager
import
LRUCacheWorkerLoRAManager
from
vllm.model_executor
import
SamplingMetadata
from
vllm.model_executor.model_loader
import
get_model
from
vllm.model_executor.parallel_utils
import
cupy_utils
from
vllm.model_executor.parallel_utils
import
cupy_utils
,
custom_all_reduce
from
vllm.model_executor.parallel_utils.communication_op
import
(
broadcast_tensor_dict
)
from
vllm.model_executor.parallel_utils.parallel_state
import
(
with_cupy_nccl_for_all_reduce
)
from
vllm.model_executor.parallel_utils
import
custom_all_reduce
from
vllm.sampling_params
import
SamplingParams
,
SamplingType
from
vllm.sequence
import
SamplerOutput
,
SequenceData
,
SequenceGroupMetadata
from
vllm.lora.worker_manager
import
LRUCacheWorkerLoRAManager
from
vllm.lora.layers
import
LoRAMapping
from
vllm.lora.request
import
LoRARequest
from
vllm.utils
import
(
async_tensor_h2d
,
CudaMemoryProfiler
,
from
vllm.utils
import
(
CudaMemoryProfiler
,
async_tensor_h2d
,
is_pin_memory_available
,
make_tensor_with_pad
,
maybe_expand_dim
)
...
...
vllm/worker/worker.py
View file @
01bfb22b
"""A GPU worker class."""
import
gc
import
os
from
typing
import
Dict
,
List
,
Tuple
,
Set
,
Optional
from
typing
import
Dict
,
List
,
Optional
,
Set
,
Tuple
import
torch
import
torch.distributed
from
vllm.config
import
(
CacheConfig
,
DeviceConfig
,
ModelConfig
,
ParallelConfig
,
SchedulerConfig
,
LoRAConfig
)
from
vllm.config
import
(
CacheConfig
,
DeviceConfig
,
LoRAConfig
,
ModelConfig
,
ParallelConfig
,
SchedulerConfig
)
from
vllm.lora.request
import
LoRARequest
from
vllm.model_executor
import
set_random_seed
from
vllm.model_executor.parallel_utils
import
cupy_utils
from
vllm.model_executor.parallel_utils.communication_op
import
(
...
...
@@ -18,7 +19,6 @@ from vllm.model_executor.parallel_utils.parallel_state import (
from
vllm.sequence
import
SamplerOutput
,
SequenceGroupMetadata
from
vllm.worker.cache_engine
import
CacheEngine
from
vllm.worker.model_runner
import
ModelRunner
from
vllm.lora.request
import
LoRARequest
class
Worker
:
...
...
Prev
1
…
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment