Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
01bfb22b
Unverified
Commit
01bfb22b
authored
Mar 25, 2024
by
SangBin Cho
Committed by
GitHub
Mar 25, 2024
Browse files
[CI] Try introducing isort. (#3495)
parent
e67c295b
Changes
144
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
68 additions
and
68 deletions
+68
-68
tests/core/test_block_manager.py
tests/core/test_block_manager.py
+5
-4
tests/core/test_scheduler.py
tests/core/test_scheduler.py
+3
-2
tests/distributed/test_comm_ops.py
tests/distributed/test_comm_ops.py
+3
-5
tests/distributed/test_custom_all_reduce.py
tests/distributed/test_custom_all_reduce.py
+1
-1
tests/entrypoints/test_guided_processors.py
tests/entrypoints/test_guided_processors.py
+3
-3
tests/entrypoints/test_openai_server.py
tests/entrypoints/test_openai_server.py
+7
-8
tests/kernels/conftest.py
tests/kernels/conftest.py
+1
-0
tests/kernels/test_activation.py
tests/kernels/test_activation.py
+1
-1
tests/kernels/test_attention.py
tests/kernels/test_attention.py
+3
-4
tests/kernels/test_cache.py
tests/kernels/test_cache.py
+1
-2
tests/kernels/test_moe.py
tests/kernels/test_moe.py
+1
-1
tests/kernels/test_pos_encoding.py
tests/kernels/test_pos_encoding.py
+2
-1
tests/kernels/test_prefix_prefill.py
tests/kernels/test_prefix_prefill.py
+3
-2
tests/kernels/test_rand.py
tests/kernels/test_rand.py
+3
-2
tests/kernels/test_sampler.py
tests/kernels/test_sampler.py
+4
-4
tests/lora/conftest.py
tests/lora/conftest.py
+4
-4
tests/lora/test_layer_variation.py
tests/lora/test_layer_variation.py
+4
-2
tests/lora/test_layers.py
tests/lora/test_layers.py
+14
-18
tests/lora/test_llama.py
tests/lora/test_llama.py
+1
-0
tests/lora/test_lora_manager.py
tests/lora/test_lora_manager.py
+4
-4
No files found.
tests/core/test_block_manager.py
View file @
01bfb22b
import
pytest
import
time
import
time
from
typing
import
List
from
typing
import
List
import
pytest
from
vllm
import
SamplingParams
from
vllm
import
SamplingParams
from
vllm.block
import
PhysicalTokenBlock
from
vllm.block
import
PhysicalTokenBlock
from
vllm.core.block_manager
import
(
UncachedBlockAllocator
,
BlockSpaceManager
,
from
vllm.core.block_manager
import
(
AllocStatus
,
BlockSpaceManager
,
AllocStatus
)
UncachedBlockAllocator
)
from
vllm.sequence
import
Logprob
,
Sequence
,
SequenceGroup
,
SequenceStatus
from
vllm.utils
import
Device
from
vllm.utils
import
Device
from
vllm.sequence
import
Sequence
,
SequenceGroup
,
SequenceStatus
,
Logprob
from
.utils
import
create_dummy_prompt
from
.utils
import
create_dummy_prompt
...
...
tests/core/test_scheduler.py
View file @
01bfb22b
import
time
from
typing
import
List
from
typing
import
List
import
pytest
# noqa
import
pytest
# noqa
import
time
from
vllm.config
import
CacheConfig
,
SchedulerConfig
from
vllm.config
import
CacheConfig
,
SchedulerConfig
from
vllm.core.scheduler
import
Scheduler
from
vllm.core.scheduler
import
Scheduler
from
vllm.sequence
import
SequenceGroup
,
Logprob
from
vllm.sequence
import
Logprob
,
SequenceGroup
from
.utils
import
create_dummy_prompt
from
.utils
import
create_dummy_prompt
...
...
tests/distributed/test_comm_ops.py
View file @
01bfb22b
...
@@ -3,14 +3,12 @@
...
@@ -3,14 +3,12 @@
Run `pytest tests/distributed/test_comm_ops.py --forked`.
Run `pytest tests/distributed/test_comm_ops.py --forked`.
"""
"""
import
pytest
import
pytest
import
torch
import
ray
import
ray
import
torch
from
vllm.model_executor.parallel_utils.communication_op
import
(
from
vllm.model_executor.parallel_utils.communication_op
import
(
tensor_model_parallel_all_reduce
,
broadcast_tensor_dict
,
tensor_model_parallel_all_gather
,
tensor_model_parallel_all_gather
,
tensor_model_parallel_all_reduce
)
broadcast_tensor_dict
,
)
from
vllm.test_utils
import
(
init_test_distributed_environment
,
from
vllm.test_utils
import
(
init_test_distributed_environment
,
multi_process_tensor_parallel
)
multi_process_tensor_parallel
)
...
...
tests/distributed/test_custom_all_reduce.py
View file @
01bfb22b
import
os
import
random
import
random
import
os
import
pytest
import
pytest
import
ray
import
ray
import
torch
import
torch
...
...
tests/entrypoints/test_guided_processors.py
View file @
01bfb22b
# This unit test should be moved to a new
# This unit test should be moved to a new
# tests/test_guided_decoding directory.
# tests/test_guided_decoding directory.
from
transformers
import
AutoTokenizer
import
torch
import
torch
from
transformers
import
AutoTokenizer
from
vllm.model_executor.guided_logits_processors
import
(
Regex
LogitsProcessor
,
from
vllm.model_executor.guided_logits_processors
import
(
JSON
LogitsProcessor
,
JSON
LogitsProcessor
)
Regex
LogitsProcessor
)
TEST_SCHEMA
=
{
TEST_SCHEMA
=
{
"type"
:
"object"
,
"type"
:
"object"
,
...
...
tests/entrypoints/test_openai_server.py
View file @
01bfb22b
# imports for guided decoding tests
import
json
import
os
import
os
import
re
import
subprocess
import
subprocess
import
sys
import
time
import
time
import
sys
import
jsonschema
import
openai
# use the official client for correctness check
import
pytest
import
pytest
import
requests
# using Ray for overall ease of process management, parallel requests,
# using Ray for overall ease of process management, parallel requests,
# and debugging.
# and debugging.
import
ray
import
ray
import
openai
# use the official client for correctness check
import
requests
# downloading lora to test lora requests
# downloading lora to test lora requests
from
huggingface_hub
import
snapshot_download
from
huggingface_hub
import
snapshot_download
# imports for guided decoding tests
import
json
import
jsonschema
import
re
from
vllm.transformers_utils.tokenizer
import
get_tokenizer
from
vllm.transformers_utils.tokenizer
import
get_tokenizer
MAX_SERVER_START_WAIT_S
=
600
# wait for server to start for 60 seconds
MAX_SERVER_START_WAIT_S
=
600
# wait for server to start for 60 seconds
...
...
tests/kernels/conftest.py
View file @
01bfb22b
import
pytest
import
pytest
from
vllm.utils
import
create_kv_caches_with_random
from
vllm.utils
import
create_kv_caches_with_random
...
...
tests/kernels/test_activation.py
View file @
01bfb22b
...
@@ -2,10 +2,10 @@ from typing import Type
...
@@ -2,10 +2,10 @@ from typing import Type
import
pytest
import
pytest
import
torch
import
torch
from
allclose_default
import
get_default_atol
,
get_default_rtol
from
vllm.model_executor.layers.activation
import
(
FastGELU
,
GeluAndMul
,
from
vllm.model_executor.layers.activation
import
(
FastGELU
,
GeluAndMul
,
NewGELU
,
SiluAndMul
)
NewGELU
,
SiluAndMul
)
from
allclose_default
import
get_default_atol
,
get_default_rtol
DTYPES
=
[
torch
.
half
,
torch
.
bfloat16
,
torch
.
float
]
DTYPES
=
[
torch
.
half
,
torch
.
bfloat16
,
torch
.
float
]
NUM_TOKENS
=
[
7
,
83
,
2048
]
# Arbitrary values for testing
NUM_TOKENS
=
[
7
,
83
,
2048
]
# Arbitrary values for testing
...
...
tests/kernels/test_attention.py
View file @
01bfb22b
...
@@ -3,13 +3,12 @@ from typing import List, Optional, Tuple
...
@@ -3,13 +3,12 @@ from typing import List, Optional, Tuple
import
pytest
import
pytest
import
torch
import
torch
from
allclose_default
import
get_default_atol
,
get_default_rtol
from
xformers
import
ops
as
xops
from
xformers
import
ops
as
xops
from
xformers.ops.fmha.attn_bias
import
BlockDiagonalCausalMask
from
xformers.ops.fmha.attn_bias
import
BlockDiagonalCausalMask
from
vllm._C
import
ops
,
cache_ops
from
vllm._C
import
cache_ops
,
ops
from
vllm.utils
import
get_max_shared_memory_bytes
from
vllm.utils
import
get_max_shared_memory_bytes
,
is_hip
from
vllm.utils
import
is_hip
from
allclose_default
import
get_default_atol
,
get_default_rtol
FLOAT32_BYTES
=
torch
.
finfo
(
torch
.
float
).
bits
//
8
FLOAT32_BYTES
=
torch
.
finfo
(
torch
.
float
).
bits
//
8
# This will change depending on the compute capability.
# This will change depending on the compute capability.
...
...
tests/kernels/test_cache.py
View file @
01bfb22b
import
random
import
random
from
typing
import
Tuple
import
pytest
import
pytest
import
torch
import
torch
from
typing
import
Tuple
from
vllm._C
import
cache_ops
from
vllm._C
import
cache_ops
COPYING_DIRECTION
=
[(
'cuda'
,
'cpu'
),
(
'cuda'
,
'cuda'
),
(
'cpu'
,
'cuda'
)]
COPYING_DIRECTION
=
[(
'cuda'
,
'cpu'
),
(
'cuda'
,
'cuda'
),
(
'cpu'
,
'cuda'
)]
...
...
tests/kernels/test_moe.py
View file @
01bfb22b
...
@@ -7,8 +7,8 @@ import torch
...
@@ -7,8 +7,8 @@ import torch
from
transformers
import
MixtralConfig
from
transformers
import
MixtralConfig
from
transformers.models.mixtral.modeling_mixtral
import
MixtralSparseMoeBlock
from
transformers.models.mixtral.modeling_mixtral
import
MixtralSparseMoeBlock
from
vllm.model_executor.layers.fused_moe
import
fused_moe
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.fused_moe
import
fused_moe
from
vllm.model_executor.models.mixtral
import
MixtralMoE
from
vllm.model_executor.models.mixtral
import
MixtralMoE
...
...
tests/kernels/test_pos_encoding.py
View file @
01bfb22b
from
itertools
import
accumulate
from
typing
import
List
,
Optional
from
typing
import
List
,
Optional
import
pytest
import
pytest
import
torch
import
torch
from
allclose_default
import
get_default_atol
,
get_default_rtol
from
allclose_default
import
get_default_atol
,
get_default_rtol
from
itertools
import
accumulate
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
IS_NEOX_STYLE
=
[
True
,
False
]
IS_NEOX_STYLE
=
[
True
,
False
]
...
...
tests/kernels/test_prefix_prefill.py
View file @
01bfb22b
import
random
import
random
import
pytest
import
time
import
time
import
pytest
import
torch
import
torch
from
vllm.attention.ops.prefix_prefill
import
context_attention_fwd
from
xformers
import
ops
as
xops
from
xformers
import
ops
as
xops
from
xformers.ops.fmha.attn_bias
import
BlockDiagonalCausalFromBottomRightMask
from
xformers.ops.fmha.attn_bias
import
BlockDiagonalCausalFromBottomRightMask
from
vllm.attention.ops.prefix_prefill
import
context_attention_fwd
NUM_HEADS
=
[
64
]
NUM_HEADS
=
[
64
]
NUM_QUERIES_PER_KV
=
[
1
,
8
,
64
]
NUM_QUERIES_PER_KV
=
[
1
,
8
,
64
]
HEAD_SIZES
=
[
128
]
HEAD_SIZES
=
[
128
]
...
...
tests/kernels/test_rand.py
View file @
01bfb22b
import
torch
import
pytest
import
random
import
random
import
pytest
import
torch
from
vllm.model_executor.layers.ops.rand
import
seeded_uniform
from
vllm.model_executor.layers.ops.rand
import
seeded_uniform
from
vllm.model_executor.utils
import
set_random_seed
from
vllm.model_executor.utils
import
set_random_seed
...
...
tests/kernels/test_sampler.py
View file @
01bfb22b
import
gc
import
gc
import
torch
import
pytest
import
pytest
import
torch
import
triton
import
triton
import
triton.language
as
tl
import
triton.language
as
tl
from
vllm.model_executor.layers.ops.sample
import
(
from
vllm.model_executor.layers.ops.sample
import
(
_uniform_to_exponential
,
sample
,
get_num_triton_sampler_splits
,
MAX_TRITON_N_COLS
,
_uniform_to_exponential
,
get_num_triton_sampler_splits
,
MAX_TRITON_N_COLS
)
sample
)
from
vllm.model_executor.utils
import
set_random_seed
from
vllm.model_executor.sampling_metadata
import
SamplingTensors
from
vllm.model_executor.sampling_metadata
import
SamplingTensors
from
vllm.model_executor.utils
import
set_random_seed
SINGLE_SPLIT_VOCAB_SIZE
=
32000
# llama/mistral/mixtral vocab size
SINGLE_SPLIT_VOCAB_SIZE
=
32000
# llama/mistral/mixtral vocab size
MULTI_SPLIT_VOCAB_SIZE
=
MAX_TRITON_N_COLS
+
100
MULTI_SPLIT_VOCAB_SIZE
=
MAX_TRITON_N_COLS
+
100
...
...
tests/lora/conftest.py
View file @
01bfb22b
...
@@ -2,7 +2,7 @@ import contextlib
...
@@ -2,7 +2,7 @@ import contextlib
import
gc
import
gc
import
tempfile
import
tempfile
from
collections
import
OrderedDict
from
collections
import
OrderedDict
from
unittest.mock
import
patch
,
MagicMock
from
unittest.mock
import
MagicMock
,
patch
import
pytest
import
pytest
import
ray
import
ray
...
@@ -12,13 +12,13 @@ from huggingface_hub import snapshot_download
...
@@ -12,13 +12,13 @@ from huggingface_hub import snapshot_download
import
vllm
import
vllm
from
vllm.config
import
LoRAConfig
from
vllm.config
import
LoRAConfig
from
vllm.model_executor.layers.sampler
import
Sampler
from
vllm.model_executor.layers.logits_processor
import
LogitsProcessor
from
vllm.model_executor.model_loader
import
get_model
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
MergedColumnParallelLinear
,
MergedColumnParallelLinear
,
RowParallelLinear
)
RowParallelLinear
)
from
vllm.model_executor.layers.logits_processor
import
LogitsProcessor
from
vllm.model_executor.layers.sampler
import
Sampler
from
vllm.model_executor.layers.vocab_parallel_embedding
import
ParallelLMHead
from
vllm.model_executor.layers.vocab_parallel_embedding
import
ParallelLMHead
from
vllm.model_executor.model_loader
import
get_model
from
vllm.model_executor.parallel_utils.parallel_state
import
(
from
vllm.model_executor.parallel_utils.parallel_state
import
(
destroy_model_parallel
,
initialize_model_parallel
)
destroy_model_parallel
,
initialize_model_parallel
)
...
...
tests/lora/test_layer_variation.py
View file @
01bfb22b
import
tempfile
from
random
import
sample
from
typing
import
List
,
Optional
from
typing
import
List
,
Optional
import
peft
import
peft
import
pytest
import
pytest
from
random
import
sample
import
tempfile
from
transformers
import
AutoModelForCausalLM
from
transformers
import
AutoModelForCausalLM
import
vllm
import
vllm
from
vllm.lora.request
import
LoRARequest
from
vllm.lora.request
import
LoRARequest
from
.conftest
import
cleanup
from
.conftest
import
cleanup
MODEL_PATH
=
"Felladrin/Llama-68M-Chat-v1"
MODEL_PATH
=
"Felladrin/Llama-68M-Chat-v1"
...
...
tests/lora/test_layers.py
View file @
01bfb22b
import
pytest
import
random
import
random
from
copy
import
deepcopy
from
copy
import
deepcopy
from
dataclasses
import
dataclass
from
dataclasses
import
dataclass
from
typing
import
List
,
Optional
,
Dict
,
Tuple
from
typing
import
Dict
,
List
,
Optional
,
Tuple
import
pytest
import
torch
import
torch
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
from
vllm.lora.layers
import
(
from
vllm.config
import
LoRAConfig
ColumnParallelLinearWithLoRA
,
from
vllm.lora.layers
import
(
BaseLayerWithLoRA
,
ColumnParallelLinearWithLoRA
,
LogitsProcessorWithLoRA
,
LoRAMapping
,
MergedColumnParallelLinearWithLoRA
,
MergedColumnParallelLinearWithLoRA
,
QKVParallelLinearWithLora
,
QKVParallelLinearWithLora
,
VocabParallelEmbeddingWithLoRA
,
RowParallelLinearWithLoRA
,
RowParallelLinearWithLoRA
,
LogitsProcessorWithLoRA
,
VocabParallelEmbeddingWithLoRA
)
LoRAMapping
,
from
vllm.lora.models
import
(
LoRALayerWeights
,
PackedLoRALayerWeights
,
BaseLayerWithLoRA
,
convert_mapping
)
)
from
vllm.lora.models
import
(
LoRALayerWeights
,
convert_mapping
,
PackedLoRALayerWeights
)
from
vllm.config
import
LoRAConfig
from
vllm.model_executor.layers.logits_processor
import
LogitsProcessor
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
MergedColumnParallelLinear
,
MergedColumnParallelLinear
,
RowParallelLinear
,
QKVParallelLinear
,
QKVParallelLinear
)
RowParallelLinear
)
from
vllm.model_executor.layers.logits_processor
import
LogitsProcessor
from
vllm.model_executor.layers.vocab_parallel_embedding
import
(
from
vllm.model_executor.layers.vocab_parallel_embedding
import
(
VocabParallelEmbedding
,
ParallelLMHead
)
ParallelLMHead
,
VocabParallelEmbedding
)
from
vllm.model_executor.utils
import
set_random_seed
from
vllm.model_executor.utils
import
set_random_seed
from
.utils
import
DummyLoRAManager
from
.utils
import
DummyLoRAManager
...
...
tests/lora/test_llama.py
View file @
01bfb22b
...
@@ -3,6 +3,7 @@ import ray
...
@@ -3,6 +3,7 @@ import ray
import
vllm
import
vllm
from
vllm.lora.request
import
LoRARequest
from
vllm.lora.request
import
LoRARequest
from
.conftest
import
cleanup
from
.conftest
import
cleanup
MODEL_PATH
=
"meta-llama/Llama-2-7b-hf"
MODEL_PATH
=
"meta-llama/Llama-2-7b-hf"
...
...
tests/lora/test_lora_manager.py
View file @
01bfb22b
...
@@ -8,11 +8,11 @@ from torch import nn
...
@@ -8,11 +8,11 @@ from torch import nn
from
vllm.config
import
LoRAConfig
from
vllm.config
import
LoRAConfig
from
vllm.lora.layers
import
(
ColumnParallelLinearWithLoRA
,
from
vllm.lora.layers
import
(
ColumnParallelLinearWithLoRA
,
Row
ParallelLinearWithLoRA
,
MergedColumn
ParallelLinearWithLoRA
,
MergedColumn
ParallelLinearWithLoRA
)
Row
ParallelLinearWithLoRA
)
from
vllm.lora.lora
import
LoRALayerWeights
,
PackedLoRALayerWeights
from
vllm.lora.lora
import
LoRALayerWeights
,
PackedLoRALayerWeights
from
vllm.lora.models
import
(
LoRAModel
,
LoRAModelManager
,
from
vllm.lora.models
import
(
LoRAMapping
,
LoRAModel
,
LoRAModelManager
,
LRUCacheLoRAModelManager
,
LoRAMapping
)
LRUCacheLoRAModelManager
)
from
vllm.lora.request
import
LoRARequest
from
vllm.lora.request
import
LoRARequest
from
vllm.lora.worker_manager
import
(
LRUCacheWorkerLoRAManager
,
from
vllm.lora.worker_manager
import
(
LRUCacheWorkerLoRAManager
,
WorkerLoRAManager
)
WorkerLoRAManager
)
...
...
Prev
1
2
3
4
5
6
…
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment