Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
01bfb22b
Unverified
Commit
01bfb22b
authored
Mar 25, 2024
by
SangBin Cho
Committed by
GitHub
Mar 25, 2024
Browse files
[CI] Try introducing isort. (#3495)
parent
e67c295b
Changes
144
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
68 additions
and
68 deletions
+68
-68
tests/core/test_block_manager.py
tests/core/test_block_manager.py
+5
-4
tests/core/test_scheduler.py
tests/core/test_scheduler.py
+3
-2
tests/distributed/test_comm_ops.py
tests/distributed/test_comm_ops.py
+3
-5
tests/distributed/test_custom_all_reduce.py
tests/distributed/test_custom_all_reduce.py
+1
-1
tests/entrypoints/test_guided_processors.py
tests/entrypoints/test_guided_processors.py
+3
-3
tests/entrypoints/test_openai_server.py
tests/entrypoints/test_openai_server.py
+7
-8
tests/kernels/conftest.py
tests/kernels/conftest.py
+1
-0
tests/kernels/test_activation.py
tests/kernels/test_activation.py
+1
-1
tests/kernels/test_attention.py
tests/kernels/test_attention.py
+3
-4
tests/kernels/test_cache.py
tests/kernels/test_cache.py
+1
-2
tests/kernels/test_moe.py
tests/kernels/test_moe.py
+1
-1
tests/kernels/test_pos_encoding.py
tests/kernels/test_pos_encoding.py
+2
-1
tests/kernels/test_prefix_prefill.py
tests/kernels/test_prefix_prefill.py
+3
-2
tests/kernels/test_rand.py
tests/kernels/test_rand.py
+3
-2
tests/kernels/test_sampler.py
tests/kernels/test_sampler.py
+4
-4
tests/lora/conftest.py
tests/lora/conftest.py
+4
-4
tests/lora/test_layer_variation.py
tests/lora/test_layer_variation.py
+4
-2
tests/lora/test_layers.py
tests/lora/test_layers.py
+14
-18
tests/lora/test_llama.py
tests/lora/test_llama.py
+1
-0
tests/lora/test_lora_manager.py
tests/lora/test_lora_manager.py
+4
-4
No files found.
tests/core/test_block_manager.py
View file @
01bfb22b
import
pytest
import
time
from
typing
import
List
import
pytest
from
vllm
import
SamplingParams
from
vllm.block
import
PhysicalTokenBlock
from
vllm.core.block_manager
import
(
UncachedBlockAllocator
,
BlockSpaceManager
,
AllocStatus
)
from
vllm.core.block_manager
import
(
AllocStatus
,
BlockSpaceManager
,
UncachedBlockAllocator
)
from
vllm.sequence
import
Logprob
,
Sequence
,
SequenceGroup
,
SequenceStatus
from
vllm.utils
import
Device
from
vllm.sequence
import
Sequence
,
SequenceGroup
,
SequenceStatus
,
Logprob
from
.utils
import
create_dummy_prompt
...
...
tests/core/test_scheduler.py
View file @
01bfb22b
import
time
from
typing
import
List
import
pytest
# noqa
import
time
from
vllm.config
import
CacheConfig
,
SchedulerConfig
from
vllm.core.scheduler
import
Scheduler
from
vllm.sequence
import
SequenceGroup
,
Logprob
from
vllm.sequence
import
Logprob
,
SequenceGroup
from
.utils
import
create_dummy_prompt
...
...
tests/distributed/test_comm_ops.py
View file @
01bfb22b
...
...
@@ -3,14 +3,12 @@
Run `pytest tests/distributed/test_comm_ops.py --forked`.
"""
import
pytest
import
torch
import
ray
import
torch
from
vllm.model_executor.parallel_utils.communication_op
import
(
tensor_model_parallel_all_reduce
,
tensor_model_parallel_all_gather
,
broadcast_tensor_dict
,
)
broadcast_tensor_dict
,
tensor_model_parallel_all_gather
,
tensor_model_parallel_all_reduce
)
from
vllm.test_utils
import
(
init_test_distributed_environment
,
multi_process_tensor_parallel
)
...
...
tests/distributed/test_custom_all_reduce.py
View file @
01bfb22b
import
os
import
random
import
os
import
pytest
import
ray
import
torch
...
...
tests/entrypoints/test_guided_processors.py
View file @
01bfb22b
# This unit test should be moved to a new
# tests/test_guided_decoding directory.
from
transformers
import
AutoTokenizer
import
torch
from
transformers
import
AutoTokenizer
from
vllm.model_executor.guided_logits_processors
import
(
Regex
LogitsProcessor
,
JSON
LogitsProcessor
)
from
vllm.model_executor.guided_logits_processors
import
(
JSON
LogitsProcessor
,
Regex
LogitsProcessor
)
TEST_SCHEMA
=
{
"type"
:
"object"
,
...
...
tests/entrypoints/test_openai_server.py
View file @
01bfb22b
# imports for guided decoding tests
import
json
import
os
import
re
import
subprocess
import
sys
import
time
import
sys
import
jsonschema
import
openai
# use the official client for correctness check
import
pytest
import
requests
# using Ray for overall ease of process management, parallel requests,
# and debugging.
import
ray
import
openai
# use the official client for correctness check
import
requests
# downloading lora to test lora requests
from
huggingface_hub
import
snapshot_download
# imports for guided decoding tests
import
json
import
jsonschema
import
re
from
vllm.transformers_utils.tokenizer
import
get_tokenizer
MAX_SERVER_START_WAIT_S
=
600
# wait for server to start for 60 seconds
...
...
tests/kernels/conftest.py
View file @
01bfb22b
import
pytest
from
vllm.utils
import
create_kv_caches_with_random
...
...
tests/kernels/test_activation.py
View file @
01bfb22b
...
...
@@ -2,10 +2,10 @@ from typing import Type
import
pytest
import
torch
from
allclose_default
import
get_default_atol
,
get_default_rtol
from
vllm.model_executor.layers.activation
import
(
FastGELU
,
GeluAndMul
,
NewGELU
,
SiluAndMul
)
from
allclose_default
import
get_default_atol
,
get_default_rtol
DTYPES
=
[
torch
.
half
,
torch
.
bfloat16
,
torch
.
float
]
NUM_TOKENS
=
[
7
,
83
,
2048
]
# Arbitrary values for testing
...
...
tests/kernels/test_attention.py
View file @
01bfb22b
...
...
@@ -3,13 +3,12 @@ from typing import List, Optional, Tuple
import
pytest
import
torch
from
allclose_default
import
get_default_atol
,
get_default_rtol
from
xformers
import
ops
as
xops
from
xformers.ops.fmha.attn_bias
import
BlockDiagonalCausalMask
from
vllm._C
import
ops
,
cache_ops
from
vllm.utils
import
get_max_shared_memory_bytes
from
vllm.utils
import
is_hip
from
allclose_default
import
get_default_atol
,
get_default_rtol
from
vllm._C
import
cache_ops
,
ops
from
vllm.utils
import
get_max_shared_memory_bytes
,
is_hip
FLOAT32_BYTES
=
torch
.
finfo
(
torch
.
float
).
bits
//
8
# This will change depending on the compute capability.
...
...
tests/kernels/test_cache.py
View file @
01bfb22b
import
random
from
typing
import
Tuple
import
pytest
import
torch
from
typing
import
Tuple
from
vllm._C
import
cache_ops
COPYING_DIRECTION
=
[(
'cuda'
,
'cpu'
),
(
'cuda'
,
'cuda'
),
(
'cpu'
,
'cuda'
)]
...
...
tests/kernels/test_moe.py
View file @
01bfb22b
...
...
@@ -7,8 +7,8 @@ import torch
from
transformers
import
MixtralConfig
from
transformers.models.mixtral.modeling_mixtral
import
MixtralSparseMoeBlock
from
vllm.model_executor.layers.fused_moe
import
fused_moe
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.fused_moe
import
fused_moe
from
vllm.model_executor.models.mixtral
import
MixtralMoE
...
...
tests/kernels/test_pos_encoding.py
View file @
01bfb22b
from
itertools
import
accumulate
from
typing
import
List
,
Optional
import
pytest
import
torch
from
allclose_default
import
get_default_atol
,
get_default_rtol
from
itertools
import
accumulate
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
IS_NEOX_STYLE
=
[
True
,
False
]
...
...
tests/kernels/test_prefix_prefill.py
View file @
01bfb22b
import
random
import
pytest
import
time
import
pytest
import
torch
from
vllm.attention.ops.prefix_prefill
import
context_attention_fwd
from
xformers
import
ops
as
xops
from
xformers.ops.fmha.attn_bias
import
BlockDiagonalCausalFromBottomRightMask
from
vllm.attention.ops.prefix_prefill
import
context_attention_fwd
NUM_HEADS
=
[
64
]
NUM_QUERIES_PER_KV
=
[
1
,
8
,
64
]
HEAD_SIZES
=
[
128
]
...
...
tests/kernels/test_rand.py
View file @
01bfb22b
import
torch
import
pytest
import
random
import
pytest
import
torch
from
vllm.model_executor.layers.ops.rand
import
seeded_uniform
from
vllm.model_executor.utils
import
set_random_seed
...
...
tests/kernels/test_sampler.py
View file @
01bfb22b
import
gc
import
torch
import
pytest
import
torch
import
triton
import
triton.language
as
tl
from
vllm.model_executor.layers.ops.sample
import
(
_uniform_to_exponential
,
sample
,
get_num_triton_sampler_splits
,
MAX_TRITON_N_COLS
)
from
vllm.model_executor.utils
import
set_random_seed
MAX_TRITON_N_COLS
,
_uniform_to_exponential
,
get_num_triton_sampler_splits
,
sample
)
from
vllm.model_executor.sampling_metadata
import
SamplingTensors
from
vllm.model_executor.utils
import
set_random_seed
SINGLE_SPLIT_VOCAB_SIZE
=
32000
# llama/mistral/mixtral vocab size
MULTI_SPLIT_VOCAB_SIZE
=
MAX_TRITON_N_COLS
+
100
...
...
tests/lora/conftest.py
View file @
01bfb22b
...
...
@@ -2,7 +2,7 @@ import contextlib
import
gc
import
tempfile
from
collections
import
OrderedDict
from
unittest.mock
import
patch
,
MagicMock
from
unittest.mock
import
MagicMock
,
patch
import
pytest
import
ray
...
...
@@ -12,13 +12,13 @@ from huggingface_hub import snapshot_download
import
vllm
from
vllm.config
import
LoRAConfig
from
vllm.model_executor.layers.sampler
import
Sampler
from
vllm.model_executor.layers.logits_processor
import
LogitsProcessor
from
vllm.model_executor.model_loader
import
get_model
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
MergedColumnParallelLinear
,
RowParallelLinear
)
from
vllm.model_executor.layers.logits_processor
import
LogitsProcessor
from
vllm.model_executor.layers.sampler
import
Sampler
from
vllm.model_executor.layers.vocab_parallel_embedding
import
ParallelLMHead
from
vllm.model_executor.model_loader
import
get_model
from
vllm.model_executor.parallel_utils.parallel_state
import
(
destroy_model_parallel
,
initialize_model_parallel
)
...
...
tests/lora/test_layer_variation.py
View file @
01bfb22b
import
tempfile
from
random
import
sample
from
typing
import
List
,
Optional
import
peft
import
pytest
from
random
import
sample
import
tempfile
from
transformers
import
AutoModelForCausalLM
import
vllm
from
vllm.lora.request
import
LoRARequest
from
.conftest
import
cleanup
MODEL_PATH
=
"Felladrin/Llama-68M-Chat-v1"
...
...
tests/lora/test_layers.py
View file @
01bfb22b
import
pytest
import
random
from
copy
import
deepcopy
from
dataclasses
import
dataclass
from
typing
import
List
,
Optional
,
Dict
,
Tuple
from
typing
import
Dict
,
List
,
Optional
,
Tuple
import
pytest
import
torch
import
torch.nn.functional
as
F
from
vllm.lora.layers
import
(
ColumnParallelLinearWithLoRA
,
MergedColumnParallelLinearWithLoRA
,
QKVParallelLinearWithLora
,
VocabParallelEmbeddingWithLoRA
,
RowParallelLinearWithLoRA
,
LogitsProcessorWithLoRA
,
LoRAMapping
,
BaseLayerWithLoRA
,
)
from
vllm.lora.models
import
(
LoRALayerWeights
,
convert_mapping
,
PackedLoRALayerWeights
)
from
vllm.config
import
LoRAConfig
from
vllm.model_executor.layers.logits_processor
import
LogitsProcessor
from
vllm.lora.layers
import
(
BaseLayerWithLoRA
,
ColumnParallelLinearWithLoRA
,
LogitsProcessorWithLoRA
,
LoRAMapping
,
MergedColumnParallelLinearWithLoRA
,
QKVParallelLinearWithLora
,
RowParallelLinearWithLoRA
,
VocabParallelEmbeddingWithLoRA
)
from
vllm.lora.models
import
(
LoRALayerWeights
,
PackedLoRALayerWeights
,
convert_mapping
)
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
MergedColumnParallelLinear
,
RowParallelLinear
,
QKVParallelLinear
)
QKVParallelLinear
,
RowParallelLinear
)
from
vllm.model_executor.layers.logits_processor
import
LogitsProcessor
from
vllm.model_executor.layers.vocab_parallel_embedding
import
(
VocabParallelEmbedding
,
ParallelLMHead
)
ParallelLMHead
,
VocabParallelEmbedding
)
from
vllm.model_executor.utils
import
set_random_seed
from
.utils
import
DummyLoRAManager
...
...
tests/lora/test_llama.py
View file @
01bfb22b
...
...
@@ -3,6 +3,7 @@ import ray
import
vllm
from
vllm.lora.request
import
LoRARequest
from
.conftest
import
cleanup
MODEL_PATH
=
"meta-llama/Llama-2-7b-hf"
...
...
tests/lora/test_lora_manager.py
View file @
01bfb22b
...
...
@@ -8,11 +8,11 @@ from torch import nn
from
vllm.config
import
LoRAConfig
from
vllm.lora.layers
import
(
ColumnParallelLinearWithLoRA
,
Row
ParallelLinearWithLoRA
,
MergedColumn
ParallelLinearWithLoRA
)
MergedColumn
ParallelLinearWithLoRA
,
Row
ParallelLinearWithLoRA
)
from
vllm.lora.lora
import
LoRALayerWeights
,
PackedLoRALayerWeights
from
vllm.lora.models
import
(
LoRAModel
,
LoRAModelManager
,
LRUCacheLoRAModelManager
,
LoRAMapping
)
from
vllm.lora.models
import
(
LoRAMapping
,
LoRAModel
,
LoRAModelManager
,
LRUCacheLoRAModelManager
)
from
vllm.lora.request
import
LoRARequest
from
vllm.lora.worker_manager
import
(
LRUCacheWorkerLoRAManager
,
WorkerLoRAManager
)
...
...
Prev
1
2
3
4
5
6
…
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment