Unverified Commit 01bfb22b authored by SangBin Cho's avatar SangBin Cho Committed by GitHub
Browse files

[CI] Try introducing isort. (#3495)

parent e67c295b
import pytest
import time
from typing import List
import pytest
from vllm import SamplingParams
from vllm.block import PhysicalTokenBlock
from vllm.core.block_manager import (UncachedBlockAllocator, BlockSpaceManager,
AllocStatus)
from vllm.core.block_manager import (AllocStatus, BlockSpaceManager,
UncachedBlockAllocator)
from vllm.sequence import Logprob, Sequence, SequenceGroup, SequenceStatus
from vllm.utils import Device
from vllm.sequence import Sequence, SequenceGroup, SequenceStatus, Logprob
from .utils import create_dummy_prompt
......
import time
from typing import List
import pytest # noqa
import time
from vllm.config import CacheConfig, SchedulerConfig
from vllm.core.scheduler import Scheduler
from vllm.sequence import SequenceGroup, Logprob
from vllm.sequence import Logprob, SequenceGroup
from .utils import create_dummy_prompt
......
......@@ -3,14 +3,12 @@
Run `pytest tests/distributed/test_comm_ops.py --forked`.
"""
import pytest
import torch
import ray
import torch
from vllm.model_executor.parallel_utils.communication_op import (
tensor_model_parallel_all_reduce,
tensor_model_parallel_all_gather,
broadcast_tensor_dict,
)
broadcast_tensor_dict, tensor_model_parallel_all_gather,
tensor_model_parallel_all_reduce)
from vllm.test_utils import (init_test_distributed_environment,
multi_process_tensor_parallel)
......
import os
import random
import os
import pytest
import ray
import torch
......
# This unit test should be moved to a new
# tests/test_guided_decoding directory.
from transformers import AutoTokenizer
import torch
from transformers import AutoTokenizer
from vllm.model_executor.guided_logits_processors import (RegexLogitsProcessor,
JSONLogitsProcessor)
from vllm.model_executor.guided_logits_processors import (JSONLogitsProcessor,
RegexLogitsProcessor)
TEST_SCHEMA = {
"type": "object",
......
# imports for guided decoding tests
import json
import os
import re
import subprocess
import sys
import time
import sys
import jsonschema
import openai # use the official client for correctness check
import pytest
import requests
# using Ray for overall ease of process management, parallel requests,
# and debugging.
import ray
import openai # use the official client for correctness check
import requests
# downloading lora to test lora requests
from huggingface_hub import snapshot_download
# imports for guided decoding tests
import json
import jsonschema
import re
from vllm.transformers_utils.tokenizer import get_tokenizer
MAX_SERVER_START_WAIT_S = 600 # wait for server to start for 60 seconds
......
import pytest
from vllm.utils import create_kv_caches_with_random
......
......@@ -2,10 +2,10 @@ from typing import Type
import pytest
import torch
from allclose_default import get_default_atol, get_default_rtol
from vllm.model_executor.layers.activation import (FastGELU, GeluAndMul,
NewGELU, SiluAndMul)
from allclose_default import get_default_atol, get_default_rtol
DTYPES = [torch.half, torch.bfloat16, torch.float]
NUM_TOKENS = [7, 83, 2048] # Arbitrary values for testing
......
......@@ -3,13 +3,12 @@ from typing import List, Optional, Tuple
import pytest
import torch
from allclose_default import get_default_atol, get_default_rtol
from xformers import ops as xops
from xformers.ops.fmha.attn_bias import BlockDiagonalCausalMask
from vllm._C import ops, cache_ops
from vllm.utils import get_max_shared_memory_bytes
from vllm.utils import is_hip
from allclose_default import get_default_atol, get_default_rtol
from vllm._C import cache_ops, ops
from vllm.utils import get_max_shared_memory_bytes, is_hip
FLOAT32_BYTES = torch.finfo(torch.float).bits // 8
# This will change depending on the compute capability.
......
import random
from typing import Tuple
import pytest
import torch
from typing import Tuple
from vllm._C import cache_ops
COPYING_DIRECTION = [('cuda', 'cpu'), ('cuda', 'cuda'), ('cpu', 'cuda')]
......
......@@ -7,8 +7,8 @@ import torch
from transformers import MixtralConfig
from transformers.models.mixtral.modeling_mixtral import MixtralSparseMoeBlock
from vllm.model_executor.layers.fused_moe import fused_moe
from vllm.model_executor.layers.activation import SiluAndMul
from vllm.model_executor.layers.fused_moe import fused_moe
from vllm.model_executor.models.mixtral import MixtralMoE
......
from itertools import accumulate
from typing import List, Optional
import pytest
import torch
from allclose_default import get_default_atol, get_default_rtol
from itertools import accumulate
from vllm.model_executor.layers.rotary_embedding import get_rope
IS_NEOX_STYLE = [True, False]
......
import random
import pytest
import time
import pytest
import torch
from vllm.attention.ops.prefix_prefill import context_attention_fwd
from xformers import ops as xops
from xformers.ops.fmha.attn_bias import BlockDiagonalCausalFromBottomRightMask
from vllm.attention.ops.prefix_prefill import context_attention_fwd
NUM_HEADS = [64]
NUM_QUERIES_PER_KV = [1, 8, 64]
HEAD_SIZES = [128]
......
import torch
import pytest
import random
import pytest
import torch
from vllm.model_executor.layers.ops.rand import seeded_uniform
from vllm.model_executor.utils import set_random_seed
......
import gc
import torch
import pytest
import torch
import triton
import triton.language as tl
from vllm.model_executor.layers.ops.sample import (
_uniform_to_exponential, sample, get_num_triton_sampler_splits,
MAX_TRITON_N_COLS)
from vllm.model_executor.utils import set_random_seed
MAX_TRITON_N_COLS, _uniform_to_exponential, get_num_triton_sampler_splits,
sample)
from vllm.model_executor.sampling_metadata import SamplingTensors
from vllm.model_executor.utils import set_random_seed
SINGLE_SPLIT_VOCAB_SIZE = 32000 # llama/mistral/mixtral vocab size
MULTI_SPLIT_VOCAB_SIZE = MAX_TRITON_N_COLS + 100
......
......@@ -2,7 +2,7 @@ import contextlib
import gc
import tempfile
from collections import OrderedDict
from unittest.mock import patch, MagicMock
from unittest.mock import MagicMock, patch
import pytest
import ray
......@@ -12,13 +12,13 @@ from huggingface_hub import snapshot_download
import vllm
from vllm.config import LoRAConfig
from vllm.model_executor.layers.sampler import Sampler
from vllm.model_executor.layers.logits_processor import LogitsProcessor
from vllm.model_executor.model_loader import get_model
from vllm.model_executor.layers.linear import (ColumnParallelLinear,
MergedColumnParallelLinear,
RowParallelLinear)
from vllm.model_executor.layers.logits_processor import LogitsProcessor
from vllm.model_executor.layers.sampler import Sampler
from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead
from vllm.model_executor.model_loader import get_model
from vllm.model_executor.parallel_utils.parallel_state import (
destroy_model_parallel, initialize_model_parallel)
......
import tempfile
from random import sample
from typing import List, Optional
import peft
import pytest
from random import sample
import tempfile
from transformers import AutoModelForCausalLM
import vllm
from vllm.lora.request import LoRARequest
from .conftest import cleanup
MODEL_PATH = "Felladrin/Llama-68M-Chat-v1"
......
import pytest
import random
from copy import deepcopy
from dataclasses import dataclass
from typing import List, Optional, Dict, Tuple
from typing import Dict, List, Optional, Tuple
import pytest
import torch
import torch.nn.functional as F
from vllm.lora.layers import (
ColumnParallelLinearWithLoRA,
MergedColumnParallelLinearWithLoRA,
QKVParallelLinearWithLora,
VocabParallelEmbeddingWithLoRA,
RowParallelLinearWithLoRA,
LogitsProcessorWithLoRA,
LoRAMapping,
BaseLayerWithLoRA,
)
from vllm.lora.models import (LoRALayerWeights, convert_mapping,
PackedLoRALayerWeights)
from vllm.config import LoRAConfig
from vllm.model_executor.layers.logits_processor import LogitsProcessor
from vllm.lora.layers import (BaseLayerWithLoRA, ColumnParallelLinearWithLoRA,
LogitsProcessorWithLoRA, LoRAMapping,
MergedColumnParallelLinearWithLoRA,
QKVParallelLinearWithLora,
RowParallelLinearWithLoRA,
VocabParallelEmbeddingWithLoRA)
from vllm.lora.models import (LoRALayerWeights, PackedLoRALayerWeights,
convert_mapping)
from vllm.model_executor.layers.linear import (ColumnParallelLinear,
MergedColumnParallelLinear,
RowParallelLinear,
QKVParallelLinear)
QKVParallelLinear,
RowParallelLinear)
from vllm.model_executor.layers.logits_processor import LogitsProcessor
from vllm.model_executor.layers.vocab_parallel_embedding import (
VocabParallelEmbedding, ParallelLMHead)
ParallelLMHead, VocabParallelEmbedding)
from vllm.model_executor.utils import set_random_seed
from .utils import DummyLoRAManager
......
......@@ -3,6 +3,7 @@ import ray
import vllm
from vllm.lora.request import LoRARequest
from .conftest import cleanup
MODEL_PATH = "meta-llama/Llama-2-7b-hf"
......
......@@ -8,11 +8,11 @@ from torch import nn
from vllm.config import LoRAConfig
from vllm.lora.layers import (ColumnParallelLinearWithLoRA,
RowParallelLinearWithLoRA,
MergedColumnParallelLinearWithLoRA)
MergedColumnParallelLinearWithLoRA,
RowParallelLinearWithLoRA)
from vllm.lora.lora import LoRALayerWeights, PackedLoRALayerWeights
from vllm.lora.models import (LoRAModel, LoRAModelManager,
LRUCacheLoRAModelManager, LoRAMapping)
from vllm.lora.models import (LoRAMapping, LoRAModel, LoRAModelManager,
LRUCacheLoRAModelManager)
from vllm.lora.request import LoRARequest
from vllm.lora.worker_manager import (LRUCacheWorkerLoRAManager,
WorkerLoRAManager)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment