Unverified Commit 71b1c8b6 authored by Yeshwanth N's avatar Yeshwanth N Committed by GitHub
Browse files

[Chore]:Extract math and argparse utilities to separate modules (#27188)


Signed-off-by: default avatarYeshwanth Surya <yeshsurya@gmail.com>
Signed-off-by: default avatarYeshwanth N <yeshsurya@gmail.com>
Signed-off-by: default avataryeshsurya <yeshsurya@gmail.com>
parent 8fb7b2fa
...@@ -5,7 +5,7 @@ import pytest ...@@ -5,7 +5,7 @@ import pytest
import torch import torch
from vllm.attention.ops.triton_decode_attention import decode_attention_fwd from vllm.attention.ops.triton_decode_attention import decode_attention_fwd
from vllm.utils import cdiv from vllm.utils.math_utils import cdiv
@pytest.mark.parametrize("B", [3, 5]) @pytest.mark.parametrize("B", [3, 5])
......
...@@ -13,8 +13,8 @@ from tests.kernels.moe.utils import per_token_cast_to_fp8 ...@@ -13,8 +13,8 @@ from tests.kernels.moe.utils import per_token_cast_to_fp8
from tests.kernels.utils import baseline_scaled_mm from tests.kernels.utils import baseline_scaled_mm
from vllm import _custom_ops as ops from vllm import _custom_ops as ops
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.utils import cdiv
from vllm.utils.deep_gemm import per_block_cast_to_fp8 from vllm.utils.deep_gemm import per_block_cast_to_fp8
from vllm.utils.math_utils import cdiv
@pytest.mark.parametrize( @pytest.mark.parametrize(
......
...@@ -27,7 +27,7 @@ from vllm.model_executor.layers.fused_moe.gpt_oss_triton_kernels_moe import ( ...@@ -27,7 +27,7 @@ from vllm.model_executor.layers.fused_moe.gpt_oss_triton_kernels_moe import (
triton_kernel_moe_forward, triton_kernel_moe_forward,
) )
from vllm.model_executor.layers.utils import shuffle_weight from vllm.model_executor.layers.utils import shuffle_weight
from vllm.utils import round_up from vllm.utils.math_utils import round_up
def deshuffle(w: torch.Tensor): def deshuffle(w: torch.Tensor):
......
...@@ -13,7 +13,7 @@ from vllm.model_executor.layers.fused_moe.moe_align_block_size import ( ...@@ -13,7 +13,7 @@ from vllm.model_executor.layers.fused_moe.moe_align_block_size import (
moe_align_block_size, moe_align_block_size,
) )
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.utils import round_up from vllm.utils.math_utils import round_up
NUM_TOKENS = [1, 3, 256, 2256, 4096] NUM_TOKENS = [1, 3, 256, 2256, 4096]
NUM_EXPERTS = [32, 160, 256, 257] NUM_EXPERTS = [32, 160, 256, 257]
......
...@@ -13,7 +13,7 @@ from vllm.model_executor.layers.fused_moe.cutlass_moe import CutlassBatchedExper ...@@ -13,7 +13,7 @@ from vllm.model_executor.layers.fused_moe.cutlass_moe import CutlassBatchedExper
from vllm.model_executor.layers.fused_moe.fused_moe import fused_topk from vllm.model_executor.layers.fused_moe.fused_moe import fused_topk
from vllm.model_executor.layers.fused_moe.modular_kernel import FusedMoEModularKernel from vllm.model_executor.layers.fused_moe.modular_kernel import FusedMoEModularKernel
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.utils import cdiv from vllm.utils.math_utils import cdiv
from ...utils import multi_gpu_test from ...utils import multi_gpu_test
from .parallel_utils import ProcessGroupInfo, parallel_launch from .parallel_utils import ProcessGroupInfo, parallel_launch
......
...@@ -45,7 +45,7 @@ from vllm.model_executor.layers.fused_moe.topk_weight_and_reduce import ( ...@@ -45,7 +45,7 @@ from vllm.model_executor.layers.fused_moe.topk_weight_and_reduce import (
TopKWeightAndReduceDelegate, TopKWeightAndReduceDelegate,
) )
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.utils import round_up from vllm.utils.math_utils import round_up
from ...utils import multi_gpu_test from ...utils import multi_gpu_test
from .parallel_utils import ProcessGroupInfo, parallel_launch from .parallel_utils import ProcessGroupInfo, parallel_launch
......
...@@ -8,7 +8,7 @@ from vllm.model_executor.layers.fused_moe.batched_deep_gemm_moe import ( ...@@ -8,7 +8,7 @@ from vllm.model_executor.layers.fused_moe.batched_deep_gemm_moe import (
persistent_masked_m_silu_mul_quant, persistent_masked_m_silu_mul_quant,
) )
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.utils import cdiv from vllm.utils.math_utils import cdiv
fp8_dtype = torch.float8_e4m3fn fp8_dtype = torch.float8_e4m3fn
......
...@@ -16,8 +16,8 @@ from vllm.model_executor.layers.fused_moe.fused_batched_moe import ( ...@@ -16,8 +16,8 @@ from vllm.model_executor.layers.fused_moe.fused_batched_moe import (
) )
from vllm.model_executor.layers.fused_moe.modular_kernel import FusedMoEModularKernel from vllm.model_executor.layers.fused_moe.modular_kernel import FusedMoEModularKernel
from vllm.model_executor.layers.fused_moe.utils import moe_kernel_quantize_input from vllm.model_executor.layers.fused_moe.utils import moe_kernel_quantize_input
from vllm.utils import round_up
from vllm.utils.deep_gemm import per_block_cast_to_fp8 from vllm.utils.deep_gemm import per_block_cast_to_fp8
from vllm.utils.math_utils import round_up
def triton_moe( def triton_moe(
......
...@@ -6,7 +6,7 @@ import torch ...@@ -6,7 +6,7 @@ import torch
from vllm.model_executor.layers.quantization.utils.quant_utils import group_broadcast from vllm.model_executor.layers.quantization.utils.quant_utils import group_broadcast
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.utils import round_up from vllm.utils.math_utils import round_up
# Using the default value (240.0) from pytorch will cause accuracy # Using the default value (240.0) from pytorch will cause accuracy
# issue on dynamic quantization models. Here use 224.0 for rocm. # issue on dynamic quantization models. Here use 224.0 for rocm.
......
...@@ -13,7 +13,7 @@ import torch ...@@ -13,7 +13,7 @@ import torch
from tests.kernels.utils import baseline_scaled_mm, opcheck, to_fp8, to_int8 from tests.kernels.utils import baseline_scaled_mm, opcheck, to_fp8, to_int8
from vllm import _custom_ops as ops from vllm import _custom_ops as ops
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.utils import cdiv from vllm.utils.math_utils import cdiv
MNK_FACTORS = [ MNK_FACTORS = [
(1, 256, 128), (1, 256, 128),
......
...@@ -18,7 +18,7 @@ from tests.v1.attention.utils import ( ...@@ -18,7 +18,7 @@ from tests.v1.attention.utils import (
from vllm.attention.backends.registry import _Backend from vllm.attention.backends.registry import _Backend
from vllm.config import ModelConfig from vllm.config import ModelConfig
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.utils import cdiv from vllm.utils.math_utils import cdiv
from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE, is_torch_equal_or_newer from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE, is_torch_equal_or_newer
from vllm.v1.attention.backends.utils import ( from vllm.v1.attention.backends.utils import (
CommonAttentionMetadata, CommonAttentionMetadata,
......
...@@ -22,7 +22,7 @@ from vllm import _custom_ops as ops ...@@ -22,7 +22,7 @@ from vllm import _custom_ops as ops
from vllm.attention.backends.registry import _Backend from vllm.attention.backends.registry import _Backend
from vllm.attention.ops.flashmla import is_flashmla_dense_supported from vllm.attention.ops.flashmla import is_flashmla_dense_supported
from vllm.config.vllm import set_current_vllm_config from vllm.config.vllm import set_current_vllm_config
from vllm.utils import cdiv from vllm.utils.math_utils import cdiv
from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE
from vllm.v1.attention.backends.utils import CommonAttentionMetadata from vllm.v1.attention.backends.utils import CommonAttentionMetadata
from vllm.v1.kv_cache_interface import FullAttentionSpec from vllm.v1.kv_cache_interface import FullAttentionSpec
......
...@@ -23,7 +23,7 @@ from tests.v1.attention.utils import ( ...@@ -23,7 +23,7 @@ from tests.v1.attention.utils import (
from vllm import _custom_ops as ops from vllm import _custom_ops as ops
from vllm.attention.ops import flashmla from vllm.attention.ops import flashmla
from vllm.model_executor.layers.linear import ColumnParallelLinear from vllm.model_executor.layers.linear import ColumnParallelLinear
from vllm.utils import cdiv from vllm.utils.math_utils import cdiv
from vllm.v1.attention.backends.mla.flashmla_sparse import FlashMLASparseBackend from vllm.v1.attention.backends.mla.flashmla_sparse import FlashMLASparseBackend
from vllm.v1.attention.backends.mla.indexer import split_prefill_chunks from vllm.v1.attention.backends.mla.indexer import split_prefill_chunks
......
...@@ -8,7 +8,7 @@ import pytest ...@@ -8,7 +8,7 @@ import pytest
from vllm.config import VllmConfig from vllm.config import VllmConfig
from vllm.engine.arg_utils import EngineArgs from vllm.engine.arg_utils import EngineArgs
from vllm.usage.usage_lib import UsageContext from vllm.usage.usage_lib import UsageContext
from vllm.utils import FlexibleArgumentParser from vllm.utils.argparse_utils import FlexibleArgumentParser
def test_prefix_caching_from_cli(): def test_prefix_caching_from_cli():
......
...@@ -16,7 +16,7 @@ from vllm.attention.layer import Attention ...@@ -16,7 +16,7 @@ from vllm.attention.layer import Attention
from vllm.attention.selector import get_attn_backend from vllm.attention.selector import get_attn_backend
from vllm.config import CacheConfig, VllmConfig from vllm.config import CacheConfig, VllmConfig
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.utils import cdiv from vllm.utils.math_utils import cdiv
from vllm.v1.attention.backends.utils import ( from vllm.v1.attention.backends.utils import (
CommonAttentionMetadata, CommonAttentionMetadata,
subclass_attention_backend, subclass_attention_backend,
......
...@@ -7,7 +7,7 @@ import jax ...@@ -7,7 +7,7 @@ import jax
from jax.experimental import pallas as pl from jax.experimental import pallas as pl
from jax.experimental.pallas import tpu as pltpu from jax.experimental.pallas import tpu as pltpu
from vllm.utils import cdiv from vllm.utils.math_utils import cdiv
def _kv_cache_update_kernel( def _kv_cache_update_kernel(
......
...@@ -6,7 +6,7 @@ import torch ...@@ -6,7 +6,7 @@ import torch
from vllm.attention.ops.paged_attn import PagedAttention from vllm.attention.ops.paged_attn import PagedAttention
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.utils import cdiv from vllm.utils.math_utils import cdiv
FP8_DTYPE = current_platform.fp8_dtype() FP8_DTYPE = current_platform.fp8_dtype()
......
...@@ -58,7 +58,7 @@ except ImportError: ...@@ -58,7 +58,7 @@ except ImportError:
librosa = PlaceholderModule("librosa") librosa = PlaceholderModule("librosa")
try: try:
from vllm.utils import FlexibleArgumentParser from vllm.utils.argparse_utils import FlexibleArgumentParser
except ImportError: except ImportError:
from argparse import ArgumentParser as FlexibleArgumentParser from argparse import ArgumentParser as FlexibleArgumentParser
......
...@@ -19,7 +19,7 @@ from vllm.model_executor.layers.quantization.utils.quant_utils import ( ...@@ -19,7 +19,7 @@ from vllm.model_executor.layers.quantization.utils.quant_utils import (
kStaticTensorScale, kStaticTensorScale,
) )
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.utils import round_up from vllm.utils.math_utils import round_up
from .fusion import QUANT_OPS, empty_bf16, empty_fp32, empty_i32 from .fusion import QUANT_OPS, empty_bf16, empty_fp32, empty_i32
from .fx_utils import is_func from .fx_utils import is_func
......
...@@ -82,7 +82,8 @@ from vllm.transformers_utils.config import ( ...@@ -82,7 +82,8 @@ from vllm.transformers_utils.config import (
maybe_override_with_speculators, maybe_override_with_speculators,
) )
from vllm.transformers_utils.utils import check_gguf_file from vllm.transformers_utils.utils import check_gguf_file
from vllm.utils import FlexibleArgumentParser, is_in_ray_actor from vllm.utils import is_in_ray_actor
from vllm.utils.argparse_utils import FlexibleArgumentParser
from vllm.utils.mem_constants import GiB_bytes from vllm.utils.mem_constants import GiB_bytes
from vllm.utils.network_utils import get_ip from vllm.utils.network_utils import get_ip
from vllm.v1.sample.logits_processor import LogitsProcessor from vllm.v1.sample.logits_processor import LogitsProcessor
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment