Commit cc7f22a8 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge tag 'v0.9.1' into v0.9.1-ori

parents b9ea0c09 b6553be1
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# flake8: noqa
"""Tests experts_int8 quantization startup and generation,
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Tests whether FP8 computation is enabled correctly.
Run `pytest tests/quantization/test_fp8.py --forked`.
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Tests whether gptq models with dynamic quantized can be loaded.
Run `pytest tests/quantization/test_gptq_dynamic.py --forked`.
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Test model set-up and inference for quantized HF models supported
on the CPU/GPU backend using IPEX (including AWQ/GPTQ).
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Tests whether gptq models with quantized lm_head can be loaded.
Run `pytest tests/quantization/test_quant_lm_head_true.py --forked`.
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Tests whether PTPC w8a8 FP8 computation is enabled correctly.
Run `pytest tests/quantization/test_ptpc_fp8.py --forked`.
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Test model set-up and weight loading for quark-quantized models.
Run `pytest tests/quantization/test_quark.py`.
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Tests register custom quantization config.
See https://github.com/vllm-project/vllm/issues/11926 for more details.
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import importlib.metadata
import importlib.util
......@@ -12,7 +13,7 @@ TORCHAO_AVAILABLE = importlib.util.find_spec("torchao") is not None
@pytest.mark.skipif(not TORCHAO_AVAILABLE, reason="torchao is not available")
def test_pre_quantized_model(vllm_runner):
with vllm_runner("drisspg/float8_dynamic_act_float8_weight-opt-125m",
with vllm_runner("drisspg/fp8-opt-125m",
quantization="torchao",
dtype="bfloat16",
enforce_eager=True) as llm:
......@@ -29,10 +30,10 @@ def test_pre_quantized_model(vllm_runner):
"cuda:0",
# {"": "cuda"},
])
def test_opt_125m_int4wo_model_loading_with_params(vllm_runner,
def test_opt_125m_int8wo_model_loading_with_params(vllm_runner,
pt_load_map_location):
torch._dynamo.reset()
model_name = "jerryzh168/opt-125m-int4wo"
model_name = "jerryzh168/opt-125m-int8wo-partial-quant"
with vllm_runner(model_name=model_name,
quantization="torchao",
dtype="bfloat16",
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from vllm.model_executor.layers.quantization import get_quantization_config
from vllm.platforms import current_platform
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
from transformers import AutoTokenizer
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
from transformers import AutoTokenizer
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
from transformers import AutoTokenizer
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from typing import Optional, Union
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from vllm import SamplingParams
from vllm.config import LoadConfig, LoadFormat
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import glob
import tempfile
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Compare the outputs of HF and vLLM when using beam search.
Run `pytest tests/samplers/test_beam_search.py`.
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Make sure ignore_eos works.
Run `pytest tests/samplers/test_ignore_eos.py`.
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
import torch
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
import torch
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment