Commit a3d96521 authored by zhuwenwen's avatar zhuwenwen
Browse files

remove unsupported tests from kernels and add pytest html

parent 4646fe24
......@@ -5,6 +5,8 @@ pytest-forked
pytest-asyncio
pytest-rerunfailures
pytest-shard
pytest-html
pytest-timeout
# testing utils
awscli
......
......@@ -8,6 +8,8 @@ from collections import UserList
from enum import Enum
from typing import (Any, Callable, Dict, List, Optional, Tuple, Type,
TypedDict, TypeVar, Union)
import pytest
import pytest_html
import numpy as np
import pytest
......@@ -898,3 +900,21 @@ def dummy_opt_path():
with open(json_path, "w") as f:
json.dump(config, f)
return _dummy_path
# 定义一个 pytest 钩子,在测试后生成报告
@pytest.hookimpl(tryfirst=True, hookwrapper=True)
def pytest_runtest_makereport(item, call):
# 获取测试结果
outcome = yield
result = outcome.get_result()
# 如果测试失败并且有浏览器实例,添加截图
if result.when == "call" and result.failed:
if hasattr(item, "funcargs") and "browser" in item.funcargs:
browser = item.funcargs["browser"]
screenshot_path = "screenshot.png" # 设置截图路径
browser.save_screenshot(screenshot_path)
# 如果测试结果有 extra 属性,则添加截图
if hasattr(result, "extra"):
result.extra.append(pytest_html.extras.image(screenshot_path))
......@@ -12,7 +12,7 @@ from vllm.utils import STR_FLASH_ATTN_VAL, STR_INVALID_VAL
# "name", ["TORCH_SDPA", "ROCM_FLASH", "XFORMERS", "FLASHINFER", "OPENVINO"])
# @pytest.mark.parametrize("device", ["cpu", "openvino", "hip", "cuda"])
@pytest.mark.parametrize(
"name", ["TORCH_SDPA", "ROCM_FLASH", "XFORMERS", "FLASHINFER"])
"name", ["ROCM_FLASH"])
@pytest.mark.parametrize("device", ["cpu", "hip", "cuda"])
def test_env(name: str, device: str, monkeypatch):
"""Test that the attention selector can be set via environment variable.
......
......@@ -5,6 +5,7 @@ from tests.kernels.quant_utils import ref_dynamic_per_token_quant
from tests.kernels.utils import opcheck
from vllm._custom_ops import scaled_int8_quant
from vllm.utils import seed_everything
from vllm.utils import is_hip
DTYPES = [torch.half, torch.bfloat16, torch.float]
HIDDEN_SIZES = [16, 67, 768, 2048, 5120, 5137, 8192,
......@@ -37,7 +38,8 @@ def opcheck_int8_quant_dynamic(output, input, symmetric=True):
opcheck(torch.ops._C.dynamic_scaled_int8_quant,
(output, input, scale, azp))
@pytest.mark.skipif(is_hip(),
reason="Currently, there is not supported on ROCm.")
@pytest.mark.parametrize("num_tokens", NUM_TOKENS)
@pytest.mark.parametrize("hidden_size", HIDDEN_SIZES)
@pytest.mark.parametrize("dtype", DTYPES)
......@@ -60,7 +62,8 @@ def test_dynamic_scaled_int8_quant(num_tokens: int, hidden_size: int,
opcheck_int8_quant_dynamic(ops_out, x)
@pytest.mark.skipif(is_hip(),
reason="Currently, there is not supported on ROCm.")
@pytest.mark.parametrize("num_tokens", NUM_TOKENS)
@pytest.mark.parametrize("hidden_size", HIDDEN_SIZES)
@pytest.mark.parametrize("dtype", DTYPES)
......
......@@ -5,9 +5,15 @@ from transformers import PreTrainedTokenizerBase
from vllm.transformers_utils.tokenizer import get_tokenizer
from ..utils import models_path_prefix
# TOKENIZER_NAMES = [
# os.path.join(models_path_prefix, "facebook/opt-125m"),
# os.path.join(models_path_prefix, "gpt2"),
# ]
# export HF_ENDPOINT=https://hf-mirror.com
TOKENIZER_NAMES = [
os.path.join(models_path_prefix, "facebook/opt-125m"),
os.path.join(models_path_prefix, "gpt2"),
"facebook/opt-125m",
"gpt2",
]
......
......@@ -45,7 +45,7 @@ class CustomCacheManager(FileCacheManager):
self.cache_dir = os.getenv("TRITON_CACHE_DIR",
"").strip() or default_cache_dir()
if self.cache_dir:
self.cache_dir = f"{self.cache_dir}_{os.getpid()}"
# self.cache_dir = f"{self.cache_dir}_{os.getpid()}"
self.cache_dir = os.path.join(self.cache_dir, self.key)
self.lock_path = os.path.join(self.cache_dir, "lock")
os.makedirs(self.cache_dir, exist_ok=True)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment