Unverified Commit 93191613 authored by thatPepe's avatar thatPepe Committed by GitHub
Browse files

Merge pull request #1075 from InfiniTensor/RevertT_1-1-4

Revert T1-1-4
parents 6ab911c3 def22a08
import torch
import ctypes
from ctypes import c_uint64, c_float
from libinfiniop import (
LIBINFINIOP,
TestTensor,
get_test_devices,
check_error,
test_operator,
get_args,
debug,
get_tolerance,
profile_operation,
TestWorkspace,
InfiniDtype,
InfiniDtypeNames,
InfiniDeviceNames,
infiniopOperatorDescriptor_t,
)
from enum import Enum, auto
# ==============================================================================
# Configuration
# ==============================================================================
_TEST_CASES_ = [
# shape, input_stride, output_stride
((13, 4), None, None),
((13, 4), (10, 1), (10, 1)),
((16, 5632), None, None),
((4, 4, 5632), None, None),
]
class Inplace(Enum):
OUT_OF_PLACE = auto()
INPLACE = auto()
_INPLACE = [
Inplace.OUT_OF_PLACE,
Inplace.INPLACE,
]
# HardTanh 特有的参数测试组合 (min_val, max_val)
_PARAM_CASES = [
(-1.0, 1.0),
(0.0, 6.0), # 类似于 ReLU6
(-2.5, 2.5),
]
# 组合所有测试用例:shape + inplace + params
_TEST_CASES = [
test_case + (inplace_item, p_min, p_max)
for test_case in _TEST_CASES_
for inplace_item in _INPLACE
for p_min, p_max in _PARAM_CASES
]
_TENSOR_DTYPES = [InfiniDtype.BF16, InfiniDtype.F16, InfiniDtype.F32]
_TOLERANCE_MAP = {
InfiniDtype.BF16: {"atol": 1e-2, "rtol": 1e-2},
InfiniDtype.F16: {"atol": 1e-3, "rtol": 1e-3},
InfiniDtype.F32: {"atol": 1e-7, "rtol": 1e-7},
}
DEBUG = False
PROFILE = False
NUM_PRERUN = 10
NUM_ITERATIONS = 1000
def test(
handle,
device,
shape,
input_stride=None,
output_stride=None,
inplace=Inplace.OUT_OF_PLACE,
min_val=-1.0,
max_val=1.0,
dtype=torch.float16,
sync=None,
):
input = TestTensor(shape, input_stride, dtype, device)
if inplace == Inplace.INPLACE:
if input_stride != output_stride:
return
output = input
else:
output = TestTensor(shape, output_stride, dtype, device, mode="ones")
if output.is_broadcast():
return
print(
f"Testing HardTanh on {InfiniDeviceNames[device]} | shape:{shape} "
f"dtype:{InfiniDtypeNames[dtype]} inplace:{inplace} range:[{min_val}, {max_val}]"
)
# 计算 PyTorch 真值
new_output = torch.nn.functional.hardtanh(input.torch_tensor(), min_val=min_val, max_val=max_val)
output.update_torch_tensor(new_output)
if sync is not None:
sync()
descriptor = infiniopOperatorDescriptor_t()
check_error(
LIBINFINIOP.infiniopCreateHardTanhDescriptor(
handle,
ctypes.byref(descriptor),
output.descriptor,
input.descriptor,
c_float(min_val),
c_float(max_val),
)
)
for tensor in [input, output]:
tensor.destroy_desc()
workspace_size = c_uint64(0)
check_error(
LIBINFINIOP.infiniopGetHardTanhWorkspaceSize(
descriptor, ctypes.byref(workspace_size)
)
)
workspace = TestWorkspace(workspace_size.value, output.device)
def lib_hardtanh():
check_error(
LIBINFINIOP.infiniopHardTanh(
descriptor,
workspace.data(),
workspace.size(),
output.data(),
input.data(),
None,
)
)
lib_hardtanh()
atol, rtol = get_tolerance(_TOLERANCE_MAP, dtype)
if DEBUG:
debug(output.actual_tensor(), output.torch_tensor(), atol=atol, rtol=rtol)
assert torch.allclose(
output.actual_tensor(), output.torch_tensor(), atol=atol, rtol=rtol
)
if PROFILE:
profile_operation("PyTorch", lambda: torch.nn.functional.hardtanh(input.torch_tensor(), min_val, max_val), device, NUM_PRERUN, NUM_ITERATIONS)
profile_operation(" lib", lambda: lib_hardtanh(), device, NUM_PRERUN, NUM_ITERATIONS)
check_error(LIBINFINIOP.infiniopDestroyHardTanhDescriptor(descriptor))
if __name__ == "__main__":
args = get_args()
DEBUG = args.debug
PROFILE = args.profile
NUM_PRERUN = args.num_prerun
NUM_ITERATIONS = args.num_iterations
for device in get_test_devices(args):
test_operator(device, test, _TEST_CASES, _TENSOR_DTYPES)
print("\033[92mHardTanh Test passed!\033[0m")
......@@ -54,54 +54,6 @@ def add_(lib):
infiniopOperatorDescriptor_t,
]
@OpRegister.operator
def equal_(lib):
# =========================================================
# 1. 注册 Create 函数
# C函数签名: (handle, &desc, output_desc, input_a_desc, input_b_desc)
# =========================================================
lib.infiniopCreateEqualDescriptor.restype = c_int32
lib.infiniopCreateEqualDescriptor.argtypes = [
infiniopHandle_t, # handle
POINTER(infiniopOperatorDescriptor_t),# desc_ptr (输出)
infiniopTensorDescriptor_t, # output (c)
infiniopTensorDescriptor_t, # input_a
infiniopTensorDescriptor_t, # input_b
]
# =========================================================
# 2. 注册 GetWorkspaceSize 函数
# C函数签名: (desc, &size)
# =========================================================
lib.infiniopGetEqualWorkspaceSize.restype = c_int32
lib.infiniopGetEqualWorkspaceSize.argtypes = [
infiniopOperatorDescriptor_t,
POINTER(c_size_t),
]
# =========================================================
# 3. 注册 Execute (计算) 函数
# C函数签名: (desc, workspace, size, output_data, input_a_data, input_b_data, stream)
# =========================================================
lib.infiniopEqual.restype = c_int32
lib.infiniopEqual.argtypes = [
infiniopOperatorDescriptor_t, # desc
c_void_p, # workspace ptr
c_size_t, # workspace size
c_void_p, # output data ptr
c_void_p, # input a data ptr
c_void_p, # input b data ptr
c_void_p, # stream
]
# =========================================================
# 4. 注册 Destroy 函数
# C函数签名: (desc)
# =========================================================
lib.infiniopDestroyEqualDescriptor.restype = c_int32
lib.infiniopDestroyEqualDescriptor.argtypes = [
infiniopOperatorDescriptor_t,
]
@OpRegister.operator
def attention_(lib):
......@@ -210,40 +162,6 @@ def clip_(lib):
]
@OpRegister.operator
def cross_entropy_(lib):
lib.infiniopCreateCrossEntropyDescriptor.restype = c_int32
lib.infiniopCreateCrossEntropyDescriptor.argtypes = [
infiniopHandle_t,
POINTER(infiniopOperatorDescriptor_t),
infiniopTensorDescriptor_t,
infiniopTensorDescriptor_t,
infiniopTensorDescriptor_t,
]
lib.infiniopGetCrossEntropyWorkspaceSize.restype = c_int32
lib.infiniopGetCrossEntropyWorkspaceSize.argtypes = [
infiniopOperatorDescriptor_t,
POINTER(c_size_t),
]
lib.infiniopCrossEntropy.restype = c_int32
lib.infiniopCrossEntropy.argtypes = [
infiniopOperatorDescriptor_t,
c_void_p,
c_size_t,
c_void_p,
c_void_p,
c_void_p,
c_void_p,
]
lib.infiniopDestroyCrossEntropyDescriptor.restype = c_int32
lib.infiniopDestroyCrossEntropyDescriptor.argtypes = [
infiniopOperatorDescriptor_t,
]
@OpRegister.operator
def logsoftmax_(lib):
lib.infiniopCreateLogSoftmaxDescriptor.restype = c_int32
......@@ -991,112 +909,6 @@ def silu_(lib):
infiniopOperatorDescriptor_t,
]
@OpRegister.operator
def hardtanh_(lib):
# 1. Create Descriptor - 注意增加了两个 c_float 参数
lib.infiniopCreateHardTanhDescriptor.restype = c_int32
lib.infiniopCreateHardTanhDescriptor.argtypes = [
infiniopHandle_t, # handle
POINTER(infiniopOperatorDescriptor_t), # desc_ptr
infiniopTensorDescriptor_t, # output
infiniopTensorDescriptor_t, # input
c_float, # min_val
c_float, # max_val
]
# 2. Get Workspace Size
lib.infiniopGetHardTanhWorkspaceSize.restype = c_int32
lib.infiniopGetHardTanhWorkspaceSize.argtypes = [
infiniopOperatorDescriptor_t, # desc
POINTER(c_size_t), # size
]
# 3. Execute Operator
lib.infiniopHardTanh.restype = c_int32
lib.infiniopHardTanh.argtypes = [
infiniopOperatorDescriptor_t, # desc
c_void_p, # workspace
c_size_t, # workspace_size
c_void_p, # output
c_void_p, # input
c_void_p, # stream
]
# 4. Destroy Descriptor
lib.infiniopDestroyHardTanhDescriptor.restype = c_int32
lib.infiniopDestroyHardTanhDescriptor.argtypes = [
infiniopOperatorDescriptor_t, # desc
]
@OpRegister.operator
def hardswish_(lib):
lib.infiniopCreateHardSwishDescriptor.restype = c_int32
lib.infiniopCreateHardSwishDescriptor.argtypes = [
infiniopHandle_t,
POINTER(infiniopOperatorDescriptor_t),
infiniopTensorDescriptor_t,
infiniopTensorDescriptor_t,
]
lib.infiniopGetHardSwishWorkspaceSize.restype = c_int32
lib.infiniopGetHardSwishWorkspaceSize.argtypes = [
infiniopOperatorDescriptor_t,
POINTER(c_size_t),
]
lib.infiniopHardSwish.restype = c_int32
lib.infiniopHardSwish.argtypes = [
infiniopOperatorDescriptor_t,
c_void_p,
c_size_t,
c_void_p,
c_void_p,
c_void_p,
]
lib.infiniopDestroyHardSwishDescriptor.restype = c_int32
lib.infiniopDestroyHardSwishDescriptor.argtypes = [
infiniopOperatorDescriptor_t,
]
@OpRegister.operator
def avg_pool1d_(lib):
# 1. Create 函数
# C签名: (handle, *desc, y, x, kernel_size, stride, padding)
lib.infiniopCreateAvgPool1dDescriptor.restype = c_int32
lib.infiniopCreateAvgPool1dDescriptor.argtypes = [
infiniopHandle_t,
POINTER(infiniopOperatorDescriptor_t),
infiniopTensorDescriptor_t, # y_desc (Output)
infiniopTensorDescriptor_t, # x_desc (Input)
c_size_t, # kernel_size
c_size_t, # stride
c_size_t, # padding
]
# 2. GetWorkspaceSize 函数
lib.infiniopGetAvgPool1dWorkspaceSize.restype = c_int32
lib.infiniopGetAvgPool1dWorkspaceSize.argtypes = [
infiniopOperatorDescriptor_t,
POINTER(c_size_t),
]
# 3. Execute 函数
lib.infiniopAvgPool1d.restype = c_int32
lib.infiniopAvgPool1d.argtypes = [
infiniopOperatorDescriptor_t,
c_void_p, # workspace
c_size_t, # workspace_size
c_void_p, # y (output pointer)
c_void_p, # x (input pointer)
c_void_p, # stream
]
# 4. Destroy 函数
lib.infiniopDestroyAvgPool1dDescriptor.restype = c_int32
lib.infiniopDestroyAvgPool1dDescriptor.argtypes = [
infiniopOperatorDescriptor_t,
]
@OpRegister.operator
def layer_norm_(lib):
......
......@@ -83,12 +83,8 @@ class TestTensor(CTensor):
InfiniDtype.BYTE,
InfiniDtype.BOOL,
]:
if dt == InfiniDtype.BOOL:
randint_low = 0 if randint_low is None else randint_low
randint_high = 2 if randint_high is None else randint_high
else:
randint_low = -2000000000 if randint_low is None else randint_low
randint_high = 2000000000 if randint_high is None else randint_high
randint_low = -2000000000 if randint_low is None else randint_low
randint_high = 2000000000 if randint_high is None else randint_high
self._torch_tensor = torch.randint(
randint_low,
randint_high,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment