Commit def22a08 authored by wooway777's avatar wooway777
Browse files

Revert "Merge pull request #1056 from InfiniTensor/issue/1031"

This reverts commit 7f295448, reversing
changes made to e60985dc.
parent 1795b38a
......@@ -17,6 +17,7 @@ from framework import (
_TEST_CASES_DATA = [
((13, 4), None, -1.0, 1.0),
((13, 4), (10, 1), -0.5, 0.5),
((8, 8, 8), None, -2.0, 2.0),
]
......@@ -86,11 +87,9 @@ class OpTest(BaseOperatorTest):
def torch_operator(self, *args, **kwargs):
return torch.nn.functional.hardtanh(*args, **kwargs)
def infinicore_operator(self, *args, **kwargs):
"""InfiniCore implementation."""
import infinicore.nn.functional as F
return F.hardtanh(*args, **kwargs)
# def infinicore_operator(self, *args, **kwargs):
# """InfiniCore implementation (operator not yet available)."""
# return infinicore.nn.functional.hardtanh(*args, **kwargs)
def main():
......
import ctypes
from ctypes import c_uint64
import torch
from libinfiniop import (
LIBINFINIOP,
InfiniDeviceNames,
InfiniDtype,
InfiniDtypeNames,
TestTensor,
TestWorkspace,
check_error,
debug,
get_args,
get_test_devices,
get_tolerance,
infiniopOperatorDescriptor_t,
profile_operation,
test_operator,
)
# ==============================================================================
# Configuration (Internal Use Only)
# ==============================================================================
_TEST_CASES = [
# input_shape, x_stride, y_stride, kernel_size, stride, padding
((2, 3, 16), None, None, 3, None, 0),
((1, 4, 15), (60, 15, 1), (60, 15, 1), 5, 1, 2),
((2, 1, 32), None, (32, 16, 1), 2, 2, 0),
((3, 2, 7), (14, 7, 1), (9, 3, 1), 3, None, 1),
((4, 6, 31), None, None, 4, 2, 1),
((2, 8, 9), (72, 9, 1), (56, 7, 1), 3, 1, 0),
]
# Data types used for testing
_TENSOR_DTYPES = [InfiniDtype.F16, InfiniDtype.BF16, InfiniDtype.F32]
# Tolerance map for different data types
_TOLERANCE_MAP = {
InfiniDtype.F16: {"atol": 1e-3, "rtol": 1e-2},
InfiniDtype.BF16: {"atol": 1e-3, "rtol": 1e-2},
InfiniDtype.F32: {"atol": 1e-5, "rtol": 1e-4},
}
DEBUG = False
PROFILE = False
NUM_PRERUN = 10
NUM_ITERATIONS = 1000
def _effective_stride(stride, kernel_size):
if stride in (None, 0):
return kernel_size
return stride
def _compute_output_shape(input_shape, kernel_size, stride, padding):
stride = _effective_stride(stride, kernel_size)
width = input_shape[2]
out_width = (width + 2 * padding - kernel_size) // stride + 1
return (input_shape[0], input_shape[1], out_width)
def avg_pool1d_ref(x, kernel_size, stride, padding):
stride = _effective_stride(stride, kernel_size)
out = torch.nn.functional.avg_pool1d(
x.to(torch.float32), kernel_size=kernel_size, stride=stride, padding=padding
)
return out.to(x.dtype)
def test(
handle,
device,
input_shape,
x_stride,
y_stride,
kernel_size,
stride,
padding,
dtype=InfiniDtype.F16,
sync=None,
):
stride_value = _effective_stride(stride, kernel_size)
out_shape = _compute_output_shape(
input_shape, kernel_size, stride_value, padding
)
print(
f"Testing AvgPool1d on {InfiniDeviceNames[device]} with input_shape:{input_shape}, "
f"output_shape:{out_shape}, kernel_size:{kernel_size}, stride:{stride_value}, "
f"padding:{padding}, dtype:{InfiniDtypeNames[dtype]}"
)
x = TestTensor(input_shape, x_stride, dtype, device)
y = TestTensor(out_shape, y_stride, dtype, device, mode="zeros")
ans = avg_pool1d_ref(x.torch_tensor(), kernel_size, stride_value, padding)
if sync is not None:
sync()
descriptor = infiniopOperatorDescriptor_t()
check_error(
LIBINFINIOP.infiniopCreateAvgPool1dDescriptor(
handle,
ctypes.byref(descriptor),
y.descriptor,
x.descriptor,
kernel_size,
stride_value,
padding,
)
)
# Invalidate descriptors in tensors after creation to make sure kernels read from arguments
x.destroy_desc()
y.destroy_desc()
workspace_size = c_uint64(0)
check_error(
LIBINFINIOP.infiniopGetAvgPool1dWorkspaceSize(
descriptor, ctypes.byref(workspace_size)
)
)
workspace = TestWorkspace(workspace_size.value, x.device)
def lib_avg_pool1d():
check_error(
LIBINFINIOP.infiniopAvgPool1d(
descriptor,
workspace.data(),
workspace.size(),
y.data(),
x.data(),
None,
)
)
lib_avg_pool1d()
if sync is not None:
sync()
atol, rtol = get_tolerance(_TOLERANCE_MAP, dtype)
if DEBUG:
debug(y.actual_tensor(), ans, atol=atol, rtol=rtol)
assert torch.allclose(y.actual_tensor(), ans, atol=atol, rtol=rtol)
if PROFILE:
# fmt: off
profile_operation(
"PyTorch",
lambda: avg_pool1d_ref(x.torch_tensor(), kernel_size, stride_value, padding),
device,
NUM_PRERUN,
NUM_ITERATIONS,
)
profile_operation(
" lib",
lambda: lib_avg_pool1d(),
device,
NUM_PRERUN,
NUM_ITERATIONS,
)
# fmt: on
check_error(LIBINFINIOP.infiniopDestroyAvgPool1dDescriptor(descriptor))
if __name__ == "__main__":
args = get_args()
DEBUG = args.debug
PROFILE = args.profile
NUM_PRERUN = args.num_prerun
NUM_ITERATIONS = args.num_iterations
for device in get_test_devices(args):
test_operator(device, test, _TEST_CASES, _TENSOR_DTYPES)
print("\033[92mTest passed!\033[0m")
import torch
import ctypes
from ctypes import c_uint64
from libinfiniop import (
LIBINFINIOP,
TestTensor,
get_test_devices,
check_error,
test_operator,
get_args,
get_tolerance,
profile_operation,
TestWorkspace,
InfiniDtype,
InfiniDtypeNames,
InfiniDeviceNames,
infiniopOperatorDescriptor_t,
)
# ------------------------------------------------------------
# 用例配置
# ------------------------------------------------------------
_TEST_CASES_ = [
((2, 4, 10), None, None), # logits shape, x_stride, y_stride
((1, 128, 32000), None, None),
((4, 512, 1000), None, None),
]
_TENSOR_DTYPES = [InfiniDtype.F16, InfiniDtype.BF16, InfiniDtype.F32]
_TOLERANCE_MAP = {
InfiniDtype.F16: {"atol": 1e-3, "rtol": 1e-2},
InfiniDtype.BF16: {"atol": 1e-2, "rtol": 2e-2},
InfiniDtype.F32: {"atol": 1e-5, "rtol": 1e-5},
}
# ------------------------------------------------------------
# PyTorch 参考实现
# ------------------------------------------------------------
def cross_entropy_ref(logits, target):
vocab = logits.shape[-1]
logits_flat = logits.reshape(-1, vocab).float()
target_flat = target.reshape(-1).long()
loss = torch.nn.functional.cross_entropy(logits_flat, target_flat, reduction="none")
return loss.view(target.shape).to(logits.dtype)
def test(handle, device, shape, x_stride=None, y_stride=None, dtype=InfiniDtype.F16, sync=None):
logits_shape = shape
label_shape = shape[:-1]
vocab = shape[-1]
print(f"Testing CrossEntropy on {InfiniDeviceNames[device]} logits:{logits_shape} dtype:{InfiniDtypeNames[dtype]}")
x = TestTensor(logits_shape, x_stride, dtype, device)
target = TestTensor(label_shape, None, InfiniDtype.I64, device)
# 生成有效标签
tgt = target.torch_tensor()
tgt.copy_(torch.randint(0, vocab, label_shape, dtype=torch.int64, device=tgt.device))
target.actual_tensor().copy_(tgt)
reference = cross_entropy_ref(x.torch_tensor(), target.torch_tensor())
y = TestTensor(label_shape, y_stride, dtype, device)
descriptor = infiniopOperatorDescriptor_t()
check_error(
LIBINFINIOP.infiniopCreateCrossEntropyDescriptor(
handle, ctypes.byref(descriptor), y.descriptor, x.descriptor, target.descriptor
)
)
for tensor in [x, y, target]:
tensor.destroy_desc()
workspace_size = c_uint64(0)
check_error(LIBINFINIOP.infiniopGetCrossEntropyWorkspaceSize(descriptor, ctypes.byref(workspace_size)))
workspace = TestWorkspace(workspace_size.value, x.device)
def run():
check_error(
LIBINFINIOP.infiniopCrossEntropy(
descriptor,
workspace.data(),
workspace.size(),
y.data(),
x.data(),
target.data(),
None,
)
)
run()
if sync:
sync()
atol, rtol = get_tolerance(_TOLERANCE_MAP, dtype)
assert torch.allclose(y.actual_tensor(), reference, atol=atol, rtol=rtol)
check_error(LIBINFINIOP.infiniopDestroyCrossEntropyDescriptor(descriptor))
if __name__ == "__main__":
args = get_args()
for device in get_test_devices(args):
test_operator(device, test, _TEST_CASES_, _TENSOR_DTYPES)
print("\033[92mTest passed!\033[0m")
import torch
import ctypes
from ctypes import c_uint64
from libinfiniop import (
LIBINFINIOP,
TestTensor,
get_test_devices,
check_error,
test_operator,
get_args,
debug,
get_tolerance,
profile_operation,
TestWorkspace,
InfiniDtype,
InfiniDtypeNames,
InfiniDeviceNames,
infiniopOperatorDescriptor_t,
)
from enum import Enum, auto
# ==============================================================================
# Configuration (Internal Use Only)
# ==============================================================================
_TEST_CASES_ = [
# shape, a_stride, b_stride, c_stride
((13, 4), None, None, None),
((13, 4), (10, 1), (10, 1), (10, 1)),
((13, 4), (0, 1), None, None),
((13, 4, 4), None, None, None),
((13, 4, 4), (20, 4, 1), (20, 4, 1), (20, 4, 1)),
((13, 4, 4), (4, 0, 1), (0, 4, 1), None),
((16, 5632), None, None, None),
((16, 5632), (13312, 1), (13312, 1), (13312, 1)),
((13, 16, 2), (128, 4, 1), (0, 2, 1), (64, 4, 1)),
((13, 16, 2), (128, 4, 1), (2, 0, 1), (64, 4, 1)),
((4, 4, 5632), None, None, None),
((4, 4, 5632), (45056, 5632, 1), (45056, 5632, 1), (45056, 5632, 1)),
]
# Equal 算子通常不支持 Inplace (输入Float vs 输出Bool,内存大小不同)
class Inplace(Enum):
OUT_OF_PLACE = auto()
_INPLACE = [
Inplace.OUT_OF_PLACE,
]
_TEST_CASES = [
test_case + (inplace_item,)
for test_case in _TEST_CASES_
for inplace_item in _INPLACE
]
# 测试的输入数据类型
_TENSOR_DTYPES = [InfiniDtype.F16, InfiniDtype.F32, InfiniDtype.BF16, InfiniDtype.I32, InfiniDtype.I64]
# 容差设置 (对于 Bool 比较,通常要求完全匹配)
_TOLERANCE_MAP = {
InfiniDtype.F16: {"atol": 0, "rtol": 0},
InfiniDtype.F32: {"atol": 0, "rtol": 0},
InfiniDtype.BF16: {"atol": 0, "rtol": 0},
InfiniDtype.I32: {"atol": 0, "rtol": 0},
InfiniDtype.I64: {"atol": 0, "rtol": 0},
InfiniDtype.BOOL: {"atol": 0, "rtol": 0},
}
DEBUG = False
PROFILE = False
NUM_PRERUN = 10
NUM_ITERATIONS = 1000
# PyTorch 标准实现
def equal_func(c, a, b):
torch.eq(a, b, out=c)
def test(
handle,
device,
shape,
a_stride=None,
b_stride=None,
c_stride=None,
inplace=Inplace.OUT_OF_PLACE,
dtype=torch.float16,
sync=None,
):
# 输入 Tensor 使用指定的 dtype (如 float16)
a = TestTensor(shape, a_stride, dtype, device)
b = TestTensor(shape, b_stride, dtype, device)
# [关键修改] 输出 Tensor 强制使用 Bool 类型
# 注意:这里 c_stride 如果是按字节计算的,对于 Bool 类型通常是 1 byte
c = TestTensor(shape, c_stride, InfiniDtype.BOOL, device)
if c.is_broadcast():
return
print(
f"Testing Equal on {InfiniDeviceNames[device]} with shape:{shape} a_stride:{a_stride} b_stride:{b_stride} c_stride:{c_stride} "
f"input_dtype:{InfiniDtypeNames[dtype]} output_dtype:BOOL"
)
# 运行 PyTorch 对照组
equal_func(c.torch_tensor(), a.torch_tensor(), b.torch_tensor())
if sync is not None:
sync()
descriptor = infiniopOperatorDescriptor_t()
# [关键修改] 调用 Equal 的 Create 函数
check_error(
LIBINFINIOP.infiniopCreateEqualDescriptor(
handle,
ctypes.byref(descriptor),
c.descriptor, # Output (Bool)
a.descriptor, # Input A
b.descriptor, # Input B
)
)
# Invalidate descriptors
for tensor in [a, b, c]:
tensor.destroy_desc()
workspace_size = c_uint64(0)
check_error(
LIBINFINIOP.infiniopGetEqualWorkspaceSize(
descriptor, ctypes.byref(workspace_size)
)
)
workspace = TestWorkspace(workspace_size.value, c.device)
def lib_equal():
check_error(
LIBINFINIOP.infiniopEqual(
descriptor,
workspace.data(),
workspace.size(),
c.data(),
a.data(),
b.data(),
None,
)
)
lib_equal()
# 使用 Bool 类型的容差 (实际上就是全等)
atol, rtol = get_tolerance(_TOLERANCE_MAP, InfiniDtype.BOOL)
if DEBUG:
debug(c.actual_tensor(), c.torch_tensor(), atol=atol, rtol=rtol)
# 验证结果
assert torch.allclose(c.actual_tensor(), c.torch_tensor(), atol=atol, rtol=rtol)
# Profiling workflow
if PROFILE:
# fmt: off
profile_operation("PyTorch", lambda: equal_func(c.torch_tensor(), a.torch_tensor(), b.torch_tensor()), device, NUM_PRERUN, NUM_ITERATIONS)
profile_operation(" lib", lambda: lib_equal(), device, NUM_PRERUN, NUM_ITERATIONS)
# fmt: on
check_error(LIBINFINIOP.infiniopDestroyEqualDescriptor(descriptor))
if __name__ == "__main__":
args = get_args()
# Configure testing options
DEBUG = args.debug
PROFILE = args.profile
NUM_PRERUN = args.num_prerun
NUM_ITERATIONS = args.num_iterations
for device in get_test_devices(args):
test_operator(device, test, _TEST_CASES, _TENSOR_DTYPES)
print("\033[92mTest passed!\033[0m")
import torch
import ctypes
from ctypes import c_uint64
from libinfiniop import (
LIBINFINIOP,
TestTensor,
get_test_devices,
check_error,
test_operator,
get_args,
debug,
get_tolerance,
profile_operation,
TestWorkspace,
InfiniDtype,
InfiniDtypeNames,
InfiniDeviceNames,
infiniopOperatorDescriptor_t,
)
from enum import Enum, auto
# ==============================================================================
# Configuration (Internal Use Only)
# ==============================================================================
# 复用相同的测试用例配置,因为 HardSwish 也是逐元素操作
_TEST_CASES_ = [
# shape, input_stride, output_stride
((13, 4), None, None),
((13, 4), (10, 1), (10, 1)),
((13, 4), (0, 1), None),
((13, 4, 4), None, None),
((13, 4, 4), (20, 4, 1), (20, 4, 1)),
((13, 4, 4), (4, 0, 1), None),
((16, 5632), None, None),
((16, 5632), (13312, 1), (13312, 1)),
((4, 4, 5632), None, None),
((4, 4, 5632), (45056, 5632, 1), (45056, 5632, 1)),
]
class Inplace(Enum):
OUT_OF_PLACE = auto()
INPLACE = auto()
_INPLACE = [
Inplace.OUT_OF_PLACE,
Inplace.INPLACE,
]
_TEST_CASES = [
test_case + (inplace_item,)
for test_case in _TEST_CASES_
for inplace_item in _INPLACE
]
_TENSOR_DTYPES = [InfiniDtype.BF16, InfiniDtype.F16, InfiniDtype.F32]
_TOLERANCE_MAP = {
InfiniDtype.BF16: {"atol": 1e-2, "rtol": 1e-2},
InfiniDtype.F16: {"atol": 1e-3, "rtol": 1e-3},
InfiniDtype.F32: {"atol": 1e-7, "rtol": 1e-7},
InfiniDtype.F64: {"atol": 2.22e-15, "rtol": 2.22e-15},
}
DEBUG = False
PROFILE = False
NUM_PRERUN = 10
NUM_ITERATIONS = 1000
def test(
handle,
device,
shape,
input_stride=None,
output_stride=None,
inplace=Inplace.OUT_OF_PLACE,
dtype=torch.float16,
sync=None,
):
input = TestTensor(shape, input_stride, dtype, device)
if inplace == Inplace.INPLACE:
if input_stride != output_stride:
return
output = input
else:
output = TestTensor(shape, output_stride, dtype, device, mode="ones")
if output.is_broadcast():
return
print(
f"Testing HardSwish on {InfiniDeviceNames[device]} with shape:{shape} input_stride:{input_stride} output_stride:{output_stride}"
f"dtype:{InfiniDtypeNames[dtype]} inplace:{inplace}"
)
new_output = torch.nn.functional.hardswish(input.torch_tensor())
output.update_torch_tensor(new_output)
if sync is not None:
sync()
descriptor = infiniopOperatorDescriptor_t()
check_error(
LIBINFINIOP.infiniopCreateHardSwishDescriptor(
handle,
ctypes.byref(descriptor),
output.descriptor,
input.descriptor,
)
)
# Invalidate the shape and strides in the descriptor to prevent them from being directly used by the kernel
for tensor in [input, output]:
tensor.destroy_desc()
workspace_size = c_uint64(0)
check_error(
LIBINFINIOP.infiniopGetHardSwishWorkspaceSize(
descriptor, ctypes.byref(workspace_size)
)
)
workspace = TestWorkspace(workspace_size.value, output.device)
def lib_hardswish():
check_error(
LIBINFINIOP.infiniopHardSwish(
descriptor,
workspace.data(),
workspace.size(),
output.data(),
input.data(),
None,
)
)
lib_hardswish()
atol, rtol = get_tolerance(_TOLERANCE_MAP, dtype)
if DEBUG:
debug(output.actual_tensor(), output.torch_tensor(), atol=atol, rtol=rtol)
assert torch.allclose(
output.actual_tensor(), output.torch_tensor(), atol=atol, rtol=rtol
)
# Profiling workflow
if PROFILE:
# fmt: off
profile_operation("PyTorch", lambda: torch.nn.functional.hardswish(input.torch_tensor()), device, NUM_PRERUN, NUM_ITERATIONS)
profile_operation(" lib", lambda: lib_hardswish(), device, NUM_PRERUN, NUM_ITERATIONS)
# fmt: on
check_error(LIBINFINIOP.infiniopDestroyHardSwishDescriptor(descriptor))
if __name__ == "__main__":
args = get_args()
# Configure testing options
DEBUG = args.debug
PROFILE = args.profile
NUM_PRERUN = args.num_prerun
NUM_ITERATIONS = args.num_iterations
for device in get_test_devices(args):
test_operator(device, test, _TEST_CASES, _TENSOR_DTYPES)
print("\033[92mTest passed!\033[0m")
import torch
import ctypes
from ctypes import c_uint64, c_float
from libinfiniop import (
LIBINFINIOP,
TestTensor,
get_test_devices,
check_error,
test_operator,
get_args,
debug,
get_tolerance,
profile_operation,
TestWorkspace,
InfiniDtype,
InfiniDtypeNames,
InfiniDeviceNames,
infiniopOperatorDescriptor_t,
)
from enum import Enum, auto
# ==============================================================================
# Configuration
# ==============================================================================
_TEST_CASES_ = [
# shape, input_stride, output_stride
((13, 4), None, None),
((13, 4), (10, 1), (10, 1)),
((16, 5632), None, None),
((4, 4, 5632), None, None),
]
class Inplace(Enum):
OUT_OF_PLACE = auto()
INPLACE = auto()
_INPLACE = [
Inplace.OUT_OF_PLACE,
Inplace.INPLACE,
]
# HardTanh 特有的参数测试组合 (min_val, max_val)
_PARAM_CASES = [
(-1.0, 1.0),
(0.0, 6.0), # 类似于 ReLU6
(-2.5, 2.5),
]
# 组合所有测试用例:shape + inplace + params
_TEST_CASES = [
test_case + (inplace_item, p_min, p_max)
for test_case in _TEST_CASES_
for inplace_item in _INPLACE
for p_min, p_max in _PARAM_CASES
]
_TENSOR_DTYPES = [InfiniDtype.BF16, InfiniDtype.F16, InfiniDtype.F32]
_TOLERANCE_MAP = {
InfiniDtype.BF16: {"atol": 1e-2, "rtol": 1e-2},
InfiniDtype.F16: {"atol": 1e-3, "rtol": 1e-3},
InfiniDtype.F32: {"atol": 1e-7, "rtol": 1e-7},
}
DEBUG = False
PROFILE = False
NUM_PRERUN = 10
NUM_ITERATIONS = 1000
def test(
handle,
device,
shape,
input_stride=None,
output_stride=None,
inplace=Inplace.OUT_OF_PLACE,
min_val=-1.0,
max_val=1.0,
dtype=torch.float16,
sync=None,
):
input = TestTensor(shape, input_stride, dtype, device)
if inplace == Inplace.INPLACE:
if input_stride != output_stride:
return
output = input
else:
output = TestTensor(shape, output_stride, dtype, device, mode="ones")
if output.is_broadcast():
return
print(
f"Testing HardTanh on {InfiniDeviceNames[device]} | shape:{shape} "
f"dtype:{InfiniDtypeNames[dtype]} inplace:{inplace} range:[{min_val}, {max_val}]"
)
# 计算 PyTorch 真值
new_output = torch.nn.functional.hardtanh(input.torch_tensor(), min_val=min_val, max_val=max_val)
output.update_torch_tensor(new_output)
if sync is not None:
sync()
descriptor = infiniopOperatorDescriptor_t()
check_error(
LIBINFINIOP.infiniopCreateHardTanhDescriptor(
handle,
ctypes.byref(descriptor),
output.descriptor,
input.descriptor,
c_float(min_val),
c_float(max_val),
)
)
for tensor in [input, output]:
tensor.destroy_desc()
workspace_size = c_uint64(0)
check_error(
LIBINFINIOP.infiniopGetHardTanhWorkspaceSize(
descriptor, ctypes.byref(workspace_size)
)
)
workspace = TestWorkspace(workspace_size.value, output.device)
def lib_hardtanh():
check_error(
LIBINFINIOP.infiniopHardTanh(
descriptor,
workspace.data(),
workspace.size(),
output.data(),
input.data(),
None,
)
)
lib_hardtanh()
atol, rtol = get_tolerance(_TOLERANCE_MAP, dtype)
if DEBUG:
debug(output.actual_tensor(), output.torch_tensor(), atol=atol, rtol=rtol)
assert torch.allclose(
output.actual_tensor(), output.torch_tensor(), atol=atol, rtol=rtol
)
if PROFILE:
profile_operation("PyTorch", lambda: torch.nn.functional.hardtanh(input.torch_tensor(), min_val, max_val), device, NUM_PRERUN, NUM_ITERATIONS)
profile_operation(" lib", lambda: lib_hardtanh(), device, NUM_PRERUN, NUM_ITERATIONS)
check_error(LIBINFINIOP.infiniopDestroyHardTanhDescriptor(descriptor))
if __name__ == "__main__":
args = get_args()
DEBUG = args.debug
PROFILE = args.profile
NUM_PRERUN = args.num_prerun
NUM_ITERATIONS = args.num_iterations
for device in get_test_devices(args):
test_operator(device, test, _TEST_CASES, _TENSOR_DTYPES)
print("\033[92mHardTanh Test passed!\033[0m")
......@@ -54,54 +54,6 @@ def add_(lib):
infiniopOperatorDescriptor_t,
]
@OpRegister.operator
def equal_(lib):
# =========================================================
# 1. 注册 Create 函数
# C函数签名: (handle, &desc, output_desc, input_a_desc, input_b_desc)
# =========================================================
lib.infiniopCreateEqualDescriptor.restype = c_int32
lib.infiniopCreateEqualDescriptor.argtypes = [
infiniopHandle_t, # handle
POINTER(infiniopOperatorDescriptor_t),# desc_ptr (输出)
infiniopTensorDescriptor_t, # output (c)
infiniopTensorDescriptor_t, # input_a
infiniopTensorDescriptor_t, # input_b
]
# =========================================================
# 2. 注册 GetWorkspaceSize 函数
# C函数签名: (desc, &size)
# =========================================================
lib.infiniopGetEqualWorkspaceSize.restype = c_int32
lib.infiniopGetEqualWorkspaceSize.argtypes = [
infiniopOperatorDescriptor_t,
POINTER(c_size_t),
]
# =========================================================
# 3. 注册 Execute (计算) 函数
# C函数签名: (desc, workspace, size, output_data, input_a_data, input_b_data, stream)
# =========================================================
lib.infiniopEqual.restype = c_int32
lib.infiniopEqual.argtypes = [
infiniopOperatorDescriptor_t, # desc
c_void_p, # workspace ptr
c_size_t, # workspace size
c_void_p, # output data ptr
c_void_p, # input a data ptr
c_void_p, # input b data ptr
c_void_p, # stream
]
# =========================================================
# 4. 注册 Destroy 函数
# C函数签名: (desc)
# =========================================================
lib.infiniopDestroyEqualDescriptor.restype = c_int32
lib.infiniopDestroyEqualDescriptor.argtypes = [
infiniopOperatorDescriptor_t,
]
@OpRegister.operator
def attention_(lib):
......@@ -210,40 +162,6 @@ def clip_(lib):
]
@OpRegister.operator
def cross_entropy_(lib):
lib.infiniopCreateCrossEntropyDescriptor.restype = c_int32
lib.infiniopCreateCrossEntropyDescriptor.argtypes = [
infiniopHandle_t,
POINTER(infiniopOperatorDescriptor_t),
infiniopTensorDescriptor_t,
infiniopTensorDescriptor_t,
infiniopTensorDescriptor_t,
]
lib.infiniopGetCrossEntropyWorkspaceSize.restype = c_int32
lib.infiniopGetCrossEntropyWorkspaceSize.argtypes = [
infiniopOperatorDescriptor_t,
POINTER(c_size_t),
]
lib.infiniopCrossEntropy.restype = c_int32
lib.infiniopCrossEntropy.argtypes = [
infiniopOperatorDescriptor_t,
c_void_p,
c_size_t,
c_void_p,
c_void_p,
c_void_p,
c_void_p,
]
lib.infiniopDestroyCrossEntropyDescriptor.restype = c_int32
lib.infiniopDestroyCrossEntropyDescriptor.argtypes = [
infiniopOperatorDescriptor_t,
]
@OpRegister.operator
def logsoftmax_(lib):
lib.infiniopCreateLogSoftmaxDescriptor.restype = c_int32
......@@ -991,112 +909,6 @@ def silu_(lib):
infiniopOperatorDescriptor_t,
]
@OpRegister.operator
def hardtanh_(lib):
# 1. Create Descriptor - 注意增加了两个 c_float 参数
lib.infiniopCreateHardTanhDescriptor.restype = c_int32
lib.infiniopCreateHardTanhDescriptor.argtypes = [
infiniopHandle_t, # handle
POINTER(infiniopOperatorDescriptor_t), # desc_ptr
infiniopTensorDescriptor_t, # output
infiniopTensorDescriptor_t, # input
c_float, # min_val
c_float, # max_val
]
# 2. Get Workspace Size
lib.infiniopGetHardTanhWorkspaceSize.restype = c_int32
lib.infiniopGetHardTanhWorkspaceSize.argtypes = [
infiniopOperatorDescriptor_t, # desc
POINTER(c_size_t), # size
]
# 3. Execute Operator
lib.infiniopHardTanh.restype = c_int32
lib.infiniopHardTanh.argtypes = [
infiniopOperatorDescriptor_t, # desc
c_void_p, # workspace
c_size_t, # workspace_size
c_void_p, # output
c_void_p, # input
c_void_p, # stream
]
# 4. Destroy Descriptor
lib.infiniopDestroyHardTanhDescriptor.restype = c_int32
lib.infiniopDestroyHardTanhDescriptor.argtypes = [
infiniopOperatorDescriptor_t, # desc
]
@OpRegister.operator
def hardswish_(lib):
lib.infiniopCreateHardSwishDescriptor.restype = c_int32
lib.infiniopCreateHardSwishDescriptor.argtypes = [
infiniopHandle_t,
POINTER(infiniopOperatorDescriptor_t),
infiniopTensorDescriptor_t,
infiniopTensorDescriptor_t,
]
lib.infiniopGetHardSwishWorkspaceSize.restype = c_int32
lib.infiniopGetHardSwishWorkspaceSize.argtypes = [
infiniopOperatorDescriptor_t,
POINTER(c_size_t),
]
lib.infiniopHardSwish.restype = c_int32
lib.infiniopHardSwish.argtypes = [
infiniopOperatorDescriptor_t,
c_void_p,
c_size_t,
c_void_p,
c_void_p,
c_void_p,
]
lib.infiniopDestroyHardSwishDescriptor.restype = c_int32
lib.infiniopDestroyHardSwishDescriptor.argtypes = [
infiniopOperatorDescriptor_t,
]
@OpRegister.operator
def avg_pool1d_(lib):
# 1. Create 函数
# C签名: (handle, *desc, y, x, kernel_size, stride, padding)
lib.infiniopCreateAvgPool1dDescriptor.restype = c_int32
lib.infiniopCreateAvgPool1dDescriptor.argtypes = [
infiniopHandle_t,
POINTER(infiniopOperatorDescriptor_t),
infiniopTensorDescriptor_t, # y_desc (Output)
infiniopTensorDescriptor_t, # x_desc (Input)
c_size_t, # kernel_size
c_size_t, # stride
c_size_t, # padding
]
# 2. GetWorkspaceSize 函数
lib.infiniopGetAvgPool1dWorkspaceSize.restype = c_int32
lib.infiniopGetAvgPool1dWorkspaceSize.argtypes = [
infiniopOperatorDescriptor_t,
POINTER(c_size_t),
]
# 3. Execute 函数
lib.infiniopAvgPool1d.restype = c_int32
lib.infiniopAvgPool1d.argtypes = [
infiniopOperatorDescriptor_t,
c_void_p, # workspace
c_size_t, # workspace_size
c_void_p, # y (output pointer)
c_void_p, # x (input pointer)
c_void_p, # stream
]
# 4. Destroy 函数
lib.infiniopDestroyAvgPool1dDescriptor.restype = c_int32
lib.infiniopDestroyAvgPool1dDescriptor.argtypes = [
infiniopOperatorDescriptor_t,
]
@OpRegister.operator
def layer_norm_(lib):
......
......@@ -83,12 +83,8 @@ class TestTensor(CTensor):
InfiniDtype.BYTE,
InfiniDtype.BOOL,
]:
if dt == InfiniDtype.BOOL:
randint_low = 0 if randint_low is None else randint_low
randint_high = 2 if randint_high is None else randint_high
else:
randint_low = -2000000000 if randint_low is None else randint_low
randint_high = 2000000000 if randint_high is None else randint_high
randint_low = -2000000000 if randint_low is None else randint_low
randint_high = 2000000000 if randint_high is None else randint_high
self._torch_tensor = torch.randint(
randint_low,
randint_high,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment