Commit 6b8949ce authored by wooway777's avatar wooway777
Browse files

issue/556 - support more inplace cases

parent 2e5b2342
......@@ -7,3 +7,5 @@ def add(input, other, *, out=None):
return Tensor(_infinicore.add(input._underlying, other._underlying))
_infinicore.add_(out._underlying, input._underlying, other._underlying)
return out
......@@ -24,3 +24,5 @@ def attention(q, k, v, k_cache, v_cache, pos, *, out=None):
v_cache._underlying,
pos,
)
return out
......@@ -7,3 +7,5 @@ def causal_softmax(input, *, out=None):
return Tensor(_infinicore.causal_softmax(input._underlying))
_infinicore.causal_softmax_(out._underlying, input._underlying)
return out
......@@ -7,3 +7,5 @@ def matmul(input, other, *, out=None):
return Tensor(_infinicore.matmul(input._underlying, other._underlying))
_infinicore.matmul_(out._underlying, input._underlying, other._underlying)
return out
......@@ -7,3 +7,5 @@ def rearrange(input, other, *, out=None):
return Tensor(_infinicore.rearrange(input._underlying))
_infinicore.rearrange_(out._underlying, input._underlying)
return out
......@@ -11,3 +11,5 @@ def rms_norm(input, weight, epsilon=1e-5, *, out=None):
_infinicore.rms_norm_(
out._underlying, input._underlying, weight._underlying, epsilon
)
return out
......@@ -7,3 +7,5 @@ def silu(input, *, out=None):
return Tensor(_infinicore.silu(input._underlying))
_infinicore.silu_(out._underlying, input._underlying)
return out
......@@ -7,3 +7,5 @@ def swiglu(input, other, *, out=None):
return Tensor(_infinicore.swiglu(input._underlying, other._underlying))
_infinicore.swiglu_(out._underlying, input._underlying, other._underlying)
return out
# [file name]: __init__.py
# [file content begin]
from .base import TestConfig, TestRunner, TestCase, BaseOperatorTest
from .tensor import TensorSpec, TensorInitializer
from .utils import (
......@@ -16,7 +14,6 @@ from .config import get_test_devices, get_args
from .devices import InfiniDeviceEnum, InfiniDeviceNames, torch_device_map
from .datatypes import to_torch_dtype, to_infinicore_dtype
from .runner import GenericTestRunner
from .templates import BinaryOperatorTest, UnaryOperatorTest
__all__ = [
"TensorSpec",
......@@ -41,6 +38,4 @@ __all__ = [
"to_torch_dtype",
"to_infinicore_dtype",
"GenericTestRunner",
"BinaryOperatorTest",
"UnaryOperatorTest",
]
This diff is collapsed.
......@@ -20,6 +20,8 @@ def to_torch_dtype(infini_dtype):
return torch.int64
elif infini_dtype == infinicore.uint8:
return torch.uint8
elif infini_dtype == infinicore.bool:
return torch.bool
else:
raise ValueError(f"Unsupported infinicore dtype: {infini_dtype}")
......@@ -42,5 +44,7 @@ def to_infinicore_dtype(torch_dtype):
return infinicore.int64
elif torch_dtype == torch.uint8:
return infinicore.uint8
elif torch_dtype == torch.bool:
return infinicore.bool
else:
raise ValueError(f"Unsupported torch dtype: {torch_dtype}")
......@@ -20,13 +20,10 @@ class GenericTestRunner:
def run(self):
"""Execute the complete test suite"""
config = TestConfig(
tensor_dtypes=self.operator_test.tensor_dtypes,
tolerance_map=self.operator_test.tolerance_map,
debug=self.args.debug,
bench=self.args.bench,
num_prerun=self.args.num_prerun,
num_iterations=self.args.num_iterations,
dtype_combinations=self.operator_test.dtype_combinations,
)
runner = TestRunner(self.operator_test.test_cases, config)
......
This diff is collapsed.
......@@ -37,43 +37,16 @@ def profile_operation(desc, func, torch_device, num_prerun, num_iterations):
print(f" {desc} time: {elapsed * 1000 :6f} ms")
def is_integer_dtype(dtype):
"""Check if dtype is integer type"""
return dtype in [
infinicore.int8,
infinicore.int16,
infinicore.int32,
infinicore.int64,
infinicore.uint8,
]
def is_float_dtype(dtype):
"""Check if dtype is floating point type"""
return dtype in [infinicore.float16, infinicore.float32, infinicore.bfloat16]
def debug(
actual, desired, atol=0, rtol=1e-2, equal_nan=False, verbose=True, dtype=None
):
def debug(actual, desired, atol=0, rtol=1e-2, equal_nan=False, verbose=True):
"""
Debug function to compare two tensors and print differences
"""
# Convert to float32 for bfloat16 comparison
if actual.dtype == torch.bfloat16 or desired.dtype == torch.bfloat16:
actual = actual.to(torch.float32)
desired = desired.to(torch.float32)
print_discrepancy(actual, desired, atol, rtol, equal_nan, verbose, dtype)
print_discrepancy(actual, desired, atol, rtol, equal_nan, verbose)
# Use appropriate comparison based on dtype
if dtype and is_integer_dtype(dtype):
# For integer types, require exact equality
import numpy as np
np.testing.assert_array_equal(actual.cpu(), desired.cpu())
else:
# For float types, use allclose
import numpy as np
np.testing.assert_allclose(
......@@ -82,7 +55,7 @@ def debug(
def print_discrepancy(
actual, expected, atol=0, rtol=1e-3, equal_nan=True, verbose=True, dtype=None
actual, expected, atol=0, rtol=1e-3, equal_nan=True, verbose=True
):
"""Print detailed tensor differences"""
if actual.shape != expected.shape:
......@@ -96,21 +69,13 @@ def print_discrepancy(
actual_isnan = torch.isnan(actual)
expected_isnan = torch.isnan(expected)
# Calculate difference mask based on dtype
if dtype and is_integer_dtype(dtype):
# For integer types, exact equality required
diff_mask = actual != expected
else:
# For float types, use tolerance-based comparison
# Calculate difference mask
nan_mismatch = (
actual_isnan ^ expected_isnan
if equal_nan
else actual_isnan | expected_isnan
actual_isnan ^ expected_isnan if equal_nan else actual_isnan | expected_isnan
)
diff_mask = nan_mismatch | (
torch.abs(actual - expected) > (atol + rtol * torch.abs(expected))
)
diff_indices = torch.nonzero(diff_mask, as_tuple=False)
delta = actual - expected
......@@ -142,7 +107,6 @@ def print_discrepancy(
print(f" - Actual dtype: {actual.dtype}")
print(f" - Desired dtype: {expected.dtype}")
if not (dtype and is_integer_dtype(dtype)):
print(f" - Atol: {atol}")
print(f" - Rtol: {rtol}")
print(
......@@ -166,10 +130,6 @@ def get_tolerance(tolerance_map, tensor_dtype, default_atol=0, default_rtol=1e-3
"""
Get tolerance settings based on data type
"""
# For integer types, return zero tolerance (exact match required)
if is_integer_dtype(tensor_dtype):
return 0, 0
tolerance = tolerance_map.get(
tensor_dtype, {"atol": default_atol, "rtol": default_rtol}
)
......@@ -202,6 +162,8 @@ def convert_infinicore_to_torch(infini_result, torch_reference):
Args:
infini_result: infinicore tensor result
torch_reference: PyTorch tensor reference (for shape and device)
dtype: infinicore data type
device_str: torch device string
Returns:
torch.Tensor: PyTorch tensor with infinicore data
......@@ -217,70 +179,103 @@ def convert_infinicore_to_torch(infini_result, torch_reference):
def compare_results(
infini_result, torch_result, atol=1e-5, rtol=1e-5, debug_mode=False, dtype=None
infini_result, torch_result, atol=1e-5, rtol=1e-5, debug_mode=False
):
"""
Generic function to compare infinicore result with PyTorch reference result
Supports both floating-point (with tolerance) and integer (exact) comparison
Args:
infini_result: infinicore tensor result
torch_result: PyTorch tensor reference result
atol: absolute tolerance
rtol: relative tolerance
atol: absolute tolerance (for floating-point only)
rtol: relative tolerance (for floating-point only)
debug_mode: whether to enable debug output
dtype: infinicore data type for comparison logic
Returns:
bool: True if results match within tolerance
bool: True if results match within tolerance (FP) or exactly (integer)
"""
# Convert infinicore result to PyTorch tensor for comparison
torch_result_from_infini = convert_infinicore_to_torch(infini_result, torch_result)
# Choose comparison method based on dtype
if dtype and is_integer_dtype(dtype):
# For integer types, require exact equality
result = torch.equal(torch_result_from_infini, torch_result)
# Handle scalar integer comparison
if isinstance(torch_result_from_infini, (int, float)) and isinstance(
torch_result, (int, float)
):
if isinstance(torch_result_from_infini, int) and isinstance(torch_result, int):
# Exact integer scalar comparison
result_equal = torch_result_from_infini == torch_result
if debug_mode and not result_equal:
print(
f"Integer scalar mismatch: {torch_result_from_infini} != {torch_result}"
)
return result_equal
else:
# For float types, use tolerance-based comparison
result = torch.allclose(
torch_result_from_infini, torch_result, atol=atol, rtol=rtol
# Floating-point scalar comparison with tolerance
return abs(torch_result_from_infini - torch_result) <= atol + rtol * abs(
torch_result
)
# Debug mode: detailed comparison
if debug_mode:
debug(torch_result_from_infini, torch_result, atol=atol, rtol=rtol, dtype=dtype)
return result
debug(torch_result_from_infini, torch_result, atol=atol, rtol=rtol)
# Choose comparison method based on data type
if is_integer_dtype(torch_result_from_infini.dtype) or is_integer_dtype(
torch_result.dtype
):
# Exact equality for integer types
result_equal = torch.equal(torch_result_from_infini, torch_result)
if debug_mode and not result_equal:
print("Integer tensor comparison failed - requiring exact equality")
return result_equal
else:
# Tolerance-based comparison for floating-point types
return torch.allclose(
torch_result_from_infini, torch_result, atol=atol, rtol=rtol
)
def create_test_comparator(config, dtype, tolerance_map=None, mode_name=""):
def create_test_comparator(config, atol, rtol, mode_name=""):
"""
Create a test-specific comparison function that handles test configuration
Create a test-specific comparison function
Args:
config: test configuration
dtype: infinicore data type
tolerance_map: optional tolerance map (defaults to config's tolerance_map)
atol: absolute tolerance (for floating-point only)
rtol: relative tolerance (for floating-point only)
mode_name: operation mode name for debug output
Returns:
callable: function that takes (infini_result, torch_result) and returns bool
"""
if tolerance_map is None:
tolerance_map = config.tolerance_map
atol, rtol = get_tolerance(tolerance_map, dtype)
def compare_test_results(infini_result, torch_result):
if config.debug and mode_name:
print(f"\033[94mDEBUG INFO - {mode_name}:\033[0m")
# For integer types, override tolerance to require exact equality
actual_atol = atol
actual_rtol = rtol
# Check if we're dealing with integer types
try:
# Try to get dtype from infinicore tensor
if hasattr(infini_result, "dtype"):
infini_dtype = infini_result.dtype
torch_dtype = to_torch_dtype(infini_dtype)
if is_integer_dtype(torch_dtype):
actual_atol = 0
actual_rtol = 0
except:
pass
return compare_results(
infini_result,
torch_result,
atol=atol,
rtol=rtol,
atol=actual_atol,
rtol=actual_rtol,
debug_mode=config.debug,
dtype=dtype,
)
return compare_test_results
......@@ -330,3 +325,30 @@ def rearrange_tensor(tensor, new_strides):
new_tensor.set_(new_tensor.untyped_storage(), offset, shape, tuple(new_strides))
return new_tensor
def is_broadcast(strides):
"""
Check if strides indicate a broadcasted tensor
Args:
strides: Tensor strides or None
Returns:
bool: True if the tensor is broadcasted (has zero strides)
"""
if strides is None:
return False
return any(s == 0 for s in strides)
def is_integer_dtype(dtype):
"""Check if dtype is integer type"""
return dtype in [
torch.int8,
torch.int16,
torch.int32,
torch.int64,
torch.uint8,
torch.bool,
]
......@@ -7,93 +7,138 @@ import torch
import infinicore
from framework.base import BaseOperatorTest, TensorSpec, TestCase
from framework.runner import GenericTestRunner
from framework.utils import is_broadcast
# ==============================================================================
# Operator-specific configuration
# ==============================================================================
# Test cases format: (operation_mode, shape, a_strides, b_strides, c_strides)
# Test cases format: (shape, a_strides, b_strides, c_strides)
_TEST_CASES_DATA = [
(TestCase.BOTH, (13, 4), None, None, None),
(TestCase.BOTH, (13, 4), (10, 1), (10, 1), (10, 1)),
(TestCase.BOTH, (13, 4), (0, 1), None, None),
(TestCase.BOTH, (13, 4, 4), None, None, None),
(TestCase.BOTH, (13, 4, 4), (20, 4, 1), (20, 4, 1), (20, 4, 1)),
(TestCase.BOTH, (13, 4, 4), (4, 0, 1), (0, 4, 1), None),
(TestCase.BOTH, (16, 5632), None, None, None),
(TestCase.BOTH, (16, 5632), (13312, 1), (13312, 1), (13312, 1)),
# Basic cases
((13, 4), None, None, None),
((13, 4), (10, 1), (10, 1), None),
# Strided cases
((13, 4), None, None, (10, 1)),
((13, 4), (10, 1), (10, 1), (10, 1)),
# 3D cases
((13, 4, 4), None, None, None),
((13, 4, 4), (20, 4, 1), (20, 4, 1), None),
# Broadcast cases
((13, 4, 4), (4, 0, 1), (0, 4, 1), None),
# Large tensors
((16, 5632), None, None, None),
((16, 5632), (13312, 1), (13312, 1), None),
]
def parse_test_cases(data):
"""
Parse add test case data according to format:
(operation_mode, shape, a_strides, b_strides, c_strides)
"""
operation_mode = data[0]
shape = data[1]
a_strides = data[2] if len(data) > 2 else None
b_strides = data[3] if len(data) > 3 else None
c_strides = data[4] if len(data) > 4 else None
# Create input specifications
inputs = []
# Input tensor a
if a_strides is not None:
inputs.append(TensorSpec.from_strided_tensor(shape, a_strides))
else:
inputs.append(TensorSpec.from_tensor(shape))
# Input tensor b (same shape as a)
if b_strides is not None:
inputs.append(TensorSpec.from_strided_tensor(shape, b_strides))
else:
inputs.append(TensorSpec.from_tensor(shape))
# Output tensor
if c_strides is not None:
output = TensorSpec.from_strided_tensor(shape, c_strides)
else:
output = TensorSpec.from_tensor(shape)
return TestCase(operation_mode, inputs, output)
# Parse test cases
_TEST_CASES = [parse_test_cases(data) for data in _TEST_CASES_DATA]
# Data types
_TENSOR_DTYPES = [infinicore.float16, infinicore.bfloat16, infinicore.float32]
# Tolerance
# Tolerance configuration
_TOLERANCE_MAP = {
infinicore.float16: {"atol": 0, "rtol": 1e-2},
infinicore.float32: {"atol": 0, "rtol": 1e-3},
infinicore.bfloat16: {"atol": 0, "rtol": 5e-2},
}
# Data types to test
_TENSOR_DTYPES = [infinicore.float16, infinicore.bfloat16, infinicore.float32]
def parse_test_cases():
"""
Parse test case data and return list of TestCase objects for all operation types.
Each test case contains all necessary information for execution and validation.
"""
test_cases = []
for data in _TEST_CASES_DATA:
shape = data[0]
a_strides = data[1] if len(data) > 1 else None
b_strides = data[2] if len(data) > 2 else None
c_strides = data[3] if len(data) > 3 else None
# Check if tensors support in-place operations
a_supports_inplace = not is_broadcast(a_strides)
b_supports_inplace = not is_broadcast(b_strides)
c_supports_inplace = not is_broadcast(c_strides)
# Generate test cases for all data types
for dtype in _TENSOR_DTYPES:
tolerance = _TOLERANCE_MAP.get(dtype, {"atol": 0, "rtol": 1e-3})
# Create typed tensor specs
a_spec = TensorSpec.from_tensor(shape, a_strides, dtype)
b_spec = TensorSpec.from_tensor(shape, b_strides, dtype)
c_spec = TensorSpec.from_tensor(shape, c_strides, dtype)
# Test Case 1: Out-of-place (return value)
test_cases.append(
TestCase(
inputs=[a_spec, b_spec],
kwargs={},
output_spec=None,
comparison_target=None,
tolerance=tolerance,
description=f"Add - OUT_OF_PLACE",
)
)
# Test Case 2: In-place with explicit output tensor (add(a, b, out=c))
if c_supports_inplace:
test_cases.append(
TestCase(
inputs=[a_spec, b_spec],
kwargs=None,
output_spec=c_spec, # Specify the output tensor spec
comparison_target="out",
tolerance=tolerance,
description=f"Add - INPLACE(out)",
)
)
# Test Case 3: In-place on first input (add(a, b, out=a))
if a_supports_inplace:
test_cases.append(
TestCase(
inputs=[a_spec, b_spec],
kwargs={"out": 0}, # Use index 0 for first input
output_spec=None,
comparison_target=0, # Compare first input
tolerance=tolerance,
description=f"Add - INPLACE(a)",
)
)
# Test Case 4: In-place on second input (add(a, b, out=b))
if b_supports_inplace:
test_cases.append(
TestCase(
inputs=[a_spec, b_spec],
kwargs={"out": 1}, # Use index 1 for second input
output_spec=None,
comparison_target=1, # Compare second input
tolerance=tolerance,
description=f"Add - INPLACE(b)",
)
)
return test_cases
class OpTest(BaseOperatorTest):
"""Add test with simplified test case parsing"""
"""Add operator test with simplified implementation"""
def __init__(self):
super().__init__("Add")
def get_test_cases(self):
return _TEST_CASES
def get_tensor_dtypes(self):
return _TENSOR_DTYPES
def get_tolerance_map(self):
return _TOLERANCE_MAP
return parse_test_cases()
def torch_operator(self, a, b, out=None, **kwargs):
return torch.add(a, b, out=out)
def torch_operator(self, *args, **kwargs):
"""PyTorch add implementation"""
return torch.add(*args, **kwargs)
def infinicore_operator(self, a, b, out=None, **kwargs):
return infinicore.add(a, b, out=out)
def infinicore_operator(self, *args, **kwargs):
"""InfiniCore add implementation"""
return infinicore.add(*args, **kwargs)
def main():
......
......@@ -11,18 +11,17 @@ import torch
import infinicore
from framework.base import BaseOperatorTest, TensorSpec, TestCase
from framework.runner import GenericTestRunner
from framework.utils import is_broadcast
# ==============================================================================
# Operator-specific configuration
# ==============================================================================
# Test cases format: (operation_mode, n_q_head, n_kv_head, seq_len, head_dim, pos,
# k_cache_buf_len, v_cache_buf_len, q_strides, k_strides, v_strides,
# k_cache_strides, v_cache_strides)
# Test cases format: (n_q_head, n_kv_head, seq_len, head_dim, pos, k_cache_buf_len, v_cache_buf_len,
# q_strides, k_strides, v_strides, k_cache_strides, v_cache_strides)
_TEST_CASES_DATA = [
# Prefill stage
(
TestCase.OUT_OF_PLACE,
32,
4,
5,
......@@ -38,7 +37,6 @@ _TEST_CASES_DATA = [
),
# Decode stage
(
TestCase.OUT_OF_PLACE,
32,
4,
1,
......@@ -53,10 +51,9 @@ _TEST_CASES_DATA = [
[64, 11264, 1],
),
# Small test case
(TestCase.OUT_OF_PLACE, 8, 4, 2, 16, 1, 8, 8, None, None, None, None, None),
(8, 4, 2, 16, 1, 8, 8, None, None, None, None, None),
# Another prefill case
(
TestCase.OUT_OF_PLACE,
28,
28,
15,
......@@ -137,124 +134,114 @@ def torch_attention(q, k, v, k_cache, v_cache, pos):
return attn_output
def parse_test_cases(data):
def parse_test_cases():
"""
Parse attention test case data according to format:
(operation_mode, n_q_head, n_kv_head, seq_len, head_dim, pos,
k_cache_buf_len, v_cache_buf_len, q_strides, k_strides, v_strides,
k_cache_strides, v_cache_strides)
(n_q_head, n_kv_head, seq_len, head_dim, pos, k_cache_buf_len, v_cache_buf_len,
q_strides, k_strides, v_strides, k_cache_strides, v_cache_strides)
"""
operation_mode = data[0]
test_cases = []
for data in _TEST_CASES_DATA:
n_q_head, n_kv_head, seq_len, head_dim, pos = (
data[0],
data[1],
data[2],
data[3],
data[4],
data[5],
)
k_cache_buf_len, v_cache_buf_len = data[6], data[7]
q_strides = data[8] if len(data) > 8 else None
k_strides = data[9] if len(data) > 9 else None
v_strides = data[10] if len(data) > 10 else None
k_cache_strides = data[11] if len(data) > 11 else None
v_cache_strides = data[12] if len(data) > 12 else None
# Create input specifications
inputs = []
# Query tensor: (n_q_head, seq_len, head_dim)
if q_strides is not None:
inputs.append(
TensorSpec.from_strided_tensor((n_q_head, seq_len, head_dim), q_strides)
)
else:
inputs.append(TensorSpec.from_tensor((n_q_head, seq_len, head_dim)))
k_cache_buf_len, v_cache_buf_len = data[5], data[6]
q_strides = data[7] if len(data) > 7 else None
k_strides = data[8] if len(data) > 8 else None
v_strides = data[9] if len(data) > 9 else None
k_cache_strides = data[10] if len(data) > 10 else None
v_cache_strides = data[11] if len(data) > 11 else None
# Check if output tensor supports in-place operations
# For attention, output shape is (seq_len, n_q_head, head_dim)
output_shape = (seq_len, n_q_head, head_dim)
output_supports_inplace = True # Output is always contiguous for attention
# Key tensor: (n_kv_head, seq_len, head_dim)
if k_strides is not None:
inputs.append(
TensorSpec.from_strided_tensor((n_kv_head, seq_len, head_dim), k_strides)
)
else:
inputs.append(TensorSpec.from_tensor((n_kv_head, seq_len, head_dim)))
# Generate test cases for all data types
for dtype in [infinicore.float16, infinicore.bfloat16, infinicore.float32]:
tolerance = {
infinicore.float16: {"atol": 1e-4, "rtol": 1e-2},
infinicore.float32: {"atol": 1e-5, "rtol": 1e-3},
infinicore.bfloat16: {"atol": 1e-3, "rtol": 5e-2},
}.get(dtype, {"atol": 1e-5, "rtol": 1e-4})
# Value tensor: (n_kv_head, seq_len, head_dim)
if v_strides is not None:
inputs.append(
TensorSpec.from_strided_tensor((n_kv_head, seq_len, head_dim), v_strides)
# Create typed tensor specs
q_spec = TensorSpec.from_tensor(
(n_q_head, seq_len, head_dim), q_strides, dtype
)
else:
inputs.append(TensorSpec.from_tensor((n_kv_head, seq_len, head_dim)))
# Key cache: (n_kv_head, k_cache_buf_len, head_dim)
if k_cache_strides is not None:
inputs.append(
TensorSpec.from_strided_tensor(
(n_kv_head, k_cache_buf_len, head_dim), k_cache_strides
k_spec = TensorSpec.from_tensor(
(n_kv_head, seq_len, head_dim), k_strides, dtype
)
v_spec = TensorSpec.from_tensor(
(n_kv_head, seq_len, head_dim), v_strides, dtype
)
else:
inputs.append(TensorSpec.from_tensor((n_kv_head, k_cache_buf_len, head_dim)))
# Value cache: (n_kv_head, v_cache_buf_len, head_dim)
if v_cache_strides is not None:
inputs.append(
TensorSpec.from_strided_tensor(
(n_kv_head, v_cache_buf_len, head_dim), v_cache_strides
k_cache_spec = TensorSpec.from_tensor(
(n_kv_head, k_cache_buf_len, head_dim), k_cache_strides, dtype
)
v_cache_spec = TensorSpec.from_tensor(
(n_kv_head, v_cache_buf_len, head_dim), v_cache_strides, dtype
)
pos_spec = TensorSpec.from_scalar(pos)
output_spec = TensorSpec.from_tensor(
output_shape, None, dtype
) # Output is always contiguous
# Inputs list
inputs = [q_spec, k_spec, v_spec, k_cache_spec, v_cache_spec, pos_spec]
# Test Case 1: Out-of-place (return value)
test_cases.append(
TestCase(
inputs=inputs,
kwargs={},
output_spec=None,
comparison_target=None,
tolerance=tolerance,
description=f"Attention - OUT_OF_PLACE",
)
)
else:
inputs.append(TensorSpec.from_tensor((n_kv_head, v_cache_buf_len, head_dim)))
# Position (scalar)
inputs.append(TensorSpec.from_scalar(pos))
# Output tensor: (seq_len, n_q_head, head_dim)
output_shape = (seq_len, n_q_head, head_dim)
output = TensorSpec.from_tensor(output_shape)
return TestCase(operation_mode, inputs, output)
# Test Case 2: In-place with explicit output tensor (attention(q, k, v, k_cache, v_cache, pos, out=output))
if output_supports_inplace:
test_cases.append(
TestCase(
inputs=inputs,
kwargs=None,
output_spec=output_spec, # Specify the output tensor spec
comparison_target="out",
tolerance=tolerance,
description=f"Attention - INPLACE(out)",
)
)
# Parse test cases
_TEST_CASES = [parse_test_cases(data) for data in _TEST_CASES_DATA]
# Data types
_TENSOR_DTYPES = [infinicore.float16, infinicore.bfloat16, infinicore.float32]
# Tolerance
_TOLERANCE_MAP = {
infinicore.float16: {"atol": 1e-4, "rtol": 1e-2},
infinicore.float32: {"atol": 1e-5, "rtol": 1e-3},
infinicore.bfloat16: {"atol": 1e-3, "rtol": 5e-2},
}
return test_cases
class OpTest(BaseOperatorTest):
"""Attention test with simplified test case parsing"""
"""Attention operator test with simplified implementation"""
def __init__(self):
super().__init__("Attention")
def get_test_cases(self):
return _TEST_CASES
def get_tensor_dtypes(self):
return _TENSOR_DTYPES
def get_tolerance_map(self):
return _TOLERANCE_MAP
return parse_test_cases()
def torch_operator(self, q, k, v, k_cache, v_cache, pos, out=None, **kwargs):
"""PyTorch attention implementation"""
result = torch_attention(q, k, v, k_cache, v_cache, pos)
if out is not None:
out.set_(result)
out.copy_(result)
return out
else:
return result
def infinicore_operator(self, q, k, v, k_cache, v_cache, pos, out=None, **kwargs):
"""InfiniCore attention implementation"""
return infinicore.attention(q, k, v, k_cache, v_cache, pos, out=out)
......
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
import torch
import infinicore
from framework.base import BaseOperatorTest, TensorSpec, TestCase
from framework.runner import GenericTestRunner
from framework.utils import is_broadcast
# ==============================================================================
# Operator-specific configuration
# ==============================================================================
# Test cases format: (shape, a_strides, b_strides, c_strides)
_TEST_CASES_DATA = [
# Basic cases
((13, 4), None, None, None),
((13, 4), (10, 1), (10, 1), None),
# Strided cases
((13, 4), None, None, (10, 1)),
((13, 4), (10, 1), (10, 1), (10, 1)),
# 3D cases
((13, 4, 4), None, None, None),
((13, 4, 4), (20, 4, 1), (20, 4, 1), None),
# Broadcast cases
((13, 4, 4), (4, 0, 1), (0, 4, 1), None),
# Large tensors
((16, 5632), None, None, None),
((16, 5632), (13312, 1), (13312, 1), None),
]
# Tolerance configuration - exact match required for bitwise operations
_TOLERANCE_MAP = {
infinicore.int8: {"atol": 0, "rtol": 0},
infinicore.int16: {"atol": 0, "rtol": 0},
infinicore.int32: {"atol": 0, "rtol": 0},
infinicore.int64: {"atol": 0, "rtol": 0},
infinicore.uint8: {"atol": 0, "rtol": 0},
infinicore.bool: {"atol": 0, "rtol": 0},
}
# Data types to test - integer types for bitwise operations
_TENSOR_DTYPES = [
infinicore.int8,
infinicore.int16,
infinicore.int32,
infinicore.int64,
infinicore.uint8,
infinicore.bool, # XOR also supports boolean tensors
]
def parse_test_cases():
"""
Parse test case data and return list of TestCase objects for all operation types.
Each test case contains all necessary information for execution and validation.
"""
test_cases = []
for data in _TEST_CASES_DATA:
shape = data[0]
a_strides = data[1] if len(data) > 1 else None
b_strides = data[2] if len(data) > 2 else None
c_strides = data[3] if len(data) > 3 else None
# Check if tensors support in-place operations
a_supports_inplace = not is_broadcast(a_strides)
b_supports_inplace = not is_broadcast(b_strides)
c_supports_inplace = not is_broadcast(c_strides)
# Generate test cases for all data types
for dtype in _TENSOR_DTYPES:
tolerance = _TOLERANCE_MAP.get(dtype, {"atol": 0, "rtol": 0})
# Create typed tensor specs
a_spec = TensorSpec.from_tensor(shape, a_strides, dtype)
b_spec = TensorSpec.from_tensor(shape, b_strides, dtype)
c_spec = TensorSpec.from_tensor(shape, c_strides, dtype)
# Test Case 1: Out-of-place (return value)
test_cases.append(
TestCase(
inputs=[a_spec, b_spec],
kwargs={},
output_spec=None,
comparison_target=None,
tolerance=tolerance,
description=f"BitwiseXor - OUT_OF_PLACE",
)
)
# Test Case 2: In-place with explicit output tensor (bitwise_xor(a, b, out=c))
if c_supports_inplace:
test_cases.append(
TestCase(
inputs=[a_spec, b_spec],
kwargs=None,
output_spec=c_spec, # Specify the output tensor spec
comparison_target="out",
tolerance=tolerance,
description=f"BitwiseXor - INPLACE(out)",
)
)
# Test Case 3: In-place on first input (bitwise_xor(a, b, out=a))
if a_supports_inplace:
test_cases.append(
TestCase(
inputs=[a_spec, b_spec],
kwargs={"out": 0}, # Use index 0 for first input
output_spec=None,
comparison_target=0, # Compare first input
tolerance=tolerance,
description=f"BitwiseXor - INPLACE(a)",
)
)
# Test Case 4: In-place on second input (bitwise_xor(a, b, out=b))
if b_supports_inplace:
test_cases.append(
TestCase(
inputs=[a_spec, b_spec],
kwargs={"out": 1}, # Use index 1 for second input
output_spec=None,
comparison_target=1, # Compare second input
tolerance=tolerance,
description=f"BitwiseXor - INPLACE(b)",
)
)
return test_cases
class OpTest(BaseOperatorTest):
"""Bitwise XOR operator test with simplified implementation"""
def __init__(self):
super().__init__("BitwiseXor")
def get_test_cases(self):
return parse_test_cases()
def torch_operator(self, *args, **kwargs):
"""PyTorch bitwise_xor implementation"""
return torch.bitwise_xor(*args, **kwargs)
# def infinicore_operator(self, *args, **kwargs):
# """InfiniCore bitwise_xor implementation"""
# return infinicore.bitwise_xor(*args, **kwargs)
def main():
"""Main entry point"""
runner = GenericTestRunner(OpTest)
runner.run_and_exit()
if __name__ == "__main__":
main()
......@@ -7,6 +7,7 @@ import torch
import infinicore
from framework.base import BaseOperatorTest, TensorSpec, TestCase
from framework.runner import GenericTestRunner
from framework.utils import is_broadcast
# ==============================================================================
# Operator-specific configuration
......@@ -16,52 +17,17 @@ from framework.runner import GenericTestRunner
# Causal softmax is a single-input function that applies causal masking before softmax
_TEST_CASES_DATA = [
# Basic 2D causal softmax
(TestCase.BOTH, (3, 3), None, None),
(TestCase.BOTH, (32, 512), None, None),
((3, 3), None, None),
((32, 512), None, None),
# Strided tensors
(TestCase.BOTH, (32, 512), (1024, 1), (1024, 1)),
((32, 512), (1024, 1), (1024, 1)),
# 3D causal softmax
(TestCase.BOTH, (32, 5, 5), None, None),
(TestCase.BOTH, (32, 20, 512), None, None),
(TestCase.BOTH, (32, 20, 512), (20480, 512, 1), None),
(TestCase.BOTH, (28, 15, 15), None, None),
((32, 5, 5), None, None),
((32, 20, 512), None, None),
((32, 20, 512), (20480, 512, 1), None),
((28, 15, 15), None, None),
]
def parse_test_cases(data):
"""
Parse causal_softmax test case data according to format:
(operation_mode, shape, input_strides, output_strides)
"""
operation_mode = data[0]
shape = data[1]
input_strides = data[2] if len(data) > 2 else None
output_strides = data[3] if len(data) > 3 else None
# Create input specifications
inputs = []
# Tensor input
if input_strides is not None:
inputs.append(TensorSpec.from_strided_tensor(shape, input_strides))
else:
inputs.append(TensorSpec.from_tensor(shape))
# Output tensor
if output_strides is not None:
output = TensorSpec.from_strided_tensor(shape, output_strides)
else:
output = TensorSpec.from_tensor(shape)
return TestCase(operation_mode, inputs, output)
# Parse test cases
_TEST_CASES = [parse_test_cases(data) for data in _TEST_CASES_DATA]
# Data types
_TENSOR_DTYPES = [infinicore.float16, infinicore.bfloat16, infinicore.float32]
# Tolerance
_TOLERANCE_MAP = {
infinicore.float16: {"atol": 1e-3, "rtol": 1e-2},
......@@ -69,6 +35,74 @@ _TOLERANCE_MAP = {
infinicore.bfloat16: {"atol": 5e-3, "rtol": 5e-2},
}
# Data types
_TENSOR_DTYPES = [infinicore.float16, infinicore.bfloat16, infinicore.float32]
def parse_test_cases():
"""
Parse causal_softmax test case data according to format:
(shape, input_strides, output_strides)
"""
test_cases = []
for data in _TEST_CASES_DATA:
shape = data[0]
input_strides = data[1] if len(data) > 1 else None
output_strides = data[2] if len(data) > 2 else None
# Check if tensors support in-place operations
input_supports_inplace = not is_broadcast(input_strides)
output_supports_inplace = not is_broadcast(output_strides)
# Generate test cases for all data types
for dtype in _TENSOR_DTYPES:
tolerance = _TOLERANCE_MAP.get(dtype, {"atol": 0, "rtol": 1e-3})
# Create typed tensor specs
input_spec = TensorSpec.from_tensor(shape, input_strides, dtype)
output_spec = TensorSpec.from_tensor(shape, output_strides, dtype)
# Test Case 1: Out-of-place (return value)
test_cases.append(
TestCase(
inputs=[input_spec],
kwargs={},
output_spec=None,
comparison_target=None,
tolerance=tolerance,
description=f"Causal Softmax - OUT_OF_PLACE",
)
)
# Test Case 2: In-place with explicit output tensor (causal_softmax(input, out=output))
if output_supports_inplace:
test_cases.append(
TestCase(
inputs=[input_spec],
kwargs=None,
output_spec=output_spec, # Specify the output tensor spec
comparison_target="out",
tolerance=tolerance,
description=f"Causal Softmax - INPLACE(out)",
)
)
# Test Case 3: In-place on first input (causal_softmax(input, out=input))
if input_supports_inplace:
test_cases.append(
TestCase(
inputs=[input_spec],
kwargs={"out": 0}, # Use index 0 for first input
output_spec=None,
comparison_target=0, # Compare first input
tolerance=tolerance,
description=f"Causal Softmax - INPLACE(input)",
)
)
return test_cases
class OpTest(BaseOperatorTest):
"""CausalSoftmax test with simplified test case parsing"""
......@@ -77,15 +111,9 @@ class OpTest(BaseOperatorTest):
super().__init__("CausalSoftmax")
def get_test_cases(self):
return _TEST_CASES
return parse_test_cases()
def get_tensor_dtypes(self):
return _TENSOR_DTYPES
def get_tolerance_map(self):
return _TOLERANCE_MAP
def torch_operator(self, input, out=None, **kwargs):
def torch_causal_softmax(self, input, out=None, **kwargs):
# Causal softmax implementation: apply causal mask then softmax
dtype = input.dtype
......@@ -100,8 +128,11 @@ class OpTest(BaseOperatorTest):
return out
return result
def infinicore_operator(self, input, out=None, **kwargs):
return infinicore.causal_softmax(input, out=out)
def torch_operator(self, *args, **kwargs):
return self.torch_causal_softmax(*args, **kwargs)
def infinicore_operator(self, *args, **kwargs):
return infinicore.causal_softmax(*args, **kwargs)
def main():
......
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
import torch
import infinicore
from framework.base import BaseOperatorTest, TensorSpec, TestCase
from framework.runner import GenericTestRunner
from framework.utils import is_broadcast
# ==============================================================================
# Operator-specific configuration
# ==============================================================================
# Test cases format: (shape, input_strides, alpha)
_TEST_CASES_DATA = [
# Basic ELU tests without alpha (default alpha=1.0)
((13, 4), None, None),
((13, 4), (10, 1), None),
((13, 4), (0, 1), None),
# 3D tensor tests
((13, 4, 4), None, None),
((13, 4, 4), (20, 4, 1), None),
((13, 4, 4), (4, 0, 1), None),
# Large tensor tests
((16, 5632), None, None),
((16, 5632), (13312, 1), None),
# ELU with different alpha values
((8, 4), None, 0.5),
((8, 4), (10, 1), 0.5),
((8, 4), None, 1.5),
((8, 4), (10, 1), 1.5),
((16, 8), None, 2.0),
((16, 8), (20, 1), 2.0),
((16, 8), None, 0.3),
((16, 8), (20, 1), 0.3),
((32, 16), None, 1.0),
((32, 16), (40, 1), 1.0),
((32, 16), None, 1.8),
((32, 16), (40, 1), 1.8),
]
# Tolerance configuration
_TOLERANCE_MAP = {
infinicore.float16: {"atol": 1e-3, "rtol": 1e-2},
infinicore.float32: {"atol": 1e-5, "rtol": 1e-4},
infinicore.bfloat16: {"atol": 1e-2, "rtol": 5e-2},
}
# Data types to test
_TENSOR_DTYPES = [infinicore.float16, infinicore.bfloat16, infinicore.float32]
def parse_test_cases():
"""
Parse ELU test case data according to format:
(shape, input_strides, alpha)
ELU only supports out-of-place and in-place modes via PyTorch's inplace parameter
"""
test_cases = []
for data in _TEST_CASES_DATA:
shape = data[0]
input_strides = data[1] if len(data) > 1 else None
alpha = data[2] if len(data) > 2 else None
# Check if input tensor supports in-place operations
input_supports_inplace = not is_broadcast(input_strides)
# Generate test cases for all data types
for dtype in _TENSOR_DTYPES:
tolerance = _TOLERANCE_MAP.get(dtype, {"atol": 1e-5, "rtol": 1e-4})
# Create typed tensor spec
input_spec = TensorSpec.from_tensor(shape, input_strides, dtype)
# Build description
description_parts = ["ELU"]
if alpha is not None:
description_parts.append(f"alpha={alpha}")
if input_strides is not None:
description_parts.append(f"input_strides={input_strides}")
base_description = " - ".join(description_parts)
# Test Case 1: Out-of-place (return value)
kwargs = {}
if alpha is not None:
kwargs["alpha"] = alpha
test_cases.append(
TestCase(
inputs=[input_spec],
kwargs=kwargs,
output_spec=None,
comparison_target=None,
tolerance=tolerance,
description=f"{base_description} - OUT_OF_PLACE",
)
)
# Test Case 2: In-place operation using PyTorch's inplace parameter
if input_supports_inplace:
inplace_kwargs = {"inplace": True}
if alpha is not None:
inplace_kwargs["alpha"] = alpha
test_cases.append(
TestCase(
inputs=[input_spec],
kwargs=inplace_kwargs,
output_spec=None,
comparison_target=0, # Compare first input (modified in-place)
tolerance=tolerance,
description=f"{base_description} - INPLACE",
)
)
return test_cases
class OpTest(BaseOperatorTest):
"""ELU operator test with PyTorch-compatible implementation"""
def __init__(self):
super().__init__("ELU")
def get_test_cases(self):
return parse_test_cases()
def torch_operator(self, *args, **kwargs):
"""PyTorch ELU implementation"""
return torch.nn.functional.elu(*args, **kwargs)
def infinicore_operator(self, x, alpha=1.0, out=None, **kwargs):
"""InfiniCore ELU implementation"""
return None
def main():
"""Main entry point"""
runner = GenericTestRunner(OpTest)
runner.run_and_exit()
if __name__ == "__main__":
main()
......@@ -7,40 +7,53 @@ import torch
import infinicore
from framework.base import BaseOperatorTest, TensorSpec, TestCase
from framework.runner import GenericTestRunner
from framework.utils import is_broadcast
# ==============================================================================
# Operator-specific configuration
# ==============================================================================
# Test cases format: (operation_mode, nbatch, m, n, k, a_strides, b_strides, c_strides)
# Test cases format: (nbatch, m, n, k, a_strides, b_strides, c_strides)
# If nbatch is None: a_shape=(m, k), b_shape=(k, n), c_shape=(m, n)
# If nbatch is provided: a_shape=(nbatch, m, k), b_shape=(nbatch, k, n), c_shape=(nbatch, m, n)
_TEST_CASES_DATA = [
# Basic 2D matmul
(TestCase.BOTH, None, 2, 4, 3, None, None, None),
(TestCase.BOTH, None, 128, 64, 256, None, None, None),
(None, 2, 4, 3, None, None, None),
(None, 128, 64, 256, None, None, None),
# Batched matmul
(TestCase.BOTH, 2, 4, 2048, 2048, None, None, None),
(TestCase.BOTH, 4, 48, 6, 64, None, None, None),
(2, 4, 2048, 2048, None, None, None),
(4, 48, 6, 64, None, None, None),
# Strided tensors
(TestCase.BOTH, None, 1, 2048, 2048, (4096, 1), (4096, 1), (4096, 1)),
(TestCase.BOTH, None, 6, 2560, 2048, (2048, 1), (1, 2048), (2560, 1)),
(None, 1, 2048, 2048, (4096, 1), (4096, 1), (4096, 1)),
(None, 6, 2560, 2048, (2048, 1), (1, 2048), (2560, 1)),
# Mixed cases
(TestCase.BOTH, 8, 16, 32, 16, None, None, None),
(8, 16, 32, 16, None, None, None),
]
# Tolerance configuration
_TOLERANCE_MAP = {
infinicore.float16: {"atol": 0, "rtol": 1e-2},
infinicore.float32: {"atol": 0, "rtol": 1e-3},
infinicore.bfloat16: {"atol": 0, "rtol": 5e-2},
}
# Data types to test
_TENSOR_DTYPES = [infinicore.float16, infinicore.bfloat16, infinicore.float32]
def parse_test_cases(data):
def parse_test_cases():
"""
Parse matmul test case data according to format:
(operation_mode, nbatch, m, n, k, a_strides, b_strides, c_strides)
Parse test case data and return list of TestCase objects for matmul operation.
Each test case contains all necessary information for execution and validation.
"""
operation_mode = data[0]
nbatch = data[1]
m, n, k = data[2], data[3], data[4]
a_strides = data[5] if len(data) > 5 else None
b_strides = data[6] if len(data) > 6 else None
c_strides = data[7] if len(data) > 7 else None
test_cases = []
for data in _TEST_CASES_DATA:
nbatch = data[0]
m, n, k = data[1], data[2], data[3]
a_strides = data[4] if len(data) > 4 else None
b_strides = data[5] if len(data) > 5 else None
c_strides = data[6] if len(data) > 6 else None
# Determine shapes based on batch dimension
if nbatch is None:
......@@ -52,64 +65,62 @@ def parse_test_cases(data):
b_shape = (nbatch, k, n)
c_shape = (nbatch, m, n)
# Create input specifications
inputs = []
# Tensor a
if a_strides is not None:
inputs.append(TensorSpec.from_strided_tensor(a_shape, a_strides))
else:
inputs.append(TensorSpec.from_tensor(a_shape))
# Tensor b
if b_strides is not None:
inputs.append(TensorSpec.from_strided_tensor(b_shape, b_strides))
else:
inputs.append(TensorSpec.from_tensor(b_shape))
# Output tensor
if c_strides is not None:
output = TensorSpec.from_strided_tensor(c_shape, c_strides)
else:
output = TensorSpec.from_tensor(c_shape)
return TestCase(operation_mode, inputs, output)
# Parse test cases
_TEST_CASES = [parse_test_cases(data) for data in _TEST_CASES_DATA]
# Data types
_TENSOR_DTYPES = [infinicore.float16, infinicore.bfloat16, infinicore.float32]
# Tolerance
_TOLERANCE_MAP = {
infinicore.float16: {"atol": 0, "rtol": 1e-2},
infinicore.float32: {"atol": 0, "rtol": 1e-3},
infinicore.bfloat16: {"atol": 0, "rtol": 5e-2},
}
# Check if tensors support in-place operations
c_supports_inplace = not is_broadcast(c_strides)
# Generate test cases for all data types
for dtype in _TENSOR_DTYPES:
tolerance = _TOLERANCE_MAP.get(dtype, {"atol": 0, "rtol": 1e-3})
# Create typed tensor specs
a_spec = TensorSpec.from_tensor(a_shape, a_strides, dtype)
b_spec = TensorSpec.from_tensor(b_shape, b_strides, dtype)
c_spec = TensorSpec.from_tensor(c_shape, c_strides, dtype)
# Test Case 1: Out-of-place (return value)
test_cases.append(
TestCase(
inputs=[a_spec, b_spec],
kwargs={},
output_spec=None,
comparison_target=None,
tolerance=tolerance,
description=f"Matmul - OUT_OF_PLACE",
)
)
# Test Case 2: In-place with explicit output tensor (matmul(a, b, out=c))
if c_supports_inplace:
test_cases.append(
TestCase(
inputs=[a_spec, b_spec],
kwargs=None,
output_spec=c_spec, # Specify the output tensor spec
comparison_target="out",
tolerance=tolerance,
description=f"Matmul - INPLACE(out)",
)
)
return test_cases
class OpTest(BaseOperatorTest):
"""Matmul test with simplified test case parsing"""
"""Matmul operator test with simplified implementation"""
def __init__(self):
super().__init__("Matmul")
def get_test_cases(self):
return _TEST_CASES
def get_tensor_dtypes(self):
return _TENSOR_DTYPES
def get_tolerance_map(self):
return _TOLERANCE_MAP
return parse_test_cases()
def torch_operator(self, a, b, out=None, **kwargs):
return torch.matmul(a, b, out=out)
def torch_operator(self, *args, **kwargs):
"""PyTorch matmul implementation"""
return torch.matmul(*args, **kwargs)
def infinicore_operator(self, a, b, out=None, **kwargs):
return infinicore.matmul(a, b, out=out)
def infinicore_operator(self, *args, **kwargs):
"""InfiniCore matmul implementation"""
return infinicore.matmul(*args, **kwargs)
def main():
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment