Commit 9a4fb9bb authored by baominghelly's avatar baominghelly
Browse files

Merge from main && resolve conflict && format code

parents 32bd2f82 0ead67fc
......@@ -168,6 +168,19 @@ public:
/// View APIs
///
/**
* Returns a new tensor with a dimension of size one removed at the specified position.
* Throws runtime_error if the dimension to be removed is not of size 1.
*
* @param dim The dimension index to remove
* @return A new tensor with the removed dimension
*
* Example:
* // For a 3D tensor with shape [1, 3, 4], squeeze at dim 0 results in shape [3, 4]
* tensor->squeeze(0);
*/
Tensor squeeze(size_t dim) const;
/**
* Returns a new tensor with a dimension of size one inserted at the specified position.
* The returned tensor shares the same underlying storage with the original tensor.
......
......@@ -45,6 +45,8 @@ from infinicore.ops.matmul import matmul
from infinicore.ops.mul import mul
from infinicore.ops.narrow import narrow
from infinicore.ops.rearrange import rearrange
from infinicore.ops.squeeze import squeeze
from infinicore.ops.unsqueeze import unsqueeze
from infinicore.tensor import (
Tensor,
empty,
......@@ -104,6 +106,8 @@ __all__ = [
"matmul",
"mul",
"narrow",
"squeeze",
"unsqueeze",
"rearrange",
"empty",
"empty_like",
......
......@@ -20,16 +20,6 @@ def rope(
) -> Tensor:
r"""Rotary Position Embedding(RoPE)."""
bs, seq_len, num_heads, head_dim = x.shape
x_stride = x.stride()
assert seq_len * x_stride[1] == x_stride[0], (
"x need to be continuous in dim=0 and dim=1"
)
x = x.view((bs * seq_len, num_heads, head_dim))
bs, num = pos_ids.shape
pos_ids = pos_ids.view((bs * num,))
if out is None:
return Tensor(
_infinicore.rope(
......@@ -39,9 +29,8 @@ def rope(
cos_table._underlying,
algo,
)
).view((bs, seq_len, num_heads, head_dim))
)
out = out.view((bs * seq_len, num_heads, head_dim))
_infinicore.rope_(
out._underlying,
x._underlying,
......@@ -50,4 +39,4 @@ def rope(
cos_table._underlying,
algo,
)
return out.view((bs, seq_len, num_heads, head_dim))
return out
from infinicore.tensor import Tensor
def squeeze(input: Tensor, dim: int) -> Tensor:
return Tensor(input._underlying.squeeze(dim))
from infinicore.tensor import Tensor
def unsqueeze(input: Tensor, dim: int) -> Tensor:
return Tensor(input._underlying.unsqueeze(dim))
......@@ -92,6 +92,12 @@ class Tensor:
def view(self, shape):
return Tensor(self._underlying.view(shape))
def squeeze(self, dim):
return infinicore.squeeze(self, dim)
def unsqueeze(self, dim):
return infinicore.unsqueeze(self, dim)
def debug(self, filename=None):
"""Print tensor data or save to file for debugging
......
......@@ -16,25 +16,27 @@ inline void bind(py::module &m) {
.def_property_readonly("ndim", [](const Tensor &tensor) { return tensor->ndim(); })
.def_property_readonly("dtype", [](const Tensor &tensor) { return tensor->dtype(); })
.def_property_readonly("device", [](const Tensor &tensor) { return tensor->device(); })
.def("data_ptr", [](const Tensor &tensor) { return reinterpret_cast<std::uintptr_t>(tensor->data()); })
.def("size", [](const Tensor &tensor, std::size_t dim) { return tensor->size(dim); })
.def("stride", [](const Tensor &tensor, std::size_t dim) { return tensor->stride(dim); })
.def("numel", [](const Tensor &tensor) { return tensor->numel(); })
.def("is_contiguous", [](const Tensor &tensor) { return tensor->is_contiguous(); })
.def("is_pinned", [](const Tensor &tensor) { return tensor->is_pinned(); })
.def("info", [](const Tensor &tensor) { return tensor->info(); })
.def("debug", [](const Tensor &tensor) { return tensor->debug(); })
.def("debug", [](const Tensor &tensor, const std::string &filename) { return tensor->debug(filename); })
.def("copy_", [](Tensor &tensor, const Tensor &other) { tensor->copy_from(other); })
.def("to", [](const Tensor &tensor, const Device &device) { return tensor->to(device); })
.def("as_strided", [](const Tensor &tensor, const Shape &shape, const Strides &strides) { return tensor->as_strided(shape, strides); })
.def("contiguous", [](const Tensor &tensor) { return tensor->contiguous(); })
.def("as_strided", [](const Tensor &tensor, const Shape &shape, const Strides &strides) { return tensor->as_strided(shape, strides); })
.def("narrow", [](const Tensor &tensor, std::size_t dim, std::size_t start, std::size_t length) { return tensor->narrow({{dim, start, length}}); })
.def("permute", [](const Tensor &tensor, const Shape &dims) { return tensor->permute(dims); })
.def("view", [](const Tensor &tensor, const Shape &shape) { return tensor->view(shape); });
.def("view", [](const Tensor &tensor, const Shape &shape) { return tensor->view(shape); })
.def("unsqueeze", [](const Tensor &tensor, std::size_t dim) { return tensor->unsqueeze(dim); })
.def("squeeze", [](const Tensor &tensor, std::size_t dim) { return tensor->squeeze(dim); });
m.def("empty", &Tensor::empty,
py::arg("shape"),
......
......@@ -6,6 +6,23 @@
#include <stdexcept>
namespace infinicore {
Tensor TensorImpl::squeeze(size_t dim) const {
// Create new shape with dimension of size one removed at dim
if (meta_.shape[dim] != 1) {
spdlog::error("Dimension {} is not of size 1 for squeeze operation on {}.", dim, this->info());
throw std::runtime_error("Invalid squeeze operation on tensor.");
}
Shape new_shape = meta_.shape;
new_shape.erase(new_shape.begin() + dim);
Strides new_strides = meta_.strides;
new_strides.erase(new_strides.begin() + dim);
auto tensor_impl = std::make_shared<TensorImpl>(new_shape, new_strides, meta_.dtype);
tensor_impl->data_ = data_;
return Tensor(tensor_impl);
}
Tensor TensorImpl::unsqueeze(size_t dim) const {
// Create new shape with dimension of size one inserted at dim
Shape new_shape = meta_.shape;
......
#ifndef INFINIUTILS_H
#define INFINIUTILS_H
#include "infinicore.h"
#include "utils/custom_types.h"
#include "utils/rearrange.h"
......
......@@ -3,8 +3,19 @@
#include <iostream>
#include <tuple>
#include "../utils.h"
#include "infini_status_string.h"
#define CHECK_OR_DO(CONDITION, ACTION) \
do { \
if (!(CONDITION)) { \
std::cerr << "Check Failed: `(" << #CONDITION << ")` is False" \
<< " from " << __func__ \
<< " at " << __FILE__ << ":" << __LINE__ << std::endl; \
{ ACTION; } \
} \
} while (0)
#define CHECK_OR_RETURN(CONDITION, ERROR) \
do { \
if (!(CONDITION)) { \
......@@ -33,17 +44,19 @@
std::cerr << "Error: " << infini_status_string(api_result_) << std::endl; \
return api_result_)
#define CHECK_DTYPE(DT, ...) \
do { \
auto found_supported_dtype = false; \
for (auto dt : {__VA_ARGS__}) { \
if (dt == DT) { \
found_supported_dtype = true; \
break; \
} \
} \
CHECK_API_OR(found_supported_dtype, true, \
return INFINI_STATUS_BAD_TENSOR_DTYPE); \
#define CHECK_DTYPE(DT, ...) \
do { \
auto dtype_is_supported = false; \
for (auto dt : {__VA_ARGS__}) { \
if (dt == DT) { \
dtype_is_supported = true; \
break; \
} \
} \
CHECK_OR_DO(dtype_is_supported, \
{ std::cerr << "Unsupported dtype: " << \
infiniDtypeToString(DT) << ". "; \
return INFINI_STATUS_BAD_TENSOR_DTYPE; }); \
} while (0)
#define CHECK_DTYPE_ANY_INT(DT) \
......
......@@ -4,6 +4,7 @@ from .devices import InfiniDeviceEnum
from .tensor import TensorSpec
from .utils.json_utils import save_json_report
@dataclass
class CaseResult:
"""Test case result data structure"""
......@@ -63,6 +64,7 @@ class OperatorResult:
return "PARTIAL"
return "FAILED"
class TestSummary:
"""
Test Summary class:
......
......@@ -231,7 +231,6 @@ def print_discrepancy(
import sys
is_terminal = sys.stdout.isatty()
actual_isnan = torch.isnan(actual)
expected_isnan = torch.isnan(expected)
......
......@@ -62,7 +62,9 @@ def save_json_report(save_path, total_results):
f, c_key, c_val, I16, I20, close_comma=c_comma
)
else:
f.write(f'{I16}"{c_key}": {_to_json(c_val)}{c_comma}\n')
f.write(
f'{I16}"{c_key}": {_to_json(c_val)}{c_comma}\n'
)
# Handle trailing comparison/tolerance fields uniformly
if "comparison_target" in case_item:
......
......@@ -114,7 +114,7 @@ def rearrange_tensor(tensor, new_strides):
new_positions += offset
# Copy the original data to the new tensor
new_tensor.view(-1).index_add_(0, new_positions, tensor.view(-1))
new_tensor.reshape(-1).index_add_(0, new_positions, tensor.reshape(-1))
new_tensor.set_(new_tensor.untyped_storage(), offset, shape, tuple(new_strides))
return new_tensor
......
......@@ -22,11 +22,121 @@ import infinicore
_TEST_CASES_DATA = [
# bs, seq_len, num, head_dim, Algo
(1, 1, 1, 64, RopeAlgo.GPT_NEOX),
(1, 5, 32, 64, RopeAlgo.GPT_NEOX),
(1, 1, 1, 128, RopeAlgo.GPT_J),
(1, 10, 1, 64, RopeAlgo.GPT_J),
# bs, seq_len, num, head_dim, src strides, dst strides, Algo
(1, 1, 1, 64, None, None, RopeAlgo.GPT_NEOX),
(1, 5, 32, 64, None, None, RopeAlgo.GPT_NEOX),
(1, 1, 1, 128, None, None, RopeAlgo.GPT_J),
(1, 10, 1, 64, None, None, RopeAlgo.GPT_J),
(2, 20, 16, 128, None, None, RopeAlgo.GPT_NEOX),
(4, 50, 32, 256, None, None, RopeAlgo.GPT_J),
(
2,
20,
16,
128,
(655360, 8192, 256, 1),
(655360, 8192, 256, 1),
RopeAlgo.GPT_NEOX,
),
(
2,
20,
16,
128,
(655360, 8192, 256, 1),
(655360, 8192, 256, 1),
RopeAlgo.GPT_J,
),
(
4,
50,
32,
8,
(204800, 1024, 16, 1),
(460800, 1536, 24, 1),
RopeAlgo.GPT_NEOX,
),
(
4,
50,
32,
8,
(204800, 1024, 16, 1),
(460800, 1536, 24, 1),
RopeAlgo.GPT_J,
),
(
32,
64,
8,
128,
(1048576, 4096, 256, 1),
(1048576, 4096, 256, 1),
RopeAlgo.GPT_NEOX,
),
(
32,
64,
8,
128,
(1048576, 4096, 256, 1),
(1048576, 4096, 256, 1),
RopeAlgo.GPT_J,
),
(
64,
17,
32,
64,
(557056, 8192, 128, 1),
(1253376, 12288, 192, 1),
RopeAlgo.GPT_NEOX,
),
(
64,
17,
32,
64,
(557056, 8192, 128, 1),
(1253376, 12288, 192, 1),
RopeAlgo.GPT_J,
),
(
8,
20,
4,
64,
(1048576, 64, 262144, 1),
(1048576, 64, 262144, 1),
RopeAlgo.GPT_NEOX,
),
(
8,
20,
4,
64,
(1048576, 64, 262144, 1),
(1048576, 64, 262144, 1),
RopeAlgo.GPT_J,
),
(
8,
20,
32,
64,
(40960, 64, 1280, 1),
(40960, 64, 1280, 1),
RopeAlgo.GPT_NEOX,
),
(
8,
20,
32,
64,
(40960, 64, 1280, 1),
(40960, 64, 1280, 1),
RopeAlgo.GPT_J,
),
]
# Tolerance configuration
......@@ -49,7 +159,8 @@ def parse_test_cases():
for data in _TEST_CASES_DATA:
bs, seq_len, num, head_dim = data[0], data[1], data[2], data[3]
algo = data[4]
src_strides, dst_strides = data[4], data[5]
algo = data[6]
# Determine shapes based on batch dimension
out_shape = (bs, seq_len, num, head_dim)
......@@ -58,15 +169,16 @@ def parse_test_cases():
cos_table_shape = (seq_len, head_dim // 2)
# Check if tensors support in-place operations
c_supports_inplace = not is_broadcast(out_shape)
# x tensor supports in-place if it's not a broadcasted tensor
x_supports_inplace = not is_broadcast(src_strides)
# Generate test cases for all data types
for dtype in _TENSOR_DTYPES:
tolerance = _TOLERANCE_MAP.get(dtype, {"atol": 0, "rtol": 1e-3})
# Create typed tensor specs
out_spec = TensorSpec.from_tensor(out_shape, None, dtype)
x_spec = TensorSpec.from_tensor(x_shape, None, dtype)
out_spec = TensorSpec.from_tensor(out_shape, dst_strides, dtype)
x_spec = TensorSpec.from_tensor(x_shape, src_strides, dtype)
sin_table_spec = TensorSpec.from_tensor(sin_table_shape, None, dtype)
cos_table_spec = TensorSpec.from_tensor(cos_table_shape, None, dtype)
......@@ -83,7 +195,7 @@ def parse_test_cases():
)
# Test Case 2: In-place with explicit output tensor
if c_supports_inplace:
if dst_strides is None or not is_broadcast(dst_strides):
test_cases.append(
TestCase(
inputs=[x_spec, sin_table_spec, cos_table_spec],
......@@ -95,6 +207,19 @@ def parse_test_cases():
)
)
# Test Case 3: In-place on input tensor (x)
if x_supports_inplace:
test_cases.append(
TestCase(
inputs=[x_spec, sin_table_spec, cos_table_spec],
kwargs={"algo": algo, "out": 0}, # Use index 0 for first input
output_spec=None,
comparison_target=0, # Compare first input (x tensor)
tolerance=tolerance,
description=f"Rope - INPLACE(x)",
)
)
return test_cases
......@@ -107,15 +232,22 @@ def rotary_embedding(t, sin, cos, algo, *, out=None):
return t_out_1, t_out_2
ans = t.clone()
# If out parameter is provided and it's the same as input t, operate in-place
if out is not None:
if out.data_ptr() == t.data_ptr():
ans = t # Use the same tensor for in-place operation
else:
ans = out # Use provided output tensor
else:
ans = t.clone()
dh = t.shape[-1]
dt = t.dtype
assert dh % 2 == 0, "Embedding dimension must be even."
if RopeAlgo.GPT_J == algo:
t_even = t[..., 0::2] # [seq_len, n_head, dh // 2]
t_odd = t[..., 1::2] # [seq_len, n_head, dh // 2]
t_even = t[..., 0::2] # [bs, seq_len, n_head, dh // 2]
t_odd = t[..., 1::2] # [bs, seq_len, n_head, dh // 2]
t_out_even, t_out_odd = _torch_rope(sin, cos, t_even, t_odd)
......@@ -131,9 +263,10 @@ def rotary_embedding(t, sin, cos, algo, *, out=None):
ans[..., :half_dim] = t_out_first.to(dt)
ans[..., half_dim:] = t_out_second.to(dt)
else:
raise KeyError("error Algo ")
raise KeyError("Unsupported RoPE algorithm")
if out is not None:
# If operating in-place on t, we don't need to copy back
if out is not None and out.data_ptr() != t.data_ptr():
out.copy_(ans)
return out
return ans
......
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
import torch
import infinicore
from framework.base import BaseOperatorTest, TensorSpec, TestCase
from framework.runner import GenericTestRunner
from framework.utils import is_broadcast
# ==============================================================================
# Operator-specific configuration
# ==============================================================================
# Test cases format: (shape, strides, dim)
_TEST_CASES_DATA = [
# Basic cases
((1, 1, 1), None, 1),
((1, 1, 1), None, 0),
((1, 2, 4), None, 0),
((2, 1, 4), (4, 0, 1), 1),
((1, 4, 1, 32), (32, 32, 32, 1), 2),
]
# Tolerance configuration
_TOLERANCE_MAP = {
infinicore.float16: {"atol": 0, "rtol": 0},
infinicore.float32: {"atol": 0, "rtol": 0},
infinicore.bfloat16: {"atol": 0, "rtol": 0},
}
# Data types to test
_TENSOR_DTYPES = [infinicore.float16, infinicore.bfloat16, infinicore.float32]
def parse_test_cases():
"""
Parse test case data and return list of TestCase objects for all operation types.
Each test case contains all necessary information for execution and validation.
"""
test_cases = []
for data in _TEST_CASES_DATA:
shape = data[0]
strides = data[1]
dim = data[2]
# Generate test cases for all data types
for dtype in _TENSOR_DTYPES:
tolerance = _TOLERANCE_MAP.get(dtype, {"atol": 0, "rtol": 0})
# Create typed tensor specs
a_spec = TensorSpec.from_tensor(shape, strides, dtype)
test_cases.append(
TestCase(
inputs=[a_spec, dim],
kwargs={},
output_spec=None,
comparison_target=None, # Compare output
tolerance=tolerance,
description=f"squeeze",
)
)
return test_cases
class OpTest(BaseOperatorTest):
"""squeeze operator test with simplified implementation"""
def __init__(self):
super().__init__("squeeze")
def get_test_cases(self):
return parse_test_cases()
def torch_operator(self, *args, **kwargs):
"""PyTorch squeeze implementation"""
return torch.squeeze(*args, **kwargs)
def infinicore_operator(self, *args, **kwargs):
"""InfiniCore squeeze implementation"""
return infinicore.squeeze(*args, **kwargs)
def main():
"""Main entry point"""
runner = GenericTestRunner(OpTest)
runner.run_and_exit()
if __name__ == "__main__":
main()
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
import torch
import infinicore
from framework.base import BaseOperatorTest, TensorSpec, TestCase
from framework.runner import GenericTestRunner
from framework.utils import is_broadcast
# ==============================================================================
# Operator-specific configuration
# ==============================================================================
# Test cases format: (shape, strides, dim)
_TEST_CASES_DATA = [
# Basic cases
((1, 1, 1), None, 1),
((1, 1, 1), None, 0),
((1, 2, 4), None, 0),
((2, 1, 4), (4, 0, 1), 1),
((1, 4, 1, 32), (32, 32, 32, 1), 2),
]
# Tolerance configuration
_TOLERANCE_MAP = {
infinicore.float16: {"atol": 0, "rtol": 0},
infinicore.float32: {"atol": 0, "rtol": 0},
infinicore.bfloat16: {"atol": 0, "rtol": 0},
}
# Data types to test
_TENSOR_DTYPES = [infinicore.float16, infinicore.bfloat16, infinicore.float32]
def parse_test_cases():
"""
Parse test case data and return list of TestCase objects for all operation types.
Each test case contains all necessary information for execution and validation.
"""
test_cases = []
for data in _TEST_CASES_DATA:
shape = data[0]
strides = data[1]
dim = data[2]
# Generate test cases for all data types
for dtype in _TENSOR_DTYPES:
tolerance = _TOLERANCE_MAP.get(dtype, {"atol": 0, "rtol": 0})
# Create typed tensor specs
a_spec = TensorSpec.from_tensor(shape, strides, dtype)
test_cases.append(
TestCase(
inputs=[a_spec, dim],
kwargs={},
output_spec=None,
comparison_target=None, # Compare output
tolerance=tolerance,
description=f"unsqueeze",
)
)
return test_cases
class OpTest(BaseOperatorTest):
"""unsqueeze operator test with simplified implementation"""
def __init__(self):
super().__init__("unsqueeze")
def get_test_cases(self):
return parse_test_cases()
def torch_operator(self, *args, **kwargs):
"""PyTorch unsqueeze implementation"""
return torch.unsqueeze(*args, **kwargs)
def infinicore_operator(self, *args, **kwargs):
"""InfiniCore unsqueeze implementation"""
return infinicore.unsqueeze(*args, **kwargs)
def main():
"""Main entry point"""
runner = GenericTestRunner(OpTest)
runner.run_and_exit()
if __name__ == "__main__":
main()
......@@ -296,7 +296,7 @@ def rearrange_tensor(tensor, new_strides):
left = 0
right = 0
for i in range(len(shape)):
if new_strides[i] > 0:
if new_strides[i] >= 0:
new_size[i] = (shape[i] - 1) * new_strides[i] + 1
right += new_strides[i] * (shape[i] - 1)
else: # TODO: Support negative strides in the future
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment