Merge from main && resolve conflict && format code

9a4fb9bb · baominghelly · 32bd2f82 · 0ead67fc · 9a4fb9bb · 9a4fb9bb
Commit 9a4fb9bb authored Dec 23, 2025 by baominghelly
18 changed files
--- a/include/infinicore/tensor.hpp
+++ b/include/infinicore/tensor.hpp
@@ -168,6 +168,19 @@ public:
    /// View APIs
    ///

+    /**
+     * Returns a new tensor with a dimension of size one removed at the specified position.
+     * Throws runtime_error if the dimension to be removed is not of size 1.
+     *
+     * @param dim The dimension index to remove
+     * @return A new tensor with the removed dimension
+     *
+     * Example:
+     *   // For a 3D tensor with shape [1, 3, 4], squeeze at dim 0 results in shape [3, 4]
+     *   tensor->squeeze(0);
+     */
+    Tensor squeeze(size_t dim) const;
+
    /**
     * Returns a new tensor with a dimension of size one inserted at the specified position.
     * The returned tensor shares the same underlying storage with the original tensor.

--- a/python/infinicore/__init__.py
+++ b/python/infinicore/__init__.py
@@ -45,6 +45,8 @@ from infinicore.ops.matmul import matmul
 from infinicore.ops.mul import mul
 from infinicore.ops.narrow import narrow
 from infinicore.ops.rearrange import rearrange
+from infinicore.ops.squeeze import squeeze
+from infinicore.ops.unsqueeze import unsqueeze
 from infinicore.tensor import (
    Tensor,
    empty,
@@ -104,6 +106,8 @@ __all__ = [
    "matmul",
    "mul",
    "narrow",
+    "squeeze",
+    "unsqueeze",
    "rearrange",
    "empty",
    "empty_like",

--- a/python/infinicore/nn/functional/rope.py
+++ b/python/infinicore/nn/functional/rope.py
@@ -20,16 +20,6 @@ def rope(
 ) -> Tensor:
    r"""Rotary Position Embedding(RoPE)."""

-    bs, seq_len, num_heads, head_dim = x.shape
-    x_stride = x.stride()
-    assert seq_len * x_stride[1] == x_stride[0], (
-        "x need to be continuous in dim=0 and dim=1"
-    )
-
-    x = x.view((bs * seq_len, num_heads, head_dim))
-    bs, num = pos_ids.shape
-    pos_ids = pos_ids.view((bs * num,))
-
    if out is None:
        return Tensor(
            _infinicore.rope(
@@ -39,9 +29,8 @@ def rope(
                cos_table._underlying,
                algo,
            )
-        ).view((bs, seq_len, num_heads, head_dim))
+        )

-    out = out.view((bs * seq_len, num_heads, head_dim))
    _infinicore.rope_(
        out._underlying,
        x._underlying,
@@ -50,4 +39,4 @@ def rope(
        cos_table._underlying,
        algo,
    )
-    return out.view((bs, seq_len, num_heads, head_dim))
+    return out
--- a/python/infinicore/ops/squeeze.py
+++ b/python/infinicore/ops/squeeze.py
+from infinicore.tensor import Tensor
+
+
+def squeeze(input: Tensor, dim: int) -> Tensor:
+    return Tensor(input._underlying.squeeze(dim))
--- a/python/infinicore/ops/unsqueeze.py
+++ b/python/infinicore/ops/unsqueeze.py
+from infinicore.tensor import Tensor
+
+
+def unsqueeze(input: Tensor, dim: int) -> Tensor:
+    return Tensor(input._underlying.unsqueeze(dim))
--- a/python/infinicore/tensor.py
+++ b/python/infinicore/tensor.py
@@ -92,6 +92,12 @@ class Tensor:
    def view(self, shape):
        return Tensor(self._underlying.view(shape))

+    def squeeze(self, dim):
+        return infinicore.squeeze(self, dim)
+
+    def unsqueeze(self, dim):
+        return infinicore.unsqueeze(self, dim)
+
    def debug(self, filename=None):
        """Print tensor data or save to file for debugging


--- a/src/infinicore/pybind11/tensor.hpp
+++ b/src/infinicore/pybind11/tensor.hpp
@@ -16,25 +16,27 @@ inline void bind(py::module &m) {
        .def_property_readonly("ndim", [](const Tensor &tensor) { return tensor->ndim(); })
        .def_property_readonly("dtype", [](const Tensor &tensor) { return tensor->dtype(); })
        .def_property_readonly("device", [](const Tensor &tensor) { return tensor->device(); })
-
        .def("data_ptr", [](const Tensor &tensor) { return reinterpret_cast<std::uintptr_t>(tensor->data()); })
        .def("size", [](const Tensor &tensor, std::size_t dim) { return tensor->size(dim); })
        .def("stride", [](const Tensor &tensor, std::size_t dim) { return tensor->stride(dim); })
        .def("numel", [](const Tensor &tensor) { return tensor->numel(); })
-
        .def("is_contiguous", [](const Tensor &tensor) { return tensor->is_contiguous(); })
        .def("is_pinned", [](const Tensor &tensor) { return tensor->is_pinned(); })
        .def("info", [](const Tensor &tensor) { return tensor->info(); })
+
        .def("debug", [](const Tensor &tensor) { return tensor->debug(); })
        .def("debug", [](const Tensor &tensor, const std::string &filename) { return tensor->debug(filename); })

        .def("copy_", [](Tensor &tensor, const Tensor &other) { tensor->copy_from(other); })
        .def("to", [](const Tensor &tensor, const Device &device) { return tensor->to(device); })
-        .def("as_strided", [](const Tensor &tensor, const Shape &shape, const Strides &strides) { return tensor->as_strided(shape, strides); })
        .def("contiguous", [](const Tensor &tensor) { return tensor->contiguous(); })
+
+        .def("as_strided", [](const Tensor &tensor, const Shape &shape, const Strides &strides) { return tensor->as_strided(shape, strides); })
        .def("narrow", [](const Tensor &tensor, std::size_t dim, std::size_t start, std::size_t length) { return tensor->narrow({{dim, start, length}}); })
        .def("permute", [](const Tensor &tensor, const Shape &dims) { return tensor->permute(dims); })
-        .def("view", [](const Tensor &tensor, const Shape &shape) { return tensor->view(shape); });
+        .def("view", [](const Tensor &tensor, const Shape &shape) { return tensor->view(shape); })
+        .def("unsqueeze", [](const Tensor &tensor, std::size_t dim) { return tensor->unsqueeze(dim); })
+        .def("squeeze", [](const Tensor &tensor, std::size_t dim) { return tensor->squeeze(dim); });

    m.def("empty", &Tensor::empty,
          py::arg("shape"),

--- a/src/infinicore/tensor/view.cc
+++ b/src/infinicore/tensor/view.cc
@@ -6,6 +6,23 @@
 #include <stdexcept>

 namespace infinicore {
+Tensor TensorImpl::squeeze(size_t dim) const {
+    // Create new shape with dimension of size one removed at dim
+    if (meta_.shape[dim] != 1) {
+        spdlog::error("Dimension {} is not of size 1 for squeeze operation on {}.", dim, this->info());
+        throw std::runtime_error("Invalid squeeze operation on tensor.");
+    }
+    Shape new_shape = meta_.shape;
+    new_shape.erase(new_shape.begin() + dim);
+    Strides new_strides = meta_.strides;
+    new_strides.erase(new_strides.begin() + dim);
+
+    auto tensor_impl = std::make_shared<TensorImpl>(new_shape, new_strides, meta_.dtype);
+    tensor_impl->data_ = data_;
+
+    return Tensor(tensor_impl);
+}
+
 Tensor TensorImpl::unsqueeze(size_t dim) const {
    // Create new shape with dimension of size one inserted at dim
    Shape new_shape = meta_.shape;

--- a/src/utils.h
+++ b/src/utils.h
 #ifndef INFINIUTILS_H
 #define INFINIUTILS_H

+#include "infinicore.h"
 #include "utils/custom_types.h"
 #include "utils/rearrange.h"


--- a/src/utils/check.h
+++ b/src/utils/check.h
@@ -3,8 +3,19 @@
 #include <iostream>
 #include <tuple>

+#include "../utils.h"
 #include "infini_status_string.h"

+#define CHECK_OR_DO(CONDITION, ACTION)                                       \
+    do {                                                                     \
+        if (!(CONDITION)) {                                                  \
+            std::cerr << "Check Failed: `(" << #CONDITION << ")` is False"   \
+                      << " from " << __func__                                \
+                      << " at " << __FILE__ << ":" << __LINE__ << std::endl; \
+            { ACTION; }                                                      \
+        }                                                                    \
+    } while (0)
+
 #define CHECK_OR_RETURN(CONDITION, ERROR)                                    \
    do {                                                                     \
        if (!(CONDITION)) {                                                  \
@@ -33,17 +44,19 @@
                 std::cerr << "Error: " << infini_status_string(api_result_) << std::endl; \
                 return api_result_)

-#define CHECK_DTYPE(DT, ...)                                 \
-    do {                                                     \
-        auto found_supported_dtype = false;                  \
-        for (auto dt : {__VA_ARGS__}) {                      \
-            if (dt == DT) {                                  \
-                found_supported_dtype = true;                \
-                break;                                       \
-            }                                                \
-        }                                                    \
-        CHECK_API_OR(found_supported_dtype, true,            \
-                     return INFINI_STATUS_BAD_TENSOR_DTYPE); \
+#define CHECK_DTYPE(DT, ...)               \
+    do {                                   \
+        auto dtype_is_supported = false;   \
+        for (auto dt : {__VA_ARGS__}) {    \
+            if (dt == DT) {                \
+                dtype_is_supported = true; \
+                break;                     \
+            }                              \
+        }                                  \
+        CHECK_OR_DO(dtype_is_supported,    \
+                    { std::cerr << "Unsupported dtype: " << \
+                        infiniDtypeToString(DT) << ". "; \
+                        return INFINI_STATUS_BAD_TENSOR_DTYPE; });                  \
    } while (0)

 #define CHECK_DTYPE_ANY_INT(DT)                                                        \

--- a/test/infinicore/framework/results.py
+++ b/test/infinicore/framework/results.py
@@ -4,6 +4,7 @@ from .devices import InfiniDeviceEnum
 from .tensor import TensorSpec
 from .utils.json_utils import save_json_report

+
 @dataclass
 class CaseResult:
    """Test case result data structure"""
@@ -63,6 +64,7 @@ class OperatorResult:
            return "PARTIAL"
        return "FAILED"

+
 class TestSummary:
    """
    Test Summary class:

--- a/test/infinicore/framework/utils/compare_utils.py
+++ b/test/infinicore/framework/utils/compare_utils.py
@@ -231,7 +231,6 @@ def print_discrepancy(
    import sys

    is_terminal = sys.stdout.isatty()
-
    actual_isnan = torch.isnan(actual)
    expected_isnan = torch.isnan(expected)


--- a/test/infinicore/framework/utils/json_utils.py
+++ b/test/infinicore/framework/utils/json_utils.py
@@ -62,7 +62,9 @@ def save_json_report(save_path, total_results):
                                        f, c_key, c_val, I16, I20, close_comma=c_comma
                                    )
                                else:
-                                    f.write(f'{I16}"{c_key}": {_to_json(c_val)}{c_comma}\n')
+                                    f.write(
+                                        f'{I16}"{c_key}": {_to_json(c_val)}{c_comma}\n'
+                                    )

                            # Handle trailing comparison/tolerance fields uniformly
                            if "comparison_target" in case_item:

--- a/test/infinicore/framework/utils/tensor_utils.py
+++ b/test/infinicore/framework/utils/tensor_utils.py
@@ -114,7 +114,7 @@ def rearrange_tensor(tensor, new_strides):
    new_positions += offset

    # Copy the original data to the new tensor
-    new_tensor.view(-1).index_add_(0, new_positions, tensor.view(-1))
+    new_tensor.reshape(-1).index_add_(0, new_positions, tensor.reshape(-1))
    new_tensor.set_(new_tensor.untyped_storage(), offset, shape, tuple(new_strides))

    return new_tensor

--- a/test/infinicore/ops/rope.py
+++ b/test/infinicore/ops/rope.py
@@ -22,11 +22,121 @@ import infinicore


 _TEST_CASES_DATA = [
-    # bs, seq_len, num, head_dim, Algo
-    (1, 1, 1, 64, RopeAlgo.GPT_NEOX),
-    (1, 5, 32, 64, RopeAlgo.GPT_NEOX),
-    (1, 1, 1, 128, RopeAlgo.GPT_J),
-    (1, 10, 1, 64, RopeAlgo.GPT_J),
+    # bs, seq_len, num, head_dim, src strides, dst strides, Algo
+    (1, 1, 1, 64, None, None, RopeAlgo.GPT_NEOX),
+    (1, 5, 32, 64, None, None, RopeAlgo.GPT_NEOX),
+    (1, 1, 1, 128, None, None, RopeAlgo.GPT_J),
+    (1, 10, 1, 64, None, None, RopeAlgo.GPT_J),
+    (2, 20, 16, 128, None, None, RopeAlgo.GPT_NEOX),
+    (4, 50, 32, 256, None, None, RopeAlgo.GPT_J),
+    (
+        2,
+        20,
+        16,
+        128,
+        (655360, 8192, 256, 1),
+        (655360, 8192, 256, 1),
+        RopeAlgo.GPT_NEOX,
+    ),
+    (
+        2,
+        20,
+        16,
+        128,
+        (655360, 8192, 256, 1),
+        (655360, 8192, 256, 1),
+        RopeAlgo.GPT_J,
+    ),
+    (
+        4,
+        50,
+        32,
+        8,
+        (204800, 1024, 16, 1),
+        (460800, 1536, 24, 1),
+        RopeAlgo.GPT_NEOX,
+    ),
+    (
+        4,
+        50,
+        32,
+        8,
+        (204800, 1024, 16, 1),
+        (460800, 1536, 24, 1),
+        RopeAlgo.GPT_J,
+    ),
+    (
+        32,
+        64,
+        8,
+        128,
+        (1048576, 4096, 256, 1),
+        (1048576, 4096, 256, 1),
+        RopeAlgo.GPT_NEOX,
+    ),
+    (
+        32,
+        64,
+        8,
+        128,
+        (1048576, 4096, 256, 1),
+        (1048576, 4096, 256, 1),
+        RopeAlgo.GPT_J,
+    ),
+    (
+        64,
+        17,
+        32,
+        64,
+        (557056, 8192, 128, 1),
+        (1253376, 12288, 192, 1),
+        RopeAlgo.GPT_NEOX,
+    ),
+    (
+        64,
+        17,
+        32,
+        64,
+        (557056, 8192, 128, 1),
+        (1253376, 12288, 192, 1),
+        RopeAlgo.GPT_J,
+    ),
+    (
+        8,
+        20,
+        4,
+        64,
+        (1048576, 64, 262144, 1),
+        (1048576, 64, 262144, 1),
+        RopeAlgo.GPT_NEOX,
+    ),
+    (
+        8,
+        20,
+        4,
+        64,
+        (1048576, 64, 262144, 1),
+        (1048576, 64, 262144, 1),
+        RopeAlgo.GPT_J,
+    ),
+    (
+        8,
+        20,
+        32,
+        64,
+        (40960, 64, 1280, 1),
+        (40960, 64, 1280, 1),
+        RopeAlgo.GPT_NEOX,
+    ),
+    (
+        8,
+        20,
+        32,
+        64,
+        (40960, 64, 1280, 1),
+        (40960, 64, 1280, 1),
+        RopeAlgo.GPT_J,
+    ),
 ]

 # Tolerance configuration
@@ -49,7 +159,8 @@ def parse_test_cases():

    for data in _TEST_CASES_DATA:
        bs, seq_len, num, head_dim = data[0], data[1], data[2], data[3]
-        algo = data[4]
+        src_strides, dst_strides = data[4], data[5]
+        algo = data[6]

        # Determine shapes based on batch dimension
        out_shape = (bs, seq_len, num, head_dim)
@@ -58,15 +169,16 @@ def parse_test_cases():
        cos_table_shape = (seq_len, head_dim // 2)

        # Check if tensors support in-place operations
-        c_supports_inplace = not is_broadcast(out_shape)
+        # x tensor supports in-place if it's not a broadcasted tensor
+        x_supports_inplace = not is_broadcast(src_strides)

        # Generate test cases for all data types
        for dtype in _TENSOR_DTYPES:
            tolerance = _TOLERANCE_MAP.get(dtype, {"atol": 0, "rtol": 1e-3})

            # Create typed tensor specs
-            out_spec = TensorSpec.from_tensor(out_shape, None, dtype)
-            x_spec = TensorSpec.from_tensor(x_shape, None, dtype)
+            out_spec = TensorSpec.from_tensor(out_shape, dst_strides, dtype)
+            x_spec = TensorSpec.from_tensor(x_shape, src_strides, dtype)
            sin_table_spec = TensorSpec.from_tensor(sin_table_shape, None, dtype)
            cos_table_spec = TensorSpec.from_tensor(cos_table_shape, None, dtype)

@@ -83,7 +195,7 @@ def parse_test_cases():
            )

            # Test Case 2: In-place with explicit output tensor
-            if c_supports_inplace:
+            if dst_strides is None or not is_broadcast(dst_strides):
                test_cases.append(
                    TestCase(
                        inputs=[x_spec, sin_table_spec, cos_table_spec],
@@ -95,6 +207,19 @@ def parse_test_cases():
                    )
                )

+            # Test Case 3: In-place on input tensor (x)
+            if x_supports_inplace:
+                test_cases.append(
+                    TestCase(
+                        inputs=[x_spec, sin_table_spec, cos_table_spec],
+                        kwargs={"algo": algo, "out": 0},  # Use index 0 for first input
+                        output_spec=None,
+                        comparison_target=0,  # Compare first input (x tensor)
+                        tolerance=tolerance,
+                        description=f"Rope - INPLACE(x)",
+                    )
+                )
+
    return test_cases


@@ -107,15 +232,22 @@ def rotary_embedding(t, sin, cos, algo, *, out=None):

        return t_out_1, t_out_2

-    ans = t.clone()
+    # If out parameter is provided and it's the same as input t, operate in-place
+    if out is not None:
+        if out.data_ptr() == t.data_ptr():
+            ans = t  # Use the same tensor for in-place operation
+        else:
+            ans = out  # Use provided output tensor
+    else:
+        ans = t.clone()

    dh = t.shape[-1]
    dt = t.dtype
    assert dh % 2 == 0, "Embedding dimension must be even."

    if RopeAlgo.GPT_J == algo:
-        t_even = t[..., 0::2]  # [seq_len, n_head, dh // 2]
-        t_odd = t[..., 1::2]  # [seq_len, n_head, dh // 2]
+        t_even = t[..., 0::2]  # [bs, seq_len, n_head, dh // 2]
+        t_odd = t[..., 1::2]  # [bs, seq_len, n_head, dh // 2]

        t_out_even, t_out_odd = _torch_rope(sin, cos, t_even, t_odd)

@@ -131,9 +263,10 @@ def rotary_embedding(t, sin, cos, algo, *, out=None):
        ans[..., :half_dim] = t_out_first.to(dt)
        ans[..., half_dim:] = t_out_second.to(dt)
    else:
-        raise KeyError("error Algo ")
+        raise KeyError("Unsupported RoPE algorithm")

-    if out is not None:
+    # If operating in-place on t, we don't need to copy back
+    if out is not None and out.data_ptr() != t.data_ptr():
        out.copy_(ans)
        return out
    return ans

--- a/test/infinicore/tensor/squeeze.py
+++ b/test/infinicore/tensor/squeeze.py
+import sys
+import os
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+
+import torch
+import infinicore
+from framework.base import BaseOperatorTest, TensorSpec, TestCase
+from framework.runner import GenericTestRunner
+from framework.utils import is_broadcast
+
+# ==============================================================================
+# Operator-specific configuration
+# ==============================================================================
+
+# Test cases format: (shape, strides, dim)
+_TEST_CASES_DATA = [
+    # Basic cases
+    ((1, 1, 1), None, 1),
+    ((1, 1, 1), None, 0),
+    ((1, 2, 4), None, 0),
+    ((2, 1, 4), (4, 0, 1), 1),
+    ((1, 4, 1, 32), (32, 32, 32, 1), 2),
+]
+
+# Tolerance configuration
+_TOLERANCE_MAP = {
+    infinicore.float16: {"atol": 0, "rtol": 0},
+    infinicore.float32: {"atol": 0, "rtol": 0},
+    infinicore.bfloat16: {"atol": 0, "rtol": 0},
+}
+
+# Data types to test
+_TENSOR_DTYPES = [infinicore.float16, infinicore.bfloat16, infinicore.float32]
+
+
+def parse_test_cases():
+    """
+    Parse test case data and return list of TestCase objects for all operation types.
+    Each test case contains all necessary information for execution and validation.
+    """
+    test_cases = []
+
+    for data in _TEST_CASES_DATA:
+        shape = data[0]
+        strides = data[1]
+        dim = data[2]
+
+        # Generate test cases for all data types
+        for dtype in _TENSOR_DTYPES:
+            tolerance = _TOLERANCE_MAP.get(dtype, {"atol": 0, "rtol": 0})
+
+            # Create typed tensor specs
+            a_spec = TensorSpec.from_tensor(shape, strides, dtype)
+            test_cases.append(
+                TestCase(
+                    inputs=[a_spec, dim],
+                    kwargs={},
+                    output_spec=None,
+                    comparison_target=None,  # Compare output
+                    tolerance=tolerance,
+                    description=f"squeeze",
+                )
+            )
+
+    return test_cases
+
+
+class OpTest(BaseOperatorTest):
+    """squeeze operator test with simplified implementation"""
+
+    def __init__(self):
+        super().__init__("squeeze")
+
+    def get_test_cases(self):
+        return parse_test_cases()
+
+    def torch_operator(self, *args, **kwargs):
+        """PyTorch squeeze implementation"""
+        return torch.squeeze(*args, **kwargs)
+
+    def infinicore_operator(self, *args, **kwargs):
+        """InfiniCore squeeze implementation"""
+        return infinicore.squeeze(*args, **kwargs)
+
+
+def main():
+    """Main entry point"""
+    runner = GenericTestRunner(OpTest)
+    runner.run_and_exit()
+
+
+if __name__ == "__main__":
+    main()
--- a/test/infinicore/tensor/unsqueeze.py
+++ b/test/infinicore/tensor/unsqueeze.py
+import sys
+import os
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+
+import torch
+import infinicore
+from framework.base import BaseOperatorTest, TensorSpec, TestCase
+from framework.runner import GenericTestRunner
+from framework.utils import is_broadcast
+
+# ==============================================================================
+# Operator-specific configuration
+# ==============================================================================
+
+# Test cases format: (shape, strides, dim)
+_TEST_CASES_DATA = [
+    # Basic cases
+    ((1, 1, 1), None, 1),
+    ((1, 1, 1), None, 0),
+    ((1, 2, 4), None, 0),
+    ((2, 1, 4), (4, 0, 1), 1),
+    ((1, 4, 1, 32), (32, 32, 32, 1), 2),
+]
+
+# Tolerance configuration
+_TOLERANCE_MAP = {
+    infinicore.float16: {"atol": 0, "rtol": 0},
+    infinicore.float32: {"atol": 0, "rtol": 0},
+    infinicore.bfloat16: {"atol": 0, "rtol": 0},
+}
+
+# Data types to test
+_TENSOR_DTYPES = [infinicore.float16, infinicore.bfloat16, infinicore.float32]
+
+
+def parse_test_cases():
+    """
+    Parse test case data and return list of TestCase objects for all operation types.
+    Each test case contains all necessary information for execution and validation.
+    """
+    test_cases = []
+
+    for data in _TEST_CASES_DATA:
+        shape = data[0]
+        strides = data[1]
+        dim = data[2]
+
+        # Generate test cases for all data types
+        for dtype in _TENSOR_DTYPES:
+            tolerance = _TOLERANCE_MAP.get(dtype, {"atol": 0, "rtol": 0})
+
+            # Create typed tensor specs
+            a_spec = TensorSpec.from_tensor(shape, strides, dtype)
+            test_cases.append(
+                TestCase(
+                    inputs=[a_spec, dim],
+                    kwargs={},
+                    output_spec=None,
+                    comparison_target=None,  # Compare output
+                    tolerance=tolerance,
+                    description=f"unsqueeze",
+                )
+            )
+
+    return test_cases
+
+
+class OpTest(BaseOperatorTest):
+    """unsqueeze operator test with simplified implementation"""
+
+    def __init__(self):
+        super().__init__("unsqueeze")
+
+    def get_test_cases(self):
+        return parse_test_cases()
+
+    def torch_operator(self, *args, **kwargs):
+        """PyTorch unsqueeze implementation"""
+        return torch.unsqueeze(*args, **kwargs)
+
+    def infinicore_operator(self, *args, **kwargs):
+        """InfiniCore unsqueeze implementation"""
+        return infinicore.unsqueeze(*args, **kwargs)
+
+
+def main():
+    """Main entry point"""
+    runner = GenericTestRunner(OpTest)
+    runner.run_and_exit()
+
+
+if __name__ == "__main__":
+    main()
--- a/test/infiniop/libinfiniop/utils.py
+++ b/test/infiniop/libinfiniop/utils.py
@@ -296,7 +296,7 @@ def rearrange_tensor(tensor, new_strides):
    left = 0
    right = 0
    for i in range(len(shape)):
-        if new_strides[i] > 0:
+        if new_strides[i] >= 0:
            new_size[i] = (shape[i] - 1) * new_strides[i] + 1
            right += new_strides[i] * (shape[i] - 1)
        else:  # TODO: Support negative strides in the future