Merge pull request #616 from pengcheng888/issue/608

issue/608- 修改functional中的rope, 添加nn.RoPE的实现和测试

Merge pull request #616 from pengcheng888/issue/608
issue/608- 修改functional中的rope, 添加nn.RoPE的实现和测试
98fff64d · PanZezhong1725 · GitHub · 674120e1 · 1715b204 · 98fff64d
Unverified Commit 98fff64d authored Nov 20, 2025 by PanZezhong1725 Committed by GitHub Nov 20, 2025
5 changed files
--- a/python/infinicore/nn/functional/rope.py
+++ b/python/infinicore/nn/functional/rope.py
@@ -20,6 +20,16 @@ def rope(
 ) -> Tensor:
    r"""Rotary Position Embedding(RoPE)."""
+    bs, seq_len, num_heads, head_dim = x.shape
+    x_stride = x.stride()
+    assert seq_len * x_stride[1] == x_stride[0], (
+        "x need to be continuous in dim=0 and dim=1"
+    )
+    x = x.view((bs * seq_len, num_heads, head_dim))
+    bs, num = pos_ids.shape
+    pos_ids = pos_ids.view((bs * num,))
    if out is None:
        return Tensor(
            _infinicore.rope(
@@ -29,8 +39,9 @@ def rope(
                cos_table._underlying,
                algo,
            )
-        )
+        ).view((bs, seq_len, num_heads, head_dim))
+    out = out.view((bs * seq_len, num_heads, head_dim))
    _infinicore.rope_(
        out._underlying,
        x._underlying,
@@ -39,4 +50,4 @@ def rope(
        cos_table._underlying,
        algo,
    )
-    return out
+    return out.view((bs, seq_len, num_heads, head_dim))
--- a/python/infinicore/nn/modules/__init__.py
+++ b/python/infinicore/nn/modules/__init__.py
@@ -2,6 +2,7 @@ from .container import InfiniCoreModuleList as ModuleList
 from .linear import Linear
 from .module import InfiniCoreModule as Module
 from .normalization import RMSNorm
+from .rope import RoPE
 from .sparse import Embedding
-__all__ = ["Linear", "RMSNorm", "Embedding", "ModuleList", "Module"]
+__all__ = ["Linear", "RMSNorm", "Embedding", "RoPE", "ModuleList", "Module"]
--- a/python/infinicore/nn/modules/rope.py
+++ b/python/infinicore/nn/modules/rope.py
+import numpy as np
+import infinicore
+from infinicore.nn import functional as F
+from ...tensor import Tensor
+from ..functional import RopeAlgo
+from .module import InfiniCoreModule as Module
+def create_sin_cos_table_numpy(max_position, head_dim, theta=10000.0):
+    assert head_dim % 2 == 0, "Embedding dimension must be even."
+    pos = np.arange(0, max_position)
+    freqs = 1.0 / (
+        theta ** (np.arange(0, head_dim, 2)[: (head_dim // 2)].astype(float) / head_dim)
+    )
+    angles = np.outer(pos, freqs)
+    sin_table = np.sin(angles, dtype=np.float32)
+    cos_table = np.cos(angles, dtype=np.float32)
+    return sin_table, cos_table
+def create_sin_cos_table(
+    max_position,
+    head_dim,
+    theta=10000.0,
+    device=None,
+    dtype=None,
+):
+    sin_table_np, cos_table_np = create_sin_cos_table_numpy(
+        max_position, head_dim, theta
+    )
+    sin_table_infini = infinicore.from_numpy(sin_table_np, dtype=dtype, device=device)
+    cos_table_infini = infinicore.from_numpy(cos_table_np, dtype=dtype, device=device)
+    return sin_table_infini, cos_table_infini
+class RoPE(Module):
+    r"""Rotary Position Embedding(RoPE)..
+    Args:
+        max_position_embeddings (int): The maximum sequence length that this model might ever be used with.
+        rope_theta (float): The base period of the RoPE embeddings.
+        head_dim (int): The attention head dimension.
+    Shape:
+        - Input:  hidden_states, ( bs, seq_len, num_heads, head_dim).
+        - Output: hidden_states, ( bs, seq_len, num_heads, head_dim).
+    """
+    __constants__ = ["max_position_embeddings", "rope_theta", "head_dim"]
+    max_position_embeddings: int
+    rope_theta: float
+    head_dim: int
+    def __init__(
+        self,
+        max_position_embeddings: int,
+        rope_theta: float,
+        head_dim: int,
+        device=None,
+        dtype=None,
+    ):
+        factory_kwargs = {
+            "device": infinicore.device("cpu", 0) if device is None else device,
+            "dtype": infinicore.float32 if dtype is None else dtype,
+        }
+        super().__init__()
+        self.max_position_embeddings = max_position_embeddings
+        self.rope_theta = rope_theta
+        self.head_dim = head_dim
+        self._sin_table, self._cos_table = create_sin_cos_table(
+            self.max_position_embeddings,
+            head_dim=self.head_dim,
+            theta=self.rope_theta,
+            **factory_kwargs,
+        )
+    def forward(self, states: Tensor, position_ids: Tensor, algo=RopeAlgo.GPT_NEOX):
+        F.rope(
+            states,
+            position_ids,
+            self._sin_table,
+            self._cos_table,
+            algo=algo,
+            out=states,
+        )
+        return states
--- a/test/infinicore/nn/rope.py
+++ b/test/infinicore/nn/rope.py
+import os
+import sys
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+import torch
+from framework.base import BaseOperatorTest, TensorSpec, TestCase
+from framework.runner import GenericTestRunner
+from infinicore.nn.functional import RopeAlgo
+import infinicore
+# ==============================================================================
+# Operator-specific configuration
+# ==============================================================================
+# Test cases format: (x_shape)
+# bs, seq_len, num_heads, head_dim
+_TEST_CASES_DATA = [
+    # Basic cases
+    (1, 10, 32, 64),
+    (2, 2, 32, 64),
+    (5, 10, 32, 64),
+]
+# Tolerance configuration
+_TOLERANCE_MAP = {
+    infinicore.float16: {"atol": 1e-2, "rtol": 1e-2},
+    infinicore.float32: {"atol": 1e-3, "rtol": 1e-3},
+    infinicore.bfloat16: {"atol": 5e-2, "rtol": 5e-2},
+}
+# Data types to test
+_TENSOR_DTYPES = [infinicore.float16, infinicore.float32]
+def parse_test_cases():
+    """
+    Parse test case data and return list of TestCase objects for all operation types.
+    Each test case contains all necessary information for execution and validation.
+    """
+    test_cases = []
+    for bs, seq_len, num_heads, head_dim in _TEST_CASES_DATA:
+        strides = None
+        # Generate test cases for all data types
+        for dtype in _TENSOR_DTYPES:
+            tolerance = _TOLERANCE_MAP.get(dtype, {"atol": 0, "rtol": 1e-3})
+            x_shape = [bs, seq_len, num_heads, head_dim]
+            # Create typed tensor specs
+            x_spec = TensorSpec.from_tensor(x_shape, strides, dtype, name="x")
+            max_position_embeddings = 1024
+            rope_theta = 10000.0
+            # Test Case 1: Out-of-place (return value)
+            test_cases.append(
+                TestCase(
+                    inputs=[x_spec],
+                    kwargs={
+                        "max_position_embeddings": max_position_embeddings,
+                        "rope_theta": rope_theta,
+                    },
+                    output_spec=None,
+                    comparison_target=None,
+                    tolerance=tolerance,
+                    description=f"nn.RoPE - OUT_OF_PLACE",
+                )
+            )
+    return test_cases
+def rotary_embedding(
+    t,
+    max_position_embeddings,
+    rope_theta,
+    head_dim,
+    algo=RopeAlgo.GPT_NEOX,
+):
+    def create_sin_cos_table(
+        max_position,
+        head_dim,
+        theta=10000.0,
+        torch_dtype=torch.float32,
+        torch_device="cpu",
+    ):
+        assert head_dim % 2 == 0, "Embedding dimension must be even."
+        pos = torch.arange(0, max_position)
+        freqs = 1.0 / (
+            theta
+            ** (torch.arange(0, head_dim, 2)[: (head_dim // 2)].float() / head_dim)
+        )
+        angles = torch.outer(pos, freqs)
+        return torch.sin(angles).to(dtype=torch_dtype, device=torch_device), torch.cos(
+            angles
+        ).to(dtype=torch_dtype, device=torch_device)
+    def _torch_rope(sin, cos, t1, t2):
+        cos = cos.unsqueeze(1)  # [seq_len, 1, dh // 2]
+        sin = sin.unsqueeze(1)  # [seq_len, 1, dh // 2]
+        t_out_1 = t1 * cos - t2 * sin
+        t_out_2 = t1 * sin + t2 * cos
+        return t_out_1, t_out_2
+    sin, cos = create_sin_cos_table(
+        max_position_embeddings, head_dim, rope_theta, torch_device=t.device
+    )
+    ans = t.clone()
+    dh = t.shape[-1]
+    dt = t.dtype
+    assert dh % 2 == 0, "Embedding dimension must be even."
+    if RopeAlgo.GPT_J == algo:
+        t_even = t[..., 0::2]  # [seq_len, n_head, dh // 2]
+        t_odd = t[..., 1::2]  # [seq_len, n_head, dh // 2]
+        t_out_even, t_out_odd = _torch_rope(sin, cos, t_even, t_odd)
+        ans[..., 0::2] = t_out_even.to(dt)
+        ans[..., 1::2] = t_out_odd.to(dt)
+    elif RopeAlgo.GPT_NEOX == algo:
+        half_dim = dh // 2
+        t_first = t[..., :half_dim]
+        t_second = t[..., half_dim:]
+        t_out_first, t_out_second = _torch_rope(sin, cos, t_first, t_second)
+        ans[..., :half_dim] = t_out_first.to(dt)
+        ans[..., half_dim:] = t_out_second.to(dt)
+    else:
+        raise KeyError("error Algo ")
+    return ans
+class OpTest(BaseOperatorTest):
+    """nn.RoPE test with simplified implementation"""
+    def __init__(self):
+        super().__init__("nn.RoPE")
+    def get_test_cases(self):
+        return parse_test_cases()
+    def torch_operator(self, x, max_position_embeddings, rope_theta):
+        """PyTorch nn.RoPE implementation"""
+        bs, seq_len, num_heads, head_dim = x.shape
+        return rotary_embedding(x, seq_len, rope_theta, head_dim)
+    def infinicore_operator(self, x, max_position_embeddings, rope_theta):
+        """InfiniCore nn.RoPE implementation"""
+        bs, seq_len, num_heads, head_dim = x.shape
+        torch_device = "cpu"
+        if x.device.type != "cpu":
+            torch_device = "cuda"
+        # 创建 pos_ids的变量
+        pos_ids_torch = torch.arange(0, seq_len, dtype=torch.int32, device=torch_device)
+        pos_ids_torch = pos_ids_torch.unsqueeze(0)
+        pos_ids_torch = pos_ids_torch.expand(bs, seq_len).contiguous()
+        pos_ids_infini = infinicore.from_torch(pos_ids_torch)
+        # 创建类
+        rope_instance = infinicore.nn.RoPE(
+            max_position_embeddings,
+            rope_theta,
+            head_dim,
+            device=x.device,
+            dtype=x.dtype,
+        )
+        # 计算
+        y = rope_instance(x, pos_ids_infini)
+        return y
+def main():
+    """Main entry point"""
+    runner = GenericTestRunner(OpTest)
+    runner.run_and_exit()
+if __name__ == "__main__":
+    main()
--- a/test/infinicore/ops/rope.py
+++ b/test/infinicore/ops/rope.py
@@ -3,10 +3,10 @@ import sys
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
-import torch
 from framework.base import BaseOperatorTest, TensorSpec, TestCase
 from framework.runner import GenericTestRunner
-from framework.utils import infinicore_tensor_from_torch, is_broadcast
+from framework.utils import is_broadcast
 from infinicore.nn.functional import RopeAlgo
 import infinicore
@@ -17,11 +17,11 @@ import infinicore
 _TEST_CASES_DATA = [
-    # ntok, num, head_dim, Algo
+    # bs, seq_len, num, head_dim, Algo
-    (1, 1, 64, RopeAlgo.GPT_NEOX),
+    (1, 1, 1, 64, RopeAlgo.GPT_NEOX),
-    (5, 32, 64, RopeAlgo.GPT_NEOX),
+    (1, 5, 32, 64, RopeAlgo.GPT_NEOX),
-    (1, 1, 128, RopeAlgo.GPT_J),
+    (1, 1, 1, 128, RopeAlgo.GPT_J),
-    (10, 1, 64, RopeAlgo.GPT_J),
+    (1, 10, 1, 64, RopeAlgo.GPT_J),
 ]
 # Tolerance configuration
@@ -43,14 +43,14 @@ def parse_test_cases():
    test_cases = []
    for data in _TEST_CASES_DATA:
-        ntok, num, head_dim = data[0], data[1], data[2]
+        bs, seq_len, num, head_dim = data[0], data[1], data[2], data[3]
-        algo = data[3]
+        algo = data[4]
        # Determine shapes based on batch dimension
-        out_shape = (ntok, num, head_dim)
+        out_shape = (bs, seq_len, num, head_dim)
-        x_shape = (ntok, num, head_dim)
+        x_shape = (bs, seq_len, num, head_dim)
-        sin_table_shape = (ntok, head_dim // 2)
+        sin_table_shape = (seq_len, head_dim // 2)
-        cos_table_shape = (ntok, head_dim // 2)
+        cos_table_shape = (seq_len, head_dim // 2)
        # Check if tensors support in-place operations
        c_supports_inplace = not is_broadcast(out_shape)
@@ -151,18 +151,13 @@ class OpTest(BaseOperatorTest):
    def infinicore_operator(self, x, sin_table, cos_table, algo, out=None, **kwargs):
        """InfiniCore Rope implementation"""
-        ntok = x.shape[0]
+        bs, seq_len, num, head_dim = x.shape
-        torch_device = "cpu"
-        if x.device.type != "cpu":
-            torch_device = "cuda"
-        # 创建 pos_ids的变量
+        ## 创建 pos_ids的变量
-        pos_ids_torch = torch.arange(0, ntok, dtype=torch.int32, device=torch_device)
+        cache_position_list = [list(range(0, seq_len)) for i in range(bs)]
-        pos_ids_ref = infinicore_tensor_from_torch(pos_ids_torch)
+        pos_ids_infini = infinicore.from_list(
-        pos_ids_infini = infinicore.empty(
+            cache_position_list, dtype=infinicore.int64, device=x.device
-            list(pos_ids_ref.shape), dtype=pos_ids_ref.dtype, device=pos_ids_ref.device
        )
-        pos_ids_infini.copy_(pos_ids_ref)
        # 计算
        pos_ids = pos_ids_infini