Unverified Commit 98fff64d authored by PanZezhong1725's avatar PanZezhong1725 Committed by GitHub
Browse files

Merge pull request #616 from pengcheng888/issue/608

issue/608- 修改functional中的rope, 添加nn.RoPE的实现和测试
parents 674120e1 1715b204
......@@ -20,6 +20,16 @@ def rope(
) -> Tensor:
r"""Rotary Position Embedding(RoPE)."""
bs, seq_len, num_heads, head_dim = x.shape
x_stride = x.stride()
assert seq_len * x_stride[1] == x_stride[0], (
"x need to be continuous in dim=0 and dim=1"
)
x = x.view((bs * seq_len, num_heads, head_dim))
bs, num = pos_ids.shape
pos_ids = pos_ids.view((bs * num,))
if out is None:
return Tensor(
_infinicore.rope(
......@@ -29,8 +39,9 @@ def rope(
cos_table._underlying,
algo,
)
)
).view((bs, seq_len, num_heads, head_dim))
out = out.view((bs * seq_len, num_heads, head_dim))
_infinicore.rope_(
out._underlying,
x._underlying,
......@@ -39,4 +50,4 @@ def rope(
cos_table._underlying,
algo,
)
return out
return out.view((bs, seq_len, num_heads, head_dim))
......@@ -2,6 +2,7 @@ from .container import InfiniCoreModuleList as ModuleList
from .linear import Linear
from .module import InfiniCoreModule as Module
from .normalization import RMSNorm
from .rope import RoPE
from .sparse import Embedding
__all__ = ["Linear", "RMSNorm", "Embedding", "ModuleList", "Module"]
__all__ = ["Linear", "RMSNorm", "Embedding", "RoPE", "ModuleList", "Module"]
import numpy as np
import infinicore
from infinicore.nn import functional as F
from ...tensor import Tensor
from ..functional import RopeAlgo
from .module import InfiniCoreModule as Module
def create_sin_cos_table_numpy(max_position, head_dim, theta=10000.0):
assert head_dim % 2 == 0, "Embedding dimension must be even."
pos = np.arange(0, max_position)
freqs = 1.0 / (
theta ** (np.arange(0, head_dim, 2)[: (head_dim // 2)].astype(float) / head_dim)
)
angles = np.outer(pos, freqs)
sin_table = np.sin(angles, dtype=np.float32)
cos_table = np.cos(angles, dtype=np.float32)
return sin_table, cos_table
def create_sin_cos_table(
max_position,
head_dim,
theta=10000.0,
device=None,
dtype=None,
):
sin_table_np, cos_table_np = create_sin_cos_table_numpy(
max_position, head_dim, theta
)
sin_table_infini = infinicore.from_numpy(sin_table_np, dtype=dtype, device=device)
cos_table_infini = infinicore.from_numpy(cos_table_np, dtype=dtype, device=device)
return sin_table_infini, cos_table_infini
class RoPE(Module):
r"""Rotary Position Embedding(RoPE)..
Args:
max_position_embeddings (int): The maximum sequence length that this model might ever be used with.
rope_theta (float): The base period of the RoPE embeddings.
head_dim (int): The attention head dimension.
Shape:
- Input: hidden_states, ( bs, seq_len, num_heads, head_dim).
- Output: hidden_states, ( bs, seq_len, num_heads, head_dim).
"""
__constants__ = ["max_position_embeddings", "rope_theta", "head_dim"]
max_position_embeddings: int
rope_theta: float
head_dim: int
def __init__(
self,
max_position_embeddings: int,
rope_theta: float,
head_dim: int,
device=None,
dtype=None,
):
factory_kwargs = {
"device": infinicore.device("cpu", 0) if device is None else device,
"dtype": infinicore.float32 if dtype is None else dtype,
}
super().__init__()
self.max_position_embeddings = max_position_embeddings
self.rope_theta = rope_theta
self.head_dim = head_dim
self._sin_table, self._cos_table = create_sin_cos_table(
self.max_position_embeddings,
head_dim=self.head_dim,
theta=self.rope_theta,
**factory_kwargs,
)
def forward(self, states: Tensor, position_ids: Tensor, algo=RopeAlgo.GPT_NEOX):
F.rope(
states,
position_ids,
self._sin_table,
self._cos_table,
algo=algo,
out=states,
)
return states
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
import torch
from framework.base import BaseOperatorTest, TensorSpec, TestCase
from framework.runner import GenericTestRunner
from infinicore.nn.functional import RopeAlgo
import infinicore
# ==============================================================================
# Operator-specific configuration
# ==============================================================================
# Test cases format: (x_shape)
# bs, seq_len, num_heads, head_dim
_TEST_CASES_DATA = [
# Basic cases
(1, 10, 32, 64),
(2, 2, 32, 64),
(5, 10, 32, 64),
]
# Tolerance configuration
_TOLERANCE_MAP = {
infinicore.float16: {"atol": 1e-2, "rtol": 1e-2},
infinicore.float32: {"atol": 1e-3, "rtol": 1e-3},
infinicore.bfloat16: {"atol": 5e-2, "rtol": 5e-2},
}
# Data types to test
_TENSOR_DTYPES = [infinicore.float16, infinicore.float32]
def parse_test_cases():
"""
Parse test case data and return list of TestCase objects for all operation types.
Each test case contains all necessary information for execution and validation.
"""
test_cases = []
for bs, seq_len, num_heads, head_dim in _TEST_CASES_DATA:
strides = None
# Generate test cases for all data types
for dtype in _TENSOR_DTYPES:
tolerance = _TOLERANCE_MAP.get(dtype, {"atol": 0, "rtol": 1e-3})
x_shape = [bs, seq_len, num_heads, head_dim]
# Create typed tensor specs
x_spec = TensorSpec.from_tensor(x_shape, strides, dtype, name="x")
max_position_embeddings = 1024
rope_theta = 10000.0
# Test Case 1: Out-of-place (return value)
test_cases.append(
TestCase(
inputs=[x_spec],
kwargs={
"max_position_embeddings": max_position_embeddings,
"rope_theta": rope_theta,
},
output_spec=None,
comparison_target=None,
tolerance=tolerance,
description=f"nn.RoPE - OUT_OF_PLACE",
)
)
return test_cases
def rotary_embedding(
t,
max_position_embeddings,
rope_theta,
head_dim,
algo=RopeAlgo.GPT_NEOX,
):
def create_sin_cos_table(
max_position,
head_dim,
theta=10000.0,
torch_dtype=torch.float32,
torch_device="cpu",
):
assert head_dim % 2 == 0, "Embedding dimension must be even."
pos = torch.arange(0, max_position)
freqs = 1.0 / (
theta
** (torch.arange(0, head_dim, 2)[: (head_dim // 2)].float() / head_dim)
)
angles = torch.outer(pos, freqs)
return torch.sin(angles).to(dtype=torch_dtype, device=torch_device), torch.cos(
angles
).to(dtype=torch_dtype, device=torch_device)
def _torch_rope(sin, cos, t1, t2):
cos = cos.unsqueeze(1) # [seq_len, 1, dh // 2]
sin = sin.unsqueeze(1) # [seq_len, 1, dh // 2]
t_out_1 = t1 * cos - t2 * sin
t_out_2 = t1 * sin + t2 * cos
return t_out_1, t_out_2
sin, cos = create_sin_cos_table(
max_position_embeddings, head_dim, rope_theta, torch_device=t.device
)
ans = t.clone()
dh = t.shape[-1]
dt = t.dtype
assert dh % 2 == 0, "Embedding dimension must be even."
if RopeAlgo.GPT_J == algo:
t_even = t[..., 0::2] # [seq_len, n_head, dh // 2]
t_odd = t[..., 1::2] # [seq_len, n_head, dh // 2]
t_out_even, t_out_odd = _torch_rope(sin, cos, t_even, t_odd)
ans[..., 0::2] = t_out_even.to(dt)
ans[..., 1::2] = t_out_odd.to(dt)
elif RopeAlgo.GPT_NEOX == algo:
half_dim = dh // 2
t_first = t[..., :half_dim]
t_second = t[..., half_dim:]
t_out_first, t_out_second = _torch_rope(sin, cos, t_first, t_second)
ans[..., :half_dim] = t_out_first.to(dt)
ans[..., half_dim:] = t_out_second.to(dt)
else:
raise KeyError("error Algo ")
return ans
class OpTest(BaseOperatorTest):
"""nn.RoPE test with simplified implementation"""
def __init__(self):
super().__init__("nn.RoPE")
def get_test_cases(self):
return parse_test_cases()
def torch_operator(self, x, max_position_embeddings, rope_theta):
"""PyTorch nn.RoPE implementation"""
bs, seq_len, num_heads, head_dim = x.shape
return rotary_embedding(x, seq_len, rope_theta, head_dim)
def infinicore_operator(self, x, max_position_embeddings, rope_theta):
"""InfiniCore nn.RoPE implementation"""
bs, seq_len, num_heads, head_dim = x.shape
torch_device = "cpu"
if x.device.type != "cpu":
torch_device = "cuda"
# 创建 pos_ids的变量
pos_ids_torch = torch.arange(0, seq_len, dtype=torch.int32, device=torch_device)
pos_ids_torch = pos_ids_torch.unsqueeze(0)
pos_ids_torch = pos_ids_torch.expand(bs, seq_len).contiguous()
pos_ids_infini = infinicore.from_torch(pos_ids_torch)
# 创建类
rope_instance = infinicore.nn.RoPE(
max_position_embeddings,
rope_theta,
head_dim,
device=x.device,
dtype=x.dtype,
)
# 计算
y = rope_instance(x, pos_ids_infini)
return y
def main():
"""Main entry point"""
runner = GenericTestRunner(OpTest)
runner.run_and_exit()
if __name__ == "__main__":
main()
......@@ -3,10 +3,10 @@ import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
import torch
from framework.base import BaseOperatorTest, TensorSpec, TestCase
from framework.runner import GenericTestRunner
from framework.utils import infinicore_tensor_from_torch, is_broadcast
from framework.utils import is_broadcast
from infinicore.nn.functional import RopeAlgo
import infinicore
......@@ -17,11 +17,11 @@ import infinicore
_TEST_CASES_DATA = [
# ntok, num, head_dim, Algo
(1, 1, 64, RopeAlgo.GPT_NEOX),
(5, 32, 64, RopeAlgo.GPT_NEOX),
(1, 1, 128, RopeAlgo.GPT_J),
(10, 1, 64, RopeAlgo.GPT_J),
# bs, seq_len, num, head_dim, Algo
(1, 1, 1, 64, RopeAlgo.GPT_NEOX),
(1, 5, 32, 64, RopeAlgo.GPT_NEOX),
(1, 1, 1, 128, RopeAlgo.GPT_J),
(1, 10, 1, 64, RopeAlgo.GPT_J),
]
# Tolerance configuration
......@@ -43,14 +43,14 @@ def parse_test_cases():
test_cases = []
for data in _TEST_CASES_DATA:
ntok, num, head_dim = data[0], data[1], data[2]
algo = data[3]
bs, seq_len, num, head_dim = data[0], data[1], data[2], data[3]
algo = data[4]
# Determine shapes based on batch dimension
out_shape = (ntok, num, head_dim)
x_shape = (ntok, num, head_dim)
sin_table_shape = (ntok, head_dim // 2)
cos_table_shape = (ntok, head_dim // 2)
out_shape = (bs, seq_len, num, head_dim)
x_shape = (bs, seq_len, num, head_dim)
sin_table_shape = (seq_len, head_dim // 2)
cos_table_shape = (seq_len, head_dim // 2)
# Check if tensors support in-place operations
c_supports_inplace = not is_broadcast(out_shape)
......@@ -151,18 +151,13 @@ class OpTest(BaseOperatorTest):
def infinicore_operator(self, x, sin_table, cos_table, algo, out=None, **kwargs):
"""InfiniCore Rope implementation"""
ntok = x.shape[0]
torch_device = "cpu"
if x.device.type != "cpu":
torch_device = "cuda"
bs, seq_len, num, head_dim = x.shape
# 创建 pos_ids的变量
pos_ids_torch = torch.arange(0, ntok, dtype=torch.int32, device=torch_device)
pos_ids_ref = infinicore_tensor_from_torch(pos_ids_torch)
pos_ids_infini = infinicore.empty(
list(pos_ids_ref.shape), dtype=pos_ids_ref.dtype, device=pos_ids_ref.device
## 创建 pos_ids的变量
cache_position_list = [list(range(0, seq_len)) for i in range(bs)]
pos_ids_infini = infinicore.from_list(
cache_position_list, dtype=infinicore.int64, device=x.device
)
pos_ids_infini.copy_(pos_ids_ref)
# 计算
pos_ids = pos_ids_infini
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment