import pytest import numpy as np import fastllm def np_rms_norm(inputs, weights, eps): channel = inputs.shape[-1] sqrt_mean = np.sqrt(np.sum(inputs**2)/channel + eps) return inputs / sqrt_mean *weights def np_layer_norm(inputs, gamma, beta, axis=-1): assert axis < len(inputs.shapes), "axis should less than inputs dims" channel = inputs.shape[axis] mean = np.mean(inputs, axis=axis) var = np.var(inputs, axis=axis) output = (inputs - mean) / var * gamma + beta return output def np_linear(inputs, weights, bias): output = np.matmul(inputs, weights.T) + bias return output def np_softmax(inputs, axis=None): maxv = inputs.max(axis, keepdims=True) exp_v = np.exp(inputs - maxv) exp_sum = np.sum(exp_v, axis=axis) return exp_v / exp_sum def np_silu(inputs, ): return inputs / (1 + np.exp(-inputs)) def np_attention(q, k, v, mask=None, group=None, scale=None): qk = np_softmax(q @ k.T * scale, axis=-1) attn = qk @ v return attn def test_linear(): inputs = np.array([[1, 2]]) weight = np.array([[3, 4, 5, 5, 6, 7]]).reshape([3, 2]) bias = np.array([0, 1, 1]) np_output = np_linear(inputs, weight, bias) print(np_output) input = fastllm.Tensor(fastllm.float32, [1, 2], [1, 2]) weights = fastllm.Tensor(fastllm.float32, [3, 2], [3, 4, 5, 5, 6, 7]) bias = fastllm.Tensor(fastllm.float32, [3], [0, 1, 1]) out = fastllm.ops.linear(input, weights, bias) print(out) def test_rms_norm(): inputs = np.array([1, 5]).reshape([1, 2]) weights = np.array([1, 3]).reshape([1, 2]) eps = 1e-6 np_out = np_rms_norm(inputs, weights, eps) print(np_out) input = fastllm.Tensor(fastllm.float32, [1, 2], [1, 5]) weights = fastllm.Tensor(fastllm.float32, [1, 2], [1, 3]) out = fastllm.Tensor() out = fastllm.ops.rms_norm(input, weights, eps=1e-6) print(out) def test_silu(): inputs = np.array([1, 5]).reshape([1, 2]) output = np_softmax(inputs) # output = np_silu(inputs) print(output) inputs = fastllm.Tensor(fastllm.float32, [1, 2], [1, 5]) out = fastllm.ops.activation(input=inputs, activate_type="softmax") # out = fastllm.ops.activation(input=inputs, activate_type="silu") print(out) def test_attention(): q = np.array([1, 2, 3, 4, 5, 6]).reshape([2, 3]) k = np.array([5, 6, 7, 8, 9, 10]).reshape([2, 3]) v = np.array([1, 1, 1, 2, 1, 3]).reshape([2, 3]) scale = 1 / np.sqrt(q.shape[-1]) output = np_attention(q, k, v, scale=scale) print(output) q = fastllm.Tensor(fastllm.float32, [1, 2, 3], [1, 2, 3, 4, 5, 6]) k = fastllm.Tensor(fastllm.float32, [1, 2, 3], [5, 6, 7, 8, 9, 10]) v = fastllm.Tensor(fastllm.float32, [1, 2, 3], [1, 1, 1, 2, 1, 3]) mask = fastllm.Tensor() output = fastllm.ops.attention(q, k, v, mask, group=1, scale=scale, attentionType=0) print(output) if __name__ == "__main__": test_attention() test_silu() test_linear() test_rms_norm()