# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np import torch import paddle import torch.nn.functional as F bsz = 32 hidden_size = 256 x_type = np.float16 hidden_states = np.random.rand(bsz, hidden_size).astype(x_type) * 0.25 Wq = np.random.rand(hidden_size, hidden_size).astype(x_type) * 0.25 Wk = np.random.rand(hidden_size, hidden_size).astype(x_type) * 0.25 Wv = np.random.rand(hidden_size, hidden_size).astype(x_type) * 0.25 Bq = np.random.rand(hidden_size).astype(x_type) * 0.25 Bk = np.random.rand(hidden_size).astype(x_type) * 0.25 Bv = np.random.rand(hidden_size).astype(x_type) * 0.25 Wqkv = np.concatenate((Wq, Wk, Wv)) Bqkv = np.concatenate((Bq, Bk, Bv)) def run_paddle_linear(): paddle.disable_static(place=paddle.CUDAPlace(0)) paddle.set_default_dtype(x_type) ''' out = paddle.matmul( paddle.cast(paddle.to_tensor(hidden_states), 'float32'), paddle.cast(paddle.to_tensor(Wqkv), 'float32'), transpose_x=False, transpose_y=True) out = out + paddle.cast(paddle.to_tensor(Bqkv), 'float32') ''' out = paddle.matmul( paddle.to_tensor(hidden_states), paddle.to_tensor(Wqkv), transpose_x=False, transpose_y=True) out = out + paddle.to_tensor(Bqkv) return out def run_pytorch_linear(): out = F.linear( torch.from_numpy(hidden_states).cuda(), torch.from_numpy(Wqkv).cuda(), torch.from_numpy(Bqkv).cuda()) #out = F.linear(torch.from_numpy(hidden_states).cuda(), torch.from_numpy(Wqkv).cuda(), None) #out = out + torch.from_numpy(Bqkv).cuda() return out def run_numpy_linear(): out = np.matmul(hidden_states, Wqkv.transpose(1, 0)) out = out + Bqkv return out paddle_out = run_paddle_linear() pytorch_out = run_pytorch_linear() np_out = run_numpy_linear() print("compare with pytorch:") np.testing.assert_allclose( pytorch_out.cpu().detach().numpy(), paddle_out.numpy(), rtol=1e-5, atol=1e-2) print("Success!") print("paddle compare with numpy:") np.testing.assert_allclose(np_out, paddle_out.numpy(), rtol=1e-5, atol=1e-2) print("Success!") print("pytorch compare with numpy:") np.testing.assert_allclose( np_out, pytorch_out.cpu().detach().numpy(), rtol=1e-5, atol=1e-2) print("Success!")