import torch import warpctc_pytorch_change1 as warp_ctc_new import warpctc_pytorch as warp_ctc import time def test_compare_cpu(repeat_num=20): probs = torch.FloatTensor([ [[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1]], [[0.6, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.5, 0.2, 0.1]] ]).contiguous() labels = torch.IntTensor([1, 2]) label_sizes = torch.IntTensor([2, 0]) sizes = torch.IntTensor([2, 2]) minibatch_size = probs.size(1) costs = torch.zeros(minibatch_size) grads = torch.zeros(probs.size()) time_st = time.perf_counter() # 1.运行老版本 CPU for i in range(repeat_num): probs_old = probs.clone() costs_old = costs.clone() grads_old = grads.clone() warp_ctc.cpu_ctc(probs_old, grads_old, labels, label_sizes, sizes, minibatch_size, costs_old, 0) if i == 0: print('CPU_costs_old: %f' % costs_old.sum()) print('CPU probs_old={}\ngrads_old={}\ncosts_old={}'.format(probs_old, grads_old, costs_old)) time_used = (time.perf_counter() - time_st) / repeat_num print('CPU warp_ctc old version using time: ', time_used) time_st = time.perf_counter() # 2.运行新版本 CPU for i in range(repeat_num): probs_new = probs.clone() costs_new = costs.clone() grads_new = grads.clone() warp_ctc_new.cpu_ctc(probs_new, grads_new, labels, label_sizes, sizes, minibatch_size, costs_new, 0) if i == 0: print('CPU_costs_new: %f' % costs_new.sum()) print('CPU probs={}\ngrads_new={}\ncosts_new={}'.format(probs_new, grads_new, costs_new)) time_used = (time.perf_counter() - time_st) / repeat_num print('CPU warp_ctc new version using time: ', time_used) def test_compare_gpu(): probs0 = torch.FloatTensor([ [[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1]], [[0.6, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.5, 0.2, 0.1]] ]).contiguous().cuda() labels = torch.IntTensor([1, 2]) label_sizes = torch.IntTensor([2, 0]) sizes = torch.IntTensor([2, 2]) minibatch_size = probs0.size(1) # 1.运行新版本 CPU probs_new = probs0.clone().cuda() costs_new = torch.zeros(minibatch_size) grads_new = torch.zeros(probs0.size()) warp_ctc_new.cpu_ctc(probs_new, grads_new, labels, label_sizes, sizes, minibatch_size, costs_new, 0) print('CPU_costs_new: %f' % costs_new.sum()) print('CPU probs_new={}\ngrads_new={}\ncosts_new={}'.format(probs_new, grads_new, costs_new)) # 2.运行老版本 CPU probs = probs0.clone().cuda() costs = torch.zeros(minibatch_size) grads = torch.zeros(probs0.size()) warp_ctc.cpu_ctc(probs0, grads, labels, label_sizes, sizes, minibatch_size, costs, 0) print('CPU_cost: %f' % costs.sum()) print('CPU probs={}\ngrads={}\ncosts={}'.format(probs, grads, costs)) if __name__ == '__main__': print('torch.cuda.is_available() ', torch.cuda.is_available()) test_compare_cpu() test_compare_gpu()