self.model.init_wrapper(self.args.use_cuda_graph,self.device,1024,args.max_batch_size,self.block_num)# TODO: 1024 is a magic number(max_batch_tokens)
self.model.init_wrapper(self.args.use_cuda_graph,self.device,Config().chunk_size,args.max_batch_size,self.block_num)# TODO: 1024 is a magic number(max_batch_tokens)