#pragma once #include "common.h" #include "Tensor.h"
// Tensor depthwise_conv2d_kernel(Tensor A, Tensor B);
Tensor dwconv_f16(Tensor input, Tensor weight, Tensor out, Tensor bias);