TORCH_CHECK(dim%8==0,"causal_conv1d only supports channel dimension divisible by 8 for now");
TORCH_CHECK(x.stride(2)%8==0andx.stride(0)%8==0,"causal_conv1d with channel last layout requires strides (x.stride(0) and x.stride(2)) to be multiples of 8");
}
TORCH_CHECK(width>=2&&width<=4,"causal_conv1d only supports width between 2 and 4");
if(bias_.has_value()){
autobias=bias_.value();
TORCH_CHECK(bias.scalar_type()==weight_type);
TORCH_CHECK(bias.is_cuda());
TORCH_CHECK(bias.stride(-1)==1);
CHECK_SHAPE(bias,dim);
}
if(seq_idx_.has_value()){
TORCH_CHECK(is_channel_last,"seq_idx is only supported for channel last layout");
std::cerr<<"Warning (causal_conv1d fwd launch): attempting to set maxDynamicSharedMemorySize on an AMD GPU which is currently a non-op (in ROCm versions <= 6.1). This might lead to undefined behavior. \n"<<std::endl;