mla_combine.h 149 Bytes
Newer Older
1
2
3
4
5
6
#pragma once

#include "params.h"

template<typename ElementT>
void run_flash_mla_combine_kernel(Flash_fwd_mla_params &params, cudaStream_t stream);