"tests/pytorch/test_batched_graph.py" did not exist on "96179b0c96a5dfed18f961cdd0635ae6a8ce84b8"
llama_decoder_kernels.h 324 Bytes
Newer Older
Li Zhang's avatar
Li Zhang committed
1
2
3
4
5
6
7
// Copyright (c) OpenMMLab. All rights reserved.

#include <cuda_runtime.h>

namespace fastertransformer {

template<typename T>
Li Zhang's avatar
Li Zhang committed
8
void invokeFusedAddBiasResidualRMSNorm(
AllentDan's avatar
AllentDan committed
9
    T* residual, T* in_out, const T* bias, const T* scale, float eps, int batch_size, int n_dims, cudaStream_t stream);
Li Zhang's avatar
Li Zhang committed
10
11

}  // namespace fastertransformer