Blame · src/turbomind/models/llama/llama_decoder_kernels.h · 35d644628fd31b1df00e2bc5b601b89c5fd335c6 · OpenDAS / Lmdeploy · GitLab

Switch branch/tag

lmdeploy

src

turbomind

models

llama

llama_decoder_kernels.h
Find file
Normal viewHistoryPermalink

llama_decoder_kernels.h

308 Bytes

Newer

Older

check-in fastertransformer (#7)

Li Zhang
committed
Jun 20, 2023

// Copyright (c) OpenMMLab. All rights reserved.

#include <cuda_runtime.h>

build turbomind (#35)

lvhan028
committed
Jul 01, 2023

namespace turbomind {

check-in fastertransformer (#7)

Li Zhang
committed
Jun 20, 2023

template<typename T>

Support attention bias (#14)

Li Zhang
committed
Jun 24, 2023

void invokeFusedAddBiasResidualRMSNorm(

Add lint action (#32)

AllentDan
committed
Jul 01, 2023

    T* residual, T* in_out, const T* bias, const T* scale, float eps, int batch_size, int n_dims, cudaStream_t stream);

check-in fastertransformer (#7)

Li Zhang
committed
Jun 20, 2023

build turbomind (#35)

lvhan028
committed
Jul 01, 2023

}  // namespace turbomind