activation_kernels.h 1.02 KB
Newer Older
Zhekai Zhang's avatar
Zhekai Zhang committed
1
2
3
4
5
#pragma once

#include "common.h"
#include "Tensor.h"

Muyang Li's avatar
Muyang Li committed
6
7
void silu(Tensor &out, // [..., d]
          Tensor &input);
Zhekai Zhang's avatar
Zhekai Zhang committed
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26

void silu_and_mul(Tensor &out,    // [..., d]
                  Tensor &input); // [..., 2 * d]

void gelu_new(Tensor &out, Tensor &input);

void gelu_fast(Tensor &out, Tensor &input);

void invoke_dequant_silu_and_mul_quant(Tensor &out,   // [..., d]
                                       Tensor &input, // [..., 2 * d]
                                       const float scale_gate,
                                       const float scale_up,
                                       const float scale_out);

void invoke_dequant_silu_and_mul_quant(Tensor &out,   // [..., d]
                                       Tensor &input, // [..., 2 * d]
                                       const float scale_gate,
                                       const float scale_up,
                                       Tensor &scale_out, // [num_tokens]
Muyang Li's avatar
Muyang Li committed
27
28
                                       Tensor &tmp        // [num_tokens, d]
);