activation_kernels.h 1.01 KB
Newer Older
Zhekai Zhang's avatar
Zhekai Zhang committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#pragma once

#include "common.h"
#include "Tensor.h"

void silu(
  Tensor& out,     // [..., d]
  Tensor& input);

void silu_and_mul(Tensor &out,    // [..., d]
                  Tensor &input); // [..., 2 * d]

void gelu_new(Tensor &out, Tensor &input);

void gelu_fast(Tensor &out, Tensor &input);

void invoke_dequant_silu_and_mul_quant(Tensor &out,   // [..., d]
                                       Tensor &input, // [..., 2 * d]
                                       const float scale_gate,
                                       const float scale_up,
                                       const float scale_out);

void invoke_dequant_silu_and_mul_quant(Tensor &out,   // [..., d]
                                       Tensor &input, // [..., 2 * d]
                                       const float scale_gate,
                                       const float scale_up,
                                       Tensor &scale_out, // [num_tokens]
                                       Tensor &tmp // [num_tokens, d]
);