swiglu.cu 2.11 KB
Newer Older
1
/*************************************************************************
2
 * Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
4
5
6
7
 *
 * See LICENSE for license information.
 ************************************************************************/

#include "../util/math.h"
8
#include "./activation_template.h"
9

10
void nvte_silu(const NVTETensor input, NVTETensor output, cudaStream_t stream) {
11
  NVTE_API_CALL(nvte_silu);
12
  using namespace transformer_engine;
13
  act_fn<fp32, Empty, silu<fp32, fp32>>(input, output, stream);
14
15
}

16
void nvte_dsilu(const NVTETensor grad, const NVTETensor input, NVTETensor output,
17
                cudaStream_t stream) {
18
  NVTE_API_CALL(nvte_dsilu);
19
  using namespace transformer_engine;
20
  dact_fn<fp32, Empty, dsilu<fp32, fp32>>(grad, input, output, stream);
21
22
}

23
void nvte_swiglu(const NVTETensor input, NVTETensor output, cudaStream_t stream) {
24
25
  NVTE_API_CALL(nvte_swiglu);
  using namespace transformer_engine;
26
27
  Empty e = {};
  gated_act_fn<fp32, Empty, silu<fp32, fp32>>(input, output, e, stream);
28
29
}

30
void nvte_dswiglu(const NVTETensor grad, const NVTETensor input, NVTETensor output,
31
32
33
                  cudaStream_t stream) {
  NVTE_API_CALL(nvte_dswiglu);
  using namespace transformer_engine;
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
  Empty e = {};
  dgated_act_fn<fp32, Empty, silu<fp32, fp32>, dsilu<fp32, fp32>>(grad, input, output, e, stream);
}

void nvte_clamped_swiglu(const NVTETensor input, NVTETensor output, float limit, float alpha,
                         cudaStream_t stream) {
  NVTE_API_CALL(nvte_clamped_swiglu);
  using namespace transformer_engine;
  ClampedSwiGLUParam param = {limit, alpha};
  gated_act_fn<fp32, ClampedSwiGLUParam, clamped_silu<fp32, fp32>>(input, output, param, stream);
}

void nvte_clamped_dswiglu(const NVTETensor grad, const NVTETensor input, NVTETensor output,
                          float limit, float alpha, cudaStream_t stream) {
  NVTE_API_CALL(nvte_clamped_dswiglu);
  using namespace transformer_engine;
  ClampedSwiGLUParam param = {limit, alpha};
  dgated_act_fn<fp32, ClampedSwiGLUParam, clamped_silu<fp32, fp32>, clamped_dsilu<fp32, fp32>>(
      grad, input, output, param, stream);
53
}