"transformer_engine/common/activation/relu.cu" did not exist on "a5ba71f3f7379acad9c2292a289aa58ab8a489a8"
relu.cu 2.28 KB
Newer Older
1
/*************************************************************************
2
 * Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
4
5
6
7
 *
 * See LICENSE for license information.
 ************************************************************************/

#include "../util/math.h"
8
#include "./activation_template.h"
9

10
void nvte_relu(const NVTETensor input, NVTETensor output, cudaStream_t stream) {
11
12
  NVTE_API_CALL(nvte_relu);
  using namespace transformer_engine;
13
  act_fn<fp32, Empty, relu<fp32, fp32>>(input, output, stream);
14
15
}

16
void nvte_drelu(const NVTETensor grad, const NVTETensor input, NVTETensor output,
17
18
19
                cudaStream_t stream) {
  NVTE_API_CALL(nvte_drelu);
  using namespace transformer_engine;
20
  dact_fn<fp32, Empty, drelu<fp32, fp32>>(grad, input, output, stream);
21
22
}

23
void nvte_reglu(const NVTETensor input, NVTETensor output, cudaStream_t stream) {
24
25
  NVTE_API_CALL(nvte_reglu);
  using namespace transformer_engine;
26
  gated_act_fn<fp32, Empty, relu<fp32, fp32>>(input, output, stream);
27
28
}

29
void nvte_dreglu(const NVTETensor grad, const NVTETensor input, NVTETensor output,
30
31
32
                 cudaStream_t stream) {
  NVTE_API_CALL(nvte_dreglu);
  using namespace transformer_engine;
33
  dgated_act_fn<fp32, Empty, relu<fp32, fp32>, drelu<fp32, fp32>>(grad, input, output, stream);
34
}
35

36
void nvte_srelu(const NVTETensor input, NVTETensor output, cudaStream_t stream) {
37
38
  NVTE_API_CALL(nvte_srelu);
  using namespace transformer_engine;
39
  act_fn<fp32, Empty, srelu<fp32, fp32>>(input, output, stream);
40
41
}

42
43
void nvte_dsrelu(const NVTETensor grad, const NVTETensor input, NVTETensor output,
                 cudaStream_t stream) {
44
45
  NVTE_API_CALL(nvte_dsrelu);
  using namespace transformer_engine;
46
  dact_fn<fp32, Empty, dsrelu<fp32, fp32>>(grad, input, output, stream);
47
48
}

49
void nvte_sreglu(const NVTETensor input, NVTETensor output, cudaStream_t stream) {
50
51
  NVTE_API_CALL(nvte_sreglu);
  using namespace transformer_engine;
52
  gated_act_fn<fp32, Empty, srelu<fp32, fp32>>(input, output, stream);
53
54
}

55
56
void nvte_dsreglu(const NVTETensor grad, const NVTETensor input, NVTETensor output,
                  cudaStream_t stream) {
57
58
  NVTE_API_CALL(nvte_dsreglu);
  using namespace transformer_engine;
59
  dgated_act_fn<fp32, Empty, srelu<fp32, fp32>, dsrelu<fp32, fp32>>(grad, input, output, stream);
60
}