/************************************************************************* * Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * * See LICENSE for license information. ************************************************************************/ #include "../util/math.h" #include "./activation_template.h" void nvte_silu(const NVTETensor input, NVTETensor output, cudaStream_t stream) { NVTE_API_CALL(nvte_silu); using namespace transformer_engine; act_fn>(input, output, stream); } void nvte_group_silu(const NVTEGroupedTensor input, NVTEGroupedTensor output, cudaStream_t stream) { NVTE_API_CALL(nvte_group_silu); using namespace transformer_engine; constexpr bool IS_ACT = true; dispatch::group_quantize_fwd_helper>(input, output, nullptr, stream); } void nvte_dsilu(const NVTETensor grad, const NVTETensor input, NVTETensor output, cudaStream_t stream) { NVTE_API_CALL(nvte_dsilu); using namespace transformer_engine; dact_fn>(grad, input, output, stream); } void nvte_group_dsilu(const NVTEGroupedTensor grad, const NVTEGroupedTensor input, NVTEGroupedTensor output, cudaStream_t stream) { NVTE_API_CALL(nvte_group_dsilu); using namespace transformer_engine; NVTETensor dbias = nullptr; NVTETensor workspace = nullptr; constexpr bool IS_DBIAS = false; constexpr bool IS_DACT = true; dispatch::group_quantize_bwd_helper>( grad, input, output, dbias, workspace, nullptr, stream); } void nvte_quantize_dbias_dsilu(const NVTETensor input, const NVTETensor activation_input, NVTETensor output, NVTETensor dbias, NVTETensor workspace, cudaStream_t stream) { NVTE_API_CALL(nvte_quantize_dbias_dsilu); using namespace transformer_engine; constexpr bool IS_DBIAS = true; constexpr bool IS_DACT = true; dispatch::quantize_bwd_helper>( input, activation_input, output, dbias, workspace, nullptr, stream); } void nvte_group_quantize_dbias_dsilu(const NVTEGroupedTensor input, const NVTEGroupedTensor activation_input, NVTEGroupedTensor output, NVTETensor dbias, NVTETensor workspace, cudaStream_t stream) { NVTE_API_CALL(nvte_group_quantize_dbias_dsilu); using namespace transformer_engine; constexpr bool IS_DBIAS = true; constexpr bool IS_DACT = true; dispatch::group_quantize_bwd_helper>( input, activation_input, output, dbias, workspace, nullptr, stream); } void nvte_swiglu(const NVTETensor input, NVTETensor output, cudaStream_t stream) { NVTE_API_CALL(nvte_swiglu); using namespace transformer_engine; Empty e = {}; gated_act_fn>(input, output, e, stream); } void nvte_dswiglu(const NVTETensor grad, const NVTETensor input, NVTETensor output, cudaStream_t stream) { NVTE_API_CALL(nvte_dswiglu); using namespace transformer_engine; Empty e = {}; dgated_act_fn, dsilu>(grad, input, output, e, stream); } void nvte_clamped_swiglu(const NVTETensor input, NVTETensor output, float limit, float alpha, cudaStream_t stream) { NVTE_API_CALL(nvte_clamped_swiglu); using namespace transformer_engine; ClampedSwiGLUParam param = {limit, alpha}; gated_act_fn>(input, output, param, stream); } void nvte_clamped_dswiglu(const NVTETensor grad, const NVTETensor input, NVTETensor output, float limit, float alpha, cudaStream_t stream) { NVTE_API_CALL(nvte_clamped_dswiglu); using namespace transformer_engine; ClampedSwiGLUParam param = {limit, alpha}; dgated_act_fn, clamped_dsilu>( grad, input, output, param, stream); }